flock-core 0.5.10__py3-none-any.whl → 0.5.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

Files changed (91) hide show
  1. flock/__init__.py +1 -1
  2. flock/agent/__init__.py +30 -0
  3. flock/agent/builder_helpers.py +192 -0
  4. flock/agent/builder_validator.py +169 -0
  5. flock/agent/component_lifecycle.py +325 -0
  6. flock/agent/context_resolver.py +141 -0
  7. flock/agent/mcp_integration.py +212 -0
  8. flock/agent/output_processor.py +304 -0
  9. flock/api/__init__.py +20 -0
  10. flock/api/models.py +283 -0
  11. flock/{service.py → api/service.py} +121 -63
  12. flock/cli.py +2 -2
  13. flock/components/__init__.py +41 -0
  14. flock/components/agent/__init__.py +22 -0
  15. flock/{components.py → components/agent/base.py} +4 -3
  16. flock/{utility/output_utility_component.py → components/agent/output_utility.py} +12 -7
  17. flock/components/orchestrator/__init__.py +22 -0
  18. flock/{orchestrator_component.py → components/orchestrator/base.py} +5 -293
  19. flock/components/orchestrator/circuit_breaker.py +95 -0
  20. flock/components/orchestrator/collection.py +143 -0
  21. flock/components/orchestrator/deduplication.py +78 -0
  22. flock/core/__init__.py +30 -0
  23. flock/core/agent.py +953 -0
  24. flock/{artifacts.py → core/artifacts.py} +1 -1
  25. flock/{context_provider.py → core/context_provider.py} +3 -3
  26. flock/core/orchestrator.py +1102 -0
  27. flock/{store.py → core/store.py} +99 -454
  28. flock/{subscription.py → core/subscription.py} +1 -1
  29. flock/dashboard/collector.py +5 -5
  30. flock/dashboard/graph_builder.py +7 -7
  31. flock/dashboard/routes/__init__.py +21 -0
  32. flock/dashboard/routes/control.py +327 -0
  33. flock/dashboard/routes/helpers.py +340 -0
  34. flock/dashboard/routes/themes.py +76 -0
  35. flock/dashboard/routes/traces.py +521 -0
  36. flock/dashboard/routes/websocket.py +108 -0
  37. flock/dashboard/service.py +44 -1294
  38. flock/engines/dspy/__init__.py +20 -0
  39. flock/engines/dspy/artifact_materializer.py +216 -0
  40. flock/engines/dspy/signature_builder.py +474 -0
  41. flock/engines/dspy/streaming_executor.py +858 -0
  42. flock/engines/dspy_engine.py +45 -1330
  43. flock/engines/examples/simple_batch_engine.py +2 -2
  44. flock/examples.py +7 -7
  45. flock/logging/logging.py +1 -16
  46. flock/models/__init__.py +10 -0
  47. flock/models/system_artifacts.py +33 -0
  48. flock/orchestrator/__init__.py +45 -0
  49. flock/{artifact_collector.py → orchestrator/artifact_collector.py} +3 -3
  50. flock/orchestrator/artifact_manager.py +168 -0
  51. flock/{batch_accumulator.py → orchestrator/batch_accumulator.py} +2 -2
  52. flock/orchestrator/component_runner.py +389 -0
  53. flock/orchestrator/context_builder.py +167 -0
  54. flock/{correlation_engine.py → orchestrator/correlation_engine.py} +2 -2
  55. flock/orchestrator/event_emitter.py +167 -0
  56. flock/orchestrator/initialization.py +184 -0
  57. flock/orchestrator/lifecycle_manager.py +226 -0
  58. flock/orchestrator/mcp_manager.py +202 -0
  59. flock/orchestrator/scheduler.py +189 -0
  60. flock/orchestrator/server_manager.py +234 -0
  61. flock/orchestrator/tracing.py +147 -0
  62. flock/storage/__init__.py +10 -0
  63. flock/storage/artifact_aggregator.py +158 -0
  64. flock/storage/in_memory/__init__.py +6 -0
  65. flock/storage/in_memory/artifact_filter.py +114 -0
  66. flock/storage/in_memory/history_aggregator.py +115 -0
  67. flock/storage/sqlite/__init__.py +10 -0
  68. flock/storage/sqlite/agent_history_queries.py +154 -0
  69. flock/storage/sqlite/consumption_loader.py +100 -0
  70. flock/storage/sqlite/query_builder.py +112 -0
  71. flock/storage/sqlite/query_params_builder.py +91 -0
  72. flock/storage/sqlite/schema_manager.py +168 -0
  73. flock/storage/sqlite/summary_queries.py +194 -0
  74. flock/utils/__init__.py +14 -0
  75. flock/utils/async_utils.py +67 -0
  76. flock/{runtime.py → utils/runtime.py} +3 -3
  77. flock/utils/time_utils.py +53 -0
  78. flock/utils/type_resolution.py +38 -0
  79. flock/{utilities.py → utils/utilities.py} +2 -2
  80. flock/utils/validation.py +57 -0
  81. flock/utils/visibility.py +79 -0
  82. flock/utils/visibility_utils.py +134 -0
  83. {flock_core-0.5.10.dist-info → flock_core-0.5.20.dist-info}/METADATA +69 -61
  84. {flock_core-0.5.10.dist-info → flock_core-0.5.20.dist-info}/RECORD +89 -31
  85. flock/agent.py +0 -1578
  86. flock/orchestrator.py +0 -1746
  87. /flock/{visibility.py → core/visibility.py} +0 -0
  88. /flock/{helper → utils}/cli_helper.py +0 -0
  89. {flock_core-0.5.10.dist-info → flock_core-0.5.20.dist-info}/WHEEL +0 -0
  90. {flock_core-0.5.10.dist-info → flock_core-0.5.20.dist-info}/entry_points.txt +0 -0
  91. {flock_core-0.5.10.dist-info → flock_core-0.5.20.dist-info}/licenses/LICENSE +0 -0
flock/orchestrator.py DELETED
@@ -1,1746 +0,0 @@
1
- """Blackboard orchestrator and scheduling runtime."""
2
-
3
- from __future__ import annotations
4
-
5
- import asyncio
6
- import logging
7
- import os
8
- from asyncio import Task
9
- from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence
10
- from contextlib import asynccontextmanager
11
- from datetime import UTC, datetime
12
- from pathlib import Path
13
- from typing import TYPE_CHECKING, Any
14
- from uuid import uuid4
15
-
16
- from opentelemetry import trace
17
- from opentelemetry.trace import Status, StatusCode
18
- from pydantic import BaseModel
19
-
20
- from flock.agent import Agent, AgentBuilder
21
- from flock.artifact_collector import ArtifactCollector
22
- from flock.artifacts import Artifact
23
- from flock.batch_accumulator import BatchEngine
24
- from flock.correlation_engine import CorrelationEngine
25
- from flock.helper.cli_helper import init_console
26
- from flock.logging.auto_trace import AutoTracedMeta
27
- from flock.mcp import (
28
- FlockMCPClientManager,
29
- FlockMCPConfiguration,
30
- FlockMCPConnectionConfiguration,
31
- FlockMCPFeatureConfiguration,
32
- ServerParameters,
33
- )
34
- from flock.orchestrator_component import (
35
- CollectionResult,
36
- OrchestratorComponent,
37
- ScheduleDecision,
38
- )
39
- from flock.registry import type_registry
40
- from flock.runtime import Context
41
- from flock.store import BlackboardStore, ConsumptionRecord, InMemoryBlackboardStore
42
- from flock.subscription import Subscription
43
- from flock.visibility import AgentIdentity, PublicVisibility, Visibility
44
-
45
-
46
- if TYPE_CHECKING:
47
- import builtins
48
-
49
-
50
- class BoardHandle:
51
- """Handle exposed to components for publishing and inspection."""
52
-
53
- def __init__(self, orchestrator: Flock) -> None:
54
- self._orchestrator = orchestrator
55
-
56
- async def publish(self, artifact: Artifact) -> None:
57
- await self._orchestrator._persist_and_schedule(artifact)
58
-
59
- async def get(self, artifact_id) -> Artifact | None:
60
- return await self._orchestrator.store.get(artifact_id)
61
-
62
- async def list(self) -> builtins.list[Artifact]:
63
- return await self._orchestrator.store.list()
64
-
65
-
66
- class Flock(metaclass=AutoTracedMeta):
67
- """Main orchestrator for blackboard-based agent coordination.
68
-
69
- All public methods are automatically traced via OpenTelemetry.
70
- """
71
-
72
- def _patch_litellm_proxy_imports(self) -> None:
73
- """Stub litellm proxy_server to avoid optional proxy deps when not used.
74
-
75
- Some litellm versions import `litellm.proxy.proxy_server` during standard logging
76
- to read `general_settings`, which pulls in optional dependencies like `apscheduler`.
77
- We provide a stub so imports succeed but cold storage remains disabled.
78
- """
79
- try:
80
- import sys
81
- import types
82
-
83
- if "litellm.proxy.proxy_server" not in sys.modules:
84
- stub = types.ModuleType("litellm.proxy.proxy_server")
85
- # Minimal surface that cold_storage_handler accesses
86
- stub.general_settings = {}
87
- sys.modules["litellm.proxy.proxy_server"] = stub
88
- except Exception: # nosec B110 - Safe to ignore; worst case litellm will log a warning
89
- # logger.debug(f"Failed to stub litellm proxy_server: {e}")
90
- pass
91
-
92
- def __init__(
93
- self,
94
- model: str | None = None,
95
- *,
96
- store: BlackboardStore | None = None,
97
- max_agent_iterations: int = 1000,
98
- context_provider: Any = None,
99
- ) -> None:
100
- """Initialize the Flock orchestrator for blackboard-based agent coordination.
101
-
102
- Args:
103
- model: Default LLM model for agents (e.g., "openai/gpt-4.1").
104
- Can be overridden per-agent. If None, uses DEFAULT_MODEL env var.
105
- store: Custom blackboard storage backend. Defaults to InMemoryBlackboardStore.
106
- max_agent_iterations: Circuit breaker limit to prevent runaway agent loops.
107
- Defaults to 1000 iterations per agent before reset.
108
- context_provider: Global context provider for all agents (Phase 3 security fix).
109
- If None, agents use DefaultContextProvider. Can be overridden per-agent.
110
-
111
- Examples:
112
- >>> # Basic initialization with default model
113
- >>> flock = Flock("openai/gpt-4.1")
114
-
115
- >>> # Custom storage backend
116
- >>> flock = Flock("openai/gpt-4o", store=CustomBlackboardStore())
117
-
118
- >>> # Circuit breaker configuration
119
- >>> flock = Flock("openai/gpt-4.1", max_agent_iterations=500)
120
-
121
- >>> # Global context provider (Phase 3 security fix)
122
- >>> from flock.context_provider import DefaultContextProvider
123
- >>> flock = Flock(
124
- ... "openai/gpt-4.1", context_provider=DefaultContextProvider()
125
- ... )
126
- """
127
- self._patch_litellm_proxy_imports()
128
- self._logger = logging.getLogger(__name__)
129
- self.model = model
130
-
131
- try:
132
- init_console(clear_screen=True, show_banner=True, model=self.model)
133
- except (UnicodeEncodeError, UnicodeDecodeError):
134
- # Skip banner on Windows consoles with encoding issues (e.g., tests, CI)
135
- pass
136
-
137
- self.store: BlackboardStore = store or InMemoryBlackboardStore()
138
- self._agents: dict[str, Agent] = {}
139
- self._tasks: set[Task[Any]] = set()
140
- self._processed: set[tuple[str, str]] = set()
141
- self._lock = asyncio.Lock()
142
- self.metrics: dict[str, float] = {"artifacts_published": 0, "agent_runs": 0}
143
- # Phase 3: Global context provider (security fix)
144
- self._default_context_provider = context_provider
145
- # MCP integration
146
- self._mcp_configs: dict[str, FlockMCPConfiguration] = {}
147
- self._mcp_manager: FlockMCPClientManager | None = None
148
- # T068: Circuit breaker for runaway agents
149
- self.max_agent_iterations: int = max_agent_iterations
150
- self._agent_iteration_count: dict[str, int] = {}
151
- self.is_dashboard: bool = False
152
- # AND gate logic: Artifact collection for multi-type subscriptions
153
- self._artifact_collector = ArtifactCollector()
154
- # JoinSpec logic: Correlation engine for correlated AND gates
155
- self._correlation_engine = CorrelationEngine()
156
- # Background task for checking correlation expiry (time-based JoinSpec)
157
- self._correlation_cleanup_task: Task[Any] | None = None
158
- self._correlation_cleanup_interval: float = 0.1 # Check every 100ms
159
- # BatchSpec logic: Batch accumulator for size/timeout batching
160
- self._batch_engine = BatchEngine()
161
- # Background task for checking batch timeouts
162
- self._batch_timeout_task: Task[Any] | None = None
163
- self._batch_timeout_interval: float = 0.1 # Check every 100ms
164
- # Phase 1.2: WebSocket manager for real-time dashboard events (set by serve())
165
- self._websocket_manager: Any = None
166
- # Unified tracing support
167
- self._workflow_span = None
168
- self._auto_workflow_enabled = os.getenv(
169
- "FLOCK_AUTO_WORKFLOW_TRACE", "false"
170
- ).lower() in {
171
- "true",
172
- "1",
173
- "yes",
174
- "on",
175
- }
176
-
177
- # Phase 2: OrchestratorComponent system
178
- self._components: list[OrchestratorComponent] = []
179
- self._components_initialized: bool = False
180
-
181
- # Auto-add built-in components
182
- from flock.orchestrator_component import (
183
- BuiltinCollectionComponent,
184
- CircuitBreakerComponent,
185
- DeduplicationComponent,
186
- )
187
-
188
- self.add_component(CircuitBreakerComponent(max_iterations=max_agent_iterations))
189
- self.add_component(DeduplicationComponent())
190
- self.add_component(BuiltinCollectionComponent())
191
-
192
- # Log orchestrator initialization
193
- self._logger.debug("Orchestrator initialized: components=[]")
194
-
195
- if not model:
196
- self.model = os.getenv("DEFAULT_MODEL")
197
-
198
- # Agent management -----------------------------------------------------
199
-
200
- def agent(self, name: str) -> AgentBuilder:
201
- """Create a new agent using the fluent builder API.
202
-
203
- Args:
204
- name: Unique identifier for the agent. Used for visibility controls and metrics.
205
-
206
- Returns:
207
- AgentBuilder for fluent configuration
208
-
209
- Raises:
210
- ValueError: If an agent with this name already exists
211
-
212
- Examples:
213
- >>> # Basic agent
214
- >>> pizza_agent = (
215
- ... flock.agent("pizza_master")
216
- ... .description("Creates delicious pizza recipes")
217
- ... .consumes(DreamPizza)
218
- ... .publishes(Pizza)
219
- ... )
220
-
221
- >>> # Advanced agent with filtering
222
- >>> critic = (
223
- ... flock.agent("critic")
224
- ... .consumes(Movie, where=lambda m: m.rating >= 8)
225
- ... .publishes(Review)
226
- ... .with_utilities(RateLimiter(max_calls=10))
227
- ... )
228
- """
229
- if name in self._agents:
230
- raise ValueError(f"Agent '{name}' already registered.")
231
- return AgentBuilder(self, name)
232
-
233
- def register_agent(self, agent: Agent) -> None:
234
- if agent.name in self._agents:
235
- raise ValueError(f"Agent '{agent.name}' already registered.")
236
- self._agents[agent.name] = agent
237
-
238
- def get_agent(self, name: str) -> Agent:
239
- return self._agents[name]
240
-
241
- @property
242
- def agents(self) -> list[Agent]:
243
- return list(self._agents.values())
244
-
245
- # Component management -------------------------------------------------
246
-
247
- def add_component(self, component: OrchestratorComponent) -> Flock:
248
- """Add an OrchestratorComponent to this orchestrator.
249
-
250
- Components execute in priority order (lower priority number = earlier).
251
- Multiple components can have the same priority.
252
-
253
- Args:
254
- component: Component to add (must be an OrchestratorComponent instance)
255
-
256
- Returns:
257
- Self for method chaining
258
-
259
- Examples:
260
- >>> # Add single component
261
- >>> flock = Flock("openai/gpt-4.1")
262
- >>> flock.add_component(CircuitBreakerComponent(max_iterations=500))
263
-
264
- >>> # Method chaining
265
- >>> flock.add_component(CircuitBreakerComponent()) \\
266
- ... .add_component(MetricsComponent()) \\
267
- ... .add_component(DeduplicationComponent())
268
-
269
- >>> # Custom priority (lower = earlier)
270
- >>> flock.add_component(
271
- ... CustomComponent(priority=5, name="early_component")
272
- ... )
273
- """
274
- self._components.append(component)
275
- self._components.sort(key=lambda c: c.priority)
276
-
277
- # Log component addition
278
- comp_name = component.name or component.__class__.__name__
279
- self._logger.info(
280
- f"Component added: name={comp_name}, "
281
- f"priority={component.priority}, total_components={len(self._components)}"
282
- )
283
-
284
- return self
285
-
286
- # MCP management -------------------------------------------------------
287
-
288
- def add_mcp(
289
- self,
290
- name: str,
291
- connection_params: ServerParameters,
292
- *,
293
- enable_tools_feature: bool = True,
294
- enable_prompts_feature: bool = True,
295
- enable_sampling_feature: bool = True,
296
- enable_roots_feature: bool = True,
297
- mount_points: list[str] | None = None,
298
- tool_whitelist: list[str] | None = None,
299
- read_timeout_seconds: float = 300,
300
- max_retries: int = 3,
301
- **kwargs,
302
- ) -> Flock:
303
- """Register an MCP server for use by agents.
304
-
305
- Architecture Decision: AD001 - Two-Level Architecture
306
- MCP servers are registered at orchestrator level and assigned to agents.
307
-
308
- Args:
309
- name: Unique identifier for this MCP server
310
- connection_params: Server connection parameters
311
- enable_tools_feature: Enable tool execution
312
- enable_prompts_feature: Enable prompt templates
313
- enable_sampling_feature: Enable LLM sampling requests
314
- enable_roots_feature: Enable filesystem roots
315
- tool_whitelist: Optional list of tool names to allow
316
- read_timeout_seconds: Timeout for server communications
317
- max_retries: Connection retry attempts
318
-
319
- Returns:
320
- self for method chaining
321
-
322
- Raises:
323
- ValueError: If server name already registered
324
- """
325
- if name in self._mcp_configs:
326
- raise ValueError(f"MCP server '{name}' is already registered.")
327
-
328
- # Detect transport type
329
- from flock.mcp.types import (
330
- SseServerParameters,
331
- StdioServerParameters,
332
- StreamableHttpServerParameters,
333
- WebsocketServerParameters,
334
- )
335
-
336
- if isinstance(connection_params, StdioServerParameters):
337
- transport_type = "stdio"
338
- elif isinstance(connection_params, WebsocketServerParameters):
339
- transport_type = "websockets"
340
- elif isinstance(connection_params, SseServerParameters):
341
- transport_type = "sse"
342
- elif isinstance(connection_params, StreamableHttpServerParameters):
343
- transport_type = "streamable_http"
344
- else:
345
- transport_type = "custom"
346
-
347
- mcp_roots = None
348
- if mount_points:
349
- from pathlib import Path as PathLib
350
-
351
- from flock.mcp.types import MCPRoot
352
-
353
- mcp_roots = []
354
- for path in mount_points:
355
- # Normalize the path
356
- if path.startswith("file://"):
357
- # Already a file URI
358
- uri = path
359
- # Extract path from URI for name
360
- path_str = path.replace("file://", "")
361
- # the test:// path-prefix is used by testing servers such as the mcp-everything server.
362
- elif path.startswith("test://"):
363
- # Already a test URI
364
- uri = path
365
- # Extract path from URI for name
366
- path_str = path.replace("test://", "")
367
- else:
368
- # Convert to absolute path and create URI
369
- abs_path = PathLib(path).resolve()
370
- uri = f"file://{abs_path}"
371
- path_str = str(abs_path)
372
-
373
- # Extract a meaningful name (last component of path)
374
- name = (
375
- PathLib(path_str).name
376
- or path_str.rstrip("/").split("/")[-1]
377
- or "root"
378
- )
379
- mcp_roots.append(MCPRoot(uri=uri, name=name))
380
-
381
- # Build configuration
382
- connection_config = FlockMCPConnectionConfiguration(
383
- max_retries=max_retries,
384
- connection_parameters=connection_params,
385
- transport_type=transport_type,
386
- read_timeout_seconds=read_timeout_seconds,
387
- mount_points=mcp_roots,
388
- )
389
-
390
- feature_config = FlockMCPFeatureConfiguration(
391
- tools_enabled=enable_tools_feature,
392
- prompts_enabled=enable_prompts_feature,
393
- sampling_enabled=enable_sampling_feature,
394
- roots_enabled=enable_roots_feature,
395
- tool_whitelist=tool_whitelist,
396
- )
397
-
398
- mcp_config = FlockMCPConfiguration(
399
- name=name,
400
- connection_config=connection_config,
401
- feature_config=feature_config,
402
- )
403
-
404
- self._mcp_configs[name] = mcp_config
405
- return self
406
-
407
- def get_mcp_manager(self) -> FlockMCPClientManager:
408
- """Get or create the MCP client manager.
409
-
410
- Architecture Decision: AD005 - Lazy Connection Establishment
411
- """
412
- if not self._mcp_configs:
413
- raise RuntimeError("No MCP servers registered. Call add_mcp() first.")
414
-
415
- if self._mcp_manager is None:
416
- self._mcp_manager = FlockMCPClientManager(self._mcp_configs)
417
-
418
- return self._mcp_manager
419
-
420
- # Unified Tracing ------------------------------------------------------
421
-
422
- @asynccontextmanager
423
- async def traced_run(self, name: str = "workflow") -> AsyncGenerator[Any, None]:
424
- """Context manager for wrapping an entire execution in a single unified trace.
425
-
426
- This creates a parent span that encompasses all operations (publish, run_until_idle, etc.)
427
- within the context, ensuring they all belong to the same trace_id for better observability.
428
-
429
- Args:
430
- name: Name for the workflow trace (default: "workflow")
431
-
432
- Yields:
433
- The workflow span for optional manual attribute setting
434
-
435
- Examples:
436
- # Explicit workflow tracing (recommended)
437
- async with flock.traced_run("pizza_workflow"):
438
- await flock.publish(pizza_idea)
439
- await flock.run_until_idle()
440
- # All operations now share the same trace_id!
441
-
442
- # Custom attributes
443
- async with flock.traced_run("data_pipeline") as span:
444
- span.set_attribute("pipeline.version", "2.0")
445
- await flock.publish(data)
446
- await flock.run_until_idle()
447
- """
448
- tracer = trace.get_tracer(__name__)
449
- with tracer.start_as_current_span(name) as span:
450
- # Set workflow-level attributes
451
- span.set_attribute("flock.workflow", True)
452
- span.set_attribute("workflow.name", name)
453
- span.set_attribute("workflow.flock_id", str(id(self)))
454
-
455
- # Store span for nested operations to use
456
- prev_workflow_span = self._workflow_span
457
- self._workflow_span = span
458
-
459
- try:
460
- yield span
461
- span.set_status(Status(StatusCode.OK))
462
- except Exception as e:
463
- span.set_status(Status(StatusCode.ERROR, str(e)))
464
- span.record_exception(e)
465
- raise
466
- finally:
467
- # Restore previous workflow span
468
- self._workflow_span = prev_workflow_span
469
-
470
- @staticmethod
471
- def clear_traces(db_path: str = ".flock/traces.duckdb") -> dict[str, Any]:
472
- """Clear all traces from the DuckDB database.
473
-
474
- Useful for resetting debug sessions or cleaning up test data.
475
-
476
- Args:
477
- db_path: Path to the DuckDB database file (default: ".flock/traces.duckdb")
478
-
479
- Returns:
480
- Dictionary with operation results:
481
- - deleted_count: Number of spans deleted
482
- - success: Whether operation succeeded
483
- - error: Error message if failed
484
-
485
- Examples:
486
- # Clear all traces
487
- result = Flock.clear_traces()
488
- print(f"Deleted {result['deleted_count']} spans")
489
-
490
- # Custom database path
491
- result = Flock.clear_traces(".flock/custom_traces.duckdb")
492
-
493
- # Check if operation succeeded
494
- if result['success']:
495
- print("Traces cleared successfully!")
496
- else:
497
- print(f"Error: {result['error']}")
498
- """
499
- try:
500
- from pathlib import Path
501
-
502
- import duckdb
503
-
504
- db_file = Path(db_path)
505
- if not db_file.exists():
506
- return {
507
- "success": False,
508
- "deleted_count": 0,
509
- "error": f"Database file not found: {db_path}",
510
- }
511
-
512
- # Connect and clear
513
- conn = duckdb.connect(str(db_file))
514
- try:
515
- # Get count before deletion
516
- count_result = conn.execute("SELECT COUNT(*) FROM spans").fetchone()
517
- deleted_count = count_result[0] if count_result else 0
518
-
519
- # Delete all spans
520
- conn.execute("DELETE FROM spans")
521
-
522
- # Vacuum to reclaim space
523
- conn.execute("VACUUM")
524
-
525
- return {"success": True, "deleted_count": deleted_count, "error": None}
526
-
527
- finally:
528
- conn.close()
529
-
530
- except Exception as e:
531
- return {"success": False, "deleted_count": 0, "error": str(e)}
532
-
533
- # Runtime --------------------------------------------------------------
534
-
535
- async def run_until_idle(self) -> None:
536
- """Wait for all scheduled agent tasks to complete.
537
-
538
- This method blocks until the blackboard reaches a stable state where no
539
- agents are queued for execution. Essential for batch processing and ensuring
540
- all agent cascades complete before continuing.
541
-
542
- Note:
543
- Automatically resets circuit breaker counters and shuts down MCP connections
544
- when idle. Used with publish() for event-driven workflows.
545
-
546
- Examples:
547
- >>> # Event-driven workflow (recommended)
548
- >>> await flock.publish(task1)
549
- >>> await flock.publish(task2)
550
- >>> await flock.run_until_idle() # Wait for all cascades
551
- >>> # All agents have finished processing
552
-
553
- >>> # Parallel batch processing
554
- >>> await flock.publish_many([task1, task2, task3])
555
- >>> await flock.run_until_idle() # All tasks processed in parallel
556
-
557
- See Also:
558
- - publish(): Event-driven artifact publishing
559
- - publish_many(): Batch publishing for parallel execution
560
- - invoke(): Direct agent invocation without cascade
561
- """
562
- while self._tasks:
563
- await asyncio.sleep(0.01)
564
- pending = {task for task in self._tasks if not task.done()}
565
- self._tasks = pending
566
-
567
- # Determine whether any deferred work (timeouts/cleanup) is still pending.
568
- pending_batches = any(
569
- accumulator.artifacts for accumulator in self._batch_engine.batches.values()
570
- )
571
- pending_correlations = any(
572
- groups and any(group.waiting_artifacts for group in groups.values())
573
- for groups in self._correlation_engine.correlation_groups.values()
574
- )
575
-
576
- # Ensure watchdog loops remain active while pending work exists.
577
- if pending_batches and (
578
- self._batch_timeout_task is None or self._batch_timeout_task.done()
579
- ):
580
- self._batch_timeout_task = asyncio.create_task(
581
- self._batch_timeout_checker_loop()
582
- )
583
-
584
- if pending_correlations and (
585
- self._correlation_cleanup_task is None
586
- or self._correlation_cleanup_task.done()
587
- ):
588
- self._correlation_cleanup_task = asyncio.create_task(
589
- self._correlation_cleanup_loop()
590
- )
591
-
592
- # If deferred work is still outstanding, consider the orchestrator quiescent for
593
- # now but leave watchdog tasks running to finish the job.
594
- if pending_batches or pending_correlations:
595
- self._agent_iteration_count.clear()
596
- return
597
-
598
- # Notify components that orchestrator reached idle state
599
- if self._components_initialized:
600
- await self._run_idle()
601
-
602
- # T068: Reset circuit breaker counters when idle
603
- self._agent_iteration_count.clear()
604
-
605
- # Automatically shutdown MCP connections when idle
606
- await self.shutdown(include_components=False)
607
-
608
- async def direct_invoke(
609
- self, agent: Agent, inputs: Sequence[BaseModel | Mapping[str, Any] | Artifact]
610
- ) -> list[Artifact]:
611
- artifacts = [
612
- self._normalize_input(value, produced_by="__direct__") for value in inputs
613
- ]
614
- for artifact in artifacts:
615
- self._mark_processed(artifact, agent)
616
- await self._persist_and_schedule(artifact)
617
-
618
- # Phase 8: Evaluate context BEFORE creating Context (security fix)
619
- # Provider resolution: per-agent > global > DefaultContextProvider
620
- from flock.context_provider import (
621
- BoundContextProvider,
622
- ContextRequest,
623
- DefaultContextProvider,
624
- )
625
-
626
- inner_provider = (
627
- getattr(agent, "context_provider", None)
628
- or self._default_context_provider
629
- or DefaultContextProvider()
630
- )
631
-
632
- # SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
633
- provider = BoundContextProvider(inner_provider, agent.identity)
634
-
635
- # Evaluate context using provider (orchestrator controls this!)
636
- # Engines will receive pre-filtered artifacts via ctx.artifacts
637
- correlation_id = (
638
- artifacts[0].correlation_id
639
- if artifacts and artifacts[0].correlation_id
640
- else uuid4()
641
- )
642
- request = ContextRequest(
643
- agent=agent,
644
- correlation_id=correlation_id,
645
- store=self.store,
646
- agent_identity=agent.identity,
647
- exclude_ids={a.id for a in artifacts}, # Exclude input artifacts
648
- )
649
- context_artifacts = await provider(request)
650
-
651
- # Phase 8: Create Context with pre-filtered data (no capabilities!)
652
- # SECURITY: Context is now just data - engines can't query anything
653
- ctx = Context(
654
- artifacts=context_artifacts, # Pre-filtered conversation context
655
- agent_identity=agent.identity,
656
- task_id=str(uuid4()),
657
- correlation_id=correlation_id,
658
- )
659
- self._record_agent_run(agent)
660
- return await agent.execute(ctx, artifacts)
661
-
662
- async def arun(
663
- self, agent_builder: AgentBuilder, *inputs: BaseModel
664
- ) -> list[Artifact]:
665
- """Execute an agent with inputs and wait for all cascades to complete (async).
666
-
667
- Convenience method that combines direct agent invocation with run_until_idle().
668
- Useful for testing and synchronous request-response patterns.
669
-
670
- Args:
671
- agent_builder: Agent to execute (from flock.agent())
672
- *inputs: Input objects (BaseModel instances)
673
-
674
- Returns:
675
- Artifacts produced by the agent and any triggered cascades
676
-
677
- Examples:
678
- >>> # Test a single agent
679
- >>> flock = Flock("openai/gpt-4.1")
680
- >>> pizza_agent = flock.agent("pizza").consumes(Idea).publishes(Pizza)
681
- >>> results = await flock.arun(pizza_agent, Idea(topic="Margherita"))
682
-
683
- >>> # Multiple inputs
684
- >>> results = await flock.arun(
685
- ... task_agent, Task(name="deploy"), Task(name="test")
686
- ... )
687
-
688
- Note:
689
- For event-driven workflows, prefer publish() + run_until_idle() for better
690
- control over execution timing and parallel processing.
691
- """
692
- artifacts = await self.direct_invoke(agent_builder.agent, list(inputs))
693
- await self.run_until_idle()
694
- return artifacts
695
-
696
- def run(self, agent_builder: AgentBuilder, *inputs: BaseModel) -> list[Artifact]:
697
- """Synchronous wrapper for arun() - executes agent and waits for completion.
698
-
699
- Args:
700
- agent_builder: Agent to execute (from flock.agent())
701
- *inputs: Input objects (BaseModel instances)
702
-
703
- Returns:
704
- Artifacts produced by the agent and any triggered cascades
705
-
706
- Examples:
707
- >>> # Synchronous execution (blocks until complete)
708
- >>> flock = Flock("openai/gpt-4o-mini")
709
- >>> agent = flock.agent("analyzer").consumes(Data).publishes(Report)
710
- >>> results = flock.run(agent, Data(value=42))
711
-
712
- Warning:
713
- Cannot be called from within an async context. Use arun() instead
714
- if already in an async function.
715
- """
716
- return asyncio.run(self.arun(agent_builder, *inputs))
717
-
718
- async def shutdown(self, *, include_components: bool = True) -> None:
719
- """Shutdown orchestrator and clean up resources.
720
-
721
- Args:
722
- include_components: Whether to invoke component shutdown hooks.
723
- Internal callers (e.g., run_until_idle) disable this to avoid
724
- tearing down component state between cascades.
725
- """
726
- if include_components and self._components_initialized:
727
- await self._run_shutdown()
728
-
729
- # Cancel correlation cleanup task if running
730
- if self._correlation_cleanup_task and not self._correlation_cleanup_task.done():
731
- self._correlation_cleanup_task.cancel()
732
- try:
733
- await self._correlation_cleanup_task
734
- except asyncio.CancelledError:
735
- pass
736
-
737
- # Cancel batch timeout checker if running
738
- if self._batch_timeout_task and not self._batch_timeout_task.done():
739
- self._batch_timeout_task.cancel()
740
- try:
741
- await self._batch_timeout_task
742
- except asyncio.CancelledError:
743
- pass
744
-
745
- if self._mcp_manager is not None:
746
- await self._mcp_manager.cleanup_all()
747
- self._mcp_manager = None
748
-
749
- def cli(self) -> Flock:
750
- # Placeholder for CLI wiring (rich UI in Step 3)
751
- return self
752
-
753
- async def serve(
754
- self,
755
- *,
756
- dashboard: bool = False,
757
- dashboard_v2: bool = False,
758
- host: str = "127.0.0.1",
759
- port: int = 8344,
760
- ) -> None:
761
- """Start HTTP service for the orchestrator (blocking).
762
-
763
- Args:
764
- dashboard: Enable real-time dashboard with WebSocket support (default: False)
765
- dashboard_v2: Launch the new dashboard v2 frontend (implies dashboard=True)
766
- host: Host to bind to (default: "127.0.0.1")
767
- port: Port to bind to (default: 8344)
768
-
769
- Examples:
770
- # Basic HTTP API (no dashboard) - runs until interrupted
771
- await orchestrator.serve()
772
-
773
- # With dashboard (WebSocket + browser launch) - runs until interrupted
774
- await orchestrator.serve(dashboard=True)
775
- """
776
- if dashboard_v2:
777
- dashboard = True
778
-
779
- if not dashboard:
780
- # Standard service without dashboard
781
- from flock.service import BlackboardHTTPService
782
-
783
- service = BlackboardHTTPService(self)
784
- await service.run_async(host=host, port=port)
785
- return
786
-
787
- # Dashboard mode: integrate event collection and WebSocket
788
- from flock.dashboard.collector import DashboardEventCollector
789
- from flock.dashboard.launcher import DashboardLauncher
790
- from flock.dashboard.service import DashboardHTTPService
791
- from flock.dashboard.websocket import WebSocketManager
792
-
793
- # Create dashboard components
794
- websocket_manager = WebSocketManager()
795
- event_collector = DashboardEventCollector(store=self.store)
796
- event_collector.set_websocket_manager(websocket_manager)
797
- await event_collector.load_persistent_snapshots()
798
-
799
- # Store collector reference for agents added later
800
- self._dashboard_collector = event_collector
801
- # Store websocket manager for real-time event emission (Phase 1.2)
802
- self._websocket_manager = websocket_manager
803
-
804
- # Phase 6+7: Set class-level WebSocket broadcast wrapper (dashboard mode)
805
- async def _broadcast_wrapper(event):
806
- """Isolated broadcast wrapper - no reference chain to orchestrator."""
807
- return await websocket_manager.broadcast(event)
808
-
809
- from flock.agent import Agent
810
-
811
- Agent._websocket_broadcast_global = _broadcast_wrapper
812
-
813
- # Inject event collector into all existing agents
814
- for agent in self._agents.values():
815
- # Add dashboard collector with priority ordering handled by agent
816
- agent._add_utilities([event_collector])
817
-
818
- # Start dashboard launcher (npm process + browser)
819
- launcher_kwargs: dict[str, Any] = {"port": port}
820
- if dashboard_v2:
821
- dashboard_pkg_dir = Path(__file__).parent / "dashboard"
822
- launcher_kwargs["frontend_dir"] = dashboard_pkg_dir.parent / "frontend_v2"
823
- launcher_kwargs["static_dir"] = dashboard_pkg_dir / "static_v2"
824
-
825
- launcher = DashboardLauncher(**launcher_kwargs)
826
- launcher.start()
827
-
828
- # Create dashboard HTTP service
829
- service = DashboardHTTPService(
830
- orchestrator=self,
831
- websocket_manager=websocket_manager,
832
- event_collector=event_collector,
833
- use_v2=dashboard_v2,
834
- )
835
-
836
- # Store launcher for cleanup
837
- self._dashboard_launcher = launcher
838
-
839
- # Run service (blocking call)
840
- try:
841
- await service.run_async(host=host, port=port)
842
- finally:
843
- # Cleanup on exit
844
- launcher.stop()
845
-
846
- # Scheduling -----------------------------------------------------------
847
-
848
- async def publish(
849
- self,
850
- obj: BaseModel | dict | Artifact,
851
- *,
852
- visibility: Visibility | None = None,
853
- correlation_id: str | None = None,
854
- partition_key: str | None = None,
855
- tags: set[str] | None = None,
856
- is_dashboard: bool = False,
857
- ) -> Artifact:
858
- """Publish an artifact to the blackboard (event-driven).
859
-
860
- All agents with matching subscriptions will be triggered according to
861
- their filters (type, predicates, visibility, etc).
862
-
863
- Args:
864
- obj: Object to publish (BaseModel instance, dict, or Artifact)
865
- visibility: Access control (defaults to PublicVisibility)
866
- correlation_id: Optional correlation ID for request tracing
867
- partition_key: Optional partition key for sharding
868
- tags: Optional tags for channel-based routing
869
-
870
- Returns:
871
- The published Artifact
872
-
873
- Examples:
874
- >>> # Publish a model instance (recommended)
875
- >>> task = Task(name="Deploy", priority=5)
876
- >>> await orchestrator.publish(task)
877
-
878
- >>> # Publish with custom visibility
879
- >>> await orchestrator.publish(
880
- ... task, visibility=PrivateVisibility(agents={"admin"})
881
- ... )
882
-
883
- >>> # Publish with tags for channel routing
884
- >>> await orchestrator.publish(task, tags={"urgent", "backend"})
885
- """
886
- # Handle different input types
887
- if isinstance(obj, Artifact):
888
- # Already an artifact - publish as-is
889
- artifact = obj
890
- elif isinstance(obj, BaseModel):
891
- # BaseModel instance - get type from registry
892
- type_name = type_registry.name_for(type(obj))
893
- artifact = Artifact(
894
- type=type_name,
895
- payload=obj.model_dump(),
896
- produced_by="external",
897
- visibility=visibility or PublicVisibility(),
898
- correlation_id=correlation_id or uuid4(),
899
- partition_key=partition_key,
900
- tags=tags or set(),
901
- )
902
- elif isinstance(obj, dict):
903
- # Dict must have 'type' key
904
- if "type" not in obj:
905
- raise ValueError(
906
- "Dict input must contain 'type' key. "
907
- "Example: {'type': 'Task', 'name': 'foo', 'priority': 5}"
908
- )
909
- # Support both {'type': 'X', 'payload': {...}} and {'type': 'X', ...}
910
- type_name = obj["type"]
911
- if "payload" in obj:
912
- payload = obj["payload"]
913
- else:
914
- payload = {k: v for k, v in obj.items() if k != "type"}
915
-
916
- artifact = Artifact(
917
- type=type_name,
918
- payload=payload,
919
- produced_by="external",
920
- visibility=visibility or PublicVisibility(),
921
- correlation_id=correlation_id,
922
- partition_key=partition_key,
923
- tags=tags or set(),
924
- )
925
- else:
926
- raise TypeError(
927
- f"Cannot publish object of type {type(obj).__name__}. "
928
- "Expected BaseModel, dict, or Artifact."
929
- )
930
-
931
- # Persist and schedule matching agents
932
- await self._persist_and_schedule(artifact)
933
- return artifact
934
-
935
- async def publish_many(
936
- self, objects: Iterable[BaseModel | dict | Artifact], **kwargs: Any
937
- ) -> list[Artifact]:
938
- """Publish multiple artifacts at once (event-driven).
939
-
940
- Args:
941
- objects: Iterable of objects to publish
942
- **kwargs: Passed to each publish() call (visibility, tags, etc)
943
-
944
- Returns:
945
- List of published Artifacts
946
-
947
- Example:
948
- >>> tasks = [
949
- ... Task(name="Deploy", priority=5),
950
- ... Task(name="Test", priority=3),
951
- ... Task(name="Document", priority=1),
952
- ... ]
953
- >>> await orchestrator.publish_many(tasks, tags={"sprint-3"})
954
- """
955
- artifacts = []
956
- for obj in objects:
957
- artifact = await self.publish(obj, **kwargs)
958
- artifacts.append(artifact)
959
- return artifacts
960
-
961
- # -----------------------------------------------------------------------------
962
- # NEW DIRECT INVOCATION API - Explicit Control
963
- # -----------------------------------------------------------------------------
964
-
965
- async def invoke(
966
- self,
967
- agent: Agent | AgentBuilder,
968
- obj: BaseModel,
969
- *,
970
- publish_outputs: bool = True,
971
- timeout: float | None = None,
972
- ) -> list[Artifact]:
973
- """Directly invoke a specific agent (bypasses subscription matching).
974
-
975
- This executes the agent immediately without checking subscriptions or
976
- predicates. Useful for testing or synchronous request-response patterns.
977
-
978
- Args:
979
- agent: Agent or AgentBuilder to invoke
980
- obj: Input object (BaseModel instance)
981
- publish_outputs: If True, publish outputs to blackboard for cascade
982
- timeout: Optional timeout in seconds
983
-
984
- Returns:
985
- Artifacts produced by the agent
986
-
987
- Warning:
988
- This bypasses subscription filters and predicates. For event-driven
989
- coordination, use publish() instead.
990
-
991
- Examples:
992
- >>> # Testing: Execute agent without triggering others
993
- >>> results = await orchestrator.invoke(
994
- ... agent, Task(name="test", priority=5), publish_outputs=False
995
- ... )
996
-
997
- >>> # HTTP endpoint: Execute specific agent, allow cascade
998
- >>> results = await orchestrator.invoke(
999
- ... movie_agent, Idea(topic="AI", genre="comedy"), publish_outputs=True
1000
- ... )
1001
- >>> await orchestrator.run_until_idle()
1002
- """
1003
- from asyncio import wait_for
1004
- from uuid import uuid4
1005
-
1006
- # Get Agent instance
1007
- agent_obj = agent.agent if isinstance(agent, AgentBuilder) else agent
1008
-
1009
- # Create artifact (don't publish to blackboard yet)
1010
- type_name = type_registry.name_for(type(obj))
1011
- artifact = Artifact(
1012
- type=type_name,
1013
- payload=obj.model_dump(),
1014
- produced_by="__direct__",
1015
- visibility=PublicVisibility(),
1016
- )
1017
-
1018
- # Phase 8: Evaluate context BEFORE creating Context (security fix)
1019
- # Provider resolution: per-agent > global > DefaultContextProvider
1020
- from flock.context_provider import (
1021
- BoundContextProvider,
1022
- ContextRequest,
1023
- DefaultContextProvider,
1024
- )
1025
-
1026
- inner_provider = (
1027
- getattr(agent_obj, "context_provider", None)
1028
- or self._default_context_provider
1029
- or DefaultContextProvider()
1030
- )
1031
-
1032
- # SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
1033
- provider = BoundContextProvider(inner_provider, agent_obj.identity)
1034
-
1035
- # Evaluate context using provider (orchestrator controls this!)
1036
- correlation_id = artifact.correlation_id if artifact.correlation_id else uuid4()
1037
- request = ContextRequest(
1038
- agent=agent_obj,
1039
- correlation_id=correlation_id,
1040
- store=self.store,
1041
- agent_identity=agent_obj.identity,
1042
- exclude_ids={artifact.id}, # Exclude input artifact
1043
- )
1044
- context_artifacts = await provider(request)
1045
-
1046
- # Phase 8: Create Context with pre-filtered data (no capabilities!)
1047
- # SECURITY: Context is now just data - engines can't query anything
1048
- ctx = Context(
1049
- artifacts=context_artifacts, # Pre-filtered conversation context
1050
- agent_identity=agent_obj.identity,
1051
- task_id=str(uuid4()),
1052
- correlation_id=correlation_id,
1053
- )
1054
- self._record_agent_run(agent_obj)
1055
-
1056
- # Execute with optional timeout
1057
- if timeout:
1058
- execution = agent_obj.execute(ctx, [artifact])
1059
- outputs = await wait_for(execution, timeout=timeout)
1060
- else:
1061
- outputs = await agent_obj.execute(ctx, [artifact])
1062
-
1063
- # Phase 6: Orchestrator publishes outputs (security fix)
1064
- # Agents return artifacts, orchestrator validates and publishes
1065
- if publish_outputs:
1066
- for output in outputs:
1067
- await self._persist_and_schedule(output)
1068
-
1069
- return outputs
1070
-
1071
- async def _persist_and_schedule(self, artifact: Artifact) -> None:
1072
- await self.store.publish(artifact)
1073
- self.metrics["artifacts_published"] += 1
1074
- await self._schedule_artifact(artifact)
1075
-
1076
- # Component Hook Runners ───────────────────────────────────────
1077
-
1078
- async def _run_initialize(self) -> None:
1079
- """Initialize all components in priority order (called once).
1080
-
1081
- Executes on_initialize hook for each component. Sets _components_initialized
1082
- flag to prevent multiple initializations.
1083
- """
1084
- if self._components_initialized:
1085
- return
1086
-
1087
- self._logger.info(
1088
- f"Initializing {len(self._components)} orchestrator components"
1089
- )
1090
-
1091
- for component in self._components:
1092
- comp_name = component.name or component.__class__.__name__
1093
- self._logger.debug(
1094
- f"Initializing component: name={comp_name}, priority={component.priority}"
1095
- )
1096
-
1097
- try:
1098
- await component.on_initialize(self)
1099
- except Exception as e:
1100
- self._logger.exception(
1101
- f"Component initialization failed: name={comp_name}, error={e!s}"
1102
- )
1103
- raise
1104
-
1105
- self._components_initialized = True
1106
- self._logger.info(f"All components initialized: count={len(self._components)}")
1107
-
1108
- async def _run_artifact_published(self, artifact: Artifact) -> Artifact | None:
1109
- """Run on_artifact_published hooks (returns modified artifact or None to block).
1110
-
1111
- Components execute in priority order, each receiving the artifact from the
1112
- previous component (chaining). If any component returns None, the artifact
1113
- is blocked and scheduling stops.
1114
- """
1115
- current_artifact = artifact
1116
-
1117
- for component in self._components:
1118
- comp_name = component.name or component.__class__.__name__
1119
- self._logger.debug(
1120
- f"Running on_artifact_published: component={comp_name}, "
1121
- f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
1122
- )
1123
-
1124
- try:
1125
- result = await component.on_artifact_published(self, current_artifact)
1126
-
1127
- if result is None:
1128
- self._logger.info(
1129
- f"Artifact blocked by component: component={comp_name}, "
1130
- f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
1131
- )
1132
- return None
1133
-
1134
- current_artifact = result
1135
- except Exception as e:
1136
- self._logger.exception(
1137
- f"Component hook failed: component={comp_name}, "
1138
- f"hook=on_artifact_published, error={e!s}"
1139
- )
1140
- raise
1141
-
1142
- return current_artifact
1143
-
1144
- async def _run_before_schedule(
1145
- self, artifact: Artifact, agent: Agent, subscription: Subscription
1146
- ) -> ScheduleDecision:
1147
- """Run on_before_schedule hooks (returns CONTINUE, SKIP, or DEFER).
1148
-
1149
- Components execute in priority order. First component to return SKIP or
1150
- DEFER stops execution and returns that decision.
1151
- """
1152
- from flock.orchestrator_component import ScheduleDecision
1153
-
1154
- for component in self._components:
1155
- comp_name = component.name or component.__class__.__name__
1156
-
1157
- self._logger.debug(
1158
- f"Running on_before_schedule: component={comp_name}, "
1159
- f"agent={agent.name}, artifact_type={artifact.type}"
1160
- )
1161
-
1162
- try:
1163
- decision = await component.on_before_schedule(
1164
- self, artifact, agent, subscription
1165
- )
1166
-
1167
- if decision == ScheduleDecision.SKIP:
1168
- self._logger.info(
1169
- f"Scheduling skipped by component: component={comp_name}, "
1170
- f"agent={agent.name}, artifact_type={artifact.type}, decision=SKIP"
1171
- )
1172
- return ScheduleDecision.SKIP
1173
-
1174
- if decision == ScheduleDecision.DEFER:
1175
- self._logger.debug(
1176
- f"Scheduling deferred by component: component={comp_name}, "
1177
- f"agent={agent.name}, decision=DEFER"
1178
- )
1179
- return ScheduleDecision.DEFER
1180
-
1181
- except Exception as e:
1182
- self._logger.exception(
1183
- f"Component hook failed: component={comp_name}, "
1184
- f"hook=on_before_schedule, error={e!s}"
1185
- )
1186
- raise
1187
-
1188
- return ScheduleDecision.CONTINUE
1189
-
1190
- async def _run_collect_artifacts(
1191
- self, artifact: Artifact, agent: Agent, subscription: Subscription
1192
- ) -> CollectionResult:
1193
- """Run on_collect_artifacts hooks (returns first non-None result).
1194
-
1195
- Components execute in priority order. First component to return non-None
1196
- wins (short-circuit). If all return None, default is immediate scheduling.
1197
- """
1198
- from flock.orchestrator_component import CollectionResult
1199
-
1200
- for component in self._components:
1201
- comp_name = component.name or component.__class__.__name__
1202
-
1203
- self._logger.debug(
1204
- f"Running on_collect_artifacts: component={comp_name}, "
1205
- f"agent={agent.name}, artifact_type={artifact.type}"
1206
- )
1207
-
1208
- try:
1209
- result = await component.on_collect_artifacts(
1210
- self, artifact, agent, subscription
1211
- )
1212
-
1213
- if result is not None:
1214
- self._logger.debug(
1215
- f"Collection handled by component: component={comp_name}, "
1216
- f"complete={result.complete}, artifact_count={len(result.artifacts)}"
1217
- )
1218
- return result
1219
- except Exception as e:
1220
- self._logger.exception(
1221
- f"Component hook failed: component={comp_name}, "
1222
- f"hook=on_collect_artifacts, error={e!s}"
1223
- )
1224
- raise
1225
-
1226
- # Default: immediate scheduling with single artifact
1227
- self._logger.debug(
1228
- f"No component handled collection, using default: "
1229
- f"agent={agent.name}, artifact_type={artifact.type}"
1230
- )
1231
- return CollectionResult.immediate([artifact])
1232
-
1233
- async def _run_before_agent_schedule(
1234
- self, agent: Agent, artifacts: list[Artifact]
1235
- ) -> list[Artifact] | None:
1236
- """Run on_before_agent_schedule hooks (returns modified artifacts or None to block).
1237
-
1238
- Components execute in priority order, each receiving artifacts from the
1239
- previous component (chaining). If any component returns None, scheduling
1240
- is blocked.
1241
- """
1242
- current_artifacts = artifacts
1243
-
1244
- for component in self._components:
1245
- comp_name = component.name or component.__class__.__name__
1246
-
1247
- self._logger.debug(
1248
- f"Running on_before_agent_schedule: component={comp_name}, "
1249
- f"agent={agent.name}, artifact_count={len(current_artifacts)}"
1250
- )
1251
-
1252
- try:
1253
- result = await component.on_before_agent_schedule(
1254
- self, agent, current_artifacts
1255
- )
1256
-
1257
- if result is None:
1258
- self._logger.info(
1259
- f"Agent scheduling blocked by component: component={comp_name}, "
1260
- f"agent={agent.name}"
1261
- )
1262
- return None
1263
-
1264
- current_artifacts = result
1265
- except Exception as e:
1266
- self._logger.exception(
1267
- f"Component hook failed: component={comp_name}, "
1268
- f"hook=on_before_agent_schedule, error={e!s}"
1269
- )
1270
- raise
1271
-
1272
- return current_artifacts
1273
-
1274
- async def _run_agent_scheduled(
1275
- self, agent: Agent, artifacts: list[Artifact], task: Task[Any]
1276
- ) -> None:
1277
- """Run on_agent_scheduled hooks (notification only, non-blocking).
1278
-
1279
- Components execute in priority order. Exceptions are logged but don't
1280
- prevent other components from executing or block scheduling.
1281
- """
1282
- for component in self._components:
1283
- comp_name = component.name or component.__class__.__name__
1284
-
1285
- self._logger.debug(
1286
- f"Running on_agent_scheduled: component={comp_name}, "
1287
- f"agent={agent.name}, artifact_count={len(artifacts)}"
1288
- )
1289
-
1290
- try:
1291
- await component.on_agent_scheduled(self, agent, artifacts, task)
1292
- except Exception as e:
1293
- self._logger.warning(
1294
- f"Component notification hook failed (non-critical): "
1295
- f"component={comp_name}, hook=on_agent_scheduled, error={e!s}"
1296
- )
1297
- # Don't propagate - this is a notification hook
1298
-
1299
- async def _run_idle(self) -> None:
1300
- """Run on_orchestrator_idle hooks when orchestrator becomes idle.
1301
-
1302
- Components execute in priority order. Exceptions are logged but don't
1303
- prevent other components from executing.
1304
- """
1305
- self._logger.debug(
1306
- f"Running on_orchestrator_idle hooks: component_count={len(self._components)}"
1307
- )
1308
-
1309
- for component in self._components:
1310
- comp_name = component.name or component.__class__.__name__
1311
-
1312
- try:
1313
- await component.on_orchestrator_idle(self)
1314
- except Exception as e:
1315
- self._logger.warning(
1316
- f"Component idle hook failed (non-critical): "
1317
- f"component={comp_name}, hook=on_orchestrator_idle, error={e!s}"
1318
- )
1319
-
1320
- async def _run_shutdown(self) -> None:
1321
- """Run on_shutdown hooks when orchestrator shuts down.
1322
-
1323
- Components execute in priority order. Exceptions are logged but don't
1324
- prevent shutdown of other components (best-effort cleanup).
1325
- """
1326
- self._logger.info(
1327
- f"Shutting down {len(self._components)} orchestrator components"
1328
- )
1329
-
1330
- for component in self._components:
1331
- comp_name = component.name or component.__class__.__name__
1332
- self._logger.debug(f"Shutting down component: name={comp_name}")
1333
-
1334
- try:
1335
- await component.on_shutdown(self)
1336
- except Exception as e:
1337
- self._logger.exception(
1338
- f"Component shutdown failed: component={comp_name}, "
1339
- f"hook=on_shutdown, error={e!s}"
1340
- )
1341
- # Continue shutting down other components
1342
-
1343
- # Scheduling ───────────────────────────────────────────────────
1344
-
1345
- async def _schedule_artifact(self, artifact: Artifact) -> None:
1346
- """Schedule agents for an artifact using component hooks.
1347
-
1348
- Refactored to use OrchestratorComponent hook system for extensibility.
1349
- Components can modify artifact, control scheduling, and handle collection.
1350
- """
1351
- # Phase 3: Initialize components on first artifact
1352
- if not self._components_initialized:
1353
- await self._run_initialize()
1354
-
1355
- # Phase 3: Component hook - artifact published (can transform or block)
1356
- artifact = await self._run_artifact_published(artifact)
1357
- if artifact is None:
1358
- return # Artifact blocked by component
1359
-
1360
- for agent in self.agents:
1361
- identity = agent.identity
1362
- for subscription in agent.subscriptions:
1363
- if not subscription.accepts_events():
1364
- continue
1365
-
1366
- # T066: Check prevent_self_trigger
1367
- if agent.prevent_self_trigger and artifact.produced_by == agent.name:
1368
- continue # Skip - agent produced this artifact (prevents feedback loops)
1369
-
1370
- # Visibility check
1371
- if not self._check_visibility(artifact, identity):
1372
- continue
1373
-
1374
- # Subscription match check
1375
- if not subscription.matches(artifact):
1376
- continue
1377
-
1378
- # Phase 3: Component hook - before schedule (circuit breaker, deduplication, etc.)
1379
- from flock.orchestrator_component import ScheduleDecision
1380
-
1381
- decision = await self._run_before_schedule(
1382
- artifact, agent, subscription
1383
- )
1384
- if decision == ScheduleDecision.SKIP:
1385
- continue # Skip this subscription
1386
- if decision == ScheduleDecision.DEFER:
1387
- continue # Defer for later (batching/correlation)
1388
-
1389
- # Phase 3: Component hook - collect artifacts (handles AND gates, correlation, batching)
1390
- collection = await self._run_collect_artifacts(
1391
- artifact, agent, subscription
1392
- )
1393
- if not collection.complete:
1394
- continue # Still collecting (AND gate, correlation, or batch incomplete)
1395
-
1396
- artifacts = collection.artifacts
1397
-
1398
- # Phase 3: Component hook - before agent schedule (final validation/transformation)
1399
- artifacts = await self._run_before_agent_schedule(agent, artifacts)
1400
- if artifacts is None:
1401
- continue # Scheduling blocked by component
1402
-
1403
- # Complete! Schedule agent with collected artifacts
1404
- # Schedule agent task
1405
- is_batch_execution = subscription.batch is not None
1406
- task = self._schedule_task(
1407
- agent, artifacts, is_batch=is_batch_execution
1408
- )
1409
-
1410
- # Phase 3: Component hook - agent scheduled (notification)
1411
- await self._run_agent_scheduled(agent, artifacts, task)
1412
-
1413
- def _schedule_task(
1414
- self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
1415
- ) -> Task[Any]:
1416
- """Schedule agent task and return the task handle."""
1417
- task = asyncio.create_task(
1418
- self._run_agent_task(agent, artifacts, is_batch=is_batch)
1419
- )
1420
- self._tasks.add(task)
1421
- task.add_done_callback(self._tasks.discard)
1422
- return task
1423
-
1424
- def _record_agent_run(self, agent: Agent) -> None:
1425
- self.metrics["agent_runs"] += 1
1426
-
1427
- def _mark_processed(self, artifact: Artifact, agent: Agent) -> None:
1428
- key = (str(artifact.id), agent.name)
1429
- self._processed.add(key)
1430
-
1431
- def _seen_before(self, artifact: Artifact, agent: Agent) -> bool:
1432
- key = (str(artifact.id), agent.name)
1433
- return key in self._processed
1434
-
1435
- async def _run_agent_task(
1436
- self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
1437
- ) -> None:
1438
- correlation_id = artifacts[0].correlation_id if artifacts else uuid4()
1439
-
1440
- # Phase 8: Evaluate context BEFORE creating Context (security fix)
1441
- # Provider resolution: per-agent > global > DefaultContextProvider
1442
- from flock.context_provider import (
1443
- BoundContextProvider,
1444
- ContextRequest,
1445
- DefaultContextProvider,
1446
- )
1447
-
1448
- inner_provider = (
1449
- getattr(agent, "context_provider", None)
1450
- or self._default_context_provider
1451
- or DefaultContextProvider()
1452
- )
1453
-
1454
- # SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
1455
- provider = BoundContextProvider(inner_provider, agent.identity)
1456
-
1457
- # Evaluate context using provider (orchestrator controls this!)
1458
- # Engines will receive pre-filtered artifacts via ctx.artifacts
1459
- request = ContextRequest(
1460
- agent=agent,
1461
- correlation_id=correlation_id,
1462
- store=self.store,
1463
- agent_identity=agent.identity,
1464
- exclude_ids={a.id for a in artifacts}, # Exclude input artifacts
1465
- )
1466
- context_artifacts = await provider(request)
1467
-
1468
- # Phase 8: Create Context with pre-filtered data (no capabilities!)
1469
- # SECURITY: Context is now just data - engines can't query anything
1470
- ctx = Context(
1471
- artifacts=context_artifacts, # Pre-filtered conversation context
1472
- agent_identity=agent.identity,
1473
- task_id=str(uuid4()),
1474
- correlation_id=correlation_id,
1475
- is_batch=is_batch,
1476
- )
1477
- self._record_agent_run(agent)
1478
-
1479
- # Phase 6: Execute agent (returns artifacts, doesn't publish)
1480
- outputs = await agent.execute(ctx, artifacts)
1481
-
1482
- # Phase 6: Orchestrator publishes outputs (security fix)
1483
- # This fixes Vulnerability #2 (WRITE Bypass) - agents can't bypass validation
1484
- for output in outputs:
1485
- await self._persist_and_schedule(output)
1486
-
1487
- if artifacts:
1488
- try:
1489
- timestamp = datetime.now(UTC)
1490
- records = [
1491
- ConsumptionRecord(
1492
- artifact_id=artifact.id,
1493
- consumer=agent.name,
1494
- run_id=ctx.task_id,
1495
- correlation_id=str(correlation_id) if correlation_id else None,
1496
- consumed_at=timestamp,
1497
- )
1498
- for artifact in artifacts
1499
- ]
1500
- await self.store.record_consumptions(records)
1501
- except NotImplementedError:
1502
- pass
1503
- except Exception as exc: # pragma: no cover - defensive logging
1504
- self._logger.exception("Failed to record artifact consumption: %s", exc)
1505
-
1506
- # Phase 1.2: Logic Operations Event Emission ----------------------------
1507
-
1508
- async def _emit_correlation_updated_event(
1509
- self, *, agent_name: str, subscription_index: int, artifact: Artifact
1510
- ) -> None:
1511
- """Emit CorrelationGroupUpdatedEvent for real-time dashboard updates.
1512
-
1513
- Called when an artifact is added to a correlation group that is not yet complete.
1514
-
1515
- Args:
1516
- agent_name: Name of the agent with the JoinSpec subscription
1517
- subscription_index: Index of the subscription in the agent's subscriptions list
1518
- artifact: The artifact that triggered this update
1519
- """
1520
- # Only emit if dashboard is enabled
1521
- if self._websocket_manager is None:
1522
- return
1523
-
1524
- # Import _get_correlation_groups helper from dashboard service
1525
- from flock.dashboard.service import _get_correlation_groups
1526
-
1527
- # Get current correlation groups state from engine
1528
- groups = _get_correlation_groups(
1529
- self._correlation_engine, agent_name, subscription_index
1530
- )
1531
-
1532
- if not groups:
1533
- return # No groups to report (shouldn't happen, but defensive)
1534
-
1535
- # Find the group that was just updated (match by last updated time or artifact ID)
1536
- # For now, we'll emit an event for the FIRST group that's still waiting
1537
- # In practice, the artifact we just added should be in one of these groups
1538
- for group_state in groups:
1539
- if not group_state["is_complete"]:
1540
- # Import CorrelationGroupUpdatedEvent
1541
- from flock.dashboard.events import CorrelationGroupUpdatedEvent
1542
-
1543
- # Build and emit event
1544
- event = CorrelationGroupUpdatedEvent(
1545
- agent_name=agent_name,
1546
- subscription_index=subscription_index,
1547
- correlation_key=group_state["correlation_key"],
1548
- collected_types=group_state["collected_types"],
1549
- required_types=group_state["required_types"],
1550
- waiting_for=group_state["waiting_for"],
1551
- elapsed_seconds=group_state["elapsed_seconds"],
1552
- expires_in_seconds=group_state["expires_in_seconds"],
1553
- expires_in_artifacts=group_state["expires_in_artifacts"],
1554
- artifact_id=str(artifact.id),
1555
- artifact_type=artifact.type,
1556
- is_complete=group_state["is_complete"],
1557
- )
1558
-
1559
- # Broadcast via WebSocket
1560
- await self._websocket_manager.broadcast(event)
1561
- break # Only emit one event per artifact addition
1562
-
1563
- async def _emit_batch_item_added_event(
1564
- self,
1565
- *,
1566
- agent_name: str,
1567
- subscription_index: int,
1568
- subscription: Subscription, # noqa: F821
1569
- artifact: Artifact,
1570
- ) -> None:
1571
- """Emit BatchItemAddedEvent for real-time dashboard updates.
1572
-
1573
- Called when an artifact is added to a batch that hasn't reached flush threshold.
1574
-
1575
- Args:
1576
- agent_name: Name of the agent with the BatchSpec subscription
1577
- subscription_index: Index of the subscription in the agent's subscriptions list
1578
- subscription: The subscription with BatchSpec configuration
1579
- artifact: The artifact that triggered this update
1580
- """
1581
- # Only emit if dashboard is enabled
1582
- if self._websocket_manager is None:
1583
- return
1584
-
1585
- # Import _get_batch_state helper from dashboard service
1586
- from flock.dashboard.service import _get_batch_state
1587
-
1588
- # Get current batch state from engine
1589
- batch_state = _get_batch_state(
1590
- self._batch_engine, agent_name, subscription_index, subscription.batch
1591
- )
1592
-
1593
- if not batch_state:
1594
- return # No batch to report (shouldn't happen, but defensive)
1595
-
1596
- # Import BatchItemAddedEvent
1597
- from flock.dashboard.events import BatchItemAddedEvent
1598
-
1599
- # Build and emit event
1600
- event = BatchItemAddedEvent(
1601
- agent_name=agent_name,
1602
- subscription_index=subscription_index,
1603
- items_collected=batch_state["items_collected"],
1604
- items_target=batch_state.get("items_target"),
1605
- items_remaining=batch_state.get("items_remaining"),
1606
- elapsed_seconds=batch_state["elapsed_seconds"],
1607
- timeout_seconds=batch_state.get("timeout_seconds"),
1608
- timeout_remaining_seconds=batch_state.get("timeout_remaining_seconds"),
1609
- will_flush=batch_state["will_flush"],
1610
- artifact_id=str(artifact.id),
1611
- artifact_type=artifact.type,
1612
- )
1613
-
1614
- # Broadcast via WebSocket
1615
- await self._websocket_manager.broadcast(event)
1616
-
1617
- # Batch Helpers --------------------------------------------------------
1618
-
1619
- async def _correlation_cleanup_loop(self) -> None:
1620
- """Background task that periodically cleans up expired correlation groups.
1621
-
1622
- Runs continuously until all correlation groups are cleared or orchestrator shuts down.
1623
- Checks every 100ms for time-based expired correlations and discards them.
1624
- """
1625
- try:
1626
- while True:
1627
- await asyncio.sleep(self._correlation_cleanup_interval)
1628
- self._cleanup_expired_correlations()
1629
-
1630
- # Stop if no correlation groups remain
1631
- if not self._correlation_engine.correlation_groups:
1632
- self._correlation_cleanup_task = None
1633
- break
1634
- except asyncio.CancelledError:
1635
- # Clean shutdown
1636
- self._correlation_cleanup_task = None
1637
- raise
1638
-
1639
- def _cleanup_expired_correlations(self) -> None:
1640
- """Clean up all expired correlation groups across all subscriptions.
1641
-
1642
- Called periodically by background task to enforce time-based correlation windows.
1643
- Discards incomplete correlations that have exceeded their time window.
1644
- """
1645
- # Get all active subscription keys
1646
- for agent_name, subscription_index in list(
1647
- self._correlation_engine.correlation_groups.keys()
1648
- ):
1649
- self._correlation_engine.cleanup_expired(agent_name, subscription_index)
1650
-
1651
- async def _batch_timeout_checker_loop(self) -> None:
1652
- """Background task that periodically checks for batch timeouts.
1653
-
1654
- Runs continuously until all batches are cleared or orchestrator shuts down.
1655
- Checks every 100ms for expired batches and flushes them.
1656
- """
1657
- try:
1658
- while True:
1659
- await asyncio.sleep(self._batch_timeout_interval)
1660
- await self._check_batch_timeouts()
1661
-
1662
- # Stop if no batches remain
1663
- if not self._batch_engine.batches:
1664
- self._batch_timeout_task = None
1665
- break
1666
- except asyncio.CancelledError:
1667
- # Clean shutdown
1668
- self._batch_timeout_task = None
1669
- raise
1670
-
1671
- async def _check_batch_timeouts(self) -> None:
1672
- """Check all batches for timeout expiry and flush expired batches.
1673
-
1674
- This method is called periodically by the background timeout checker
1675
- or manually (in tests) to enforce timeout-based batching.
1676
- """
1677
- expired_batches = self._batch_engine.check_timeouts()
1678
-
1679
- for agent_name, subscription_index in expired_batches:
1680
- # Flush the expired batch
1681
- artifacts = self._batch_engine.flush_batch(agent_name, subscription_index)
1682
-
1683
- if artifacts is None:
1684
- continue
1685
-
1686
- # Get the agent
1687
- agent = self._agents.get(agent_name)
1688
- if agent is None:
1689
- continue
1690
-
1691
- # Schedule agent with batched artifacts (timeout flush)
1692
- self._schedule_task(agent, artifacts, is_batch=True)
1693
-
1694
- async def _flush_all_batches(self) -> None:
1695
- """Flush all partial batches (for shutdown - ensures zero data loss)."""
1696
- all_batches = self._batch_engine.flush_all()
1697
-
1698
- for agent_name, _subscription_index, artifacts in all_batches:
1699
- # Get the agent
1700
- agent = self._agents.get(agent_name)
1701
- if agent is None:
1702
- continue
1703
-
1704
- # Schedule agent with partial batch (shutdown flush)
1705
- self._schedule_task(agent, artifacts, is_batch=True)
1706
-
1707
- # Wait for all scheduled tasks to complete
1708
- await self.run_until_idle()
1709
-
1710
- # Helpers --------------------------------------------------------------
1711
-
1712
- def _normalize_input(
1713
- self, value: BaseModel | Mapping[str, Any] | Artifact, *, produced_by: str
1714
- ) -> Artifact:
1715
- if isinstance(value, Artifact):
1716
- return value
1717
- if isinstance(value, BaseModel):
1718
- model_cls = type(value)
1719
- type_name = type_registry.register(model_cls)
1720
- payload = value.model_dump()
1721
- elif isinstance(value, Mapping):
1722
- if "type" not in value:
1723
- raise ValueError("Mapping input must contain 'type'.")
1724
- type_name = value["type"]
1725
- payload = value.get("payload", {})
1726
- else: # pragma: no cover - defensive
1727
- raise TypeError("Unsupported input for direct invoke.")
1728
- return Artifact(type=type_name, payload=payload, produced_by=produced_by)
1729
-
1730
- def _check_visibility(self, artifact: Artifact, identity: AgentIdentity) -> bool:
1731
- try:
1732
- return artifact.visibility.allows(identity)
1733
- except AttributeError: # pragma: no cover - fallback for dict vis
1734
- return True
1735
-
1736
-
1737
- @asynccontextmanager
1738
- async def start_orchestrator(orchestrator: Flock): # pragma: no cover - CLI helper
1739
- try:
1740
- yield orchestrator
1741
- await orchestrator.run_until_idle()
1742
- finally:
1743
- pass
1744
-
1745
-
1746
- __all__ = ["Flock", "start_orchestrator"]