flock-core 0.5.11__py3-none-any.whl → 0.5.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

Files changed (94) hide show
  1. flock/__init__.py +1 -1
  2. flock/agent/__init__.py +30 -0
  3. flock/agent/builder_helpers.py +192 -0
  4. flock/agent/builder_validator.py +169 -0
  5. flock/agent/component_lifecycle.py +325 -0
  6. flock/agent/context_resolver.py +141 -0
  7. flock/agent/mcp_integration.py +212 -0
  8. flock/agent/output_processor.py +304 -0
  9. flock/api/__init__.py +20 -0
  10. flock/{api_models.py → api/models.py} +0 -2
  11. flock/{service.py → api/service.py} +3 -3
  12. flock/cli.py +2 -2
  13. flock/components/__init__.py +41 -0
  14. flock/components/agent/__init__.py +22 -0
  15. flock/{components.py → components/agent/base.py} +4 -3
  16. flock/{utility/output_utility_component.py → components/agent/output_utility.py} +12 -7
  17. flock/components/orchestrator/__init__.py +22 -0
  18. flock/{orchestrator_component.py → components/orchestrator/base.py} +5 -293
  19. flock/components/orchestrator/circuit_breaker.py +95 -0
  20. flock/components/orchestrator/collection.py +143 -0
  21. flock/components/orchestrator/deduplication.py +78 -0
  22. flock/core/__init__.py +30 -0
  23. flock/core/agent.py +953 -0
  24. flock/{artifacts.py → core/artifacts.py} +1 -1
  25. flock/{context_provider.py → core/context_provider.py} +3 -3
  26. flock/core/orchestrator.py +1102 -0
  27. flock/{store.py → core/store.py} +99 -454
  28. flock/{subscription.py → core/subscription.py} +1 -1
  29. flock/dashboard/collector.py +5 -5
  30. flock/dashboard/events.py +1 -1
  31. flock/dashboard/graph_builder.py +7 -7
  32. flock/dashboard/routes/__init__.py +21 -0
  33. flock/dashboard/routes/control.py +327 -0
  34. flock/dashboard/routes/helpers.py +340 -0
  35. flock/dashboard/routes/themes.py +76 -0
  36. flock/dashboard/routes/traces.py +521 -0
  37. flock/dashboard/routes/websocket.py +108 -0
  38. flock/dashboard/service.py +43 -1316
  39. flock/engines/dspy/__init__.py +20 -0
  40. flock/engines/dspy/artifact_materializer.py +216 -0
  41. flock/engines/dspy/signature_builder.py +474 -0
  42. flock/engines/dspy/streaming_executor.py +812 -0
  43. flock/engines/dspy_engine.py +45 -1330
  44. flock/engines/examples/simple_batch_engine.py +2 -2
  45. flock/engines/streaming/__init__.py +3 -0
  46. flock/engines/streaming/sinks.py +489 -0
  47. flock/examples.py +7 -7
  48. flock/logging/logging.py +1 -16
  49. flock/models/__init__.py +10 -0
  50. flock/orchestrator/__init__.py +45 -0
  51. flock/{artifact_collector.py → orchestrator/artifact_collector.py} +3 -3
  52. flock/orchestrator/artifact_manager.py +168 -0
  53. flock/{batch_accumulator.py → orchestrator/batch_accumulator.py} +2 -2
  54. flock/orchestrator/component_runner.py +389 -0
  55. flock/orchestrator/context_builder.py +167 -0
  56. flock/{correlation_engine.py → orchestrator/correlation_engine.py} +2 -2
  57. flock/orchestrator/event_emitter.py +167 -0
  58. flock/orchestrator/initialization.py +184 -0
  59. flock/orchestrator/lifecycle_manager.py +226 -0
  60. flock/orchestrator/mcp_manager.py +202 -0
  61. flock/orchestrator/scheduler.py +189 -0
  62. flock/orchestrator/server_manager.py +234 -0
  63. flock/orchestrator/tracing.py +147 -0
  64. flock/storage/__init__.py +10 -0
  65. flock/storage/artifact_aggregator.py +158 -0
  66. flock/storage/in_memory/__init__.py +6 -0
  67. flock/storage/in_memory/artifact_filter.py +114 -0
  68. flock/storage/in_memory/history_aggregator.py +115 -0
  69. flock/storage/sqlite/__init__.py +10 -0
  70. flock/storage/sqlite/agent_history_queries.py +154 -0
  71. flock/storage/sqlite/consumption_loader.py +100 -0
  72. flock/storage/sqlite/query_builder.py +112 -0
  73. flock/storage/sqlite/query_params_builder.py +91 -0
  74. flock/storage/sqlite/schema_manager.py +168 -0
  75. flock/storage/sqlite/summary_queries.py +194 -0
  76. flock/utils/__init__.py +14 -0
  77. flock/utils/async_utils.py +67 -0
  78. flock/{runtime.py → utils/runtime.py} +3 -3
  79. flock/utils/time_utils.py +53 -0
  80. flock/utils/type_resolution.py +38 -0
  81. flock/{utilities.py → utils/utilities.py} +2 -2
  82. flock/utils/validation.py +57 -0
  83. flock/utils/visibility.py +79 -0
  84. flock/utils/visibility_utils.py +134 -0
  85. {flock_core-0.5.11.dist-info → flock_core-0.5.21.dist-info}/METADATA +19 -5
  86. {flock_core-0.5.11.dist-info → flock_core-0.5.21.dist-info}/RECORD +92 -34
  87. flock/agent.py +0 -1578
  88. flock/orchestrator.py +0 -1983
  89. /flock/{visibility.py → core/visibility.py} +0 -0
  90. /flock/{system_artifacts.py → models/system_artifacts.py} +0 -0
  91. /flock/{helper → utils}/cli_helper.py +0 -0
  92. {flock_core-0.5.11.dist-info → flock_core-0.5.21.dist-info}/WHEEL +0 -0
  93. {flock_core-0.5.11.dist-info → flock_core-0.5.21.dist-info}/entry_points.txt +0 -0
  94. {flock_core-0.5.11.dist-info → flock_core-0.5.21.dist-info}/licenses/LICENSE +0 -0
flock/orchestrator.py DELETED
@@ -1,1983 +0,0 @@
1
- """Blackboard orchestrator and scheduling runtime."""
2
-
3
- from __future__ import annotations
4
-
5
- import asyncio
6
- import logging
7
- import os
8
- from asyncio import Task
9
- from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence
10
- from contextlib import asynccontextmanager
11
- from datetime import UTC, datetime
12
- from pathlib import Path
13
- from typing import TYPE_CHECKING, Any
14
- from uuid import UUID, uuid4
15
-
16
- from opentelemetry import trace
17
- from opentelemetry.trace import Status, StatusCode
18
- from pydantic import BaseModel
19
-
20
- from flock.agent import Agent, AgentBuilder
21
- from flock.artifact_collector import ArtifactCollector
22
- from flock.artifacts import Artifact
23
- from flock.batch_accumulator import BatchEngine
24
- from flock.correlation_engine import CorrelationEngine
25
- from flock.helper.cli_helper import init_console
26
- from flock.logging.auto_trace import AutoTracedMeta
27
- from flock.mcp import (
28
- FlockMCPClientManager,
29
- FlockMCPConfiguration,
30
- FlockMCPConnectionConfiguration,
31
- FlockMCPFeatureConfiguration,
32
- ServerParameters,
33
- )
34
- from flock.orchestrator_component import (
35
- CollectionResult,
36
- OrchestratorComponent,
37
- ScheduleDecision,
38
- )
39
- from flock.registry import type_registry
40
- from flock.runtime import Context
41
- from flock.store import BlackboardStore, ConsumptionRecord, InMemoryBlackboardStore
42
- from flock.subscription import Subscription
43
- from flock.visibility import AgentIdentity, PublicVisibility, Visibility
44
-
45
-
46
- if TYPE_CHECKING:
47
- import builtins
48
-
49
-
50
- class BoardHandle:
51
- """Handle exposed to components for publishing and inspection."""
52
-
53
- def __init__(self, orchestrator: Flock) -> None:
54
- self._orchestrator = orchestrator
55
-
56
- async def publish(self, artifact: Artifact) -> None:
57
- await self._orchestrator._persist_and_schedule(artifact)
58
-
59
- async def get(self, artifact_id) -> Artifact | None:
60
- return await self._orchestrator.store.get(artifact_id)
61
-
62
- async def list(self) -> builtins.list[Artifact]:
63
- return await self._orchestrator.store.list()
64
-
65
-
66
- class Flock(metaclass=AutoTracedMeta):
67
- """Main orchestrator for blackboard-based agent coordination.
68
-
69
- All public methods are automatically traced via OpenTelemetry.
70
- """
71
-
72
- def _patch_litellm_proxy_imports(self) -> None:
73
- """Stub litellm proxy_server to avoid optional proxy deps when not used.
74
-
75
- Some litellm versions import `litellm.proxy.proxy_server` during standard logging
76
- to read `general_settings`, which pulls in optional dependencies like `apscheduler`.
77
- We provide a stub so imports succeed but cold storage remains disabled.
78
- """
79
- try:
80
- import sys
81
- import types
82
-
83
- if "litellm.proxy.proxy_server" not in sys.modules:
84
- stub = types.ModuleType("litellm.proxy.proxy_server")
85
- # Minimal surface that cold_storage_handler accesses
86
- stub.general_settings = {}
87
- sys.modules["litellm.proxy.proxy_server"] = stub
88
- except Exception: # nosec B110 - Safe to ignore; worst case litellm will log a warning
89
- # logger.debug(f"Failed to stub litellm proxy_server: {e}")
90
- pass
91
-
92
- def __init__(
93
- self,
94
- model: str | None = None,
95
- *,
96
- store: BlackboardStore | None = None,
97
- max_agent_iterations: int = 1000,
98
- context_provider: Any = None,
99
- ) -> None:
100
- """Initialize the Flock orchestrator for blackboard-based agent coordination.
101
-
102
- Args:
103
- model: Default LLM model for agents (e.g., "openai/gpt-4.1").
104
- Can be overridden per-agent. If None, uses DEFAULT_MODEL env var.
105
- store: Custom blackboard storage backend. Defaults to InMemoryBlackboardStore.
106
- max_agent_iterations: Circuit breaker limit to prevent runaway agent loops.
107
- Defaults to 1000 iterations per agent before reset.
108
- context_provider: Global context provider for all agents (Phase 3 security fix).
109
- If None, agents use DefaultContextProvider. Can be overridden per-agent.
110
-
111
- Examples:
112
- >>> # Basic initialization with default model
113
- >>> flock = Flock("openai/gpt-4.1")
114
-
115
- >>> # Custom storage backend
116
- >>> flock = Flock("openai/gpt-4o", store=CustomBlackboardStore())
117
-
118
- >>> # Circuit breaker configuration
119
- >>> flock = Flock("openai/gpt-4.1", max_agent_iterations=500)
120
-
121
- >>> # Global context provider (Phase 3 security fix)
122
- >>> from flock.context_provider import DefaultContextProvider
123
- >>> flock = Flock(
124
- ... "openai/gpt-4.1", context_provider=DefaultContextProvider()
125
- ... )
126
- """
127
- self._patch_litellm_proxy_imports()
128
- self._logger = logging.getLogger(__name__)
129
- self.model = model
130
-
131
- try:
132
- init_console(clear_screen=True, show_banner=True, model=self.model)
133
- except (UnicodeEncodeError, UnicodeDecodeError):
134
- # Skip banner on Windows consoles with encoding issues (e.g., tests, CI)
135
- pass
136
-
137
- self.store: BlackboardStore = store or InMemoryBlackboardStore()
138
- self._agents: dict[str, Agent] = {}
139
- self._tasks: set[Task[Any]] = set()
140
- self._correlation_tasks: dict[
141
- UUID, set[Task[Any]]
142
- ] = {} # Track tasks by correlation_id
143
- self._processed: set[tuple[str, str]] = set()
144
- self._lock = asyncio.Lock()
145
- self.metrics: dict[str, float] = {"artifacts_published": 0, "agent_runs": 0}
146
- # Phase 3: Global context provider (security fix)
147
- self._default_context_provider = context_provider
148
- # MCP integration
149
- self._mcp_configs: dict[str, FlockMCPConfiguration] = {}
150
- self._mcp_manager: FlockMCPClientManager | None = None
151
- # T068: Circuit breaker for runaway agents
152
- self.max_agent_iterations: int = max_agent_iterations
153
- self._agent_iteration_count: dict[str, int] = {}
154
- self.is_dashboard: bool = False
155
- # AND gate logic: Artifact collection for multi-type subscriptions
156
- self._artifact_collector = ArtifactCollector()
157
- # JoinSpec logic: Correlation engine for correlated AND gates
158
- self._correlation_engine = CorrelationEngine()
159
- # Background task for checking correlation expiry (time-based JoinSpec)
160
- self._correlation_cleanup_task: Task[Any] | None = None
161
- self._correlation_cleanup_interval: float = 0.1 # Check every 100ms
162
- # BatchSpec logic: Batch accumulator for size/timeout batching
163
- self._batch_engine = BatchEngine()
164
- # Background task for checking batch timeouts
165
- self._batch_timeout_task: Task[Any] | None = None
166
- self._batch_timeout_interval: float = 0.1 # Check every 100ms
167
- # Phase 1.2: WebSocket manager for real-time dashboard events (set by serve())
168
- self._websocket_manager: Any = None
169
- # Dashboard server task and launcher (for non-blocking serve)
170
- self._server_task: Task[None] | None = None
171
- self._dashboard_launcher: Any = None
172
- # Unified tracing support
173
- self._workflow_span = None
174
- self._auto_workflow_enabled = os.getenv(
175
- "FLOCK_AUTO_WORKFLOW_TRACE", "false"
176
- ).lower() in {
177
- "true",
178
- "1",
179
- "yes",
180
- "on",
181
- }
182
-
183
- # Phase 2: OrchestratorComponent system
184
- self._components: list[OrchestratorComponent] = []
185
- self._components_initialized: bool = False
186
-
187
- # Auto-add built-in components
188
- from flock.orchestrator_component import (
189
- BuiltinCollectionComponent,
190
- CircuitBreakerComponent,
191
- DeduplicationComponent,
192
- )
193
-
194
- self.add_component(CircuitBreakerComponent(max_iterations=max_agent_iterations))
195
- self.add_component(DeduplicationComponent())
196
- self.add_component(BuiltinCollectionComponent())
197
-
198
- # Log orchestrator initialization
199
- self._logger.debug("Orchestrator initialized: components=[]")
200
-
201
- if not model:
202
- self.model = os.getenv("DEFAULT_MODEL")
203
-
204
- # Agent management -----------------------------------------------------
205
-
206
- def agent(self, name: str) -> AgentBuilder:
207
- """Create a new agent using the fluent builder API.
208
-
209
- Args:
210
- name: Unique identifier for the agent. Used for visibility controls and metrics.
211
-
212
- Returns:
213
- AgentBuilder for fluent configuration
214
-
215
- Raises:
216
- ValueError: If an agent with this name already exists
217
-
218
- Examples:
219
- >>> # Basic agent
220
- >>> pizza_agent = (
221
- ... flock.agent("pizza_master")
222
- ... .description("Creates delicious pizza recipes")
223
- ... .consumes(DreamPizza)
224
- ... .publishes(Pizza)
225
- ... )
226
-
227
- >>> # Advanced agent with filtering
228
- >>> critic = (
229
- ... flock.agent("critic")
230
- ... .consumes(Movie, where=lambda m: m.rating >= 8)
231
- ... .publishes(Review)
232
- ... .with_utilities(RateLimiter(max_calls=10))
233
- ... )
234
- """
235
- if name in self._agents:
236
- raise ValueError(f"Agent '{name}' already registered.")
237
- return AgentBuilder(self, name)
238
-
239
- def register_agent(self, agent: Agent) -> None:
240
- if agent.name in self._agents:
241
- raise ValueError(f"Agent '{agent.name}' already registered.")
242
- self._agents[agent.name] = agent
243
-
244
- def get_agent(self, name: str) -> Agent:
245
- return self._agents[name]
246
-
247
- @property
248
- def agents(self) -> list[Agent]:
249
- return list(self._agents.values())
250
-
251
- async def get_correlation_status(self, correlation_id: str) -> dict[str, Any]:
252
- """Get the status of a workflow by correlation ID.
253
-
254
- Args:
255
- correlation_id: The correlation ID to check
256
-
257
- Returns:
258
- Dictionary containing workflow status information:
259
- - state: "active" if work is pending, "completed" otherwise
260
- - has_pending_work: True if orchestrator has pending work for this correlation
261
- - artifact_count: Total number of artifacts with this correlation_id
262
- - error_count: Number of WorkflowError artifacts
263
- - started_at: Timestamp of first artifact (if any)
264
- - last_activity_at: Timestamp of most recent artifact (if any)
265
- """
266
- from uuid import UUID
267
-
268
- try:
269
- correlation_uuid = UUID(correlation_id)
270
- except ValueError as exc:
271
- raise ValueError(
272
- f"Invalid correlation_id format: {correlation_id}"
273
- ) from exc
274
-
275
- # Check if orchestrator has pending work for this correlation
276
- # 1. Check active tasks for this correlation_id
277
- has_active_tasks = correlation_uuid in self._correlation_tasks and bool(
278
- self._correlation_tasks[correlation_uuid]
279
- )
280
-
281
- # 2. Check correlation groups (for agents with JoinSpec that haven't yielded yet)
282
- has_pending_groups = False
283
- for groups in self._correlation_engine.correlation_groups.values():
284
- for group_key, group in groups.items():
285
- # Check if this group belongs to our correlation
286
- for type_name, artifacts in group.waiting_artifacts.items():
287
- if any(
288
- artifact.correlation_id == correlation_uuid
289
- for artifact in artifacts
290
- ):
291
- has_pending_groups = True
292
- break
293
- if has_pending_groups:
294
- break
295
- if has_pending_groups:
296
- break
297
-
298
- # Workflow has pending work if EITHER tasks are active OR groups are waiting
299
- has_pending_work = has_active_tasks or has_pending_groups
300
-
301
- # Query artifacts for this correlation
302
- from flock.store import FilterConfig
303
-
304
- filters = FilterConfig(correlation_id=correlation_id)
305
- artifacts, total = await self.store.query_artifacts(
306
- filters, limit=1000, offset=0
307
- )
308
-
309
- # Count errors
310
- error_count = sum(
311
- 1
312
- for artifact in artifacts
313
- if artifact.type == "flock.system_artifacts.WorkflowError"
314
- )
315
-
316
- # Get timestamps
317
- started_at = None
318
- last_activity_at = None
319
- if artifacts:
320
- timestamps = [artifact.created_at for artifact in artifacts]
321
- started_at = min(timestamps).isoformat()
322
- last_activity_at = max(timestamps).isoformat()
323
-
324
- # Determine state
325
- if has_pending_work:
326
- state = "active"
327
- elif total == 0:
328
- state = "not_found"
329
- elif error_count > 0 and total == error_count:
330
- state = "failed" # Only error artifacts exist
331
- else:
332
- state = "completed"
333
-
334
- return {
335
- "correlation_id": correlation_id,
336
- "state": state,
337
- "has_pending_work": has_pending_work,
338
- "artifact_count": total,
339
- "error_count": error_count,
340
- "started_at": started_at,
341
- "last_activity_at": last_activity_at,
342
- }
343
-
344
- # Component management -------------------------------------------------
345
-
346
- def add_component(self, component: OrchestratorComponent) -> Flock:
347
- """Add an OrchestratorComponent to this orchestrator.
348
-
349
- Components execute in priority order (lower priority number = earlier).
350
- Multiple components can have the same priority.
351
-
352
- Args:
353
- component: Component to add (must be an OrchestratorComponent instance)
354
-
355
- Returns:
356
- Self for method chaining
357
-
358
- Examples:
359
- >>> # Add single component
360
- >>> flock = Flock("openai/gpt-4.1")
361
- >>> flock.add_component(CircuitBreakerComponent(max_iterations=500))
362
-
363
- >>> # Method chaining
364
- >>> flock.add_component(CircuitBreakerComponent()) \\
365
- ... .add_component(MetricsComponent()) \\
366
- ... .add_component(DeduplicationComponent())
367
-
368
- >>> # Custom priority (lower = earlier)
369
- >>> flock.add_component(
370
- ... CustomComponent(priority=5, name="early_component")
371
- ... )
372
- """
373
- self._components.append(component)
374
- self._components.sort(key=lambda c: c.priority)
375
-
376
- # Log component addition
377
- comp_name = component.name or component.__class__.__name__
378
- self._logger.info(
379
- f"Component added: name={comp_name}, "
380
- f"priority={component.priority}, total_components={len(self._components)}"
381
- )
382
-
383
- return self
384
-
385
- # MCP management -------------------------------------------------------
386
-
387
- def add_mcp(
388
- self,
389
- name: str,
390
- connection_params: ServerParameters,
391
- *,
392
- enable_tools_feature: bool = True,
393
- enable_prompts_feature: bool = True,
394
- enable_sampling_feature: bool = True,
395
- enable_roots_feature: bool = True,
396
- mount_points: list[str] | None = None,
397
- tool_whitelist: list[str] | None = None,
398
- read_timeout_seconds: float = 300,
399
- max_retries: int = 3,
400
- **kwargs,
401
- ) -> Flock:
402
- """Register an MCP server for use by agents.
403
-
404
- Architecture Decision: AD001 - Two-Level Architecture
405
- MCP servers are registered at orchestrator level and assigned to agents.
406
-
407
- Args:
408
- name: Unique identifier for this MCP server
409
- connection_params: Server connection parameters
410
- enable_tools_feature: Enable tool execution
411
- enable_prompts_feature: Enable prompt templates
412
- enable_sampling_feature: Enable LLM sampling requests
413
- enable_roots_feature: Enable filesystem roots
414
- tool_whitelist: Optional list of tool names to allow
415
- read_timeout_seconds: Timeout for server communications
416
- max_retries: Connection retry attempts
417
-
418
- Returns:
419
- self for method chaining
420
-
421
- Raises:
422
- ValueError: If server name already registered
423
- """
424
- if name in self._mcp_configs:
425
- raise ValueError(f"MCP server '{name}' is already registered.")
426
-
427
- # Detect transport type
428
- from flock.mcp.types import (
429
- SseServerParameters,
430
- StdioServerParameters,
431
- StreamableHttpServerParameters,
432
- WebsocketServerParameters,
433
- )
434
-
435
- if isinstance(connection_params, StdioServerParameters):
436
- transport_type = "stdio"
437
- elif isinstance(connection_params, WebsocketServerParameters):
438
- transport_type = "websockets"
439
- elif isinstance(connection_params, SseServerParameters):
440
- transport_type = "sse"
441
- elif isinstance(connection_params, StreamableHttpServerParameters):
442
- transport_type = "streamable_http"
443
- else:
444
- transport_type = "custom"
445
-
446
- mcp_roots = None
447
- if mount_points:
448
- from pathlib import Path as PathLib
449
-
450
- from flock.mcp.types import MCPRoot
451
-
452
- mcp_roots = []
453
- for path in mount_points:
454
- # Normalize the path
455
- if path.startswith("file://"):
456
- # Already a file URI
457
- uri = path
458
- # Extract path from URI for name
459
- path_str = path.replace("file://", "")
460
- # the test:// path-prefix is used by testing servers such as the mcp-everything server.
461
- elif path.startswith("test://"):
462
- # Already a test URI
463
- uri = path
464
- # Extract path from URI for name
465
- path_str = path.replace("test://", "")
466
- else:
467
- # Convert to absolute path and create URI
468
- abs_path = PathLib(path).resolve()
469
- uri = f"file://{abs_path}"
470
- path_str = str(abs_path)
471
-
472
- # Extract a meaningful name (last component of path)
473
- name = (
474
- PathLib(path_str).name
475
- or path_str.rstrip("/").split("/")[-1]
476
- or "root"
477
- )
478
- mcp_roots.append(MCPRoot(uri=uri, name=name))
479
-
480
- # Build configuration
481
- connection_config = FlockMCPConnectionConfiguration(
482
- max_retries=max_retries,
483
- connection_parameters=connection_params,
484
- transport_type=transport_type,
485
- read_timeout_seconds=read_timeout_seconds,
486
- mount_points=mcp_roots,
487
- )
488
-
489
- feature_config = FlockMCPFeatureConfiguration(
490
- tools_enabled=enable_tools_feature,
491
- prompts_enabled=enable_prompts_feature,
492
- sampling_enabled=enable_sampling_feature,
493
- roots_enabled=enable_roots_feature,
494
- tool_whitelist=tool_whitelist,
495
- )
496
-
497
- mcp_config = FlockMCPConfiguration(
498
- name=name,
499
- connection_config=connection_config,
500
- feature_config=feature_config,
501
- )
502
-
503
- self._mcp_configs[name] = mcp_config
504
- return self
505
-
506
- def get_mcp_manager(self) -> FlockMCPClientManager:
507
- """Get or create the MCP client manager.
508
-
509
- Architecture Decision: AD005 - Lazy Connection Establishment
510
- """
511
- if not self._mcp_configs:
512
- raise RuntimeError("No MCP servers registered. Call add_mcp() first.")
513
-
514
- if self._mcp_manager is None:
515
- self._mcp_manager = FlockMCPClientManager(self._mcp_configs)
516
-
517
- return self._mcp_manager
518
-
519
- # Unified Tracing ------------------------------------------------------
520
-
521
- @asynccontextmanager
522
- async def traced_run(self, name: str = "workflow") -> AsyncGenerator[Any, None]:
523
- """Context manager for wrapping an entire execution in a single unified trace.
524
-
525
- This creates a parent span that encompasses all operations (publish, run_until_idle, etc.)
526
- within the context, ensuring they all belong to the same trace_id for better observability.
527
-
528
- Args:
529
- name: Name for the workflow trace (default: "workflow")
530
-
531
- Yields:
532
- The workflow span for optional manual attribute setting
533
-
534
- Examples:
535
- # Explicit workflow tracing (recommended)
536
- async with flock.traced_run("pizza_workflow"):
537
- await flock.publish(pizza_idea)
538
- await flock.run_until_idle()
539
- # All operations now share the same trace_id!
540
-
541
- # Custom attributes
542
- async with flock.traced_run("data_pipeline") as span:
543
- span.set_attribute("pipeline.version", "2.0")
544
- await flock.publish(data)
545
- await flock.run_until_idle()
546
- """
547
- tracer = trace.get_tracer(__name__)
548
- with tracer.start_as_current_span(name) as span:
549
- # Set workflow-level attributes
550
- span.set_attribute("flock.workflow", True)
551
- span.set_attribute("workflow.name", name)
552
- span.set_attribute("workflow.flock_id", str(id(self)))
553
-
554
- # Store span for nested operations to use
555
- prev_workflow_span = self._workflow_span
556
- self._workflow_span = span
557
-
558
- try:
559
- yield span
560
- span.set_status(Status(StatusCode.OK))
561
- except Exception as e:
562
- span.set_status(Status(StatusCode.ERROR, str(e)))
563
- span.record_exception(e)
564
- raise
565
- finally:
566
- # Restore previous workflow span
567
- self._workflow_span = prev_workflow_span
568
-
569
- @staticmethod
570
- def clear_traces(db_path: str = ".flock/traces.duckdb") -> dict[str, Any]:
571
- """Clear all traces from the DuckDB database.
572
-
573
- Useful for resetting debug sessions or cleaning up test data.
574
-
575
- Args:
576
- db_path: Path to the DuckDB database file (default: ".flock/traces.duckdb")
577
-
578
- Returns:
579
- Dictionary with operation results:
580
- - deleted_count: Number of spans deleted
581
- - success: Whether operation succeeded
582
- - error: Error message if failed
583
-
584
- Examples:
585
- # Clear all traces
586
- result = Flock.clear_traces()
587
- print(f"Deleted {result['deleted_count']} spans")
588
-
589
- # Custom database path
590
- result = Flock.clear_traces(".flock/custom_traces.duckdb")
591
-
592
- # Check if operation succeeded
593
- if result['success']:
594
- print("Traces cleared successfully!")
595
- else:
596
- print(f"Error: {result['error']}")
597
- """
598
- try:
599
- from pathlib import Path
600
-
601
- import duckdb
602
-
603
- db_file = Path(db_path)
604
- if not db_file.exists():
605
- return {
606
- "success": False,
607
- "deleted_count": 0,
608
- "error": f"Database file not found: {db_path}",
609
- }
610
-
611
- # Connect and clear
612
- conn = duckdb.connect(str(db_file))
613
- try:
614
- # Get count before deletion
615
- count_result = conn.execute("SELECT COUNT(*) FROM spans").fetchone()
616
- deleted_count = count_result[0] if count_result else 0
617
-
618
- # Delete all spans
619
- conn.execute("DELETE FROM spans")
620
-
621
- # Vacuum to reclaim space
622
- conn.execute("VACUUM")
623
-
624
- return {"success": True, "deleted_count": deleted_count, "error": None}
625
-
626
- finally:
627
- conn.close()
628
-
629
- except Exception as e:
630
- return {"success": False, "deleted_count": 0, "error": str(e)}
631
-
632
- # Runtime --------------------------------------------------------------
633
-
634
- async def run_until_idle(self) -> None:
635
- """Wait for all scheduled agent tasks to complete.
636
-
637
- This method blocks until the blackboard reaches a stable state where no
638
- agents are queued for execution. Essential for batch processing and ensuring
639
- all agent cascades complete before continuing.
640
-
641
- Note:
642
- Automatically resets circuit breaker counters and shuts down MCP connections
643
- when idle. Used with publish() for event-driven workflows.
644
-
645
- Examples:
646
- >>> # Event-driven workflow (recommended)
647
- >>> await flock.publish(task1)
648
- >>> await flock.publish(task2)
649
- >>> await flock.run_until_idle() # Wait for all cascades
650
- >>> # All agents have finished processing
651
-
652
- >>> # Parallel batch processing
653
- >>> await flock.publish_many([task1, task2, task3])
654
- >>> await flock.run_until_idle() # All tasks processed in parallel
655
-
656
- See Also:
657
- - publish(): Event-driven artifact publishing
658
- - publish_many(): Batch publishing for parallel execution
659
- - invoke(): Direct agent invocation without cascade
660
- """
661
- while self._tasks:
662
- await asyncio.sleep(0.01)
663
- pending = {task for task in self._tasks if not task.done()}
664
- self._tasks = pending
665
-
666
- # Determine whether any deferred work (timeouts/cleanup) is still pending.
667
- pending_batches = any(
668
- accumulator.artifacts for accumulator in self._batch_engine.batches.values()
669
- )
670
- pending_correlations = any(
671
- groups and any(group.waiting_artifacts for group in groups.values())
672
- for groups in self._correlation_engine.correlation_groups.values()
673
- )
674
-
675
- # Ensure watchdog loops remain active while pending work exists.
676
- if pending_batches and (
677
- self._batch_timeout_task is None or self._batch_timeout_task.done()
678
- ):
679
- self._batch_timeout_task = asyncio.create_task(
680
- self._batch_timeout_checker_loop()
681
- )
682
-
683
- if pending_correlations and (
684
- self._correlation_cleanup_task is None
685
- or self._correlation_cleanup_task.done()
686
- ):
687
- self._correlation_cleanup_task = asyncio.create_task(
688
- self._correlation_cleanup_loop()
689
- )
690
-
691
- # If deferred work is still outstanding, consider the orchestrator quiescent for
692
- # now but leave watchdog tasks running to finish the job.
693
- if pending_batches or pending_correlations:
694
- self._agent_iteration_count.clear()
695
- return
696
-
697
- # Notify components that orchestrator reached idle state
698
- if self._components_initialized:
699
- await self._run_idle()
700
-
701
- # T068: Reset circuit breaker counters when idle
702
- self._agent_iteration_count.clear()
703
-
704
- # Automatically shutdown MCP connections when idle
705
- await self.shutdown(include_components=False)
706
-
707
- async def direct_invoke(
708
- self, agent: Agent, inputs: Sequence[BaseModel | Mapping[str, Any] | Artifact]
709
- ) -> list[Artifact]:
710
- artifacts = [
711
- self._normalize_input(value, produced_by="__direct__") for value in inputs
712
- ]
713
- for artifact in artifacts:
714
- self._mark_processed(artifact, agent)
715
- await self._persist_and_schedule(artifact)
716
-
717
- # Phase 8: Evaluate context BEFORE creating Context (security fix)
718
- # Provider resolution: per-agent > global > DefaultContextProvider
719
- from flock.context_provider import (
720
- BoundContextProvider,
721
- ContextRequest,
722
- DefaultContextProvider,
723
- )
724
-
725
- inner_provider = (
726
- getattr(agent, "context_provider", None)
727
- or self._default_context_provider
728
- or DefaultContextProvider()
729
- )
730
-
731
- # SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
732
- provider = BoundContextProvider(inner_provider, agent.identity)
733
-
734
- # Evaluate context using provider (orchestrator controls this!)
735
- # Engines will receive pre-filtered artifacts via ctx.artifacts
736
- correlation_id = (
737
- artifacts[0].correlation_id
738
- if artifacts and artifacts[0].correlation_id
739
- else uuid4()
740
- )
741
- request = ContextRequest(
742
- agent=agent,
743
- correlation_id=correlation_id,
744
- store=self.store,
745
- agent_identity=agent.identity,
746
- exclude_ids={a.id for a in artifacts}, # Exclude input artifacts
747
- )
748
- context_artifacts = await provider(request)
749
-
750
- # Phase 8: Create Context with pre-filtered data (no capabilities!)
751
- # SECURITY: Context is now just data - engines can't query anything
752
- ctx = Context(
753
- artifacts=context_artifacts, # Pre-filtered conversation context
754
- agent_identity=agent.identity,
755
- task_id=str(uuid4()),
756
- correlation_id=correlation_id,
757
- )
758
- self._record_agent_run(agent)
759
- return await agent.execute(ctx, artifacts)
760
-
761
- async def arun(
762
- self, agent_builder: AgentBuilder, *inputs: BaseModel
763
- ) -> list[Artifact]:
764
- """Execute an agent with inputs and wait for all cascades to complete (async).
765
-
766
- Convenience method that combines direct agent invocation with run_until_idle().
767
- Useful for testing and synchronous request-response patterns.
768
-
769
- Args:
770
- agent_builder: Agent to execute (from flock.agent())
771
- *inputs: Input objects (BaseModel instances)
772
-
773
- Returns:
774
- Artifacts produced by the agent and any triggered cascades
775
-
776
- Examples:
777
- >>> # Test a single agent
778
- >>> flock = Flock("openai/gpt-4.1")
779
- >>> pizza_agent = flock.agent("pizza").consumes(Idea).publishes(Pizza)
780
- >>> results = await flock.arun(pizza_agent, Idea(topic="Margherita"))
781
-
782
- >>> # Multiple inputs
783
- >>> results = await flock.arun(
784
- ... task_agent, Task(name="deploy"), Task(name="test")
785
- ... )
786
-
787
- Note:
788
- For event-driven workflows, prefer publish() + run_until_idle() for better
789
- control over execution timing and parallel processing.
790
- """
791
- artifacts = await self.direct_invoke(agent_builder.agent, list(inputs))
792
- await self.run_until_idle()
793
- return artifacts
794
-
795
- def run(self, agent_builder: AgentBuilder, *inputs: BaseModel) -> list[Artifact]:
796
- """Synchronous wrapper for arun() - executes agent and waits for completion.
797
-
798
- Args:
799
- agent_builder: Agent to execute (from flock.agent())
800
- *inputs: Input objects (BaseModel instances)
801
-
802
- Returns:
803
- Artifacts produced by the agent and any triggered cascades
804
-
805
- Examples:
806
- >>> # Synchronous execution (blocks until complete)
807
- >>> flock = Flock("openai/gpt-4o-mini")
808
- >>> agent = flock.agent("analyzer").consumes(Data).publishes(Report)
809
- >>> results = flock.run(agent, Data(value=42))
810
-
811
- Warning:
812
- Cannot be called from within an async context. Use arun() instead
813
- if already in an async function.
814
- """
815
- return asyncio.run(self.arun(agent_builder, *inputs))
816
-
817
- async def shutdown(self, *, include_components: bool = True) -> None:
818
- """Shutdown orchestrator and clean up resources.
819
-
820
- Args:
821
- include_components: Whether to invoke component shutdown hooks.
822
- Internal callers (e.g., run_until_idle) disable this to avoid
823
- tearing down component state between cascades.
824
- """
825
- if include_components and self._components_initialized:
826
- await self._run_shutdown()
827
-
828
- # Cancel correlation cleanup task if running
829
- if self._correlation_cleanup_task and not self._correlation_cleanup_task.done():
830
- self._correlation_cleanup_task.cancel()
831
- try:
832
- await self._correlation_cleanup_task
833
- except asyncio.CancelledError:
834
- pass
835
-
836
- # Cancel batch timeout checker if running
837
- if self._batch_timeout_task and not self._batch_timeout_task.done():
838
- self._batch_timeout_task.cancel()
839
- try:
840
- await self._batch_timeout_task
841
- except asyncio.CancelledError:
842
- pass
843
-
844
- # Cancel background server task if running
845
- if self._server_task and not self._server_task.done():
846
- self._server_task.cancel()
847
- try:
848
- await self._server_task
849
- except asyncio.CancelledError:
850
- pass
851
- # Note: _cleanup_server_callback will handle launcher.stop()
852
-
853
- if self._mcp_manager is not None:
854
- await self._mcp_manager.cleanup_all()
855
- self._mcp_manager = None
856
-
857
- def cli(self) -> Flock:
858
- # Placeholder for CLI wiring (rich UI in Step 3)
859
- return self
860
-
861
- async def serve(
862
- self,
863
- *,
864
- dashboard: bool = False,
865
- dashboard_v2: bool = False,
866
- host: str = "127.0.0.1",
867
- port: int = 8344,
868
- blocking: bool = True,
869
- ) -> Task[None] | None:
870
- """Start HTTP service for the orchestrator.
871
-
872
- Args:
873
- dashboard: Enable real-time dashboard with WebSocket support (default: False)
874
- dashboard_v2: Launch the new dashboard v2 frontend (implies dashboard=True)
875
- host: Host to bind to (default: "127.0.0.1")
876
- port: Port to bind to (default: 8344)
877
- blocking: If True, blocks until server stops. If False, starts server
878
- in background and returns task handle (default: True)
879
-
880
- Returns:
881
- None if blocking=True, or Task handle if blocking=False
882
-
883
- Examples:
884
- # Basic HTTP API (no dashboard) - runs until interrupted
885
- await orchestrator.serve()
886
-
887
- # With dashboard (WebSocket + browser launch) - runs until interrupted
888
- await orchestrator.serve(dashboard=True)
889
-
890
- # Non-blocking mode - start server in background
891
- await orchestrator.serve(dashboard=True, blocking=False)
892
- # Now you can publish messages and run other logic
893
- await orchestrator.publish(my_message)
894
- await orchestrator.run_until_idle()
895
- """
896
- # If non-blocking, start server in background task
897
- if not blocking:
898
- self._server_task = asyncio.create_task(
899
- self._serve_impl(
900
- dashboard=dashboard,
901
- dashboard_v2=dashboard_v2,
902
- host=host,
903
- port=port,
904
- )
905
- )
906
- # Add cleanup callback
907
- self._server_task.add_done_callback(self._cleanup_server_callback)
908
- # Give server a moment to start
909
- await asyncio.sleep(0.1)
910
- return self._server_task
911
-
912
- # Blocking mode - run server directly with cleanup
913
- try:
914
- await self._serve_impl(
915
- dashboard=dashboard,
916
- dashboard_v2=dashboard_v2,
917
- host=host,
918
- port=port,
919
- )
920
- finally:
921
- # In blocking mode, manually cleanup dashboard launcher
922
- if self._dashboard_launcher is not None:
923
- self._dashboard_launcher.stop()
924
- self._dashboard_launcher = None
925
- return None
926
-
927
- def _cleanup_server_callback(self, task: Task[None]) -> None:
928
- """Cleanup callback when background server task completes."""
929
- # Stop dashboard launcher if it was started
930
- if self._dashboard_launcher is not None:
931
- try:
932
- self._dashboard_launcher.stop()
933
- except Exception as e:
934
- self._logger.warning(f"Failed to stop dashboard launcher: {e}")
935
- finally:
936
- self._dashboard_launcher = None
937
-
938
- # Clear server task reference
939
- self._server_task = None
940
-
941
- # Log any exceptions from the task
942
- try:
943
- exc = task.exception()
944
- if exc and not isinstance(exc, asyncio.CancelledError):
945
- self._logger.error(f"Server task failed: {exc}", exc_info=exc)
946
- except asyncio.CancelledError:
947
- pass # Normal cancellation
948
-
949
- async def _serve_impl(
950
- self,
951
- *,
952
- dashboard: bool = False,
953
- dashboard_v2: bool = False,
954
- host: str = "127.0.0.1",
955
- port: int = 8344,
956
- ) -> None:
957
- """Internal implementation of serve() - actual server logic."""
958
- if dashboard_v2:
959
- dashboard = True
960
-
961
- if not dashboard:
962
- # Standard service without dashboard
963
- from flock.service import BlackboardHTTPService
964
-
965
- service = BlackboardHTTPService(self)
966
- await service.run_async(host=host, port=port)
967
- return
968
-
969
- # Dashboard mode: integrate event collection and WebSocket
970
- from flock.dashboard.collector import DashboardEventCollector
971
- from flock.dashboard.launcher import DashboardLauncher
972
- from flock.dashboard.service import DashboardHTTPService
973
- from flock.dashboard.websocket import WebSocketManager
974
-
975
- # Create dashboard components
976
- websocket_manager = WebSocketManager()
977
- event_collector = DashboardEventCollector(store=self.store)
978
- event_collector.set_websocket_manager(websocket_manager)
979
- await event_collector.load_persistent_snapshots()
980
-
981
- # Store collector reference for agents added later
982
- self._dashboard_collector = event_collector
983
- # Store websocket manager for real-time event emission (Phase 1.2)
984
- self._websocket_manager = websocket_manager
985
-
986
- # Phase 6+7: Set class-level WebSocket broadcast wrapper (dashboard mode)
987
- async def _broadcast_wrapper(event):
988
- """Isolated broadcast wrapper - no reference chain to orchestrator."""
989
- return await websocket_manager.broadcast(event)
990
-
991
- from flock.agent import Agent
992
-
993
- Agent._websocket_broadcast_global = _broadcast_wrapper
994
-
995
- # Inject event collector into all existing agents
996
- for agent in self._agents.values():
997
- # Add dashboard collector with priority ordering handled by agent
998
- agent._add_utilities([event_collector])
999
-
1000
- # Start dashboard launcher (npm process + browser)
1001
- launcher_kwargs: dict[str, Any] = {"port": port}
1002
- if dashboard_v2:
1003
- dashboard_pkg_dir = Path(__file__).parent / "dashboard"
1004
- launcher_kwargs["frontend_dir"] = dashboard_pkg_dir.parent / "frontend_v2"
1005
- launcher_kwargs["static_dir"] = dashboard_pkg_dir / "static_v2"
1006
-
1007
- launcher = DashboardLauncher(**launcher_kwargs)
1008
- launcher.start()
1009
-
1010
- # Create dashboard HTTP service
1011
- service = DashboardHTTPService(
1012
- orchestrator=self,
1013
- websocket_manager=websocket_manager,
1014
- event_collector=event_collector,
1015
- use_v2=dashboard_v2,
1016
- )
1017
-
1018
- # Store launcher for cleanup
1019
- self._dashboard_launcher = launcher
1020
-
1021
- # Run service (blocking call)
1022
- # Note: Cleanup is handled by serve() (blocking mode) or callback (non-blocking mode)
1023
- await service.run_async(host=host, port=port)
1024
-
1025
- # Scheduling -----------------------------------------------------------
1026
-
1027
- async def publish(
1028
- self,
1029
- obj: BaseModel | dict | Artifact,
1030
- *,
1031
- visibility: Visibility | None = None,
1032
- correlation_id: str | None = None,
1033
- partition_key: str | None = None,
1034
- tags: set[str] | None = None,
1035
- is_dashboard: bool = False,
1036
- ) -> Artifact:
1037
- """Publish an artifact to the blackboard (event-driven).
1038
-
1039
- All agents with matching subscriptions will be triggered according to
1040
- their filters (type, predicates, visibility, etc).
1041
-
1042
- Args:
1043
- obj: Object to publish (BaseModel instance, dict, or Artifact)
1044
- visibility: Access control (defaults to PublicVisibility)
1045
- correlation_id: Optional correlation ID for request tracing
1046
- partition_key: Optional partition key for sharding
1047
- tags: Optional tags for channel-based routing
1048
-
1049
- Returns:
1050
- The published Artifact
1051
-
1052
- Examples:
1053
- >>> # Publish a model instance (recommended)
1054
- >>> task = Task(name="Deploy", priority=5)
1055
- >>> await orchestrator.publish(task)
1056
-
1057
- >>> # Publish with custom visibility
1058
- >>> await orchestrator.publish(
1059
- ... task, visibility=PrivateVisibility(agents={"admin"})
1060
- ... )
1061
-
1062
- >>> # Publish with tags for channel routing
1063
- >>> await orchestrator.publish(task, tags={"urgent", "backend"})
1064
- """
1065
- # Handle different input types
1066
- if isinstance(obj, Artifact):
1067
- # Already an artifact - publish as-is
1068
- artifact = obj
1069
- elif isinstance(obj, BaseModel):
1070
- # BaseModel instance - get type from registry
1071
- type_name = type_registry.name_for(type(obj))
1072
- artifact = Artifact(
1073
- type=type_name,
1074
- payload=obj.model_dump(),
1075
- produced_by="external",
1076
- visibility=visibility or PublicVisibility(),
1077
- correlation_id=correlation_id or uuid4(),
1078
- partition_key=partition_key,
1079
- tags=tags or set(),
1080
- )
1081
- elif isinstance(obj, dict):
1082
- # Dict must have 'type' key
1083
- if "type" not in obj:
1084
- raise ValueError(
1085
- "Dict input must contain 'type' key. "
1086
- "Example: {'type': 'Task', 'name': 'foo', 'priority': 5}"
1087
- )
1088
- # Support both {'type': 'X', 'payload': {...}} and {'type': 'X', ...}
1089
- type_name = obj["type"]
1090
- if "payload" in obj:
1091
- payload = obj["payload"]
1092
- else:
1093
- payload = {k: v for k, v in obj.items() if k != "type"}
1094
-
1095
- artifact = Artifact(
1096
- type=type_name,
1097
- payload=payload,
1098
- produced_by="external",
1099
- visibility=visibility or PublicVisibility(),
1100
- correlation_id=correlation_id,
1101
- partition_key=partition_key,
1102
- tags=tags or set(),
1103
- )
1104
- else:
1105
- raise TypeError(
1106
- f"Cannot publish object of type {type(obj).__name__}. "
1107
- "Expected BaseModel, dict, or Artifact."
1108
- )
1109
-
1110
- # Persist and schedule matching agents
1111
- await self._persist_and_schedule(artifact)
1112
- return artifact
1113
-
1114
- async def publish_many(
1115
- self, objects: Iterable[BaseModel | dict | Artifact], **kwargs: Any
1116
- ) -> list[Artifact]:
1117
- """Publish multiple artifacts at once (event-driven).
1118
-
1119
- Args:
1120
- objects: Iterable of objects to publish
1121
- **kwargs: Passed to each publish() call (visibility, tags, etc)
1122
-
1123
- Returns:
1124
- List of published Artifacts
1125
-
1126
- Example:
1127
- >>> tasks = [
1128
- ... Task(name="Deploy", priority=5),
1129
- ... Task(name="Test", priority=3),
1130
- ... Task(name="Document", priority=1),
1131
- ... ]
1132
- >>> await orchestrator.publish_many(tasks, tags={"sprint-3"})
1133
- """
1134
- artifacts = []
1135
- for obj in objects:
1136
- artifact = await self.publish(obj, **kwargs)
1137
- artifacts.append(artifact)
1138
- return artifacts
1139
-
1140
- # -----------------------------------------------------------------------------
1141
- # NEW DIRECT INVOCATION API - Explicit Control
1142
- # -----------------------------------------------------------------------------
1143
-
1144
- async def invoke(
1145
- self,
1146
- agent: Agent | AgentBuilder,
1147
- obj: BaseModel,
1148
- *,
1149
- publish_outputs: bool = True,
1150
- timeout: float | None = None,
1151
- ) -> list[Artifact]:
1152
- """Directly invoke a specific agent (bypasses subscription matching).
1153
-
1154
- This executes the agent immediately without checking subscriptions or
1155
- predicates. Useful for testing or synchronous request-response patterns.
1156
-
1157
- Args:
1158
- agent: Agent or AgentBuilder to invoke
1159
- obj: Input object (BaseModel instance)
1160
- publish_outputs: If True, publish outputs to blackboard for cascade
1161
- timeout: Optional timeout in seconds
1162
-
1163
- Returns:
1164
- Artifacts produced by the agent
1165
-
1166
- Warning:
1167
- This bypasses subscription filters and predicates. For event-driven
1168
- coordination, use publish() instead.
1169
-
1170
- Examples:
1171
- >>> # Testing: Execute agent without triggering others
1172
- >>> results = await orchestrator.invoke(
1173
- ... agent, Task(name="test", priority=5), publish_outputs=False
1174
- ... )
1175
-
1176
- >>> # HTTP endpoint: Execute specific agent, allow cascade
1177
- >>> results = await orchestrator.invoke(
1178
- ... movie_agent, Idea(topic="AI", genre="comedy"), publish_outputs=True
1179
- ... )
1180
- >>> await orchestrator.run_until_idle()
1181
- """
1182
- from asyncio import wait_for
1183
- from uuid import uuid4
1184
-
1185
- # Get Agent instance
1186
- agent_obj = agent.agent if isinstance(agent, AgentBuilder) else agent
1187
-
1188
- # Create artifact (don't publish to blackboard yet)
1189
- type_name = type_registry.name_for(type(obj))
1190
- artifact = Artifact(
1191
- type=type_name,
1192
- payload=obj.model_dump(),
1193
- produced_by="__direct__",
1194
- visibility=PublicVisibility(),
1195
- )
1196
-
1197
- # Phase 8: Evaluate context BEFORE creating Context (security fix)
1198
- # Provider resolution: per-agent > global > DefaultContextProvider
1199
- from flock.context_provider import (
1200
- BoundContextProvider,
1201
- ContextRequest,
1202
- DefaultContextProvider,
1203
- )
1204
-
1205
- inner_provider = (
1206
- getattr(agent_obj, "context_provider", None)
1207
- or self._default_context_provider
1208
- or DefaultContextProvider()
1209
- )
1210
-
1211
- # SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
1212
- provider = BoundContextProvider(inner_provider, agent_obj.identity)
1213
-
1214
- # Evaluate context using provider (orchestrator controls this!)
1215
- correlation_id = artifact.correlation_id if artifact.correlation_id else uuid4()
1216
- request = ContextRequest(
1217
- agent=agent_obj,
1218
- correlation_id=correlation_id,
1219
- store=self.store,
1220
- agent_identity=agent_obj.identity,
1221
- exclude_ids={artifact.id}, # Exclude input artifact
1222
- )
1223
- context_artifacts = await provider(request)
1224
-
1225
- # Phase 8: Create Context with pre-filtered data (no capabilities!)
1226
- # SECURITY: Context is now just data - engines can't query anything
1227
- ctx = Context(
1228
- artifacts=context_artifacts, # Pre-filtered conversation context
1229
- agent_identity=agent_obj.identity,
1230
- task_id=str(uuid4()),
1231
- correlation_id=correlation_id,
1232
- )
1233
- self._record_agent_run(agent_obj)
1234
-
1235
- # Execute with optional timeout
1236
- if timeout:
1237
- execution = agent_obj.execute(ctx, [artifact])
1238
- outputs = await wait_for(execution, timeout=timeout)
1239
- else:
1240
- outputs = await agent_obj.execute(ctx, [artifact])
1241
-
1242
- # Phase 6: Orchestrator publishes outputs (security fix)
1243
- # Agents return artifacts, orchestrator validates and publishes
1244
- if publish_outputs:
1245
- for output in outputs:
1246
- await self._persist_and_schedule(output)
1247
-
1248
- return outputs
1249
-
1250
- async def _persist_and_schedule(self, artifact: Artifact) -> None:
1251
- await self.store.publish(artifact)
1252
- self.metrics["artifacts_published"] += 1
1253
- await self._schedule_artifact(artifact)
1254
-
1255
- # Component Hook Runners ───────────────────────────────────────
1256
-
1257
- async def _run_initialize(self) -> None:
1258
- """Initialize all components in priority order (called once).
1259
-
1260
- Executes on_initialize hook for each component. Sets _components_initialized
1261
- flag to prevent multiple initializations.
1262
- """
1263
- if self._components_initialized:
1264
- return
1265
-
1266
- self._logger.info(
1267
- f"Initializing {len(self._components)} orchestrator components"
1268
- )
1269
-
1270
- for component in self._components:
1271
- comp_name = component.name or component.__class__.__name__
1272
- self._logger.debug(
1273
- f"Initializing component: name={comp_name}, priority={component.priority}"
1274
- )
1275
-
1276
- try:
1277
- await component.on_initialize(self)
1278
- except Exception as e:
1279
- self._logger.exception(
1280
- f"Component initialization failed: name={comp_name}, error={e!s}"
1281
- )
1282
- raise
1283
-
1284
- self._components_initialized = True
1285
- self._logger.info(f"All components initialized: count={len(self._components)}")
1286
-
1287
- async def _run_artifact_published(self, artifact: Artifact) -> Artifact | None:
1288
- """Run on_artifact_published hooks (returns modified artifact or None to block).
1289
-
1290
- Components execute in priority order, each receiving the artifact from the
1291
- previous component (chaining). If any component returns None, the artifact
1292
- is blocked and scheduling stops.
1293
- """
1294
- current_artifact = artifact
1295
-
1296
- for component in self._components:
1297
- comp_name = component.name or component.__class__.__name__
1298
- self._logger.debug(
1299
- f"Running on_artifact_published: component={comp_name}, "
1300
- f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
1301
- )
1302
-
1303
- try:
1304
- result = await component.on_artifact_published(self, current_artifact)
1305
-
1306
- if result is None:
1307
- self._logger.info(
1308
- f"Artifact blocked by component: component={comp_name}, "
1309
- f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
1310
- )
1311
- return None
1312
-
1313
- current_artifact = result
1314
- except Exception as e:
1315
- self._logger.exception(
1316
- f"Component hook failed: component={comp_name}, "
1317
- f"hook=on_artifact_published, error={e!s}"
1318
- )
1319
- raise
1320
-
1321
- return current_artifact
1322
-
1323
- async def _run_before_schedule(
1324
- self, artifact: Artifact, agent: Agent, subscription: Subscription
1325
- ) -> ScheduleDecision:
1326
- """Run on_before_schedule hooks (returns CONTINUE, SKIP, or DEFER).
1327
-
1328
- Components execute in priority order. First component to return SKIP or
1329
- DEFER stops execution and returns that decision.
1330
- """
1331
- from flock.orchestrator_component import ScheduleDecision
1332
-
1333
- for component in self._components:
1334
- comp_name = component.name or component.__class__.__name__
1335
-
1336
- self._logger.debug(
1337
- f"Running on_before_schedule: component={comp_name}, "
1338
- f"agent={agent.name}, artifact_type={artifact.type}"
1339
- )
1340
-
1341
- try:
1342
- decision = await component.on_before_schedule(
1343
- self, artifact, agent, subscription
1344
- )
1345
-
1346
- if decision == ScheduleDecision.SKIP:
1347
- self._logger.info(
1348
- f"Scheduling skipped by component: component={comp_name}, "
1349
- f"agent={agent.name}, artifact_type={artifact.type}, decision=SKIP"
1350
- )
1351
- return ScheduleDecision.SKIP
1352
-
1353
- if decision == ScheduleDecision.DEFER:
1354
- self._logger.debug(
1355
- f"Scheduling deferred by component: component={comp_name}, "
1356
- f"agent={agent.name}, decision=DEFER"
1357
- )
1358
- return ScheduleDecision.DEFER
1359
-
1360
- except Exception as e:
1361
- self._logger.exception(
1362
- f"Component hook failed: component={comp_name}, "
1363
- f"hook=on_before_schedule, error={e!s}"
1364
- )
1365
- raise
1366
-
1367
- return ScheduleDecision.CONTINUE
1368
-
1369
- async def _run_collect_artifacts(
1370
- self, artifact: Artifact, agent: Agent, subscription: Subscription
1371
- ) -> CollectionResult:
1372
- """Run on_collect_artifacts hooks (returns first non-None result).
1373
-
1374
- Components execute in priority order. First component to return non-None
1375
- wins (short-circuit). If all return None, default is immediate scheduling.
1376
- """
1377
- from flock.orchestrator_component import CollectionResult
1378
-
1379
- for component in self._components:
1380
- comp_name = component.name or component.__class__.__name__
1381
-
1382
- self._logger.debug(
1383
- f"Running on_collect_artifacts: component={comp_name}, "
1384
- f"agent={agent.name}, artifact_type={artifact.type}"
1385
- )
1386
-
1387
- try:
1388
- result = await component.on_collect_artifacts(
1389
- self, artifact, agent, subscription
1390
- )
1391
-
1392
- if result is not None:
1393
- self._logger.debug(
1394
- f"Collection handled by component: component={comp_name}, "
1395
- f"complete={result.complete}, artifact_count={len(result.artifacts)}"
1396
- )
1397
- return result
1398
- except Exception as e:
1399
- self._logger.exception(
1400
- f"Component hook failed: component={comp_name}, "
1401
- f"hook=on_collect_artifacts, error={e!s}"
1402
- )
1403
- raise
1404
-
1405
- # Default: immediate scheduling with single artifact
1406
- self._logger.debug(
1407
- f"No component handled collection, using default: "
1408
- f"agent={agent.name}, artifact_type={artifact.type}"
1409
- )
1410
- return CollectionResult.immediate([artifact])
1411
-
1412
- async def _run_before_agent_schedule(
1413
- self, agent: Agent, artifacts: list[Artifact]
1414
- ) -> list[Artifact] | None:
1415
- """Run on_before_agent_schedule hooks (returns modified artifacts or None to block).
1416
-
1417
- Components execute in priority order, each receiving artifacts from the
1418
- previous component (chaining). If any component returns None, scheduling
1419
- is blocked.
1420
- """
1421
- current_artifacts = artifacts
1422
-
1423
- for component in self._components:
1424
- comp_name = component.name or component.__class__.__name__
1425
-
1426
- self._logger.debug(
1427
- f"Running on_before_agent_schedule: component={comp_name}, "
1428
- f"agent={agent.name}, artifact_count={len(current_artifacts)}"
1429
- )
1430
-
1431
- try:
1432
- result = await component.on_before_agent_schedule(
1433
- self, agent, current_artifacts
1434
- )
1435
-
1436
- if result is None:
1437
- self._logger.info(
1438
- f"Agent scheduling blocked by component: component={comp_name}, "
1439
- f"agent={agent.name}"
1440
- )
1441
- return None
1442
-
1443
- current_artifacts = result
1444
- except Exception as e:
1445
- self._logger.exception(
1446
- f"Component hook failed: component={comp_name}, "
1447
- f"hook=on_before_agent_schedule, error={e!s}"
1448
- )
1449
- raise
1450
-
1451
- return current_artifacts
1452
-
1453
- async def _run_agent_scheduled(
1454
- self, agent: Agent, artifacts: list[Artifact], task: Task[Any]
1455
- ) -> None:
1456
- """Run on_agent_scheduled hooks (notification only, non-blocking).
1457
-
1458
- Components execute in priority order. Exceptions are logged but don't
1459
- prevent other components from executing or block scheduling.
1460
- """
1461
- for component in self._components:
1462
- comp_name = component.name or component.__class__.__name__
1463
-
1464
- self._logger.debug(
1465
- f"Running on_agent_scheduled: component={comp_name}, "
1466
- f"agent={agent.name}, artifact_count={len(artifacts)}"
1467
- )
1468
-
1469
- try:
1470
- await component.on_agent_scheduled(self, agent, artifacts, task)
1471
- except Exception as e:
1472
- self._logger.warning(
1473
- f"Component notification hook failed (non-critical): "
1474
- f"component={comp_name}, hook=on_agent_scheduled, error={e!s}"
1475
- )
1476
- # Don't propagate - this is a notification hook
1477
-
1478
- async def _run_idle(self) -> None:
1479
- """Run on_orchestrator_idle hooks when orchestrator becomes idle.
1480
-
1481
- Components execute in priority order. Exceptions are logged but don't
1482
- prevent other components from executing.
1483
- """
1484
- self._logger.debug(
1485
- f"Running on_orchestrator_idle hooks: component_count={len(self._components)}"
1486
- )
1487
-
1488
- for component in self._components:
1489
- comp_name = component.name or component.__class__.__name__
1490
-
1491
- try:
1492
- await component.on_orchestrator_idle(self)
1493
- except Exception as e:
1494
- self._logger.warning(
1495
- f"Component idle hook failed (non-critical): "
1496
- f"component={comp_name}, hook=on_orchestrator_idle, error={e!s}"
1497
- )
1498
-
1499
- async def _run_shutdown(self) -> None:
1500
- """Run on_shutdown hooks when orchestrator shuts down.
1501
-
1502
- Components execute in priority order. Exceptions are logged but don't
1503
- prevent shutdown of other components (best-effort cleanup).
1504
- """
1505
- self._logger.info(
1506
- f"Shutting down {len(self._components)} orchestrator components"
1507
- )
1508
-
1509
- for component in self._components:
1510
- comp_name = component.name or component.__class__.__name__
1511
- self._logger.debug(f"Shutting down component: name={comp_name}")
1512
-
1513
- try:
1514
- await component.on_shutdown(self)
1515
- except Exception as e:
1516
- self._logger.exception(
1517
- f"Component shutdown failed: component={comp_name}, "
1518
- f"hook=on_shutdown, error={e!s}"
1519
- )
1520
- # Continue shutting down other components
1521
-
1522
- # Scheduling ───────────────────────────────────────────────────
1523
-
1524
- async def _schedule_artifact(self, artifact: Artifact) -> None:
1525
- """Schedule agents for an artifact using component hooks.
1526
-
1527
- Refactored to use OrchestratorComponent hook system for extensibility.
1528
- Components can modify artifact, control scheduling, and handle collection.
1529
- """
1530
- # Phase 3: Initialize components on first artifact
1531
- if not self._components_initialized:
1532
- await self._run_initialize()
1533
-
1534
- # Phase 3: Component hook - artifact published (can transform or block)
1535
- artifact = await self._run_artifact_published(artifact)
1536
- if artifact is None:
1537
- return # Artifact blocked by component
1538
-
1539
- for agent in self.agents:
1540
- identity = agent.identity
1541
- for subscription in agent.subscriptions:
1542
- if not subscription.accepts_events():
1543
- continue
1544
-
1545
- # T066: Check prevent_self_trigger
1546
- if agent.prevent_self_trigger and artifact.produced_by == agent.name:
1547
- continue # Skip - agent produced this artifact (prevents feedback loops)
1548
-
1549
- # Visibility check
1550
- if not self._check_visibility(artifact, identity):
1551
- continue
1552
-
1553
- # Subscription match check
1554
- if not subscription.matches(artifact):
1555
- continue
1556
-
1557
- # Phase 3: Component hook - before schedule (circuit breaker, deduplication, etc.)
1558
- from flock.orchestrator_component import ScheduleDecision
1559
-
1560
- decision = await self._run_before_schedule(
1561
- artifact, agent, subscription
1562
- )
1563
- if decision == ScheduleDecision.SKIP:
1564
- continue # Skip this subscription
1565
- if decision == ScheduleDecision.DEFER:
1566
- continue # Defer for later (batching/correlation)
1567
-
1568
- # Phase 3: Component hook - collect artifacts (handles AND gates, correlation, batching)
1569
- collection = await self._run_collect_artifacts(
1570
- artifact, agent, subscription
1571
- )
1572
- if not collection.complete:
1573
- continue # Still collecting (AND gate, correlation, or batch incomplete)
1574
-
1575
- artifacts = collection.artifacts
1576
-
1577
- # Phase 3: Component hook - before agent schedule (final validation/transformation)
1578
- artifacts = await self._run_before_agent_schedule(agent, artifacts)
1579
- if artifacts is None:
1580
- continue # Scheduling blocked by component
1581
-
1582
- # Complete! Schedule agent with collected artifacts
1583
- # Schedule agent task
1584
- is_batch_execution = subscription.batch is not None
1585
- task = self._schedule_task(
1586
- agent, artifacts, is_batch=is_batch_execution
1587
- )
1588
-
1589
- # Phase 3: Component hook - agent scheduled (notification)
1590
- await self._run_agent_scheduled(agent, artifacts, task)
1591
-
1592
- def _schedule_task(
1593
- self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
1594
- ) -> Task[Any]:
1595
- """Schedule agent task and return the task handle."""
1596
- task = asyncio.create_task(
1597
- self._run_agent_task(agent, artifacts, is_batch=is_batch)
1598
- )
1599
- self._tasks.add(task)
1600
- task.add_done_callback(self._tasks.discard)
1601
-
1602
- # Track task by correlation_id for workflow status tracking
1603
- correlation_id = artifacts[0].correlation_id if artifacts else None
1604
- if correlation_id:
1605
- if correlation_id not in self._correlation_tasks:
1606
- self._correlation_tasks[correlation_id] = set()
1607
- self._correlation_tasks[correlation_id].add(task)
1608
-
1609
- # Clean up correlation tracking when task completes
1610
- def cleanup_correlation(t: Task[Any]) -> None:
1611
- if correlation_id in self._correlation_tasks:
1612
- self._correlation_tasks[correlation_id].discard(t)
1613
- # Remove empty sets to prevent memory leaks
1614
- if not self._correlation_tasks[correlation_id]:
1615
- del self._correlation_tasks[correlation_id]
1616
-
1617
- task.add_done_callback(cleanup_correlation)
1618
-
1619
- return task
1620
-
1621
- def _record_agent_run(self, agent: Agent) -> None:
1622
- self.metrics["agent_runs"] += 1
1623
-
1624
- def _mark_processed(self, artifact: Artifact, agent: Agent) -> None:
1625
- key = (str(artifact.id), agent.name)
1626
- self._processed.add(key)
1627
-
1628
- def _seen_before(self, artifact: Artifact, agent: Agent) -> bool:
1629
- key = (str(artifact.id), agent.name)
1630
- return key in self._processed
1631
-
1632
- async def _run_agent_task(
1633
- self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
1634
- ) -> None:
1635
- correlation_id = artifacts[0].correlation_id if artifacts else uuid4()
1636
-
1637
- # Phase 8: Evaluate context BEFORE creating Context (security fix)
1638
- # Provider resolution: per-agent > global > DefaultContextProvider
1639
- from flock.context_provider import (
1640
- BoundContextProvider,
1641
- ContextRequest,
1642
- DefaultContextProvider,
1643
- )
1644
-
1645
- inner_provider = (
1646
- getattr(agent, "context_provider", None)
1647
- or self._default_context_provider
1648
- or DefaultContextProvider()
1649
- )
1650
-
1651
- # SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
1652
- provider = BoundContextProvider(inner_provider, agent.identity)
1653
-
1654
- # Evaluate context using provider (orchestrator controls this!)
1655
- # Engines will receive pre-filtered artifacts via ctx.artifacts
1656
- request = ContextRequest(
1657
- agent=agent,
1658
- correlation_id=correlation_id,
1659
- store=self.store,
1660
- agent_identity=agent.identity,
1661
- exclude_ids={a.id for a in artifacts}, # Exclude input artifacts
1662
- )
1663
- context_artifacts = await provider(request)
1664
-
1665
- # Phase 8: Create Context with pre-filtered data (no capabilities!)
1666
- # SECURITY: Context is now just data - engines can't query anything
1667
- ctx = Context(
1668
- artifacts=context_artifacts, # Pre-filtered conversation context
1669
- agent_identity=agent.identity,
1670
- task_id=str(uuid4()),
1671
- correlation_id=correlation_id,
1672
- is_batch=is_batch,
1673
- )
1674
- self._record_agent_run(agent)
1675
-
1676
- # Phase 6: Execute agent (returns artifacts, doesn't publish)
1677
- # Wrap in try/catch to handle agent failures gracefully
1678
- try:
1679
- outputs = await agent.execute(ctx, artifacts)
1680
- except asyncio.CancelledError:
1681
- # Re-raise cancellations immediately (shutdown, user cancellation)
1682
- # Do NOT treat these as errors - they're intentional interruptions
1683
- self._logger.debug(
1684
- f"Agent '{agent.name}' task cancelled (task={ctx.task_id})"
1685
- )
1686
- raise # Propagate cancellation so task.cancelled() == True
1687
- except Exception as exc:
1688
- # Agent already called component.on_error hooks before re-raising
1689
- # Now orchestrator publishes error artifact and continues workflow
1690
- from flock.system_artifacts import WorkflowError
1691
-
1692
- error_artifact_data = WorkflowError(
1693
- failed_agent=agent.name,
1694
- error_type=type(exc).__name__,
1695
- error_message=str(exc),
1696
- timestamp=datetime.now(UTC),
1697
- task_id=ctx.task_id,
1698
- )
1699
-
1700
- # Build and publish error artifact with correlation_id
1701
- from flock.artifacts import ArtifactSpec
1702
-
1703
- error_spec = ArtifactSpec.from_model(WorkflowError)
1704
- error_artifact = error_spec.build(
1705
- produced_by=f"orchestrator#{agent.name}",
1706
- data=error_artifact_data.model_dump(),
1707
- correlation_id=correlation_id,
1708
- )
1709
-
1710
- await self._persist_and_schedule(error_artifact)
1711
-
1712
- # Log error but don't re-raise - workflow continues
1713
- self._logger.error(
1714
- f"Agent '{agent.name}' failed (task={ctx.task_id}): {exc}",
1715
- exc_info=True,
1716
- )
1717
- return # Exit early - no outputs to publish
1718
-
1719
- # Phase 6: Orchestrator publishes outputs (security fix)
1720
- # This fixes Vulnerability #2 (WRITE Bypass) - agents can't bypass validation
1721
- for output in outputs:
1722
- await self._persist_and_schedule(output)
1723
-
1724
- if artifacts:
1725
- try:
1726
- timestamp = datetime.now(UTC)
1727
- records = [
1728
- ConsumptionRecord(
1729
- artifact_id=artifact.id,
1730
- consumer=agent.name,
1731
- run_id=ctx.task_id,
1732
- correlation_id=str(correlation_id) if correlation_id else None,
1733
- consumed_at=timestamp,
1734
- )
1735
- for artifact in artifacts
1736
- ]
1737
- await self.store.record_consumptions(records)
1738
- except NotImplementedError:
1739
- pass
1740
- except Exception as exc: # pragma: no cover - defensive logging
1741
- self._logger.exception("Failed to record artifact consumption: %s", exc)
1742
-
1743
- # Phase 1.2: Logic Operations Event Emission ----------------------------
1744
-
1745
- async def _emit_correlation_updated_event(
1746
- self, *, agent_name: str, subscription_index: int, artifact: Artifact
1747
- ) -> None:
1748
- """Emit CorrelationGroupUpdatedEvent for real-time dashboard updates.
1749
-
1750
- Called when an artifact is added to a correlation group that is not yet complete.
1751
-
1752
- Args:
1753
- agent_name: Name of the agent with the JoinSpec subscription
1754
- subscription_index: Index of the subscription in the agent's subscriptions list
1755
- artifact: The artifact that triggered this update
1756
- """
1757
- # Only emit if dashboard is enabled
1758
- if self._websocket_manager is None:
1759
- return
1760
-
1761
- # Import _get_correlation_groups helper from dashboard service
1762
- from flock.dashboard.service import _get_correlation_groups
1763
-
1764
- # Get current correlation groups state from engine
1765
- groups = _get_correlation_groups(
1766
- self._correlation_engine, agent_name, subscription_index
1767
- )
1768
-
1769
- if not groups:
1770
- return # No groups to report (shouldn't happen, but defensive)
1771
-
1772
- # Find the group that was just updated (match by last updated time or artifact ID)
1773
- # For now, we'll emit an event for the FIRST group that's still waiting
1774
- # In practice, the artifact we just added should be in one of these groups
1775
- for group_state in groups:
1776
- if not group_state["is_complete"]:
1777
- # Import CorrelationGroupUpdatedEvent
1778
- from flock.dashboard.events import CorrelationGroupUpdatedEvent
1779
-
1780
- # Build and emit event
1781
- event = CorrelationGroupUpdatedEvent(
1782
- agent_name=agent_name,
1783
- subscription_index=subscription_index,
1784
- correlation_key=group_state["correlation_key"],
1785
- collected_types=group_state["collected_types"],
1786
- required_types=group_state["required_types"],
1787
- waiting_for=group_state["waiting_for"],
1788
- elapsed_seconds=group_state["elapsed_seconds"],
1789
- expires_in_seconds=group_state["expires_in_seconds"],
1790
- expires_in_artifacts=group_state["expires_in_artifacts"],
1791
- artifact_id=str(artifact.id),
1792
- artifact_type=artifact.type,
1793
- is_complete=group_state["is_complete"],
1794
- )
1795
-
1796
- # Broadcast via WebSocket
1797
- await self._websocket_manager.broadcast(event)
1798
- break # Only emit one event per artifact addition
1799
-
1800
- async def _emit_batch_item_added_event(
1801
- self,
1802
- *,
1803
- agent_name: str,
1804
- subscription_index: int,
1805
- subscription: Subscription, # noqa: F821
1806
- artifact: Artifact,
1807
- ) -> None:
1808
- """Emit BatchItemAddedEvent for real-time dashboard updates.
1809
-
1810
- Called when an artifact is added to a batch that hasn't reached flush threshold.
1811
-
1812
- Args:
1813
- agent_name: Name of the agent with the BatchSpec subscription
1814
- subscription_index: Index of the subscription in the agent's subscriptions list
1815
- subscription: The subscription with BatchSpec configuration
1816
- artifact: The artifact that triggered this update
1817
- """
1818
- # Only emit if dashboard is enabled
1819
- if self._websocket_manager is None:
1820
- return
1821
-
1822
- # Import _get_batch_state helper from dashboard service
1823
- from flock.dashboard.service import _get_batch_state
1824
-
1825
- # Get current batch state from engine
1826
- batch_state = _get_batch_state(
1827
- self._batch_engine, agent_name, subscription_index, subscription.batch
1828
- )
1829
-
1830
- if not batch_state:
1831
- return # No batch to report (shouldn't happen, but defensive)
1832
-
1833
- # Import BatchItemAddedEvent
1834
- from flock.dashboard.events import BatchItemAddedEvent
1835
-
1836
- # Build and emit event
1837
- event = BatchItemAddedEvent(
1838
- agent_name=agent_name,
1839
- subscription_index=subscription_index,
1840
- items_collected=batch_state["items_collected"],
1841
- items_target=batch_state.get("items_target"),
1842
- items_remaining=batch_state.get("items_remaining"),
1843
- elapsed_seconds=batch_state["elapsed_seconds"],
1844
- timeout_seconds=batch_state.get("timeout_seconds"),
1845
- timeout_remaining_seconds=batch_state.get("timeout_remaining_seconds"),
1846
- will_flush=batch_state["will_flush"],
1847
- artifact_id=str(artifact.id),
1848
- artifact_type=artifact.type,
1849
- )
1850
-
1851
- # Broadcast via WebSocket
1852
- await self._websocket_manager.broadcast(event)
1853
-
1854
- # Batch Helpers --------------------------------------------------------
1855
-
1856
- async def _correlation_cleanup_loop(self) -> None:
1857
- """Background task that periodically cleans up expired correlation groups.
1858
-
1859
- Runs continuously until all correlation groups are cleared or orchestrator shuts down.
1860
- Checks every 100ms for time-based expired correlations and discards them.
1861
- """
1862
- try:
1863
- while True:
1864
- await asyncio.sleep(self._correlation_cleanup_interval)
1865
- self._cleanup_expired_correlations()
1866
-
1867
- # Stop if no correlation groups remain
1868
- if not self._correlation_engine.correlation_groups:
1869
- self._correlation_cleanup_task = None
1870
- break
1871
- except asyncio.CancelledError:
1872
- # Clean shutdown
1873
- self._correlation_cleanup_task = None
1874
- raise
1875
-
1876
- def _cleanup_expired_correlations(self) -> None:
1877
- """Clean up all expired correlation groups across all subscriptions.
1878
-
1879
- Called periodically by background task to enforce time-based correlation windows.
1880
- Discards incomplete correlations that have exceeded their time window.
1881
- """
1882
- # Get all active subscription keys
1883
- for agent_name, subscription_index in list(
1884
- self._correlation_engine.correlation_groups.keys()
1885
- ):
1886
- self._correlation_engine.cleanup_expired(agent_name, subscription_index)
1887
-
1888
- async def _batch_timeout_checker_loop(self) -> None:
1889
- """Background task that periodically checks for batch timeouts.
1890
-
1891
- Runs continuously until all batches are cleared or orchestrator shuts down.
1892
- Checks every 100ms for expired batches and flushes them.
1893
- """
1894
- try:
1895
- while True:
1896
- await asyncio.sleep(self._batch_timeout_interval)
1897
- await self._check_batch_timeouts()
1898
-
1899
- # Stop if no batches remain
1900
- if not self._batch_engine.batches:
1901
- self._batch_timeout_task = None
1902
- break
1903
- except asyncio.CancelledError:
1904
- # Clean shutdown
1905
- self._batch_timeout_task = None
1906
- raise
1907
-
1908
- async def _check_batch_timeouts(self) -> None:
1909
- """Check all batches for timeout expiry and flush expired batches.
1910
-
1911
- This method is called periodically by the background timeout checker
1912
- or manually (in tests) to enforce timeout-based batching.
1913
- """
1914
- expired_batches = self._batch_engine.check_timeouts()
1915
-
1916
- for agent_name, subscription_index in expired_batches:
1917
- # Flush the expired batch
1918
- artifacts = self._batch_engine.flush_batch(agent_name, subscription_index)
1919
-
1920
- if artifacts is None:
1921
- continue
1922
-
1923
- # Get the agent
1924
- agent = self._agents.get(agent_name)
1925
- if agent is None:
1926
- continue
1927
-
1928
- # Schedule agent with batched artifacts (timeout flush)
1929
- self._schedule_task(agent, artifacts, is_batch=True)
1930
-
1931
- async def _flush_all_batches(self) -> None:
1932
- """Flush all partial batches (for shutdown - ensures zero data loss)."""
1933
- all_batches = self._batch_engine.flush_all()
1934
-
1935
- for agent_name, _subscription_index, artifacts in all_batches:
1936
- # Get the agent
1937
- agent = self._agents.get(agent_name)
1938
- if agent is None:
1939
- continue
1940
-
1941
- # Schedule agent with partial batch (shutdown flush)
1942
- self._schedule_task(agent, artifacts, is_batch=True)
1943
-
1944
- # Wait for all scheduled tasks to complete
1945
- await self.run_until_idle()
1946
-
1947
- # Helpers --------------------------------------------------------------
1948
-
1949
- def _normalize_input(
1950
- self, value: BaseModel | Mapping[str, Any] | Artifact, *, produced_by: str
1951
- ) -> Artifact:
1952
- if isinstance(value, Artifact):
1953
- return value
1954
- if isinstance(value, BaseModel):
1955
- model_cls = type(value)
1956
- type_name = type_registry.register(model_cls)
1957
- payload = value.model_dump()
1958
- elif isinstance(value, Mapping):
1959
- if "type" not in value:
1960
- raise ValueError("Mapping input must contain 'type'.")
1961
- type_name = value["type"]
1962
- payload = value.get("payload", {})
1963
- else: # pragma: no cover - defensive
1964
- raise TypeError("Unsupported input for direct invoke.")
1965
- return Artifact(type=type_name, payload=payload, produced_by=produced_by)
1966
-
1967
- def _check_visibility(self, artifact: Artifact, identity: AgentIdentity) -> bool:
1968
- try:
1969
- return artifact.visibility.allows(identity)
1970
- except AttributeError: # pragma: no cover - fallback for dict vis
1971
- return True
1972
-
1973
-
1974
- @asynccontextmanager
1975
- async def start_orchestrator(orchestrator: Flock): # pragma: no cover - CLI helper
1976
- try:
1977
- yield orchestrator
1978
- await orchestrator.run_until_idle()
1979
- finally:
1980
- pass
1981
-
1982
-
1983
- __all__ = ["Flock", "start_orchestrator"]