flock-core 0.5.11__py3-none-any.whl → 0.5.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- flock/__init__.py +1 -1
- flock/agent/__init__.py +30 -0
- flock/agent/builder_helpers.py +192 -0
- flock/agent/builder_validator.py +169 -0
- flock/agent/component_lifecycle.py +325 -0
- flock/agent/context_resolver.py +141 -0
- flock/agent/mcp_integration.py +212 -0
- flock/agent/output_processor.py +304 -0
- flock/api/__init__.py +20 -0
- flock/{api_models.py → api/models.py} +0 -2
- flock/{service.py → api/service.py} +3 -3
- flock/cli.py +2 -2
- flock/components/__init__.py +41 -0
- flock/components/agent/__init__.py +22 -0
- flock/{components.py → components/agent/base.py} +4 -3
- flock/{utility/output_utility_component.py → components/agent/output_utility.py} +12 -7
- flock/components/orchestrator/__init__.py +22 -0
- flock/{orchestrator_component.py → components/orchestrator/base.py} +5 -293
- flock/components/orchestrator/circuit_breaker.py +95 -0
- flock/components/orchestrator/collection.py +143 -0
- flock/components/orchestrator/deduplication.py +78 -0
- flock/core/__init__.py +30 -0
- flock/core/agent.py +953 -0
- flock/{artifacts.py → core/artifacts.py} +1 -1
- flock/{context_provider.py → core/context_provider.py} +3 -3
- flock/core/orchestrator.py +1102 -0
- flock/{store.py → core/store.py} +99 -454
- flock/{subscription.py → core/subscription.py} +1 -1
- flock/dashboard/collector.py +5 -5
- flock/dashboard/graph_builder.py +7 -7
- flock/dashboard/routes/__init__.py +21 -0
- flock/dashboard/routes/control.py +327 -0
- flock/dashboard/routes/helpers.py +340 -0
- flock/dashboard/routes/themes.py +76 -0
- flock/dashboard/routes/traces.py +521 -0
- flock/dashboard/routes/websocket.py +108 -0
- flock/dashboard/service.py +43 -1316
- flock/engines/dspy/__init__.py +20 -0
- flock/engines/dspy/artifact_materializer.py +216 -0
- flock/engines/dspy/signature_builder.py +474 -0
- flock/engines/dspy/streaming_executor.py +858 -0
- flock/engines/dspy_engine.py +45 -1330
- flock/engines/examples/simple_batch_engine.py +2 -2
- flock/examples.py +7 -7
- flock/logging/logging.py +1 -16
- flock/models/__init__.py +10 -0
- flock/orchestrator/__init__.py +45 -0
- flock/{artifact_collector.py → orchestrator/artifact_collector.py} +3 -3
- flock/orchestrator/artifact_manager.py +168 -0
- flock/{batch_accumulator.py → orchestrator/batch_accumulator.py} +2 -2
- flock/orchestrator/component_runner.py +389 -0
- flock/orchestrator/context_builder.py +167 -0
- flock/{correlation_engine.py → orchestrator/correlation_engine.py} +2 -2
- flock/orchestrator/event_emitter.py +167 -0
- flock/orchestrator/initialization.py +184 -0
- flock/orchestrator/lifecycle_manager.py +226 -0
- flock/orchestrator/mcp_manager.py +202 -0
- flock/orchestrator/scheduler.py +189 -0
- flock/orchestrator/server_manager.py +234 -0
- flock/orchestrator/tracing.py +147 -0
- flock/storage/__init__.py +10 -0
- flock/storage/artifact_aggregator.py +158 -0
- flock/storage/in_memory/__init__.py +6 -0
- flock/storage/in_memory/artifact_filter.py +114 -0
- flock/storage/in_memory/history_aggregator.py +115 -0
- flock/storage/sqlite/__init__.py +10 -0
- flock/storage/sqlite/agent_history_queries.py +154 -0
- flock/storage/sqlite/consumption_loader.py +100 -0
- flock/storage/sqlite/query_builder.py +112 -0
- flock/storage/sqlite/query_params_builder.py +91 -0
- flock/storage/sqlite/schema_manager.py +168 -0
- flock/storage/sqlite/summary_queries.py +194 -0
- flock/utils/__init__.py +14 -0
- flock/utils/async_utils.py +67 -0
- flock/{runtime.py → utils/runtime.py} +3 -3
- flock/utils/time_utils.py +53 -0
- flock/utils/type_resolution.py +38 -0
- flock/{utilities.py → utils/utilities.py} +2 -2
- flock/utils/validation.py +57 -0
- flock/utils/visibility.py +79 -0
- flock/utils/visibility_utils.py +134 -0
- {flock_core-0.5.11.dist-info → flock_core-0.5.20.dist-info}/METADATA +18 -4
- {flock_core-0.5.11.dist-info → flock_core-0.5.20.dist-info}/RECORD +89 -33
- flock/agent.py +0 -1578
- flock/orchestrator.py +0 -1983
- /flock/{visibility.py → core/visibility.py} +0 -0
- /flock/{system_artifacts.py → models/system_artifacts.py} +0 -0
- /flock/{helper → utils}/cli_helper.py +0 -0
- {flock_core-0.5.11.dist-info → flock_core-0.5.20.dist-info}/WHEEL +0 -0
- {flock_core-0.5.11.dist-info → flock_core-0.5.20.dist-info}/entry_points.txt +0 -0
- {flock_core-0.5.11.dist-info → flock_core-0.5.20.dist-info}/licenses/LICENSE +0 -0
flock/orchestrator.py
DELETED
|
@@ -1,1983 +0,0 @@
|
|
|
1
|
-
"""Blackboard orchestrator and scheduling runtime."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import asyncio
|
|
6
|
-
import logging
|
|
7
|
-
import os
|
|
8
|
-
from asyncio import Task
|
|
9
|
-
from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence
|
|
10
|
-
from contextlib import asynccontextmanager
|
|
11
|
-
from datetime import UTC, datetime
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
from typing import TYPE_CHECKING, Any
|
|
14
|
-
from uuid import UUID, uuid4
|
|
15
|
-
|
|
16
|
-
from opentelemetry import trace
|
|
17
|
-
from opentelemetry.trace import Status, StatusCode
|
|
18
|
-
from pydantic import BaseModel
|
|
19
|
-
|
|
20
|
-
from flock.agent import Agent, AgentBuilder
|
|
21
|
-
from flock.artifact_collector import ArtifactCollector
|
|
22
|
-
from flock.artifacts import Artifact
|
|
23
|
-
from flock.batch_accumulator import BatchEngine
|
|
24
|
-
from flock.correlation_engine import CorrelationEngine
|
|
25
|
-
from flock.helper.cli_helper import init_console
|
|
26
|
-
from flock.logging.auto_trace import AutoTracedMeta
|
|
27
|
-
from flock.mcp import (
|
|
28
|
-
FlockMCPClientManager,
|
|
29
|
-
FlockMCPConfiguration,
|
|
30
|
-
FlockMCPConnectionConfiguration,
|
|
31
|
-
FlockMCPFeatureConfiguration,
|
|
32
|
-
ServerParameters,
|
|
33
|
-
)
|
|
34
|
-
from flock.orchestrator_component import (
|
|
35
|
-
CollectionResult,
|
|
36
|
-
OrchestratorComponent,
|
|
37
|
-
ScheduleDecision,
|
|
38
|
-
)
|
|
39
|
-
from flock.registry import type_registry
|
|
40
|
-
from flock.runtime import Context
|
|
41
|
-
from flock.store import BlackboardStore, ConsumptionRecord, InMemoryBlackboardStore
|
|
42
|
-
from flock.subscription import Subscription
|
|
43
|
-
from flock.visibility import AgentIdentity, PublicVisibility, Visibility
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
if TYPE_CHECKING:
|
|
47
|
-
import builtins
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class BoardHandle:
|
|
51
|
-
"""Handle exposed to components for publishing and inspection."""
|
|
52
|
-
|
|
53
|
-
def __init__(self, orchestrator: Flock) -> None:
|
|
54
|
-
self._orchestrator = orchestrator
|
|
55
|
-
|
|
56
|
-
async def publish(self, artifact: Artifact) -> None:
|
|
57
|
-
await self._orchestrator._persist_and_schedule(artifact)
|
|
58
|
-
|
|
59
|
-
async def get(self, artifact_id) -> Artifact | None:
|
|
60
|
-
return await self._orchestrator.store.get(artifact_id)
|
|
61
|
-
|
|
62
|
-
async def list(self) -> builtins.list[Artifact]:
|
|
63
|
-
return await self._orchestrator.store.list()
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
class Flock(metaclass=AutoTracedMeta):
|
|
67
|
-
"""Main orchestrator for blackboard-based agent coordination.
|
|
68
|
-
|
|
69
|
-
All public methods are automatically traced via OpenTelemetry.
|
|
70
|
-
"""
|
|
71
|
-
|
|
72
|
-
def _patch_litellm_proxy_imports(self) -> None:
|
|
73
|
-
"""Stub litellm proxy_server to avoid optional proxy deps when not used.
|
|
74
|
-
|
|
75
|
-
Some litellm versions import `litellm.proxy.proxy_server` during standard logging
|
|
76
|
-
to read `general_settings`, which pulls in optional dependencies like `apscheduler`.
|
|
77
|
-
We provide a stub so imports succeed but cold storage remains disabled.
|
|
78
|
-
"""
|
|
79
|
-
try:
|
|
80
|
-
import sys
|
|
81
|
-
import types
|
|
82
|
-
|
|
83
|
-
if "litellm.proxy.proxy_server" not in sys.modules:
|
|
84
|
-
stub = types.ModuleType("litellm.proxy.proxy_server")
|
|
85
|
-
# Minimal surface that cold_storage_handler accesses
|
|
86
|
-
stub.general_settings = {}
|
|
87
|
-
sys.modules["litellm.proxy.proxy_server"] = stub
|
|
88
|
-
except Exception: # nosec B110 - Safe to ignore; worst case litellm will log a warning
|
|
89
|
-
# logger.debug(f"Failed to stub litellm proxy_server: {e}")
|
|
90
|
-
pass
|
|
91
|
-
|
|
92
|
-
def __init__(
|
|
93
|
-
self,
|
|
94
|
-
model: str | None = None,
|
|
95
|
-
*,
|
|
96
|
-
store: BlackboardStore | None = None,
|
|
97
|
-
max_agent_iterations: int = 1000,
|
|
98
|
-
context_provider: Any = None,
|
|
99
|
-
) -> None:
|
|
100
|
-
"""Initialize the Flock orchestrator for blackboard-based agent coordination.
|
|
101
|
-
|
|
102
|
-
Args:
|
|
103
|
-
model: Default LLM model for agents (e.g., "openai/gpt-4.1").
|
|
104
|
-
Can be overridden per-agent. If None, uses DEFAULT_MODEL env var.
|
|
105
|
-
store: Custom blackboard storage backend. Defaults to InMemoryBlackboardStore.
|
|
106
|
-
max_agent_iterations: Circuit breaker limit to prevent runaway agent loops.
|
|
107
|
-
Defaults to 1000 iterations per agent before reset.
|
|
108
|
-
context_provider: Global context provider for all agents (Phase 3 security fix).
|
|
109
|
-
If None, agents use DefaultContextProvider. Can be overridden per-agent.
|
|
110
|
-
|
|
111
|
-
Examples:
|
|
112
|
-
>>> # Basic initialization with default model
|
|
113
|
-
>>> flock = Flock("openai/gpt-4.1")
|
|
114
|
-
|
|
115
|
-
>>> # Custom storage backend
|
|
116
|
-
>>> flock = Flock("openai/gpt-4o", store=CustomBlackboardStore())
|
|
117
|
-
|
|
118
|
-
>>> # Circuit breaker configuration
|
|
119
|
-
>>> flock = Flock("openai/gpt-4.1", max_agent_iterations=500)
|
|
120
|
-
|
|
121
|
-
>>> # Global context provider (Phase 3 security fix)
|
|
122
|
-
>>> from flock.context_provider import DefaultContextProvider
|
|
123
|
-
>>> flock = Flock(
|
|
124
|
-
... "openai/gpt-4.1", context_provider=DefaultContextProvider()
|
|
125
|
-
... )
|
|
126
|
-
"""
|
|
127
|
-
self._patch_litellm_proxy_imports()
|
|
128
|
-
self._logger = logging.getLogger(__name__)
|
|
129
|
-
self.model = model
|
|
130
|
-
|
|
131
|
-
try:
|
|
132
|
-
init_console(clear_screen=True, show_banner=True, model=self.model)
|
|
133
|
-
except (UnicodeEncodeError, UnicodeDecodeError):
|
|
134
|
-
# Skip banner on Windows consoles with encoding issues (e.g., tests, CI)
|
|
135
|
-
pass
|
|
136
|
-
|
|
137
|
-
self.store: BlackboardStore = store or InMemoryBlackboardStore()
|
|
138
|
-
self._agents: dict[str, Agent] = {}
|
|
139
|
-
self._tasks: set[Task[Any]] = set()
|
|
140
|
-
self._correlation_tasks: dict[
|
|
141
|
-
UUID, set[Task[Any]]
|
|
142
|
-
] = {} # Track tasks by correlation_id
|
|
143
|
-
self._processed: set[tuple[str, str]] = set()
|
|
144
|
-
self._lock = asyncio.Lock()
|
|
145
|
-
self.metrics: dict[str, float] = {"artifacts_published": 0, "agent_runs": 0}
|
|
146
|
-
# Phase 3: Global context provider (security fix)
|
|
147
|
-
self._default_context_provider = context_provider
|
|
148
|
-
# MCP integration
|
|
149
|
-
self._mcp_configs: dict[str, FlockMCPConfiguration] = {}
|
|
150
|
-
self._mcp_manager: FlockMCPClientManager | None = None
|
|
151
|
-
# T068: Circuit breaker for runaway agents
|
|
152
|
-
self.max_agent_iterations: int = max_agent_iterations
|
|
153
|
-
self._agent_iteration_count: dict[str, int] = {}
|
|
154
|
-
self.is_dashboard: bool = False
|
|
155
|
-
# AND gate logic: Artifact collection for multi-type subscriptions
|
|
156
|
-
self._artifact_collector = ArtifactCollector()
|
|
157
|
-
# JoinSpec logic: Correlation engine for correlated AND gates
|
|
158
|
-
self._correlation_engine = CorrelationEngine()
|
|
159
|
-
# Background task for checking correlation expiry (time-based JoinSpec)
|
|
160
|
-
self._correlation_cleanup_task: Task[Any] | None = None
|
|
161
|
-
self._correlation_cleanup_interval: float = 0.1 # Check every 100ms
|
|
162
|
-
# BatchSpec logic: Batch accumulator for size/timeout batching
|
|
163
|
-
self._batch_engine = BatchEngine()
|
|
164
|
-
# Background task for checking batch timeouts
|
|
165
|
-
self._batch_timeout_task: Task[Any] | None = None
|
|
166
|
-
self._batch_timeout_interval: float = 0.1 # Check every 100ms
|
|
167
|
-
# Phase 1.2: WebSocket manager for real-time dashboard events (set by serve())
|
|
168
|
-
self._websocket_manager: Any = None
|
|
169
|
-
# Dashboard server task and launcher (for non-blocking serve)
|
|
170
|
-
self._server_task: Task[None] | None = None
|
|
171
|
-
self._dashboard_launcher: Any = None
|
|
172
|
-
# Unified tracing support
|
|
173
|
-
self._workflow_span = None
|
|
174
|
-
self._auto_workflow_enabled = os.getenv(
|
|
175
|
-
"FLOCK_AUTO_WORKFLOW_TRACE", "false"
|
|
176
|
-
).lower() in {
|
|
177
|
-
"true",
|
|
178
|
-
"1",
|
|
179
|
-
"yes",
|
|
180
|
-
"on",
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
# Phase 2: OrchestratorComponent system
|
|
184
|
-
self._components: list[OrchestratorComponent] = []
|
|
185
|
-
self._components_initialized: bool = False
|
|
186
|
-
|
|
187
|
-
# Auto-add built-in components
|
|
188
|
-
from flock.orchestrator_component import (
|
|
189
|
-
BuiltinCollectionComponent,
|
|
190
|
-
CircuitBreakerComponent,
|
|
191
|
-
DeduplicationComponent,
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
self.add_component(CircuitBreakerComponent(max_iterations=max_agent_iterations))
|
|
195
|
-
self.add_component(DeduplicationComponent())
|
|
196
|
-
self.add_component(BuiltinCollectionComponent())
|
|
197
|
-
|
|
198
|
-
# Log orchestrator initialization
|
|
199
|
-
self._logger.debug("Orchestrator initialized: components=[]")
|
|
200
|
-
|
|
201
|
-
if not model:
|
|
202
|
-
self.model = os.getenv("DEFAULT_MODEL")
|
|
203
|
-
|
|
204
|
-
# Agent management -----------------------------------------------------
|
|
205
|
-
|
|
206
|
-
def agent(self, name: str) -> AgentBuilder:
|
|
207
|
-
"""Create a new agent using the fluent builder API.
|
|
208
|
-
|
|
209
|
-
Args:
|
|
210
|
-
name: Unique identifier for the agent. Used for visibility controls and metrics.
|
|
211
|
-
|
|
212
|
-
Returns:
|
|
213
|
-
AgentBuilder for fluent configuration
|
|
214
|
-
|
|
215
|
-
Raises:
|
|
216
|
-
ValueError: If an agent with this name already exists
|
|
217
|
-
|
|
218
|
-
Examples:
|
|
219
|
-
>>> # Basic agent
|
|
220
|
-
>>> pizza_agent = (
|
|
221
|
-
... flock.agent("pizza_master")
|
|
222
|
-
... .description("Creates delicious pizza recipes")
|
|
223
|
-
... .consumes(DreamPizza)
|
|
224
|
-
... .publishes(Pizza)
|
|
225
|
-
... )
|
|
226
|
-
|
|
227
|
-
>>> # Advanced agent with filtering
|
|
228
|
-
>>> critic = (
|
|
229
|
-
... flock.agent("critic")
|
|
230
|
-
... .consumes(Movie, where=lambda m: m.rating >= 8)
|
|
231
|
-
... .publishes(Review)
|
|
232
|
-
... .with_utilities(RateLimiter(max_calls=10))
|
|
233
|
-
... )
|
|
234
|
-
"""
|
|
235
|
-
if name in self._agents:
|
|
236
|
-
raise ValueError(f"Agent '{name}' already registered.")
|
|
237
|
-
return AgentBuilder(self, name)
|
|
238
|
-
|
|
239
|
-
def register_agent(self, agent: Agent) -> None:
|
|
240
|
-
if agent.name in self._agents:
|
|
241
|
-
raise ValueError(f"Agent '{agent.name}' already registered.")
|
|
242
|
-
self._agents[agent.name] = agent
|
|
243
|
-
|
|
244
|
-
def get_agent(self, name: str) -> Agent:
|
|
245
|
-
return self._agents[name]
|
|
246
|
-
|
|
247
|
-
@property
|
|
248
|
-
def agents(self) -> list[Agent]:
|
|
249
|
-
return list(self._agents.values())
|
|
250
|
-
|
|
251
|
-
async def get_correlation_status(self, correlation_id: str) -> dict[str, Any]:
|
|
252
|
-
"""Get the status of a workflow by correlation ID.
|
|
253
|
-
|
|
254
|
-
Args:
|
|
255
|
-
correlation_id: The correlation ID to check
|
|
256
|
-
|
|
257
|
-
Returns:
|
|
258
|
-
Dictionary containing workflow status information:
|
|
259
|
-
- state: "active" if work is pending, "completed" otherwise
|
|
260
|
-
- has_pending_work: True if orchestrator has pending work for this correlation
|
|
261
|
-
- artifact_count: Total number of artifacts with this correlation_id
|
|
262
|
-
- error_count: Number of WorkflowError artifacts
|
|
263
|
-
- started_at: Timestamp of first artifact (if any)
|
|
264
|
-
- last_activity_at: Timestamp of most recent artifact (if any)
|
|
265
|
-
"""
|
|
266
|
-
from uuid import UUID
|
|
267
|
-
|
|
268
|
-
try:
|
|
269
|
-
correlation_uuid = UUID(correlation_id)
|
|
270
|
-
except ValueError as exc:
|
|
271
|
-
raise ValueError(
|
|
272
|
-
f"Invalid correlation_id format: {correlation_id}"
|
|
273
|
-
) from exc
|
|
274
|
-
|
|
275
|
-
# Check if orchestrator has pending work for this correlation
|
|
276
|
-
# 1. Check active tasks for this correlation_id
|
|
277
|
-
has_active_tasks = correlation_uuid in self._correlation_tasks and bool(
|
|
278
|
-
self._correlation_tasks[correlation_uuid]
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
# 2. Check correlation groups (for agents with JoinSpec that haven't yielded yet)
|
|
282
|
-
has_pending_groups = False
|
|
283
|
-
for groups in self._correlation_engine.correlation_groups.values():
|
|
284
|
-
for group_key, group in groups.items():
|
|
285
|
-
# Check if this group belongs to our correlation
|
|
286
|
-
for type_name, artifacts in group.waiting_artifacts.items():
|
|
287
|
-
if any(
|
|
288
|
-
artifact.correlation_id == correlation_uuid
|
|
289
|
-
for artifact in artifacts
|
|
290
|
-
):
|
|
291
|
-
has_pending_groups = True
|
|
292
|
-
break
|
|
293
|
-
if has_pending_groups:
|
|
294
|
-
break
|
|
295
|
-
if has_pending_groups:
|
|
296
|
-
break
|
|
297
|
-
|
|
298
|
-
# Workflow has pending work if EITHER tasks are active OR groups are waiting
|
|
299
|
-
has_pending_work = has_active_tasks or has_pending_groups
|
|
300
|
-
|
|
301
|
-
# Query artifacts for this correlation
|
|
302
|
-
from flock.store import FilterConfig
|
|
303
|
-
|
|
304
|
-
filters = FilterConfig(correlation_id=correlation_id)
|
|
305
|
-
artifacts, total = await self.store.query_artifacts(
|
|
306
|
-
filters, limit=1000, offset=0
|
|
307
|
-
)
|
|
308
|
-
|
|
309
|
-
# Count errors
|
|
310
|
-
error_count = sum(
|
|
311
|
-
1
|
|
312
|
-
for artifact in artifacts
|
|
313
|
-
if artifact.type == "flock.system_artifacts.WorkflowError"
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
# Get timestamps
|
|
317
|
-
started_at = None
|
|
318
|
-
last_activity_at = None
|
|
319
|
-
if artifacts:
|
|
320
|
-
timestamps = [artifact.created_at for artifact in artifacts]
|
|
321
|
-
started_at = min(timestamps).isoformat()
|
|
322
|
-
last_activity_at = max(timestamps).isoformat()
|
|
323
|
-
|
|
324
|
-
# Determine state
|
|
325
|
-
if has_pending_work:
|
|
326
|
-
state = "active"
|
|
327
|
-
elif total == 0:
|
|
328
|
-
state = "not_found"
|
|
329
|
-
elif error_count > 0 and total == error_count:
|
|
330
|
-
state = "failed" # Only error artifacts exist
|
|
331
|
-
else:
|
|
332
|
-
state = "completed"
|
|
333
|
-
|
|
334
|
-
return {
|
|
335
|
-
"correlation_id": correlation_id,
|
|
336
|
-
"state": state,
|
|
337
|
-
"has_pending_work": has_pending_work,
|
|
338
|
-
"artifact_count": total,
|
|
339
|
-
"error_count": error_count,
|
|
340
|
-
"started_at": started_at,
|
|
341
|
-
"last_activity_at": last_activity_at,
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
# Component management -------------------------------------------------
|
|
345
|
-
|
|
346
|
-
def add_component(self, component: OrchestratorComponent) -> Flock:
|
|
347
|
-
"""Add an OrchestratorComponent to this orchestrator.
|
|
348
|
-
|
|
349
|
-
Components execute in priority order (lower priority number = earlier).
|
|
350
|
-
Multiple components can have the same priority.
|
|
351
|
-
|
|
352
|
-
Args:
|
|
353
|
-
component: Component to add (must be an OrchestratorComponent instance)
|
|
354
|
-
|
|
355
|
-
Returns:
|
|
356
|
-
Self for method chaining
|
|
357
|
-
|
|
358
|
-
Examples:
|
|
359
|
-
>>> # Add single component
|
|
360
|
-
>>> flock = Flock("openai/gpt-4.1")
|
|
361
|
-
>>> flock.add_component(CircuitBreakerComponent(max_iterations=500))
|
|
362
|
-
|
|
363
|
-
>>> # Method chaining
|
|
364
|
-
>>> flock.add_component(CircuitBreakerComponent()) \\
|
|
365
|
-
... .add_component(MetricsComponent()) \\
|
|
366
|
-
... .add_component(DeduplicationComponent())
|
|
367
|
-
|
|
368
|
-
>>> # Custom priority (lower = earlier)
|
|
369
|
-
>>> flock.add_component(
|
|
370
|
-
... CustomComponent(priority=5, name="early_component")
|
|
371
|
-
... )
|
|
372
|
-
"""
|
|
373
|
-
self._components.append(component)
|
|
374
|
-
self._components.sort(key=lambda c: c.priority)
|
|
375
|
-
|
|
376
|
-
# Log component addition
|
|
377
|
-
comp_name = component.name or component.__class__.__name__
|
|
378
|
-
self._logger.info(
|
|
379
|
-
f"Component added: name={comp_name}, "
|
|
380
|
-
f"priority={component.priority}, total_components={len(self._components)}"
|
|
381
|
-
)
|
|
382
|
-
|
|
383
|
-
return self
|
|
384
|
-
|
|
385
|
-
# MCP management -------------------------------------------------------
|
|
386
|
-
|
|
387
|
-
def add_mcp(
|
|
388
|
-
self,
|
|
389
|
-
name: str,
|
|
390
|
-
connection_params: ServerParameters,
|
|
391
|
-
*,
|
|
392
|
-
enable_tools_feature: bool = True,
|
|
393
|
-
enable_prompts_feature: bool = True,
|
|
394
|
-
enable_sampling_feature: bool = True,
|
|
395
|
-
enable_roots_feature: bool = True,
|
|
396
|
-
mount_points: list[str] | None = None,
|
|
397
|
-
tool_whitelist: list[str] | None = None,
|
|
398
|
-
read_timeout_seconds: float = 300,
|
|
399
|
-
max_retries: int = 3,
|
|
400
|
-
**kwargs,
|
|
401
|
-
) -> Flock:
|
|
402
|
-
"""Register an MCP server for use by agents.
|
|
403
|
-
|
|
404
|
-
Architecture Decision: AD001 - Two-Level Architecture
|
|
405
|
-
MCP servers are registered at orchestrator level and assigned to agents.
|
|
406
|
-
|
|
407
|
-
Args:
|
|
408
|
-
name: Unique identifier for this MCP server
|
|
409
|
-
connection_params: Server connection parameters
|
|
410
|
-
enable_tools_feature: Enable tool execution
|
|
411
|
-
enable_prompts_feature: Enable prompt templates
|
|
412
|
-
enable_sampling_feature: Enable LLM sampling requests
|
|
413
|
-
enable_roots_feature: Enable filesystem roots
|
|
414
|
-
tool_whitelist: Optional list of tool names to allow
|
|
415
|
-
read_timeout_seconds: Timeout for server communications
|
|
416
|
-
max_retries: Connection retry attempts
|
|
417
|
-
|
|
418
|
-
Returns:
|
|
419
|
-
self for method chaining
|
|
420
|
-
|
|
421
|
-
Raises:
|
|
422
|
-
ValueError: If server name already registered
|
|
423
|
-
"""
|
|
424
|
-
if name in self._mcp_configs:
|
|
425
|
-
raise ValueError(f"MCP server '{name}' is already registered.")
|
|
426
|
-
|
|
427
|
-
# Detect transport type
|
|
428
|
-
from flock.mcp.types import (
|
|
429
|
-
SseServerParameters,
|
|
430
|
-
StdioServerParameters,
|
|
431
|
-
StreamableHttpServerParameters,
|
|
432
|
-
WebsocketServerParameters,
|
|
433
|
-
)
|
|
434
|
-
|
|
435
|
-
if isinstance(connection_params, StdioServerParameters):
|
|
436
|
-
transport_type = "stdio"
|
|
437
|
-
elif isinstance(connection_params, WebsocketServerParameters):
|
|
438
|
-
transport_type = "websockets"
|
|
439
|
-
elif isinstance(connection_params, SseServerParameters):
|
|
440
|
-
transport_type = "sse"
|
|
441
|
-
elif isinstance(connection_params, StreamableHttpServerParameters):
|
|
442
|
-
transport_type = "streamable_http"
|
|
443
|
-
else:
|
|
444
|
-
transport_type = "custom"
|
|
445
|
-
|
|
446
|
-
mcp_roots = None
|
|
447
|
-
if mount_points:
|
|
448
|
-
from pathlib import Path as PathLib
|
|
449
|
-
|
|
450
|
-
from flock.mcp.types import MCPRoot
|
|
451
|
-
|
|
452
|
-
mcp_roots = []
|
|
453
|
-
for path in mount_points:
|
|
454
|
-
# Normalize the path
|
|
455
|
-
if path.startswith("file://"):
|
|
456
|
-
# Already a file URI
|
|
457
|
-
uri = path
|
|
458
|
-
# Extract path from URI for name
|
|
459
|
-
path_str = path.replace("file://", "")
|
|
460
|
-
# the test:// path-prefix is used by testing servers such as the mcp-everything server.
|
|
461
|
-
elif path.startswith("test://"):
|
|
462
|
-
# Already a test URI
|
|
463
|
-
uri = path
|
|
464
|
-
# Extract path from URI for name
|
|
465
|
-
path_str = path.replace("test://", "")
|
|
466
|
-
else:
|
|
467
|
-
# Convert to absolute path and create URI
|
|
468
|
-
abs_path = PathLib(path).resolve()
|
|
469
|
-
uri = f"file://{abs_path}"
|
|
470
|
-
path_str = str(abs_path)
|
|
471
|
-
|
|
472
|
-
# Extract a meaningful name (last component of path)
|
|
473
|
-
name = (
|
|
474
|
-
PathLib(path_str).name
|
|
475
|
-
or path_str.rstrip("/").split("/")[-1]
|
|
476
|
-
or "root"
|
|
477
|
-
)
|
|
478
|
-
mcp_roots.append(MCPRoot(uri=uri, name=name))
|
|
479
|
-
|
|
480
|
-
# Build configuration
|
|
481
|
-
connection_config = FlockMCPConnectionConfiguration(
|
|
482
|
-
max_retries=max_retries,
|
|
483
|
-
connection_parameters=connection_params,
|
|
484
|
-
transport_type=transport_type,
|
|
485
|
-
read_timeout_seconds=read_timeout_seconds,
|
|
486
|
-
mount_points=mcp_roots,
|
|
487
|
-
)
|
|
488
|
-
|
|
489
|
-
feature_config = FlockMCPFeatureConfiguration(
|
|
490
|
-
tools_enabled=enable_tools_feature,
|
|
491
|
-
prompts_enabled=enable_prompts_feature,
|
|
492
|
-
sampling_enabled=enable_sampling_feature,
|
|
493
|
-
roots_enabled=enable_roots_feature,
|
|
494
|
-
tool_whitelist=tool_whitelist,
|
|
495
|
-
)
|
|
496
|
-
|
|
497
|
-
mcp_config = FlockMCPConfiguration(
|
|
498
|
-
name=name,
|
|
499
|
-
connection_config=connection_config,
|
|
500
|
-
feature_config=feature_config,
|
|
501
|
-
)
|
|
502
|
-
|
|
503
|
-
self._mcp_configs[name] = mcp_config
|
|
504
|
-
return self
|
|
505
|
-
|
|
506
|
-
def get_mcp_manager(self) -> FlockMCPClientManager:
|
|
507
|
-
"""Get or create the MCP client manager.
|
|
508
|
-
|
|
509
|
-
Architecture Decision: AD005 - Lazy Connection Establishment
|
|
510
|
-
"""
|
|
511
|
-
if not self._mcp_configs:
|
|
512
|
-
raise RuntimeError("No MCP servers registered. Call add_mcp() first.")
|
|
513
|
-
|
|
514
|
-
if self._mcp_manager is None:
|
|
515
|
-
self._mcp_manager = FlockMCPClientManager(self._mcp_configs)
|
|
516
|
-
|
|
517
|
-
return self._mcp_manager
|
|
518
|
-
|
|
519
|
-
# Unified Tracing ------------------------------------------------------
|
|
520
|
-
|
|
521
|
-
@asynccontextmanager
|
|
522
|
-
async def traced_run(self, name: str = "workflow") -> AsyncGenerator[Any, None]:
|
|
523
|
-
"""Context manager for wrapping an entire execution in a single unified trace.
|
|
524
|
-
|
|
525
|
-
This creates a parent span that encompasses all operations (publish, run_until_idle, etc.)
|
|
526
|
-
within the context, ensuring they all belong to the same trace_id for better observability.
|
|
527
|
-
|
|
528
|
-
Args:
|
|
529
|
-
name: Name for the workflow trace (default: "workflow")
|
|
530
|
-
|
|
531
|
-
Yields:
|
|
532
|
-
The workflow span for optional manual attribute setting
|
|
533
|
-
|
|
534
|
-
Examples:
|
|
535
|
-
# Explicit workflow tracing (recommended)
|
|
536
|
-
async with flock.traced_run("pizza_workflow"):
|
|
537
|
-
await flock.publish(pizza_idea)
|
|
538
|
-
await flock.run_until_idle()
|
|
539
|
-
# All operations now share the same trace_id!
|
|
540
|
-
|
|
541
|
-
# Custom attributes
|
|
542
|
-
async with flock.traced_run("data_pipeline") as span:
|
|
543
|
-
span.set_attribute("pipeline.version", "2.0")
|
|
544
|
-
await flock.publish(data)
|
|
545
|
-
await flock.run_until_idle()
|
|
546
|
-
"""
|
|
547
|
-
tracer = trace.get_tracer(__name__)
|
|
548
|
-
with tracer.start_as_current_span(name) as span:
|
|
549
|
-
# Set workflow-level attributes
|
|
550
|
-
span.set_attribute("flock.workflow", True)
|
|
551
|
-
span.set_attribute("workflow.name", name)
|
|
552
|
-
span.set_attribute("workflow.flock_id", str(id(self)))
|
|
553
|
-
|
|
554
|
-
# Store span for nested operations to use
|
|
555
|
-
prev_workflow_span = self._workflow_span
|
|
556
|
-
self._workflow_span = span
|
|
557
|
-
|
|
558
|
-
try:
|
|
559
|
-
yield span
|
|
560
|
-
span.set_status(Status(StatusCode.OK))
|
|
561
|
-
except Exception as e:
|
|
562
|
-
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
563
|
-
span.record_exception(e)
|
|
564
|
-
raise
|
|
565
|
-
finally:
|
|
566
|
-
# Restore previous workflow span
|
|
567
|
-
self._workflow_span = prev_workflow_span
|
|
568
|
-
|
|
569
|
-
@staticmethod
|
|
570
|
-
def clear_traces(db_path: str = ".flock/traces.duckdb") -> dict[str, Any]:
|
|
571
|
-
"""Clear all traces from the DuckDB database.
|
|
572
|
-
|
|
573
|
-
Useful for resetting debug sessions or cleaning up test data.
|
|
574
|
-
|
|
575
|
-
Args:
|
|
576
|
-
db_path: Path to the DuckDB database file (default: ".flock/traces.duckdb")
|
|
577
|
-
|
|
578
|
-
Returns:
|
|
579
|
-
Dictionary with operation results:
|
|
580
|
-
- deleted_count: Number of spans deleted
|
|
581
|
-
- success: Whether operation succeeded
|
|
582
|
-
- error: Error message if failed
|
|
583
|
-
|
|
584
|
-
Examples:
|
|
585
|
-
# Clear all traces
|
|
586
|
-
result = Flock.clear_traces()
|
|
587
|
-
print(f"Deleted {result['deleted_count']} spans")
|
|
588
|
-
|
|
589
|
-
# Custom database path
|
|
590
|
-
result = Flock.clear_traces(".flock/custom_traces.duckdb")
|
|
591
|
-
|
|
592
|
-
# Check if operation succeeded
|
|
593
|
-
if result['success']:
|
|
594
|
-
print("Traces cleared successfully!")
|
|
595
|
-
else:
|
|
596
|
-
print(f"Error: {result['error']}")
|
|
597
|
-
"""
|
|
598
|
-
try:
|
|
599
|
-
from pathlib import Path
|
|
600
|
-
|
|
601
|
-
import duckdb
|
|
602
|
-
|
|
603
|
-
db_file = Path(db_path)
|
|
604
|
-
if not db_file.exists():
|
|
605
|
-
return {
|
|
606
|
-
"success": False,
|
|
607
|
-
"deleted_count": 0,
|
|
608
|
-
"error": f"Database file not found: {db_path}",
|
|
609
|
-
}
|
|
610
|
-
|
|
611
|
-
# Connect and clear
|
|
612
|
-
conn = duckdb.connect(str(db_file))
|
|
613
|
-
try:
|
|
614
|
-
# Get count before deletion
|
|
615
|
-
count_result = conn.execute("SELECT COUNT(*) FROM spans").fetchone()
|
|
616
|
-
deleted_count = count_result[0] if count_result else 0
|
|
617
|
-
|
|
618
|
-
# Delete all spans
|
|
619
|
-
conn.execute("DELETE FROM spans")
|
|
620
|
-
|
|
621
|
-
# Vacuum to reclaim space
|
|
622
|
-
conn.execute("VACUUM")
|
|
623
|
-
|
|
624
|
-
return {"success": True, "deleted_count": deleted_count, "error": None}
|
|
625
|
-
|
|
626
|
-
finally:
|
|
627
|
-
conn.close()
|
|
628
|
-
|
|
629
|
-
except Exception as e:
|
|
630
|
-
return {"success": False, "deleted_count": 0, "error": str(e)}
|
|
631
|
-
|
|
632
|
-
# Runtime --------------------------------------------------------------
|
|
633
|
-
|
|
634
|
-
async def run_until_idle(self) -> None:
|
|
635
|
-
"""Wait for all scheduled agent tasks to complete.
|
|
636
|
-
|
|
637
|
-
This method blocks until the blackboard reaches a stable state where no
|
|
638
|
-
agents are queued for execution. Essential for batch processing and ensuring
|
|
639
|
-
all agent cascades complete before continuing.
|
|
640
|
-
|
|
641
|
-
Note:
|
|
642
|
-
Automatically resets circuit breaker counters and shuts down MCP connections
|
|
643
|
-
when idle. Used with publish() for event-driven workflows.
|
|
644
|
-
|
|
645
|
-
Examples:
|
|
646
|
-
>>> # Event-driven workflow (recommended)
|
|
647
|
-
>>> await flock.publish(task1)
|
|
648
|
-
>>> await flock.publish(task2)
|
|
649
|
-
>>> await flock.run_until_idle() # Wait for all cascades
|
|
650
|
-
>>> # All agents have finished processing
|
|
651
|
-
|
|
652
|
-
>>> # Parallel batch processing
|
|
653
|
-
>>> await flock.publish_many([task1, task2, task3])
|
|
654
|
-
>>> await flock.run_until_idle() # All tasks processed in parallel
|
|
655
|
-
|
|
656
|
-
See Also:
|
|
657
|
-
- publish(): Event-driven artifact publishing
|
|
658
|
-
- publish_many(): Batch publishing for parallel execution
|
|
659
|
-
- invoke(): Direct agent invocation without cascade
|
|
660
|
-
"""
|
|
661
|
-
while self._tasks:
|
|
662
|
-
await asyncio.sleep(0.01)
|
|
663
|
-
pending = {task for task in self._tasks if not task.done()}
|
|
664
|
-
self._tasks = pending
|
|
665
|
-
|
|
666
|
-
# Determine whether any deferred work (timeouts/cleanup) is still pending.
|
|
667
|
-
pending_batches = any(
|
|
668
|
-
accumulator.artifacts for accumulator in self._batch_engine.batches.values()
|
|
669
|
-
)
|
|
670
|
-
pending_correlations = any(
|
|
671
|
-
groups and any(group.waiting_artifacts for group in groups.values())
|
|
672
|
-
for groups in self._correlation_engine.correlation_groups.values()
|
|
673
|
-
)
|
|
674
|
-
|
|
675
|
-
# Ensure watchdog loops remain active while pending work exists.
|
|
676
|
-
if pending_batches and (
|
|
677
|
-
self._batch_timeout_task is None or self._batch_timeout_task.done()
|
|
678
|
-
):
|
|
679
|
-
self._batch_timeout_task = asyncio.create_task(
|
|
680
|
-
self._batch_timeout_checker_loop()
|
|
681
|
-
)
|
|
682
|
-
|
|
683
|
-
if pending_correlations and (
|
|
684
|
-
self._correlation_cleanup_task is None
|
|
685
|
-
or self._correlation_cleanup_task.done()
|
|
686
|
-
):
|
|
687
|
-
self._correlation_cleanup_task = asyncio.create_task(
|
|
688
|
-
self._correlation_cleanup_loop()
|
|
689
|
-
)
|
|
690
|
-
|
|
691
|
-
# If deferred work is still outstanding, consider the orchestrator quiescent for
|
|
692
|
-
# now but leave watchdog tasks running to finish the job.
|
|
693
|
-
if pending_batches or pending_correlations:
|
|
694
|
-
self._agent_iteration_count.clear()
|
|
695
|
-
return
|
|
696
|
-
|
|
697
|
-
# Notify components that orchestrator reached idle state
|
|
698
|
-
if self._components_initialized:
|
|
699
|
-
await self._run_idle()
|
|
700
|
-
|
|
701
|
-
# T068: Reset circuit breaker counters when idle
|
|
702
|
-
self._agent_iteration_count.clear()
|
|
703
|
-
|
|
704
|
-
# Automatically shutdown MCP connections when idle
|
|
705
|
-
await self.shutdown(include_components=False)
|
|
706
|
-
|
|
707
|
-
async def direct_invoke(
|
|
708
|
-
self, agent: Agent, inputs: Sequence[BaseModel | Mapping[str, Any] | Artifact]
|
|
709
|
-
) -> list[Artifact]:
|
|
710
|
-
artifacts = [
|
|
711
|
-
self._normalize_input(value, produced_by="__direct__") for value in inputs
|
|
712
|
-
]
|
|
713
|
-
for artifact in artifacts:
|
|
714
|
-
self._mark_processed(artifact, agent)
|
|
715
|
-
await self._persist_and_schedule(artifact)
|
|
716
|
-
|
|
717
|
-
# Phase 8: Evaluate context BEFORE creating Context (security fix)
|
|
718
|
-
# Provider resolution: per-agent > global > DefaultContextProvider
|
|
719
|
-
from flock.context_provider import (
|
|
720
|
-
BoundContextProvider,
|
|
721
|
-
ContextRequest,
|
|
722
|
-
DefaultContextProvider,
|
|
723
|
-
)
|
|
724
|
-
|
|
725
|
-
inner_provider = (
|
|
726
|
-
getattr(agent, "context_provider", None)
|
|
727
|
-
or self._default_context_provider
|
|
728
|
-
or DefaultContextProvider()
|
|
729
|
-
)
|
|
730
|
-
|
|
731
|
-
# SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
|
|
732
|
-
provider = BoundContextProvider(inner_provider, agent.identity)
|
|
733
|
-
|
|
734
|
-
# Evaluate context using provider (orchestrator controls this!)
|
|
735
|
-
# Engines will receive pre-filtered artifacts via ctx.artifacts
|
|
736
|
-
correlation_id = (
|
|
737
|
-
artifacts[0].correlation_id
|
|
738
|
-
if artifacts and artifacts[0].correlation_id
|
|
739
|
-
else uuid4()
|
|
740
|
-
)
|
|
741
|
-
request = ContextRequest(
|
|
742
|
-
agent=agent,
|
|
743
|
-
correlation_id=correlation_id,
|
|
744
|
-
store=self.store,
|
|
745
|
-
agent_identity=agent.identity,
|
|
746
|
-
exclude_ids={a.id for a in artifacts}, # Exclude input artifacts
|
|
747
|
-
)
|
|
748
|
-
context_artifacts = await provider(request)
|
|
749
|
-
|
|
750
|
-
# Phase 8: Create Context with pre-filtered data (no capabilities!)
|
|
751
|
-
# SECURITY: Context is now just data - engines can't query anything
|
|
752
|
-
ctx = Context(
|
|
753
|
-
artifacts=context_artifacts, # Pre-filtered conversation context
|
|
754
|
-
agent_identity=agent.identity,
|
|
755
|
-
task_id=str(uuid4()),
|
|
756
|
-
correlation_id=correlation_id,
|
|
757
|
-
)
|
|
758
|
-
self._record_agent_run(agent)
|
|
759
|
-
return await agent.execute(ctx, artifacts)
|
|
760
|
-
|
|
761
|
-
async def arun(
|
|
762
|
-
self, agent_builder: AgentBuilder, *inputs: BaseModel
|
|
763
|
-
) -> list[Artifact]:
|
|
764
|
-
"""Execute an agent with inputs and wait for all cascades to complete (async).
|
|
765
|
-
|
|
766
|
-
Convenience method that combines direct agent invocation with run_until_idle().
|
|
767
|
-
Useful for testing and synchronous request-response patterns.
|
|
768
|
-
|
|
769
|
-
Args:
|
|
770
|
-
agent_builder: Agent to execute (from flock.agent())
|
|
771
|
-
*inputs: Input objects (BaseModel instances)
|
|
772
|
-
|
|
773
|
-
Returns:
|
|
774
|
-
Artifacts produced by the agent and any triggered cascades
|
|
775
|
-
|
|
776
|
-
Examples:
|
|
777
|
-
>>> # Test a single agent
|
|
778
|
-
>>> flock = Flock("openai/gpt-4.1")
|
|
779
|
-
>>> pizza_agent = flock.agent("pizza").consumes(Idea).publishes(Pizza)
|
|
780
|
-
>>> results = await flock.arun(pizza_agent, Idea(topic="Margherita"))
|
|
781
|
-
|
|
782
|
-
>>> # Multiple inputs
|
|
783
|
-
>>> results = await flock.arun(
|
|
784
|
-
... task_agent, Task(name="deploy"), Task(name="test")
|
|
785
|
-
... )
|
|
786
|
-
|
|
787
|
-
Note:
|
|
788
|
-
For event-driven workflows, prefer publish() + run_until_idle() for better
|
|
789
|
-
control over execution timing and parallel processing.
|
|
790
|
-
"""
|
|
791
|
-
artifacts = await self.direct_invoke(agent_builder.agent, list(inputs))
|
|
792
|
-
await self.run_until_idle()
|
|
793
|
-
return artifacts
|
|
794
|
-
|
|
795
|
-
def run(self, agent_builder: AgentBuilder, *inputs: BaseModel) -> list[Artifact]:
|
|
796
|
-
"""Synchronous wrapper for arun() - executes agent and waits for completion.
|
|
797
|
-
|
|
798
|
-
Args:
|
|
799
|
-
agent_builder: Agent to execute (from flock.agent())
|
|
800
|
-
*inputs: Input objects (BaseModel instances)
|
|
801
|
-
|
|
802
|
-
Returns:
|
|
803
|
-
Artifacts produced by the agent and any triggered cascades
|
|
804
|
-
|
|
805
|
-
Examples:
|
|
806
|
-
>>> # Synchronous execution (blocks until complete)
|
|
807
|
-
>>> flock = Flock("openai/gpt-4o-mini")
|
|
808
|
-
>>> agent = flock.agent("analyzer").consumes(Data).publishes(Report)
|
|
809
|
-
>>> results = flock.run(agent, Data(value=42))
|
|
810
|
-
|
|
811
|
-
Warning:
|
|
812
|
-
Cannot be called from within an async context. Use arun() instead
|
|
813
|
-
if already in an async function.
|
|
814
|
-
"""
|
|
815
|
-
return asyncio.run(self.arun(agent_builder, *inputs))
|
|
816
|
-
|
|
817
|
-
async def shutdown(self, *, include_components: bool = True) -> None:
|
|
818
|
-
"""Shutdown orchestrator and clean up resources.
|
|
819
|
-
|
|
820
|
-
Args:
|
|
821
|
-
include_components: Whether to invoke component shutdown hooks.
|
|
822
|
-
Internal callers (e.g., run_until_idle) disable this to avoid
|
|
823
|
-
tearing down component state between cascades.
|
|
824
|
-
"""
|
|
825
|
-
if include_components and self._components_initialized:
|
|
826
|
-
await self._run_shutdown()
|
|
827
|
-
|
|
828
|
-
# Cancel correlation cleanup task if running
|
|
829
|
-
if self._correlation_cleanup_task and not self._correlation_cleanup_task.done():
|
|
830
|
-
self._correlation_cleanup_task.cancel()
|
|
831
|
-
try:
|
|
832
|
-
await self._correlation_cleanup_task
|
|
833
|
-
except asyncio.CancelledError:
|
|
834
|
-
pass
|
|
835
|
-
|
|
836
|
-
# Cancel batch timeout checker if running
|
|
837
|
-
if self._batch_timeout_task and not self._batch_timeout_task.done():
|
|
838
|
-
self._batch_timeout_task.cancel()
|
|
839
|
-
try:
|
|
840
|
-
await self._batch_timeout_task
|
|
841
|
-
except asyncio.CancelledError:
|
|
842
|
-
pass
|
|
843
|
-
|
|
844
|
-
# Cancel background server task if running
|
|
845
|
-
if self._server_task and not self._server_task.done():
|
|
846
|
-
self._server_task.cancel()
|
|
847
|
-
try:
|
|
848
|
-
await self._server_task
|
|
849
|
-
except asyncio.CancelledError:
|
|
850
|
-
pass
|
|
851
|
-
# Note: _cleanup_server_callback will handle launcher.stop()
|
|
852
|
-
|
|
853
|
-
if self._mcp_manager is not None:
|
|
854
|
-
await self._mcp_manager.cleanup_all()
|
|
855
|
-
self._mcp_manager = None
|
|
856
|
-
|
|
857
|
-
def cli(self) -> Flock:
|
|
858
|
-
# Placeholder for CLI wiring (rich UI in Step 3)
|
|
859
|
-
return self
|
|
860
|
-
|
|
861
|
-
async def serve(
|
|
862
|
-
self,
|
|
863
|
-
*,
|
|
864
|
-
dashboard: bool = False,
|
|
865
|
-
dashboard_v2: bool = False,
|
|
866
|
-
host: str = "127.0.0.1",
|
|
867
|
-
port: int = 8344,
|
|
868
|
-
blocking: bool = True,
|
|
869
|
-
) -> Task[None] | None:
|
|
870
|
-
"""Start HTTP service for the orchestrator.
|
|
871
|
-
|
|
872
|
-
Args:
|
|
873
|
-
dashboard: Enable real-time dashboard with WebSocket support (default: False)
|
|
874
|
-
dashboard_v2: Launch the new dashboard v2 frontend (implies dashboard=True)
|
|
875
|
-
host: Host to bind to (default: "127.0.0.1")
|
|
876
|
-
port: Port to bind to (default: 8344)
|
|
877
|
-
blocking: If True, blocks until server stops. If False, starts server
|
|
878
|
-
in background and returns task handle (default: True)
|
|
879
|
-
|
|
880
|
-
Returns:
|
|
881
|
-
None if blocking=True, or Task handle if blocking=False
|
|
882
|
-
|
|
883
|
-
Examples:
|
|
884
|
-
# Basic HTTP API (no dashboard) - runs until interrupted
|
|
885
|
-
await orchestrator.serve()
|
|
886
|
-
|
|
887
|
-
# With dashboard (WebSocket + browser launch) - runs until interrupted
|
|
888
|
-
await orchestrator.serve(dashboard=True)
|
|
889
|
-
|
|
890
|
-
# Non-blocking mode - start server in background
|
|
891
|
-
await orchestrator.serve(dashboard=True, blocking=False)
|
|
892
|
-
# Now you can publish messages and run other logic
|
|
893
|
-
await orchestrator.publish(my_message)
|
|
894
|
-
await orchestrator.run_until_idle()
|
|
895
|
-
"""
|
|
896
|
-
# If non-blocking, start server in background task
|
|
897
|
-
if not blocking:
|
|
898
|
-
self._server_task = asyncio.create_task(
|
|
899
|
-
self._serve_impl(
|
|
900
|
-
dashboard=dashboard,
|
|
901
|
-
dashboard_v2=dashboard_v2,
|
|
902
|
-
host=host,
|
|
903
|
-
port=port,
|
|
904
|
-
)
|
|
905
|
-
)
|
|
906
|
-
# Add cleanup callback
|
|
907
|
-
self._server_task.add_done_callback(self._cleanup_server_callback)
|
|
908
|
-
# Give server a moment to start
|
|
909
|
-
await asyncio.sleep(0.1)
|
|
910
|
-
return self._server_task
|
|
911
|
-
|
|
912
|
-
# Blocking mode - run server directly with cleanup
|
|
913
|
-
try:
|
|
914
|
-
await self._serve_impl(
|
|
915
|
-
dashboard=dashboard,
|
|
916
|
-
dashboard_v2=dashboard_v2,
|
|
917
|
-
host=host,
|
|
918
|
-
port=port,
|
|
919
|
-
)
|
|
920
|
-
finally:
|
|
921
|
-
# In blocking mode, manually cleanup dashboard launcher
|
|
922
|
-
if self._dashboard_launcher is not None:
|
|
923
|
-
self._dashboard_launcher.stop()
|
|
924
|
-
self._dashboard_launcher = None
|
|
925
|
-
return None
|
|
926
|
-
|
|
927
|
-
def _cleanup_server_callback(self, task: Task[None]) -> None:
|
|
928
|
-
"""Cleanup callback when background server task completes."""
|
|
929
|
-
# Stop dashboard launcher if it was started
|
|
930
|
-
if self._dashboard_launcher is not None:
|
|
931
|
-
try:
|
|
932
|
-
self._dashboard_launcher.stop()
|
|
933
|
-
except Exception as e:
|
|
934
|
-
self._logger.warning(f"Failed to stop dashboard launcher: {e}")
|
|
935
|
-
finally:
|
|
936
|
-
self._dashboard_launcher = None
|
|
937
|
-
|
|
938
|
-
# Clear server task reference
|
|
939
|
-
self._server_task = None
|
|
940
|
-
|
|
941
|
-
# Log any exceptions from the task
|
|
942
|
-
try:
|
|
943
|
-
exc = task.exception()
|
|
944
|
-
if exc and not isinstance(exc, asyncio.CancelledError):
|
|
945
|
-
self._logger.error(f"Server task failed: {exc}", exc_info=exc)
|
|
946
|
-
except asyncio.CancelledError:
|
|
947
|
-
pass # Normal cancellation
|
|
948
|
-
|
|
949
|
-
async def _serve_impl(
|
|
950
|
-
self,
|
|
951
|
-
*,
|
|
952
|
-
dashboard: bool = False,
|
|
953
|
-
dashboard_v2: bool = False,
|
|
954
|
-
host: str = "127.0.0.1",
|
|
955
|
-
port: int = 8344,
|
|
956
|
-
) -> None:
|
|
957
|
-
"""Internal implementation of serve() - actual server logic."""
|
|
958
|
-
if dashboard_v2:
|
|
959
|
-
dashboard = True
|
|
960
|
-
|
|
961
|
-
if not dashboard:
|
|
962
|
-
# Standard service without dashboard
|
|
963
|
-
from flock.service import BlackboardHTTPService
|
|
964
|
-
|
|
965
|
-
service = BlackboardHTTPService(self)
|
|
966
|
-
await service.run_async(host=host, port=port)
|
|
967
|
-
return
|
|
968
|
-
|
|
969
|
-
# Dashboard mode: integrate event collection and WebSocket
|
|
970
|
-
from flock.dashboard.collector import DashboardEventCollector
|
|
971
|
-
from flock.dashboard.launcher import DashboardLauncher
|
|
972
|
-
from flock.dashboard.service import DashboardHTTPService
|
|
973
|
-
from flock.dashboard.websocket import WebSocketManager
|
|
974
|
-
|
|
975
|
-
# Create dashboard components
|
|
976
|
-
websocket_manager = WebSocketManager()
|
|
977
|
-
event_collector = DashboardEventCollector(store=self.store)
|
|
978
|
-
event_collector.set_websocket_manager(websocket_manager)
|
|
979
|
-
await event_collector.load_persistent_snapshots()
|
|
980
|
-
|
|
981
|
-
# Store collector reference for agents added later
|
|
982
|
-
self._dashboard_collector = event_collector
|
|
983
|
-
# Store websocket manager for real-time event emission (Phase 1.2)
|
|
984
|
-
self._websocket_manager = websocket_manager
|
|
985
|
-
|
|
986
|
-
# Phase 6+7: Set class-level WebSocket broadcast wrapper (dashboard mode)
|
|
987
|
-
async def _broadcast_wrapper(event):
|
|
988
|
-
"""Isolated broadcast wrapper - no reference chain to orchestrator."""
|
|
989
|
-
return await websocket_manager.broadcast(event)
|
|
990
|
-
|
|
991
|
-
from flock.agent import Agent
|
|
992
|
-
|
|
993
|
-
Agent._websocket_broadcast_global = _broadcast_wrapper
|
|
994
|
-
|
|
995
|
-
# Inject event collector into all existing agents
|
|
996
|
-
for agent in self._agents.values():
|
|
997
|
-
# Add dashboard collector with priority ordering handled by agent
|
|
998
|
-
agent._add_utilities([event_collector])
|
|
999
|
-
|
|
1000
|
-
# Start dashboard launcher (npm process + browser)
|
|
1001
|
-
launcher_kwargs: dict[str, Any] = {"port": port}
|
|
1002
|
-
if dashboard_v2:
|
|
1003
|
-
dashboard_pkg_dir = Path(__file__).parent / "dashboard"
|
|
1004
|
-
launcher_kwargs["frontend_dir"] = dashboard_pkg_dir.parent / "frontend_v2"
|
|
1005
|
-
launcher_kwargs["static_dir"] = dashboard_pkg_dir / "static_v2"
|
|
1006
|
-
|
|
1007
|
-
launcher = DashboardLauncher(**launcher_kwargs)
|
|
1008
|
-
launcher.start()
|
|
1009
|
-
|
|
1010
|
-
# Create dashboard HTTP service
|
|
1011
|
-
service = DashboardHTTPService(
|
|
1012
|
-
orchestrator=self,
|
|
1013
|
-
websocket_manager=websocket_manager,
|
|
1014
|
-
event_collector=event_collector,
|
|
1015
|
-
use_v2=dashboard_v2,
|
|
1016
|
-
)
|
|
1017
|
-
|
|
1018
|
-
# Store launcher for cleanup
|
|
1019
|
-
self._dashboard_launcher = launcher
|
|
1020
|
-
|
|
1021
|
-
# Run service (blocking call)
|
|
1022
|
-
# Note: Cleanup is handled by serve() (blocking mode) or callback (non-blocking mode)
|
|
1023
|
-
await service.run_async(host=host, port=port)
|
|
1024
|
-
|
|
1025
|
-
# Scheduling -----------------------------------------------------------
|
|
1026
|
-
|
|
1027
|
-
async def publish(
|
|
1028
|
-
self,
|
|
1029
|
-
obj: BaseModel | dict | Artifact,
|
|
1030
|
-
*,
|
|
1031
|
-
visibility: Visibility | None = None,
|
|
1032
|
-
correlation_id: str | None = None,
|
|
1033
|
-
partition_key: str | None = None,
|
|
1034
|
-
tags: set[str] | None = None,
|
|
1035
|
-
is_dashboard: bool = False,
|
|
1036
|
-
) -> Artifact:
|
|
1037
|
-
"""Publish an artifact to the blackboard (event-driven).
|
|
1038
|
-
|
|
1039
|
-
All agents with matching subscriptions will be triggered according to
|
|
1040
|
-
their filters (type, predicates, visibility, etc).
|
|
1041
|
-
|
|
1042
|
-
Args:
|
|
1043
|
-
obj: Object to publish (BaseModel instance, dict, or Artifact)
|
|
1044
|
-
visibility: Access control (defaults to PublicVisibility)
|
|
1045
|
-
correlation_id: Optional correlation ID for request tracing
|
|
1046
|
-
partition_key: Optional partition key for sharding
|
|
1047
|
-
tags: Optional tags for channel-based routing
|
|
1048
|
-
|
|
1049
|
-
Returns:
|
|
1050
|
-
The published Artifact
|
|
1051
|
-
|
|
1052
|
-
Examples:
|
|
1053
|
-
>>> # Publish a model instance (recommended)
|
|
1054
|
-
>>> task = Task(name="Deploy", priority=5)
|
|
1055
|
-
>>> await orchestrator.publish(task)
|
|
1056
|
-
|
|
1057
|
-
>>> # Publish with custom visibility
|
|
1058
|
-
>>> await orchestrator.publish(
|
|
1059
|
-
... task, visibility=PrivateVisibility(agents={"admin"})
|
|
1060
|
-
... )
|
|
1061
|
-
|
|
1062
|
-
>>> # Publish with tags for channel routing
|
|
1063
|
-
>>> await orchestrator.publish(task, tags={"urgent", "backend"})
|
|
1064
|
-
"""
|
|
1065
|
-
# Handle different input types
|
|
1066
|
-
if isinstance(obj, Artifact):
|
|
1067
|
-
# Already an artifact - publish as-is
|
|
1068
|
-
artifact = obj
|
|
1069
|
-
elif isinstance(obj, BaseModel):
|
|
1070
|
-
# BaseModel instance - get type from registry
|
|
1071
|
-
type_name = type_registry.name_for(type(obj))
|
|
1072
|
-
artifact = Artifact(
|
|
1073
|
-
type=type_name,
|
|
1074
|
-
payload=obj.model_dump(),
|
|
1075
|
-
produced_by="external",
|
|
1076
|
-
visibility=visibility or PublicVisibility(),
|
|
1077
|
-
correlation_id=correlation_id or uuid4(),
|
|
1078
|
-
partition_key=partition_key,
|
|
1079
|
-
tags=tags or set(),
|
|
1080
|
-
)
|
|
1081
|
-
elif isinstance(obj, dict):
|
|
1082
|
-
# Dict must have 'type' key
|
|
1083
|
-
if "type" not in obj:
|
|
1084
|
-
raise ValueError(
|
|
1085
|
-
"Dict input must contain 'type' key. "
|
|
1086
|
-
"Example: {'type': 'Task', 'name': 'foo', 'priority': 5}"
|
|
1087
|
-
)
|
|
1088
|
-
# Support both {'type': 'X', 'payload': {...}} and {'type': 'X', ...}
|
|
1089
|
-
type_name = obj["type"]
|
|
1090
|
-
if "payload" in obj:
|
|
1091
|
-
payload = obj["payload"]
|
|
1092
|
-
else:
|
|
1093
|
-
payload = {k: v for k, v in obj.items() if k != "type"}
|
|
1094
|
-
|
|
1095
|
-
artifact = Artifact(
|
|
1096
|
-
type=type_name,
|
|
1097
|
-
payload=payload,
|
|
1098
|
-
produced_by="external",
|
|
1099
|
-
visibility=visibility or PublicVisibility(),
|
|
1100
|
-
correlation_id=correlation_id,
|
|
1101
|
-
partition_key=partition_key,
|
|
1102
|
-
tags=tags or set(),
|
|
1103
|
-
)
|
|
1104
|
-
else:
|
|
1105
|
-
raise TypeError(
|
|
1106
|
-
f"Cannot publish object of type {type(obj).__name__}. "
|
|
1107
|
-
"Expected BaseModel, dict, or Artifact."
|
|
1108
|
-
)
|
|
1109
|
-
|
|
1110
|
-
# Persist and schedule matching agents
|
|
1111
|
-
await self._persist_and_schedule(artifact)
|
|
1112
|
-
return artifact
|
|
1113
|
-
|
|
1114
|
-
async def publish_many(
|
|
1115
|
-
self, objects: Iterable[BaseModel | dict | Artifact], **kwargs: Any
|
|
1116
|
-
) -> list[Artifact]:
|
|
1117
|
-
"""Publish multiple artifacts at once (event-driven).
|
|
1118
|
-
|
|
1119
|
-
Args:
|
|
1120
|
-
objects: Iterable of objects to publish
|
|
1121
|
-
**kwargs: Passed to each publish() call (visibility, tags, etc)
|
|
1122
|
-
|
|
1123
|
-
Returns:
|
|
1124
|
-
List of published Artifacts
|
|
1125
|
-
|
|
1126
|
-
Example:
|
|
1127
|
-
>>> tasks = [
|
|
1128
|
-
... Task(name="Deploy", priority=5),
|
|
1129
|
-
... Task(name="Test", priority=3),
|
|
1130
|
-
... Task(name="Document", priority=1),
|
|
1131
|
-
... ]
|
|
1132
|
-
>>> await orchestrator.publish_many(tasks, tags={"sprint-3"})
|
|
1133
|
-
"""
|
|
1134
|
-
artifacts = []
|
|
1135
|
-
for obj in objects:
|
|
1136
|
-
artifact = await self.publish(obj, **kwargs)
|
|
1137
|
-
artifacts.append(artifact)
|
|
1138
|
-
return artifacts
|
|
1139
|
-
|
|
1140
|
-
# -----------------------------------------------------------------------------
|
|
1141
|
-
# NEW DIRECT INVOCATION API - Explicit Control
|
|
1142
|
-
# -----------------------------------------------------------------------------
|
|
1143
|
-
|
|
1144
|
-
async def invoke(
|
|
1145
|
-
self,
|
|
1146
|
-
agent: Agent | AgentBuilder,
|
|
1147
|
-
obj: BaseModel,
|
|
1148
|
-
*,
|
|
1149
|
-
publish_outputs: bool = True,
|
|
1150
|
-
timeout: float | None = None,
|
|
1151
|
-
) -> list[Artifact]:
|
|
1152
|
-
"""Directly invoke a specific agent (bypasses subscription matching).
|
|
1153
|
-
|
|
1154
|
-
This executes the agent immediately without checking subscriptions or
|
|
1155
|
-
predicates. Useful for testing or synchronous request-response patterns.
|
|
1156
|
-
|
|
1157
|
-
Args:
|
|
1158
|
-
agent: Agent or AgentBuilder to invoke
|
|
1159
|
-
obj: Input object (BaseModel instance)
|
|
1160
|
-
publish_outputs: If True, publish outputs to blackboard for cascade
|
|
1161
|
-
timeout: Optional timeout in seconds
|
|
1162
|
-
|
|
1163
|
-
Returns:
|
|
1164
|
-
Artifacts produced by the agent
|
|
1165
|
-
|
|
1166
|
-
Warning:
|
|
1167
|
-
This bypasses subscription filters and predicates. For event-driven
|
|
1168
|
-
coordination, use publish() instead.
|
|
1169
|
-
|
|
1170
|
-
Examples:
|
|
1171
|
-
>>> # Testing: Execute agent without triggering others
|
|
1172
|
-
>>> results = await orchestrator.invoke(
|
|
1173
|
-
... agent, Task(name="test", priority=5), publish_outputs=False
|
|
1174
|
-
... )
|
|
1175
|
-
|
|
1176
|
-
>>> # HTTP endpoint: Execute specific agent, allow cascade
|
|
1177
|
-
>>> results = await orchestrator.invoke(
|
|
1178
|
-
... movie_agent, Idea(topic="AI", genre="comedy"), publish_outputs=True
|
|
1179
|
-
... )
|
|
1180
|
-
>>> await orchestrator.run_until_idle()
|
|
1181
|
-
"""
|
|
1182
|
-
from asyncio import wait_for
|
|
1183
|
-
from uuid import uuid4
|
|
1184
|
-
|
|
1185
|
-
# Get Agent instance
|
|
1186
|
-
agent_obj = agent.agent if isinstance(agent, AgentBuilder) else agent
|
|
1187
|
-
|
|
1188
|
-
# Create artifact (don't publish to blackboard yet)
|
|
1189
|
-
type_name = type_registry.name_for(type(obj))
|
|
1190
|
-
artifact = Artifact(
|
|
1191
|
-
type=type_name,
|
|
1192
|
-
payload=obj.model_dump(),
|
|
1193
|
-
produced_by="__direct__",
|
|
1194
|
-
visibility=PublicVisibility(),
|
|
1195
|
-
)
|
|
1196
|
-
|
|
1197
|
-
# Phase 8: Evaluate context BEFORE creating Context (security fix)
|
|
1198
|
-
# Provider resolution: per-agent > global > DefaultContextProvider
|
|
1199
|
-
from flock.context_provider import (
|
|
1200
|
-
BoundContextProvider,
|
|
1201
|
-
ContextRequest,
|
|
1202
|
-
DefaultContextProvider,
|
|
1203
|
-
)
|
|
1204
|
-
|
|
1205
|
-
inner_provider = (
|
|
1206
|
-
getattr(agent_obj, "context_provider", None)
|
|
1207
|
-
or self._default_context_provider
|
|
1208
|
-
or DefaultContextProvider()
|
|
1209
|
-
)
|
|
1210
|
-
|
|
1211
|
-
# SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
|
|
1212
|
-
provider = BoundContextProvider(inner_provider, agent_obj.identity)
|
|
1213
|
-
|
|
1214
|
-
# Evaluate context using provider (orchestrator controls this!)
|
|
1215
|
-
correlation_id = artifact.correlation_id if artifact.correlation_id else uuid4()
|
|
1216
|
-
request = ContextRequest(
|
|
1217
|
-
agent=agent_obj,
|
|
1218
|
-
correlation_id=correlation_id,
|
|
1219
|
-
store=self.store,
|
|
1220
|
-
agent_identity=agent_obj.identity,
|
|
1221
|
-
exclude_ids={artifact.id}, # Exclude input artifact
|
|
1222
|
-
)
|
|
1223
|
-
context_artifacts = await provider(request)
|
|
1224
|
-
|
|
1225
|
-
# Phase 8: Create Context with pre-filtered data (no capabilities!)
|
|
1226
|
-
# SECURITY: Context is now just data - engines can't query anything
|
|
1227
|
-
ctx = Context(
|
|
1228
|
-
artifacts=context_artifacts, # Pre-filtered conversation context
|
|
1229
|
-
agent_identity=agent_obj.identity,
|
|
1230
|
-
task_id=str(uuid4()),
|
|
1231
|
-
correlation_id=correlation_id,
|
|
1232
|
-
)
|
|
1233
|
-
self._record_agent_run(agent_obj)
|
|
1234
|
-
|
|
1235
|
-
# Execute with optional timeout
|
|
1236
|
-
if timeout:
|
|
1237
|
-
execution = agent_obj.execute(ctx, [artifact])
|
|
1238
|
-
outputs = await wait_for(execution, timeout=timeout)
|
|
1239
|
-
else:
|
|
1240
|
-
outputs = await agent_obj.execute(ctx, [artifact])
|
|
1241
|
-
|
|
1242
|
-
# Phase 6: Orchestrator publishes outputs (security fix)
|
|
1243
|
-
# Agents return artifacts, orchestrator validates and publishes
|
|
1244
|
-
if publish_outputs:
|
|
1245
|
-
for output in outputs:
|
|
1246
|
-
await self._persist_and_schedule(output)
|
|
1247
|
-
|
|
1248
|
-
return outputs
|
|
1249
|
-
|
|
1250
|
-
async def _persist_and_schedule(self, artifact: Artifact) -> None:
|
|
1251
|
-
await self.store.publish(artifact)
|
|
1252
|
-
self.metrics["artifacts_published"] += 1
|
|
1253
|
-
await self._schedule_artifact(artifact)
|
|
1254
|
-
|
|
1255
|
-
# Component Hook Runners ───────────────────────────────────────
|
|
1256
|
-
|
|
1257
|
-
async def _run_initialize(self) -> None:
|
|
1258
|
-
"""Initialize all components in priority order (called once).
|
|
1259
|
-
|
|
1260
|
-
Executes on_initialize hook for each component. Sets _components_initialized
|
|
1261
|
-
flag to prevent multiple initializations.
|
|
1262
|
-
"""
|
|
1263
|
-
if self._components_initialized:
|
|
1264
|
-
return
|
|
1265
|
-
|
|
1266
|
-
self._logger.info(
|
|
1267
|
-
f"Initializing {len(self._components)} orchestrator components"
|
|
1268
|
-
)
|
|
1269
|
-
|
|
1270
|
-
for component in self._components:
|
|
1271
|
-
comp_name = component.name or component.__class__.__name__
|
|
1272
|
-
self._logger.debug(
|
|
1273
|
-
f"Initializing component: name={comp_name}, priority={component.priority}"
|
|
1274
|
-
)
|
|
1275
|
-
|
|
1276
|
-
try:
|
|
1277
|
-
await component.on_initialize(self)
|
|
1278
|
-
except Exception as e:
|
|
1279
|
-
self._logger.exception(
|
|
1280
|
-
f"Component initialization failed: name={comp_name}, error={e!s}"
|
|
1281
|
-
)
|
|
1282
|
-
raise
|
|
1283
|
-
|
|
1284
|
-
self._components_initialized = True
|
|
1285
|
-
self._logger.info(f"All components initialized: count={len(self._components)}")
|
|
1286
|
-
|
|
1287
|
-
async def _run_artifact_published(self, artifact: Artifact) -> Artifact | None:
|
|
1288
|
-
"""Run on_artifact_published hooks (returns modified artifact or None to block).
|
|
1289
|
-
|
|
1290
|
-
Components execute in priority order, each receiving the artifact from the
|
|
1291
|
-
previous component (chaining). If any component returns None, the artifact
|
|
1292
|
-
is blocked and scheduling stops.
|
|
1293
|
-
"""
|
|
1294
|
-
current_artifact = artifact
|
|
1295
|
-
|
|
1296
|
-
for component in self._components:
|
|
1297
|
-
comp_name = component.name or component.__class__.__name__
|
|
1298
|
-
self._logger.debug(
|
|
1299
|
-
f"Running on_artifact_published: component={comp_name}, "
|
|
1300
|
-
f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
|
|
1301
|
-
)
|
|
1302
|
-
|
|
1303
|
-
try:
|
|
1304
|
-
result = await component.on_artifact_published(self, current_artifact)
|
|
1305
|
-
|
|
1306
|
-
if result is None:
|
|
1307
|
-
self._logger.info(
|
|
1308
|
-
f"Artifact blocked by component: component={comp_name}, "
|
|
1309
|
-
f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
|
|
1310
|
-
)
|
|
1311
|
-
return None
|
|
1312
|
-
|
|
1313
|
-
current_artifact = result
|
|
1314
|
-
except Exception as e:
|
|
1315
|
-
self._logger.exception(
|
|
1316
|
-
f"Component hook failed: component={comp_name}, "
|
|
1317
|
-
f"hook=on_artifact_published, error={e!s}"
|
|
1318
|
-
)
|
|
1319
|
-
raise
|
|
1320
|
-
|
|
1321
|
-
return current_artifact
|
|
1322
|
-
|
|
1323
|
-
async def _run_before_schedule(
|
|
1324
|
-
self, artifact: Artifact, agent: Agent, subscription: Subscription
|
|
1325
|
-
) -> ScheduleDecision:
|
|
1326
|
-
"""Run on_before_schedule hooks (returns CONTINUE, SKIP, or DEFER).
|
|
1327
|
-
|
|
1328
|
-
Components execute in priority order. First component to return SKIP or
|
|
1329
|
-
DEFER stops execution and returns that decision.
|
|
1330
|
-
"""
|
|
1331
|
-
from flock.orchestrator_component import ScheduleDecision
|
|
1332
|
-
|
|
1333
|
-
for component in self._components:
|
|
1334
|
-
comp_name = component.name or component.__class__.__name__
|
|
1335
|
-
|
|
1336
|
-
self._logger.debug(
|
|
1337
|
-
f"Running on_before_schedule: component={comp_name}, "
|
|
1338
|
-
f"agent={agent.name}, artifact_type={artifact.type}"
|
|
1339
|
-
)
|
|
1340
|
-
|
|
1341
|
-
try:
|
|
1342
|
-
decision = await component.on_before_schedule(
|
|
1343
|
-
self, artifact, agent, subscription
|
|
1344
|
-
)
|
|
1345
|
-
|
|
1346
|
-
if decision == ScheduleDecision.SKIP:
|
|
1347
|
-
self._logger.info(
|
|
1348
|
-
f"Scheduling skipped by component: component={comp_name}, "
|
|
1349
|
-
f"agent={agent.name}, artifact_type={artifact.type}, decision=SKIP"
|
|
1350
|
-
)
|
|
1351
|
-
return ScheduleDecision.SKIP
|
|
1352
|
-
|
|
1353
|
-
if decision == ScheduleDecision.DEFER:
|
|
1354
|
-
self._logger.debug(
|
|
1355
|
-
f"Scheduling deferred by component: component={comp_name}, "
|
|
1356
|
-
f"agent={agent.name}, decision=DEFER"
|
|
1357
|
-
)
|
|
1358
|
-
return ScheduleDecision.DEFER
|
|
1359
|
-
|
|
1360
|
-
except Exception as e:
|
|
1361
|
-
self._logger.exception(
|
|
1362
|
-
f"Component hook failed: component={comp_name}, "
|
|
1363
|
-
f"hook=on_before_schedule, error={e!s}"
|
|
1364
|
-
)
|
|
1365
|
-
raise
|
|
1366
|
-
|
|
1367
|
-
return ScheduleDecision.CONTINUE
|
|
1368
|
-
|
|
1369
|
-
async def _run_collect_artifacts(
|
|
1370
|
-
self, artifact: Artifact, agent: Agent, subscription: Subscription
|
|
1371
|
-
) -> CollectionResult:
|
|
1372
|
-
"""Run on_collect_artifacts hooks (returns first non-None result).
|
|
1373
|
-
|
|
1374
|
-
Components execute in priority order. First component to return non-None
|
|
1375
|
-
wins (short-circuit). If all return None, default is immediate scheduling.
|
|
1376
|
-
"""
|
|
1377
|
-
from flock.orchestrator_component import CollectionResult
|
|
1378
|
-
|
|
1379
|
-
for component in self._components:
|
|
1380
|
-
comp_name = component.name or component.__class__.__name__
|
|
1381
|
-
|
|
1382
|
-
self._logger.debug(
|
|
1383
|
-
f"Running on_collect_artifacts: component={comp_name}, "
|
|
1384
|
-
f"agent={agent.name}, artifact_type={artifact.type}"
|
|
1385
|
-
)
|
|
1386
|
-
|
|
1387
|
-
try:
|
|
1388
|
-
result = await component.on_collect_artifacts(
|
|
1389
|
-
self, artifact, agent, subscription
|
|
1390
|
-
)
|
|
1391
|
-
|
|
1392
|
-
if result is not None:
|
|
1393
|
-
self._logger.debug(
|
|
1394
|
-
f"Collection handled by component: component={comp_name}, "
|
|
1395
|
-
f"complete={result.complete}, artifact_count={len(result.artifacts)}"
|
|
1396
|
-
)
|
|
1397
|
-
return result
|
|
1398
|
-
except Exception as e:
|
|
1399
|
-
self._logger.exception(
|
|
1400
|
-
f"Component hook failed: component={comp_name}, "
|
|
1401
|
-
f"hook=on_collect_artifacts, error={e!s}"
|
|
1402
|
-
)
|
|
1403
|
-
raise
|
|
1404
|
-
|
|
1405
|
-
# Default: immediate scheduling with single artifact
|
|
1406
|
-
self._logger.debug(
|
|
1407
|
-
f"No component handled collection, using default: "
|
|
1408
|
-
f"agent={agent.name}, artifact_type={artifact.type}"
|
|
1409
|
-
)
|
|
1410
|
-
return CollectionResult.immediate([artifact])
|
|
1411
|
-
|
|
1412
|
-
async def _run_before_agent_schedule(
|
|
1413
|
-
self, agent: Agent, artifacts: list[Artifact]
|
|
1414
|
-
) -> list[Artifact] | None:
|
|
1415
|
-
"""Run on_before_agent_schedule hooks (returns modified artifacts or None to block).
|
|
1416
|
-
|
|
1417
|
-
Components execute in priority order, each receiving artifacts from the
|
|
1418
|
-
previous component (chaining). If any component returns None, scheduling
|
|
1419
|
-
is blocked.
|
|
1420
|
-
"""
|
|
1421
|
-
current_artifacts = artifacts
|
|
1422
|
-
|
|
1423
|
-
for component in self._components:
|
|
1424
|
-
comp_name = component.name or component.__class__.__name__
|
|
1425
|
-
|
|
1426
|
-
self._logger.debug(
|
|
1427
|
-
f"Running on_before_agent_schedule: component={comp_name}, "
|
|
1428
|
-
f"agent={agent.name}, artifact_count={len(current_artifacts)}"
|
|
1429
|
-
)
|
|
1430
|
-
|
|
1431
|
-
try:
|
|
1432
|
-
result = await component.on_before_agent_schedule(
|
|
1433
|
-
self, agent, current_artifacts
|
|
1434
|
-
)
|
|
1435
|
-
|
|
1436
|
-
if result is None:
|
|
1437
|
-
self._logger.info(
|
|
1438
|
-
f"Agent scheduling blocked by component: component={comp_name}, "
|
|
1439
|
-
f"agent={agent.name}"
|
|
1440
|
-
)
|
|
1441
|
-
return None
|
|
1442
|
-
|
|
1443
|
-
current_artifacts = result
|
|
1444
|
-
except Exception as e:
|
|
1445
|
-
self._logger.exception(
|
|
1446
|
-
f"Component hook failed: component={comp_name}, "
|
|
1447
|
-
f"hook=on_before_agent_schedule, error={e!s}"
|
|
1448
|
-
)
|
|
1449
|
-
raise
|
|
1450
|
-
|
|
1451
|
-
return current_artifacts
|
|
1452
|
-
|
|
1453
|
-
async def _run_agent_scheduled(
|
|
1454
|
-
self, agent: Agent, artifacts: list[Artifact], task: Task[Any]
|
|
1455
|
-
) -> None:
|
|
1456
|
-
"""Run on_agent_scheduled hooks (notification only, non-blocking).
|
|
1457
|
-
|
|
1458
|
-
Components execute in priority order. Exceptions are logged but don't
|
|
1459
|
-
prevent other components from executing or block scheduling.
|
|
1460
|
-
"""
|
|
1461
|
-
for component in self._components:
|
|
1462
|
-
comp_name = component.name or component.__class__.__name__
|
|
1463
|
-
|
|
1464
|
-
self._logger.debug(
|
|
1465
|
-
f"Running on_agent_scheduled: component={comp_name}, "
|
|
1466
|
-
f"agent={agent.name}, artifact_count={len(artifacts)}"
|
|
1467
|
-
)
|
|
1468
|
-
|
|
1469
|
-
try:
|
|
1470
|
-
await component.on_agent_scheduled(self, agent, artifacts, task)
|
|
1471
|
-
except Exception as e:
|
|
1472
|
-
self._logger.warning(
|
|
1473
|
-
f"Component notification hook failed (non-critical): "
|
|
1474
|
-
f"component={comp_name}, hook=on_agent_scheduled, error={e!s}"
|
|
1475
|
-
)
|
|
1476
|
-
# Don't propagate - this is a notification hook
|
|
1477
|
-
|
|
1478
|
-
async def _run_idle(self) -> None:
|
|
1479
|
-
"""Run on_orchestrator_idle hooks when orchestrator becomes idle.
|
|
1480
|
-
|
|
1481
|
-
Components execute in priority order. Exceptions are logged but don't
|
|
1482
|
-
prevent other components from executing.
|
|
1483
|
-
"""
|
|
1484
|
-
self._logger.debug(
|
|
1485
|
-
f"Running on_orchestrator_idle hooks: component_count={len(self._components)}"
|
|
1486
|
-
)
|
|
1487
|
-
|
|
1488
|
-
for component in self._components:
|
|
1489
|
-
comp_name = component.name or component.__class__.__name__
|
|
1490
|
-
|
|
1491
|
-
try:
|
|
1492
|
-
await component.on_orchestrator_idle(self)
|
|
1493
|
-
except Exception as e:
|
|
1494
|
-
self._logger.warning(
|
|
1495
|
-
f"Component idle hook failed (non-critical): "
|
|
1496
|
-
f"component={comp_name}, hook=on_orchestrator_idle, error={e!s}"
|
|
1497
|
-
)
|
|
1498
|
-
|
|
1499
|
-
async def _run_shutdown(self) -> None:
|
|
1500
|
-
"""Run on_shutdown hooks when orchestrator shuts down.
|
|
1501
|
-
|
|
1502
|
-
Components execute in priority order. Exceptions are logged but don't
|
|
1503
|
-
prevent shutdown of other components (best-effort cleanup).
|
|
1504
|
-
"""
|
|
1505
|
-
self._logger.info(
|
|
1506
|
-
f"Shutting down {len(self._components)} orchestrator components"
|
|
1507
|
-
)
|
|
1508
|
-
|
|
1509
|
-
for component in self._components:
|
|
1510
|
-
comp_name = component.name or component.__class__.__name__
|
|
1511
|
-
self._logger.debug(f"Shutting down component: name={comp_name}")
|
|
1512
|
-
|
|
1513
|
-
try:
|
|
1514
|
-
await component.on_shutdown(self)
|
|
1515
|
-
except Exception as e:
|
|
1516
|
-
self._logger.exception(
|
|
1517
|
-
f"Component shutdown failed: component={comp_name}, "
|
|
1518
|
-
f"hook=on_shutdown, error={e!s}"
|
|
1519
|
-
)
|
|
1520
|
-
# Continue shutting down other components
|
|
1521
|
-
|
|
1522
|
-
# Scheduling ───────────────────────────────────────────────────
|
|
1523
|
-
|
|
1524
|
-
async def _schedule_artifact(self, artifact: Artifact) -> None:
|
|
1525
|
-
"""Schedule agents for an artifact using component hooks.
|
|
1526
|
-
|
|
1527
|
-
Refactored to use OrchestratorComponent hook system for extensibility.
|
|
1528
|
-
Components can modify artifact, control scheduling, and handle collection.
|
|
1529
|
-
"""
|
|
1530
|
-
# Phase 3: Initialize components on first artifact
|
|
1531
|
-
if not self._components_initialized:
|
|
1532
|
-
await self._run_initialize()
|
|
1533
|
-
|
|
1534
|
-
# Phase 3: Component hook - artifact published (can transform or block)
|
|
1535
|
-
artifact = await self._run_artifact_published(artifact)
|
|
1536
|
-
if artifact is None:
|
|
1537
|
-
return # Artifact blocked by component
|
|
1538
|
-
|
|
1539
|
-
for agent in self.agents:
|
|
1540
|
-
identity = agent.identity
|
|
1541
|
-
for subscription in agent.subscriptions:
|
|
1542
|
-
if not subscription.accepts_events():
|
|
1543
|
-
continue
|
|
1544
|
-
|
|
1545
|
-
# T066: Check prevent_self_trigger
|
|
1546
|
-
if agent.prevent_self_trigger and artifact.produced_by == agent.name:
|
|
1547
|
-
continue # Skip - agent produced this artifact (prevents feedback loops)
|
|
1548
|
-
|
|
1549
|
-
# Visibility check
|
|
1550
|
-
if not self._check_visibility(artifact, identity):
|
|
1551
|
-
continue
|
|
1552
|
-
|
|
1553
|
-
# Subscription match check
|
|
1554
|
-
if not subscription.matches(artifact):
|
|
1555
|
-
continue
|
|
1556
|
-
|
|
1557
|
-
# Phase 3: Component hook - before schedule (circuit breaker, deduplication, etc.)
|
|
1558
|
-
from flock.orchestrator_component import ScheduleDecision
|
|
1559
|
-
|
|
1560
|
-
decision = await self._run_before_schedule(
|
|
1561
|
-
artifact, agent, subscription
|
|
1562
|
-
)
|
|
1563
|
-
if decision == ScheduleDecision.SKIP:
|
|
1564
|
-
continue # Skip this subscription
|
|
1565
|
-
if decision == ScheduleDecision.DEFER:
|
|
1566
|
-
continue # Defer for later (batching/correlation)
|
|
1567
|
-
|
|
1568
|
-
# Phase 3: Component hook - collect artifacts (handles AND gates, correlation, batching)
|
|
1569
|
-
collection = await self._run_collect_artifacts(
|
|
1570
|
-
artifact, agent, subscription
|
|
1571
|
-
)
|
|
1572
|
-
if not collection.complete:
|
|
1573
|
-
continue # Still collecting (AND gate, correlation, or batch incomplete)
|
|
1574
|
-
|
|
1575
|
-
artifacts = collection.artifacts
|
|
1576
|
-
|
|
1577
|
-
# Phase 3: Component hook - before agent schedule (final validation/transformation)
|
|
1578
|
-
artifacts = await self._run_before_agent_schedule(agent, artifacts)
|
|
1579
|
-
if artifacts is None:
|
|
1580
|
-
continue # Scheduling blocked by component
|
|
1581
|
-
|
|
1582
|
-
# Complete! Schedule agent with collected artifacts
|
|
1583
|
-
# Schedule agent task
|
|
1584
|
-
is_batch_execution = subscription.batch is not None
|
|
1585
|
-
task = self._schedule_task(
|
|
1586
|
-
agent, artifacts, is_batch=is_batch_execution
|
|
1587
|
-
)
|
|
1588
|
-
|
|
1589
|
-
# Phase 3: Component hook - agent scheduled (notification)
|
|
1590
|
-
await self._run_agent_scheduled(agent, artifacts, task)
|
|
1591
|
-
|
|
1592
|
-
def _schedule_task(
|
|
1593
|
-
self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
|
|
1594
|
-
) -> Task[Any]:
|
|
1595
|
-
"""Schedule agent task and return the task handle."""
|
|
1596
|
-
task = asyncio.create_task(
|
|
1597
|
-
self._run_agent_task(agent, artifacts, is_batch=is_batch)
|
|
1598
|
-
)
|
|
1599
|
-
self._tasks.add(task)
|
|
1600
|
-
task.add_done_callback(self._tasks.discard)
|
|
1601
|
-
|
|
1602
|
-
# Track task by correlation_id for workflow status tracking
|
|
1603
|
-
correlation_id = artifacts[0].correlation_id if artifacts else None
|
|
1604
|
-
if correlation_id:
|
|
1605
|
-
if correlation_id not in self._correlation_tasks:
|
|
1606
|
-
self._correlation_tasks[correlation_id] = set()
|
|
1607
|
-
self._correlation_tasks[correlation_id].add(task)
|
|
1608
|
-
|
|
1609
|
-
# Clean up correlation tracking when task completes
|
|
1610
|
-
def cleanup_correlation(t: Task[Any]) -> None:
|
|
1611
|
-
if correlation_id in self._correlation_tasks:
|
|
1612
|
-
self._correlation_tasks[correlation_id].discard(t)
|
|
1613
|
-
# Remove empty sets to prevent memory leaks
|
|
1614
|
-
if not self._correlation_tasks[correlation_id]:
|
|
1615
|
-
del self._correlation_tasks[correlation_id]
|
|
1616
|
-
|
|
1617
|
-
task.add_done_callback(cleanup_correlation)
|
|
1618
|
-
|
|
1619
|
-
return task
|
|
1620
|
-
|
|
1621
|
-
def _record_agent_run(self, agent: Agent) -> None:
|
|
1622
|
-
self.metrics["agent_runs"] += 1
|
|
1623
|
-
|
|
1624
|
-
def _mark_processed(self, artifact: Artifact, agent: Agent) -> None:
|
|
1625
|
-
key = (str(artifact.id), agent.name)
|
|
1626
|
-
self._processed.add(key)
|
|
1627
|
-
|
|
1628
|
-
def _seen_before(self, artifact: Artifact, agent: Agent) -> bool:
|
|
1629
|
-
key = (str(artifact.id), agent.name)
|
|
1630
|
-
return key in self._processed
|
|
1631
|
-
|
|
1632
|
-
async def _run_agent_task(
|
|
1633
|
-
self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
|
|
1634
|
-
) -> None:
|
|
1635
|
-
correlation_id = artifacts[0].correlation_id if artifacts else uuid4()
|
|
1636
|
-
|
|
1637
|
-
# Phase 8: Evaluate context BEFORE creating Context (security fix)
|
|
1638
|
-
# Provider resolution: per-agent > global > DefaultContextProvider
|
|
1639
|
-
from flock.context_provider import (
|
|
1640
|
-
BoundContextProvider,
|
|
1641
|
-
ContextRequest,
|
|
1642
|
-
DefaultContextProvider,
|
|
1643
|
-
)
|
|
1644
|
-
|
|
1645
|
-
inner_provider = (
|
|
1646
|
-
getattr(agent, "context_provider", None)
|
|
1647
|
-
or self._default_context_provider
|
|
1648
|
-
or DefaultContextProvider()
|
|
1649
|
-
)
|
|
1650
|
-
|
|
1651
|
-
# SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
|
|
1652
|
-
provider = BoundContextProvider(inner_provider, agent.identity)
|
|
1653
|
-
|
|
1654
|
-
# Evaluate context using provider (orchestrator controls this!)
|
|
1655
|
-
# Engines will receive pre-filtered artifacts via ctx.artifacts
|
|
1656
|
-
request = ContextRequest(
|
|
1657
|
-
agent=agent,
|
|
1658
|
-
correlation_id=correlation_id,
|
|
1659
|
-
store=self.store,
|
|
1660
|
-
agent_identity=agent.identity,
|
|
1661
|
-
exclude_ids={a.id for a in artifacts}, # Exclude input artifacts
|
|
1662
|
-
)
|
|
1663
|
-
context_artifacts = await provider(request)
|
|
1664
|
-
|
|
1665
|
-
# Phase 8: Create Context with pre-filtered data (no capabilities!)
|
|
1666
|
-
# SECURITY: Context is now just data - engines can't query anything
|
|
1667
|
-
ctx = Context(
|
|
1668
|
-
artifacts=context_artifacts, # Pre-filtered conversation context
|
|
1669
|
-
agent_identity=agent.identity,
|
|
1670
|
-
task_id=str(uuid4()),
|
|
1671
|
-
correlation_id=correlation_id,
|
|
1672
|
-
is_batch=is_batch,
|
|
1673
|
-
)
|
|
1674
|
-
self._record_agent_run(agent)
|
|
1675
|
-
|
|
1676
|
-
# Phase 6: Execute agent (returns artifacts, doesn't publish)
|
|
1677
|
-
# Wrap in try/catch to handle agent failures gracefully
|
|
1678
|
-
try:
|
|
1679
|
-
outputs = await agent.execute(ctx, artifacts)
|
|
1680
|
-
except asyncio.CancelledError:
|
|
1681
|
-
# Re-raise cancellations immediately (shutdown, user cancellation)
|
|
1682
|
-
# Do NOT treat these as errors - they're intentional interruptions
|
|
1683
|
-
self._logger.debug(
|
|
1684
|
-
f"Agent '{agent.name}' task cancelled (task={ctx.task_id})"
|
|
1685
|
-
)
|
|
1686
|
-
raise # Propagate cancellation so task.cancelled() == True
|
|
1687
|
-
except Exception as exc:
|
|
1688
|
-
# Agent already called component.on_error hooks before re-raising
|
|
1689
|
-
# Now orchestrator publishes error artifact and continues workflow
|
|
1690
|
-
from flock.system_artifacts import WorkflowError
|
|
1691
|
-
|
|
1692
|
-
error_artifact_data = WorkflowError(
|
|
1693
|
-
failed_agent=agent.name,
|
|
1694
|
-
error_type=type(exc).__name__,
|
|
1695
|
-
error_message=str(exc),
|
|
1696
|
-
timestamp=datetime.now(UTC),
|
|
1697
|
-
task_id=ctx.task_id,
|
|
1698
|
-
)
|
|
1699
|
-
|
|
1700
|
-
# Build and publish error artifact with correlation_id
|
|
1701
|
-
from flock.artifacts import ArtifactSpec
|
|
1702
|
-
|
|
1703
|
-
error_spec = ArtifactSpec.from_model(WorkflowError)
|
|
1704
|
-
error_artifact = error_spec.build(
|
|
1705
|
-
produced_by=f"orchestrator#{agent.name}",
|
|
1706
|
-
data=error_artifact_data.model_dump(),
|
|
1707
|
-
correlation_id=correlation_id,
|
|
1708
|
-
)
|
|
1709
|
-
|
|
1710
|
-
await self._persist_and_schedule(error_artifact)
|
|
1711
|
-
|
|
1712
|
-
# Log error but don't re-raise - workflow continues
|
|
1713
|
-
self._logger.error(
|
|
1714
|
-
f"Agent '{agent.name}' failed (task={ctx.task_id}): {exc}",
|
|
1715
|
-
exc_info=True,
|
|
1716
|
-
)
|
|
1717
|
-
return # Exit early - no outputs to publish
|
|
1718
|
-
|
|
1719
|
-
# Phase 6: Orchestrator publishes outputs (security fix)
|
|
1720
|
-
# This fixes Vulnerability #2 (WRITE Bypass) - agents can't bypass validation
|
|
1721
|
-
for output in outputs:
|
|
1722
|
-
await self._persist_and_schedule(output)
|
|
1723
|
-
|
|
1724
|
-
if artifacts:
|
|
1725
|
-
try:
|
|
1726
|
-
timestamp = datetime.now(UTC)
|
|
1727
|
-
records = [
|
|
1728
|
-
ConsumptionRecord(
|
|
1729
|
-
artifact_id=artifact.id,
|
|
1730
|
-
consumer=agent.name,
|
|
1731
|
-
run_id=ctx.task_id,
|
|
1732
|
-
correlation_id=str(correlation_id) if correlation_id else None,
|
|
1733
|
-
consumed_at=timestamp,
|
|
1734
|
-
)
|
|
1735
|
-
for artifact in artifacts
|
|
1736
|
-
]
|
|
1737
|
-
await self.store.record_consumptions(records)
|
|
1738
|
-
except NotImplementedError:
|
|
1739
|
-
pass
|
|
1740
|
-
except Exception as exc: # pragma: no cover - defensive logging
|
|
1741
|
-
self._logger.exception("Failed to record artifact consumption: %s", exc)
|
|
1742
|
-
|
|
1743
|
-
# Phase 1.2: Logic Operations Event Emission ----------------------------
|
|
1744
|
-
|
|
1745
|
-
async def _emit_correlation_updated_event(
|
|
1746
|
-
self, *, agent_name: str, subscription_index: int, artifact: Artifact
|
|
1747
|
-
) -> None:
|
|
1748
|
-
"""Emit CorrelationGroupUpdatedEvent for real-time dashboard updates.
|
|
1749
|
-
|
|
1750
|
-
Called when an artifact is added to a correlation group that is not yet complete.
|
|
1751
|
-
|
|
1752
|
-
Args:
|
|
1753
|
-
agent_name: Name of the agent with the JoinSpec subscription
|
|
1754
|
-
subscription_index: Index of the subscription in the agent's subscriptions list
|
|
1755
|
-
artifact: The artifact that triggered this update
|
|
1756
|
-
"""
|
|
1757
|
-
# Only emit if dashboard is enabled
|
|
1758
|
-
if self._websocket_manager is None:
|
|
1759
|
-
return
|
|
1760
|
-
|
|
1761
|
-
# Import _get_correlation_groups helper from dashboard service
|
|
1762
|
-
from flock.dashboard.service import _get_correlation_groups
|
|
1763
|
-
|
|
1764
|
-
# Get current correlation groups state from engine
|
|
1765
|
-
groups = _get_correlation_groups(
|
|
1766
|
-
self._correlation_engine, agent_name, subscription_index
|
|
1767
|
-
)
|
|
1768
|
-
|
|
1769
|
-
if not groups:
|
|
1770
|
-
return # No groups to report (shouldn't happen, but defensive)
|
|
1771
|
-
|
|
1772
|
-
# Find the group that was just updated (match by last updated time or artifact ID)
|
|
1773
|
-
# For now, we'll emit an event for the FIRST group that's still waiting
|
|
1774
|
-
# In practice, the artifact we just added should be in one of these groups
|
|
1775
|
-
for group_state in groups:
|
|
1776
|
-
if not group_state["is_complete"]:
|
|
1777
|
-
# Import CorrelationGroupUpdatedEvent
|
|
1778
|
-
from flock.dashboard.events import CorrelationGroupUpdatedEvent
|
|
1779
|
-
|
|
1780
|
-
# Build and emit event
|
|
1781
|
-
event = CorrelationGroupUpdatedEvent(
|
|
1782
|
-
agent_name=agent_name,
|
|
1783
|
-
subscription_index=subscription_index,
|
|
1784
|
-
correlation_key=group_state["correlation_key"],
|
|
1785
|
-
collected_types=group_state["collected_types"],
|
|
1786
|
-
required_types=group_state["required_types"],
|
|
1787
|
-
waiting_for=group_state["waiting_for"],
|
|
1788
|
-
elapsed_seconds=group_state["elapsed_seconds"],
|
|
1789
|
-
expires_in_seconds=group_state["expires_in_seconds"],
|
|
1790
|
-
expires_in_artifacts=group_state["expires_in_artifacts"],
|
|
1791
|
-
artifact_id=str(artifact.id),
|
|
1792
|
-
artifact_type=artifact.type,
|
|
1793
|
-
is_complete=group_state["is_complete"],
|
|
1794
|
-
)
|
|
1795
|
-
|
|
1796
|
-
# Broadcast via WebSocket
|
|
1797
|
-
await self._websocket_manager.broadcast(event)
|
|
1798
|
-
break # Only emit one event per artifact addition
|
|
1799
|
-
|
|
1800
|
-
async def _emit_batch_item_added_event(
|
|
1801
|
-
self,
|
|
1802
|
-
*,
|
|
1803
|
-
agent_name: str,
|
|
1804
|
-
subscription_index: int,
|
|
1805
|
-
subscription: Subscription, # noqa: F821
|
|
1806
|
-
artifact: Artifact,
|
|
1807
|
-
) -> None:
|
|
1808
|
-
"""Emit BatchItemAddedEvent for real-time dashboard updates.
|
|
1809
|
-
|
|
1810
|
-
Called when an artifact is added to a batch that hasn't reached flush threshold.
|
|
1811
|
-
|
|
1812
|
-
Args:
|
|
1813
|
-
agent_name: Name of the agent with the BatchSpec subscription
|
|
1814
|
-
subscription_index: Index of the subscription in the agent's subscriptions list
|
|
1815
|
-
subscription: The subscription with BatchSpec configuration
|
|
1816
|
-
artifact: The artifact that triggered this update
|
|
1817
|
-
"""
|
|
1818
|
-
# Only emit if dashboard is enabled
|
|
1819
|
-
if self._websocket_manager is None:
|
|
1820
|
-
return
|
|
1821
|
-
|
|
1822
|
-
# Import _get_batch_state helper from dashboard service
|
|
1823
|
-
from flock.dashboard.service import _get_batch_state
|
|
1824
|
-
|
|
1825
|
-
# Get current batch state from engine
|
|
1826
|
-
batch_state = _get_batch_state(
|
|
1827
|
-
self._batch_engine, agent_name, subscription_index, subscription.batch
|
|
1828
|
-
)
|
|
1829
|
-
|
|
1830
|
-
if not batch_state:
|
|
1831
|
-
return # No batch to report (shouldn't happen, but defensive)
|
|
1832
|
-
|
|
1833
|
-
# Import BatchItemAddedEvent
|
|
1834
|
-
from flock.dashboard.events import BatchItemAddedEvent
|
|
1835
|
-
|
|
1836
|
-
# Build and emit event
|
|
1837
|
-
event = BatchItemAddedEvent(
|
|
1838
|
-
agent_name=agent_name,
|
|
1839
|
-
subscription_index=subscription_index,
|
|
1840
|
-
items_collected=batch_state["items_collected"],
|
|
1841
|
-
items_target=batch_state.get("items_target"),
|
|
1842
|
-
items_remaining=batch_state.get("items_remaining"),
|
|
1843
|
-
elapsed_seconds=batch_state["elapsed_seconds"],
|
|
1844
|
-
timeout_seconds=batch_state.get("timeout_seconds"),
|
|
1845
|
-
timeout_remaining_seconds=batch_state.get("timeout_remaining_seconds"),
|
|
1846
|
-
will_flush=batch_state["will_flush"],
|
|
1847
|
-
artifact_id=str(artifact.id),
|
|
1848
|
-
artifact_type=artifact.type,
|
|
1849
|
-
)
|
|
1850
|
-
|
|
1851
|
-
# Broadcast via WebSocket
|
|
1852
|
-
await self._websocket_manager.broadcast(event)
|
|
1853
|
-
|
|
1854
|
-
# Batch Helpers --------------------------------------------------------
|
|
1855
|
-
|
|
1856
|
-
async def _correlation_cleanup_loop(self) -> None:
|
|
1857
|
-
"""Background task that periodically cleans up expired correlation groups.
|
|
1858
|
-
|
|
1859
|
-
Runs continuously until all correlation groups are cleared or orchestrator shuts down.
|
|
1860
|
-
Checks every 100ms for time-based expired correlations and discards them.
|
|
1861
|
-
"""
|
|
1862
|
-
try:
|
|
1863
|
-
while True:
|
|
1864
|
-
await asyncio.sleep(self._correlation_cleanup_interval)
|
|
1865
|
-
self._cleanup_expired_correlations()
|
|
1866
|
-
|
|
1867
|
-
# Stop if no correlation groups remain
|
|
1868
|
-
if not self._correlation_engine.correlation_groups:
|
|
1869
|
-
self._correlation_cleanup_task = None
|
|
1870
|
-
break
|
|
1871
|
-
except asyncio.CancelledError:
|
|
1872
|
-
# Clean shutdown
|
|
1873
|
-
self._correlation_cleanup_task = None
|
|
1874
|
-
raise
|
|
1875
|
-
|
|
1876
|
-
def _cleanup_expired_correlations(self) -> None:
|
|
1877
|
-
"""Clean up all expired correlation groups across all subscriptions.
|
|
1878
|
-
|
|
1879
|
-
Called periodically by background task to enforce time-based correlation windows.
|
|
1880
|
-
Discards incomplete correlations that have exceeded their time window.
|
|
1881
|
-
"""
|
|
1882
|
-
# Get all active subscription keys
|
|
1883
|
-
for agent_name, subscription_index in list(
|
|
1884
|
-
self._correlation_engine.correlation_groups.keys()
|
|
1885
|
-
):
|
|
1886
|
-
self._correlation_engine.cleanup_expired(agent_name, subscription_index)
|
|
1887
|
-
|
|
1888
|
-
async def _batch_timeout_checker_loop(self) -> None:
|
|
1889
|
-
"""Background task that periodically checks for batch timeouts.
|
|
1890
|
-
|
|
1891
|
-
Runs continuously until all batches are cleared or orchestrator shuts down.
|
|
1892
|
-
Checks every 100ms for expired batches and flushes them.
|
|
1893
|
-
"""
|
|
1894
|
-
try:
|
|
1895
|
-
while True:
|
|
1896
|
-
await asyncio.sleep(self._batch_timeout_interval)
|
|
1897
|
-
await self._check_batch_timeouts()
|
|
1898
|
-
|
|
1899
|
-
# Stop if no batches remain
|
|
1900
|
-
if not self._batch_engine.batches:
|
|
1901
|
-
self._batch_timeout_task = None
|
|
1902
|
-
break
|
|
1903
|
-
except asyncio.CancelledError:
|
|
1904
|
-
# Clean shutdown
|
|
1905
|
-
self._batch_timeout_task = None
|
|
1906
|
-
raise
|
|
1907
|
-
|
|
1908
|
-
async def _check_batch_timeouts(self) -> None:
|
|
1909
|
-
"""Check all batches for timeout expiry and flush expired batches.
|
|
1910
|
-
|
|
1911
|
-
This method is called periodically by the background timeout checker
|
|
1912
|
-
or manually (in tests) to enforce timeout-based batching.
|
|
1913
|
-
"""
|
|
1914
|
-
expired_batches = self._batch_engine.check_timeouts()
|
|
1915
|
-
|
|
1916
|
-
for agent_name, subscription_index in expired_batches:
|
|
1917
|
-
# Flush the expired batch
|
|
1918
|
-
artifacts = self._batch_engine.flush_batch(agent_name, subscription_index)
|
|
1919
|
-
|
|
1920
|
-
if artifacts is None:
|
|
1921
|
-
continue
|
|
1922
|
-
|
|
1923
|
-
# Get the agent
|
|
1924
|
-
agent = self._agents.get(agent_name)
|
|
1925
|
-
if agent is None:
|
|
1926
|
-
continue
|
|
1927
|
-
|
|
1928
|
-
# Schedule agent with batched artifacts (timeout flush)
|
|
1929
|
-
self._schedule_task(agent, artifacts, is_batch=True)
|
|
1930
|
-
|
|
1931
|
-
async def _flush_all_batches(self) -> None:
|
|
1932
|
-
"""Flush all partial batches (for shutdown - ensures zero data loss)."""
|
|
1933
|
-
all_batches = self._batch_engine.flush_all()
|
|
1934
|
-
|
|
1935
|
-
for agent_name, _subscription_index, artifacts in all_batches:
|
|
1936
|
-
# Get the agent
|
|
1937
|
-
agent = self._agents.get(agent_name)
|
|
1938
|
-
if agent is None:
|
|
1939
|
-
continue
|
|
1940
|
-
|
|
1941
|
-
# Schedule agent with partial batch (shutdown flush)
|
|
1942
|
-
self._schedule_task(agent, artifacts, is_batch=True)
|
|
1943
|
-
|
|
1944
|
-
# Wait for all scheduled tasks to complete
|
|
1945
|
-
await self.run_until_idle()
|
|
1946
|
-
|
|
1947
|
-
# Helpers --------------------------------------------------------------
|
|
1948
|
-
|
|
1949
|
-
def _normalize_input(
|
|
1950
|
-
self, value: BaseModel | Mapping[str, Any] | Artifact, *, produced_by: str
|
|
1951
|
-
) -> Artifact:
|
|
1952
|
-
if isinstance(value, Artifact):
|
|
1953
|
-
return value
|
|
1954
|
-
if isinstance(value, BaseModel):
|
|
1955
|
-
model_cls = type(value)
|
|
1956
|
-
type_name = type_registry.register(model_cls)
|
|
1957
|
-
payload = value.model_dump()
|
|
1958
|
-
elif isinstance(value, Mapping):
|
|
1959
|
-
if "type" not in value:
|
|
1960
|
-
raise ValueError("Mapping input must contain 'type'.")
|
|
1961
|
-
type_name = value["type"]
|
|
1962
|
-
payload = value.get("payload", {})
|
|
1963
|
-
else: # pragma: no cover - defensive
|
|
1964
|
-
raise TypeError("Unsupported input for direct invoke.")
|
|
1965
|
-
return Artifact(type=type_name, payload=payload, produced_by=produced_by)
|
|
1966
|
-
|
|
1967
|
-
def _check_visibility(self, artifact: Artifact, identity: AgentIdentity) -> bool:
|
|
1968
|
-
try:
|
|
1969
|
-
return artifact.visibility.allows(identity)
|
|
1970
|
-
except AttributeError: # pragma: no cover - fallback for dict vis
|
|
1971
|
-
return True
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
-
@asynccontextmanager
|
|
1975
|
-
async def start_orchestrator(orchestrator: Flock): # pragma: no cover - CLI helper
|
|
1976
|
-
try:
|
|
1977
|
-
yield orchestrator
|
|
1978
|
-
await orchestrator.run_until_idle()
|
|
1979
|
-
finally:
|
|
1980
|
-
pass
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
__all__ = ["Flock", "start_orchestrator"]
|