flock-core 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- flock/agent.py +149 -62
- flock/api/themes.py +6 -2
- flock/api_models.py +285 -0
- flock/artifact_collector.py +6 -3
- flock/batch_accumulator.py +3 -1
- flock/cli.py +3 -1
- flock/components.py +45 -56
- flock/context_provider.py +531 -0
- flock/correlation_engine.py +8 -4
- flock/dashboard/collector.py +48 -29
- flock/dashboard/events.py +10 -4
- flock/dashboard/launcher.py +3 -1
- flock/dashboard/models/graph.py +9 -3
- flock/dashboard/service.py +187 -93
- flock/dashboard/websocket.py +17 -4
- flock/engines/dspy_engine.py +174 -98
- flock/engines/examples/simple_batch_engine.py +9 -3
- flock/examples.py +6 -2
- flock/frontend/src/services/indexeddb.test.ts +4 -4
- flock/frontend/src/services/indexeddb.ts +1 -1
- flock/helper/cli_helper.py +14 -1
- flock/logging/auto_trace.py +6 -1
- flock/logging/formatters/enum_builder.py +3 -1
- flock/logging/formatters/theme_builder.py +32 -17
- flock/logging/formatters/themed_formatter.py +38 -22
- flock/logging/logging.py +21 -7
- flock/logging/telemetry.py +9 -3
- flock/logging/telemetry_exporter/duckdb_exporter.py +27 -25
- flock/logging/trace_and_logged.py +14 -5
- flock/mcp/__init__.py +3 -6
- flock/mcp/client.py +49 -19
- flock/mcp/config.py +12 -6
- flock/mcp/manager.py +6 -2
- flock/mcp/servers/sse/flock_sse_server.py +9 -3
- flock/mcp/servers/streamable_http/flock_streamable_http_server.py +6 -2
- flock/mcp/tool.py +18 -6
- flock/mcp/types/handlers.py +3 -1
- flock/mcp/types/types.py +9 -3
- flock/orchestrator.py +449 -58
- flock/orchestrator_component.py +15 -5
- flock/patches/dspy_streaming_patch.py +12 -4
- flock/registry.py +9 -3
- flock/runtime.py +69 -18
- flock/service.py +135 -64
- flock/store.py +29 -10
- flock/subscription.py +6 -4
- flock/system_artifacts.py +33 -0
- flock/utilities.py +41 -13
- flock/utility/output_utility_component.py +31 -11
- {flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/METADATA +150 -26
- {flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/RECORD +54 -51
- {flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/WHEEL +0 -0
- {flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/entry_points.txt +0 -0
- {flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/licenses/LICENSE +0 -0
flock/orchestrator.py
CHANGED
|
@@ -11,7 +11,7 @@ from contextlib import asynccontextmanager
|
|
|
11
11
|
from datetime import UTC, datetime
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from typing import TYPE_CHECKING, Any
|
|
14
|
-
from uuid import uuid4
|
|
14
|
+
from uuid import UUID, uuid4
|
|
15
15
|
|
|
16
16
|
from opentelemetry import trace
|
|
17
17
|
from opentelemetry.trace import Status, StatusCode
|
|
@@ -95,6 +95,7 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
95
95
|
*,
|
|
96
96
|
store: BlackboardStore | None = None,
|
|
97
97
|
max_agent_iterations: int = 1000,
|
|
98
|
+
context_provider: Any = None,
|
|
98
99
|
) -> None:
|
|
99
100
|
"""Initialize the Flock orchestrator for blackboard-based agent coordination.
|
|
100
101
|
|
|
@@ -104,32 +105,46 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
104
105
|
store: Custom blackboard storage backend. Defaults to InMemoryBlackboardStore.
|
|
105
106
|
max_agent_iterations: Circuit breaker limit to prevent runaway agent loops.
|
|
106
107
|
Defaults to 1000 iterations per agent before reset.
|
|
108
|
+
context_provider: Global context provider for all agents (Phase 3 security fix).
|
|
109
|
+
If None, agents use DefaultContextProvider. Can be overridden per-agent.
|
|
107
110
|
|
|
108
111
|
Examples:
|
|
109
112
|
>>> # Basic initialization with default model
|
|
110
113
|
>>> flock = Flock("openai/gpt-4.1")
|
|
111
114
|
|
|
112
115
|
>>> # Custom storage backend
|
|
113
|
-
>>> flock = Flock(
|
|
114
|
-
... "openai/gpt-4o",
|
|
115
|
-
... store=CustomBlackboardStore()
|
|
116
|
-
... )
|
|
116
|
+
>>> flock = Flock("openai/gpt-4o", store=CustomBlackboardStore())
|
|
117
117
|
|
|
118
118
|
>>> # Circuit breaker configuration
|
|
119
|
+
>>> flock = Flock("openai/gpt-4.1", max_agent_iterations=500)
|
|
120
|
+
|
|
121
|
+
>>> # Global context provider (Phase 3 security fix)
|
|
122
|
+
>>> from flock.context_provider import DefaultContextProvider
|
|
119
123
|
>>> flock = Flock(
|
|
120
|
-
... "openai/gpt-4.1",
|
|
121
|
-
... max_agent_iterations=500
|
|
124
|
+
... "openai/gpt-4.1", context_provider=DefaultContextProvider()
|
|
122
125
|
... )
|
|
123
126
|
"""
|
|
124
127
|
self._patch_litellm_proxy_imports()
|
|
125
128
|
self._logger = logging.getLogger(__name__)
|
|
126
129
|
self.model = model
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
init_console(clear_screen=True, show_banner=True, model=self.model)
|
|
133
|
+
except (UnicodeEncodeError, UnicodeDecodeError):
|
|
134
|
+
# Skip banner on Windows consoles with encoding issues (e.g., tests, CI)
|
|
135
|
+
pass
|
|
136
|
+
|
|
127
137
|
self.store: BlackboardStore = store or InMemoryBlackboardStore()
|
|
128
138
|
self._agents: dict[str, Agent] = {}
|
|
129
139
|
self._tasks: set[Task[Any]] = set()
|
|
140
|
+
self._correlation_tasks: dict[
|
|
141
|
+
UUID, set[Task[Any]]
|
|
142
|
+
] = {} # Track tasks by correlation_id
|
|
130
143
|
self._processed: set[tuple[str, str]] = set()
|
|
131
144
|
self._lock = asyncio.Lock()
|
|
132
145
|
self.metrics: dict[str, float] = {"artifacts_published": 0, "agent_runs": 0}
|
|
146
|
+
# Phase 3: Global context provider (security fix)
|
|
147
|
+
self._default_context_provider = context_provider
|
|
133
148
|
# MCP integration
|
|
134
149
|
self._mcp_configs: dict[str, FlockMCPConfiguration] = {}
|
|
135
150
|
self._mcp_manager: FlockMCPClientManager | None = None
|
|
@@ -151,9 +166,14 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
151
166
|
self._batch_timeout_interval: float = 0.1 # Check every 100ms
|
|
152
167
|
# Phase 1.2: WebSocket manager for real-time dashboard events (set by serve())
|
|
153
168
|
self._websocket_manager: Any = None
|
|
169
|
+
# Dashboard server task and launcher (for non-blocking serve)
|
|
170
|
+
self._server_task: Task[None] | None = None
|
|
171
|
+
self._dashboard_launcher: Any = None
|
|
154
172
|
# Unified tracing support
|
|
155
173
|
self._workflow_span = None
|
|
156
|
-
self._auto_workflow_enabled = os.getenv(
|
|
174
|
+
self._auto_workflow_enabled = os.getenv(
|
|
175
|
+
"FLOCK_AUTO_WORKFLOW_TRACE", "false"
|
|
176
|
+
).lower() in {
|
|
157
177
|
"true",
|
|
158
178
|
"1",
|
|
159
179
|
"yes",
|
|
@@ -228,6 +248,99 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
228
248
|
def agents(self) -> list[Agent]:
|
|
229
249
|
return list(self._agents.values())
|
|
230
250
|
|
|
251
|
+
async def get_correlation_status(self, correlation_id: str) -> dict[str, Any]:
|
|
252
|
+
"""Get the status of a workflow by correlation ID.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
correlation_id: The correlation ID to check
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
Dictionary containing workflow status information:
|
|
259
|
+
- state: "active" if work is pending, "completed" otherwise
|
|
260
|
+
- has_pending_work: True if orchestrator has pending work for this correlation
|
|
261
|
+
- artifact_count: Total number of artifacts with this correlation_id
|
|
262
|
+
- error_count: Number of WorkflowError artifacts
|
|
263
|
+
- started_at: Timestamp of first artifact (if any)
|
|
264
|
+
- last_activity_at: Timestamp of most recent artifact (if any)
|
|
265
|
+
"""
|
|
266
|
+
from uuid import UUID
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
correlation_uuid = UUID(correlation_id)
|
|
270
|
+
except ValueError as exc:
|
|
271
|
+
raise ValueError(
|
|
272
|
+
f"Invalid correlation_id format: {correlation_id}"
|
|
273
|
+
) from exc
|
|
274
|
+
|
|
275
|
+
# Check if orchestrator has pending work for this correlation
|
|
276
|
+
# 1. Check active tasks for this correlation_id
|
|
277
|
+
has_active_tasks = correlation_uuid in self._correlation_tasks and bool(
|
|
278
|
+
self._correlation_tasks[correlation_uuid]
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# 2. Check correlation groups (for agents with JoinSpec that haven't yielded yet)
|
|
282
|
+
has_pending_groups = False
|
|
283
|
+
for groups in self._correlation_engine.correlation_groups.values():
|
|
284
|
+
for group_key, group in groups.items():
|
|
285
|
+
# Check if this group belongs to our correlation
|
|
286
|
+
for type_name, artifacts in group.waiting_artifacts.items():
|
|
287
|
+
if any(
|
|
288
|
+
artifact.correlation_id == correlation_uuid
|
|
289
|
+
for artifact in artifacts
|
|
290
|
+
):
|
|
291
|
+
has_pending_groups = True
|
|
292
|
+
break
|
|
293
|
+
if has_pending_groups:
|
|
294
|
+
break
|
|
295
|
+
if has_pending_groups:
|
|
296
|
+
break
|
|
297
|
+
|
|
298
|
+
# Workflow has pending work if EITHER tasks are active OR groups are waiting
|
|
299
|
+
has_pending_work = has_active_tasks or has_pending_groups
|
|
300
|
+
|
|
301
|
+
# Query artifacts for this correlation
|
|
302
|
+
from flock.store import FilterConfig
|
|
303
|
+
|
|
304
|
+
filters = FilterConfig(correlation_id=correlation_id)
|
|
305
|
+
artifacts, total = await self.store.query_artifacts(
|
|
306
|
+
filters, limit=1000, offset=0
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Count errors
|
|
310
|
+
error_count = sum(
|
|
311
|
+
1
|
|
312
|
+
for artifact in artifacts
|
|
313
|
+
if artifact.type == "flock.system_artifacts.WorkflowError"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Get timestamps
|
|
317
|
+
started_at = None
|
|
318
|
+
last_activity_at = None
|
|
319
|
+
if artifacts:
|
|
320
|
+
timestamps = [artifact.created_at for artifact in artifacts]
|
|
321
|
+
started_at = min(timestamps).isoformat()
|
|
322
|
+
last_activity_at = max(timestamps).isoformat()
|
|
323
|
+
|
|
324
|
+
# Determine state
|
|
325
|
+
if has_pending_work:
|
|
326
|
+
state = "active"
|
|
327
|
+
elif total == 0:
|
|
328
|
+
state = "not_found"
|
|
329
|
+
elif error_count > 0 and total == error_count:
|
|
330
|
+
state = "failed" # Only error artifacts exist
|
|
331
|
+
else:
|
|
332
|
+
state = "completed"
|
|
333
|
+
|
|
334
|
+
return {
|
|
335
|
+
"correlation_id": correlation_id,
|
|
336
|
+
"state": state,
|
|
337
|
+
"has_pending_work": has_pending_work,
|
|
338
|
+
"artifact_count": total,
|
|
339
|
+
"error_count": error_count,
|
|
340
|
+
"started_at": started_at,
|
|
341
|
+
"last_activity_at": last_activity_at,
|
|
342
|
+
}
|
|
343
|
+
|
|
231
344
|
# Component management -------------------------------------------------
|
|
232
345
|
|
|
233
346
|
def add_component(self, component: OrchestratorComponent) -> Flock:
|
|
@@ -357,7 +470,11 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
357
470
|
path_str = str(abs_path)
|
|
358
471
|
|
|
359
472
|
# Extract a meaningful name (last component of path)
|
|
360
|
-
name =
|
|
473
|
+
name = (
|
|
474
|
+
PathLib(path_str).name
|
|
475
|
+
or path_str.rstrip("/").split("/")[-1]
|
|
476
|
+
or "root"
|
|
477
|
+
)
|
|
361
478
|
mcp_roots.append(MCPRoot(uri=uri, name=name))
|
|
362
479
|
|
|
363
480
|
# Build configuration
|
|
@@ -559,12 +676,17 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
559
676
|
if pending_batches and (
|
|
560
677
|
self._batch_timeout_task is None or self._batch_timeout_task.done()
|
|
561
678
|
):
|
|
562
|
-
self._batch_timeout_task = asyncio.create_task(
|
|
679
|
+
self._batch_timeout_task = asyncio.create_task(
|
|
680
|
+
self._batch_timeout_checker_loop()
|
|
681
|
+
)
|
|
563
682
|
|
|
564
683
|
if pending_correlations and (
|
|
565
|
-
self._correlation_cleanup_task is None
|
|
684
|
+
self._correlation_cleanup_task is None
|
|
685
|
+
or self._correlation_cleanup_task.done()
|
|
566
686
|
):
|
|
567
|
-
self._correlation_cleanup_task = asyncio.create_task(
|
|
687
|
+
self._correlation_cleanup_task = asyncio.create_task(
|
|
688
|
+
self._correlation_cleanup_loop()
|
|
689
|
+
)
|
|
568
690
|
|
|
569
691
|
# If deferred work is still outstanding, consider the orchestrator quiescent for
|
|
570
692
|
# now but leave watchdog tasks running to finish the job.
|
|
@@ -585,15 +707,60 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
585
707
|
async def direct_invoke(
|
|
586
708
|
self, agent: Agent, inputs: Sequence[BaseModel | Mapping[str, Any] | Artifact]
|
|
587
709
|
) -> list[Artifact]:
|
|
588
|
-
artifacts = [
|
|
710
|
+
artifacts = [
|
|
711
|
+
self._normalize_input(value, produced_by="__direct__") for value in inputs
|
|
712
|
+
]
|
|
589
713
|
for artifact in artifacts:
|
|
590
714
|
self._mark_processed(artifact, agent)
|
|
591
715
|
await self._persist_and_schedule(artifact)
|
|
592
|
-
|
|
716
|
+
|
|
717
|
+
# Phase 8: Evaluate context BEFORE creating Context (security fix)
|
|
718
|
+
# Provider resolution: per-agent > global > DefaultContextProvider
|
|
719
|
+
from flock.context_provider import (
|
|
720
|
+
BoundContextProvider,
|
|
721
|
+
ContextRequest,
|
|
722
|
+
DefaultContextProvider,
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
inner_provider = (
|
|
726
|
+
getattr(agent, "context_provider", None)
|
|
727
|
+
or self._default_context_provider
|
|
728
|
+
or DefaultContextProvider()
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
# SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
|
|
732
|
+
provider = BoundContextProvider(inner_provider, agent.identity)
|
|
733
|
+
|
|
734
|
+
# Evaluate context using provider (orchestrator controls this!)
|
|
735
|
+
# Engines will receive pre-filtered artifacts via ctx.artifacts
|
|
736
|
+
correlation_id = (
|
|
737
|
+
artifacts[0].correlation_id
|
|
738
|
+
if artifacts and artifacts[0].correlation_id
|
|
739
|
+
else uuid4()
|
|
740
|
+
)
|
|
741
|
+
request = ContextRequest(
|
|
742
|
+
agent=agent,
|
|
743
|
+
correlation_id=correlation_id,
|
|
744
|
+
store=self.store,
|
|
745
|
+
agent_identity=agent.identity,
|
|
746
|
+
exclude_ids={a.id for a in artifacts}, # Exclude input artifacts
|
|
747
|
+
)
|
|
748
|
+
context_artifacts = await provider(request)
|
|
749
|
+
|
|
750
|
+
# Phase 8: Create Context with pre-filtered data (no capabilities!)
|
|
751
|
+
# SECURITY: Context is now just data - engines can't query anything
|
|
752
|
+
ctx = Context(
|
|
753
|
+
artifacts=context_artifacts, # Pre-filtered conversation context
|
|
754
|
+
agent_identity=agent.identity,
|
|
755
|
+
task_id=str(uuid4()),
|
|
756
|
+
correlation_id=correlation_id,
|
|
757
|
+
)
|
|
593
758
|
self._record_agent_run(agent)
|
|
594
759
|
return await agent.execute(ctx, artifacts)
|
|
595
760
|
|
|
596
|
-
async def arun(
|
|
761
|
+
async def arun(
|
|
762
|
+
self, agent_builder: AgentBuilder, *inputs: BaseModel
|
|
763
|
+
) -> list[Artifact]:
|
|
597
764
|
"""Execute an agent with inputs and wait for all cascades to complete (async).
|
|
598
765
|
|
|
599
766
|
Convenience method that combines direct agent invocation with run_until_idle().
|
|
@@ -614,9 +781,7 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
614
781
|
|
|
615
782
|
>>> # Multiple inputs
|
|
616
783
|
>>> results = await flock.arun(
|
|
617
|
-
... task_agent,
|
|
618
|
-
... Task(name="deploy"),
|
|
619
|
-
... Task(name="test")
|
|
784
|
+
... task_agent, Task(name="deploy"), Task(name="test")
|
|
620
785
|
... )
|
|
621
786
|
|
|
622
787
|
Note:
|
|
@@ -676,6 +841,15 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
676
841
|
except asyncio.CancelledError:
|
|
677
842
|
pass
|
|
678
843
|
|
|
844
|
+
# Cancel background server task if running
|
|
845
|
+
if self._server_task and not self._server_task.done():
|
|
846
|
+
self._server_task.cancel()
|
|
847
|
+
try:
|
|
848
|
+
await self._server_task
|
|
849
|
+
except asyncio.CancelledError:
|
|
850
|
+
pass
|
|
851
|
+
# Note: _cleanup_server_callback will handle launcher.stop()
|
|
852
|
+
|
|
679
853
|
if self._mcp_manager is not None:
|
|
680
854
|
await self._mcp_manager.cleanup_all()
|
|
681
855
|
self._mcp_manager = None
|
|
@@ -691,14 +865,20 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
691
865
|
dashboard_v2: bool = False,
|
|
692
866
|
host: str = "127.0.0.1",
|
|
693
867
|
port: int = 8344,
|
|
694
|
-
|
|
695
|
-
|
|
868
|
+
blocking: bool = True,
|
|
869
|
+
) -> Task[None] | None:
|
|
870
|
+
"""Start HTTP service for the orchestrator.
|
|
696
871
|
|
|
697
872
|
Args:
|
|
698
873
|
dashboard: Enable real-time dashboard with WebSocket support (default: False)
|
|
699
874
|
dashboard_v2: Launch the new dashboard v2 frontend (implies dashboard=True)
|
|
700
875
|
host: Host to bind to (default: "127.0.0.1")
|
|
701
876
|
port: Port to bind to (default: 8344)
|
|
877
|
+
blocking: If True, blocks until server stops. If False, starts server
|
|
878
|
+
in background and returns task handle (default: True)
|
|
879
|
+
|
|
880
|
+
Returns:
|
|
881
|
+
None if blocking=True, or Task handle if blocking=False
|
|
702
882
|
|
|
703
883
|
Examples:
|
|
704
884
|
# Basic HTTP API (no dashboard) - runs until interrupted
|
|
@@ -706,7 +886,75 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
706
886
|
|
|
707
887
|
# With dashboard (WebSocket + browser launch) - runs until interrupted
|
|
708
888
|
await orchestrator.serve(dashboard=True)
|
|
889
|
+
|
|
890
|
+
# Non-blocking mode - start server in background
|
|
891
|
+
await orchestrator.serve(dashboard=True, blocking=False)
|
|
892
|
+
# Now you can publish messages and run other logic
|
|
893
|
+
await orchestrator.publish(my_message)
|
|
894
|
+
await orchestrator.run_until_idle()
|
|
709
895
|
"""
|
|
896
|
+
# If non-blocking, start server in background task
|
|
897
|
+
if not blocking:
|
|
898
|
+
self._server_task = asyncio.create_task(
|
|
899
|
+
self._serve_impl(
|
|
900
|
+
dashboard=dashboard,
|
|
901
|
+
dashboard_v2=dashboard_v2,
|
|
902
|
+
host=host,
|
|
903
|
+
port=port,
|
|
904
|
+
)
|
|
905
|
+
)
|
|
906
|
+
# Add cleanup callback
|
|
907
|
+
self._server_task.add_done_callback(self._cleanup_server_callback)
|
|
908
|
+
# Give server a moment to start
|
|
909
|
+
await asyncio.sleep(0.1)
|
|
910
|
+
return self._server_task
|
|
911
|
+
|
|
912
|
+
# Blocking mode - run server directly with cleanup
|
|
913
|
+
try:
|
|
914
|
+
await self._serve_impl(
|
|
915
|
+
dashboard=dashboard,
|
|
916
|
+
dashboard_v2=dashboard_v2,
|
|
917
|
+
host=host,
|
|
918
|
+
port=port,
|
|
919
|
+
)
|
|
920
|
+
finally:
|
|
921
|
+
# In blocking mode, manually cleanup dashboard launcher
|
|
922
|
+
if self._dashboard_launcher is not None:
|
|
923
|
+
self._dashboard_launcher.stop()
|
|
924
|
+
self._dashboard_launcher = None
|
|
925
|
+
return None
|
|
926
|
+
|
|
927
|
+
def _cleanup_server_callback(self, task: Task[None]) -> None:
|
|
928
|
+
"""Cleanup callback when background server task completes."""
|
|
929
|
+
# Stop dashboard launcher if it was started
|
|
930
|
+
if self._dashboard_launcher is not None:
|
|
931
|
+
try:
|
|
932
|
+
self._dashboard_launcher.stop()
|
|
933
|
+
except Exception as e:
|
|
934
|
+
self._logger.warning(f"Failed to stop dashboard launcher: {e}")
|
|
935
|
+
finally:
|
|
936
|
+
self._dashboard_launcher = None
|
|
937
|
+
|
|
938
|
+
# Clear server task reference
|
|
939
|
+
self._server_task = None
|
|
940
|
+
|
|
941
|
+
# Log any exceptions from the task
|
|
942
|
+
try:
|
|
943
|
+
exc = task.exception()
|
|
944
|
+
if exc and not isinstance(exc, asyncio.CancelledError):
|
|
945
|
+
self._logger.error(f"Server task failed: {exc}", exc_info=exc)
|
|
946
|
+
except asyncio.CancelledError:
|
|
947
|
+
pass # Normal cancellation
|
|
948
|
+
|
|
949
|
+
async def _serve_impl(
|
|
950
|
+
self,
|
|
951
|
+
*,
|
|
952
|
+
dashboard: bool = False,
|
|
953
|
+
dashboard_v2: bool = False,
|
|
954
|
+
host: str = "127.0.0.1",
|
|
955
|
+
port: int = 8344,
|
|
956
|
+
) -> None:
|
|
957
|
+
"""Internal implementation of serve() - actual server logic."""
|
|
710
958
|
if dashboard_v2:
|
|
711
959
|
dashboard = True
|
|
712
960
|
|
|
@@ -735,6 +983,15 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
735
983
|
# Store websocket manager for real-time event emission (Phase 1.2)
|
|
736
984
|
self._websocket_manager = websocket_manager
|
|
737
985
|
|
|
986
|
+
# Phase 6+7: Set class-level WebSocket broadcast wrapper (dashboard mode)
|
|
987
|
+
async def _broadcast_wrapper(event):
|
|
988
|
+
"""Isolated broadcast wrapper - no reference chain to orchestrator."""
|
|
989
|
+
return await websocket_manager.broadcast(event)
|
|
990
|
+
|
|
991
|
+
from flock.agent import Agent
|
|
992
|
+
|
|
993
|
+
Agent._websocket_broadcast_global = _broadcast_wrapper
|
|
994
|
+
|
|
738
995
|
# Inject event collector into all existing agents
|
|
739
996
|
for agent in self._agents.values():
|
|
740
997
|
# Add dashboard collector with priority ordering handled by agent
|
|
@@ -762,11 +1019,8 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
762
1019
|
self._dashboard_launcher = launcher
|
|
763
1020
|
|
|
764
1021
|
# Run service (blocking call)
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
finally:
|
|
768
|
-
# Cleanup on exit
|
|
769
|
-
launcher.stop()
|
|
1022
|
+
# Note: Cleanup is handled by serve() (blocking mode) or callback (non-blocking mode)
|
|
1023
|
+
await service.run_async(host=host, port=port)
|
|
770
1024
|
|
|
771
1025
|
# Scheduling -----------------------------------------------------------
|
|
772
1026
|
|
|
@@ -802,21 +1056,12 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
802
1056
|
|
|
803
1057
|
>>> # Publish with custom visibility
|
|
804
1058
|
>>> await orchestrator.publish(
|
|
805
|
-
... task,
|
|
806
|
-
... visibility=PrivateVisibility(agents={"admin"})
|
|
1059
|
+
... task, visibility=PrivateVisibility(agents={"admin"})
|
|
807
1060
|
... )
|
|
808
1061
|
|
|
809
1062
|
>>> # Publish with tags for channel routing
|
|
810
1063
|
>>> await orchestrator.publish(task, tags={"urgent", "backend"})
|
|
811
1064
|
"""
|
|
812
|
-
self.is_dashboard = is_dashboard
|
|
813
|
-
# Only show banner in CLI mode, not dashboard mode
|
|
814
|
-
if not self.is_dashboard:
|
|
815
|
-
try:
|
|
816
|
-
init_console(clear_screen=True, show_banner=True, model=self.model)
|
|
817
|
-
except (UnicodeEncodeError, UnicodeDecodeError):
|
|
818
|
-
# Skip banner on Windows consoles with encoding issues (e.g., tests, CI)
|
|
819
|
-
pass
|
|
820
1065
|
# Handle different input types
|
|
821
1066
|
if isinstance(obj, Artifact):
|
|
822
1067
|
# Already an artifact - publish as-is
|
|
@@ -925,16 +1170,12 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
925
1170
|
Examples:
|
|
926
1171
|
>>> # Testing: Execute agent without triggering others
|
|
927
1172
|
>>> results = await orchestrator.invoke(
|
|
928
|
-
... agent,
|
|
929
|
-
... Task(name="test", priority=5),
|
|
930
|
-
... publish_outputs=False
|
|
1173
|
+
... agent, Task(name="test", priority=5), publish_outputs=False
|
|
931
1174
|
... )
|
|
932
1175
|
|
|
933
1176
|
>>> # HTTP endpoint: Execute specific agent, allow cascade
|
|
934
1177
|
>>> results = await orchestrator.invoke(
|
|
935
|
-
... movie_agent,
|
|
936
|
-
... Idea(topic="AI", genre="comedy"),
|
|
937
|
-
... publish_outputs=True
|
|
1178
|
+
... movie_agent, Idea(topic="AI", genre="comedy"), publish_outputs=True
|
|
938
1179
|
... )
|
|
939
1180
|
>>> await orchestrator.run_until_idle()
|
|
940
1181
|
"""
|
|
@@ -953,8 +1194,42 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
953
1194
|
visibility=PublicVisibility(),
|
|
954
1195
|
)
|
|
955
1196
|
|
|
956
|
-
#
|
|
957
|
-
|
|
1197
|
+
# Phase 8: Evaluate context BEFORE creating Context (security fix)
|
|
1198
|
+
# Provider resolution: per-agent > global > DefaultContextProvider
|
|
1199
|
+
from flock.context_provider import (
|
|
1200
|
+
BoundContextProvider,
|
|
1201
|
+
ContextRequest,
|
|
1202
|
+
DefaultContextProvider,
|
|
1203
|
+
)
|
|
1204
|
+
|
|
1205
|
+
inner_provider = (
|
|
1206
|
+
getattr(agent_obj, "context_provider", None)
|
|
1207
|
+
or self._default_context_provider
|
|
1208
|
+
or DefaultContextProvider()
|
|
1209
|
+
)
|
|
1210
|
+
|
|
1211
|
+
# SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
|
|
1212
|
+
provider = BoundContextProvider(inner_provider, agent_obj.identity)
|
|
1213
|
+
|
|
1214
|
+
# Evaluate context using provider (orchestrator controls this!)
|
|
1215
|
+
correlation_id = artifact.correlation_id if artifact.correlation_id else uuid4()
|
|
1216
|
+
request = ContextRequest(
|
|
1217
|
+
agent=agent_obj,
|
|
1218
|
+
correlation_id=correlation_id,
|
|
1219
|
+
store=self.store,
|
|
1220
|
+
agent_identity=agent_obj.identity,
|
|
1221
|
+
exclude_ids={artifact.id}, # Exclude input artifact
|
|
1222
|
+
)
|
|
1223
|
+
context_artifacts = await provider(request)
|
|
1224
|
+
|
|
1225
|
+
# Phase 8: Create Context with pre-filtered data (no capabilities!)
|
|
1226
|
+
# SECURITY: Context is now just data - engines can't query anything
|
|
1227
|
+
ctx = Context(
|
|
1228
|
+
artifacts=context_artifacts, # Pre-filtered conversation context
|
|
1229
|
+
agent_identity=agent_obj.identity,
|
|
1230
|
+
task_id=str(uuid4()),
|
|
1231
|
+
correlation_id=correlation_id,
|
|
1232
|
+
)
|
|
958
1233
|
self._record_agent_run(agent_obj)
|
|
959
1234
|
|
|
960
1235
|
# Execute with optional timeout
|
|
@@ -964,7 +1239,8 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
964
1239
|
else:
|
|
965
1240
|
outputs = await agent_obj.execute(ctx, [artifact])
|
|
966
1241
|
|
|
967
|
-
#
|
|
1242
|
+
# Phase 6: Orchestrator publishes outputs (security fix)
|
|
1243
|
+
# Agents return artifacts, orchestrator validates and publishes
|
|
968
1244
|
if publish_outputs:
|
|
969
1245
|
for output in outputs:
|
|
970
1246
|
await self._persist_and_schedule(output)
|
|
@@ -987,7 +1263,9 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
987
1263
|
if self._components_initialized:
|
|
988
1264
|
return
|
|
989
1265
|
|
|
990
|
-
self._logger.info(
|
|
1266
|
+
self._logger.info(
|
|
1267
|
+
f"Initializing {len(self._components)} orchestrator components"
|
|
1268
|
+
)
|
|
991
1269
|
|
|
992
1270
|
for component in self._components:
|
|
993
1271
|
comp_name = component.name or component.__class__.__name__
|
|
@@ -1061,7 +1339,9 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1061
1339
|
)
|
|
1062
1340
|
|
|
1063
1341
|
try:
|
|
1064
|
-
decision = await component.on_before_schedule(
|
|
1342
|
+
decision = await component.on_before_schedule(
|
|
1343
|
+
self, artifact, agent, subscription
|
|
1344
|
+
)
|
|
1065
1345
|
|
|
1066
1346
|
if decision == ScheduleDecision.SKIP:
|
|
1067
1347
|
self._logger.info(
|
|
@@ -1105,7 +1385,9 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1105
1385
|
)
|
|
1106
1386
|
|
|
1107
1387
|
try:
|
|
1108
|
-
result = await component.on_collect_artifacts(
|
|
1388
|
+
result = await component.on_collect_artifacts(
|
|
1389
|
+
self, artifact, agent, subscription
|
|
1390
|
+
)
|
|
1109
1391
|
|
|
1110
1392
|
if result is not None:
|
|
1111
1393
|
self._logger.debug(
|
|
@@ -1147,7 +1429,9 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1147
1429
|
)
|
|
1148
1430
|
|
|
1149
1431
|
try:
|
|
1150
|
-
result = await component.on_before_agent_schedule(
|
|
1432
|
+
result = await component.on_before_agent_schedule(
|
|
1433
|
+
self, agent, current_artifacts
|
|
1434
|
+
)
|
|
1151
1435
|
|
|
1152
1436
|
if result is None:
|
|
1153
1437
|
self._logger.info(
|
|
@@ -1218,7 +1502,9 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1218
1502
|
Components execute in priority order. Exceptions are logged but don't
|
|
1219
1503
|
prevent shutdown of other components (best-effort cleanup).
|
|
1220
1504
|
"""
|
|
1221
|
-
self._logger.info(
|
|
1505
|
+
self._logger.info(
|
|
1506
|
+
f"Shutting down {len(self._components)} orchestrator components"
|
|
1507
|
+
)
|
|
1222
1508
|
|
|
1223
1509
|
for component in self._components:
|
|
1224
1510
|
comp_name = component.name or component.__class__.__name__
|
|
@@ -1271,14 +1557,18 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1271
1557
|
# Phase 3: Component hook - before schedule (circuit breaker, deduplication, etc.)
|
|
1272
1558
|
from flock.orchestrator_component import ScheduleDecision
|
|
1273
1559
|
|
|
1274
|
-
decision = await self._run_before_schedule(
|
|
1560
|
+
decision = await self._run_before_schedule(
|
|
1561
|
+
artifact, agent, subscription
|
|
1562
|
+
)
|
|
1275
1563
|
if decision == ScheduleDecision.SKIP:
|
|
1276
1564
|
continue # Skip this subscription
|
|
1277
1565
|
if decision == ScheduleDecision.DEFER:
|
|
1278
1566
|
continue # Defer for later (batching/correlation)
|
|
1279
1567
|
|
|
1280
1568
|
# Phase 3: Component hook - collect artifacts (handles AND gates, correlation, batching)
|
|
1281
|
-
collection = await self._run_collect_artifacts(
|
|
1569
|
+
collection = await self._run_collect_artifacts(
|
|
1570
|
+
artifact, agent, subscription
|
|
1571
|
+
)
|
|
1282
1572
|
if not collection.complete:
|
|
1283
1573
|
continue # Still collecting (AND gate, correlation, or batch incomplete)
|
|
1284
1574
|
|
|
@@ -1292,7 +1582,9 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1292
1582
|
# Complete! Schedule agent with collected artifacts
|
|
1293
1583
|
# Schedule agent task
|
|
1294
1584
|
is_batch_execution = subscription.batch is not None
|
|
1295
|
-
task = self._schedule_task(
|
|
1585
|
+
task = self._schedule_task(
|
|
1586
|
+
agent, artifacts, is_batch=is_batch_execution
|
|
1587
|
+
)
|
|
1296
1588
|
|
|
1297
1589
|
# Phase 3: Component hook - agent scheduled (notification)
|
|
1298
1590
|
await self._run_agent_scheduled(agent, artifacts, task)
|
|
@@ -1301,9 +1593,29 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1301
1593
|
self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
|
|
1302
1594
|
) -> Task[Any]:
|
|
1303
1595
|
"""Schedule agent task and return the task handle."""
|
|
1304
|
-
task = asyncio.create_task(
|
|
1596
|
+
task = asyncio.create_task(
|
|
1597
|
+
self._run_agent_task(agent, artifacts, is_batch=is_batch)
|
|
1598
|
+
)
|
|
1305
1599
|
self._tasks.add(task)
|
|
1306
1600
|
task.add_done_callback(self._tasks.discard)
|
|
1601
|
+
|
|
1602
|
+
# Track task by correlation_id for workflow status tracking
|
|
1603
|
+
correlation_id = artifacts[0].correlation_id if artifacts else None
|
|
1604
|
+
if correlation_id:
|
|
1605
|
+
if correlation_id not in self._correlation_tasks:
|
|
1606
|
+
self._correlation_tasks[correlation_id] = set()
|
|
1607
|
+
self._correlation_tasks[correlation_id].add(task)
|
|
1608
|
+
|
|
1609
|
+
# Clean up correlation tracking when task completes
|
|
1610
|
+
def cleanup_correlation(t: Task[Any]) -> None:
|
|
1611
|
+
if correlation_id in self._correlation_tasks:
|
|
1612
|
+
self._correlation_tasks[correlation_id].discard(t)
|
|
1613
|
+
# Remove empty sets to prevent memory leaks
|
|
1614
|
+
if not self._correlation_tasks[correlation_id]:
|
|
1615
|
+
del self._correlation_tasks[correlation_id]
|
|
1616
|
+
|
|
1617
|
+
task.add_done_callback(cleanup_correlation)
|
|
1618
|
+
|
|
1307
1619
|
return task
|
|
1308
1620
|
|
|
1309
1621
|
def _record_agent_run(self, agent: Agent) -> None:
|
|
@@ -1322,15 +1634,92 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1322
1634
|
) -> None:
|
|
1323
1635
|
correlation_id = artifacts[0].correlation_id if artifacts else uuid4()
|
|
1324
1636
|
|
|
1637
|
+
# Phase 8: Evaluate context BEFORE creating Context (security fix)
|
|
1638
|
+
# Provider resolution: per-agent > global > DefaultContextProvider
|
|
1639
|
+
from flock.context_provider import (
|
|
1640
|
+
BoundContextProvider,
|
|
1641
|
+
ContextRequest,
|
|
1642
|
+
DefaultContextProvider,
|
|
1643
|
+
)
|
|
1644
|
+
|
|
1645
|
+
inner_provider = (
|
|
1646
|
+
getattr(agent, "context_provider", None)
|
|
1647
|
+
or self._default_context_provider
|
|
1648
|
+
or DefaultContextProvider()
|
|
1649
|
+
)
|
|
1650
|
+
|
|
1651
|
+
# SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
|
|
1652
|
+
provider = BoundContextProvider(inner_provider, agent.identity)
|
|
1653
|
+
|
|
1654
|
+
# Evaluate context using provider (orchestrator controls this!)
|
|
1655
|
+
# Engines will receive pre-filtered artifacts via ctx.artifacts
|
|
1656
|
+
request = ContextRequest(
|
|
1657
|
+
agent=agent,
|
|
1658
|
+
correlation_id=correlation_id,
|
|
1659
|
+
store=self.store,
|
|
1660
|
+
agent_identity=agent.identity,
|
|
1661
|
+
exclude_ids={a.id for a in artifacts}, # Exclude input artifacts
|
|
1662
|
+
)
|
|
1663
|
+
context_artifacts = await provider(request)
|
|
1664
|
+
|
|
1665
|
+
# Phase 8: Create Context with pre-filtered data (no capabilities!)
|
|
1666
|
+
# SECURITY: Context is now just data - engines can't query anything
|
|
1325
1667
|
ctx = Context(
|
|
1326
|
-
|
|
1327
|
-
|
|
1668
|
+
artifacts=context_artifacts, # Pre-filtered conversation context
|
|
1669
|
+
agent_identity=agent.identity,
|
|
1328
1670
|
task_id=str(uuid4()),
|
|
1329
1671
|
correlation_id=correlation_id,
|
|
1330
|
-
is_batch=is_batch,
|
|
1672
|
+
is_batch=is_batch,
|
|
1331
1673
|
)
|
|
1332
1674
|
self._record_agent_run(agent)
|
|
1333
|
-
|
|
1675
|
+
|
|
1676
|
+
# Phase 6: Execute agent (returns artifacts, doesn't publish)
|
|
1677
|
+
# Wrap in try/catch to handle agent failures gracefully
|
|
1678
|
+
try:
|
|
1679
|
+
outputs = await agent.execute(ctx, artifacts)
|
|
1680
|
+
except asyncio.CancelledError:
|
|
1681
|
+
# Re-raise cancellations immediately (shutdown, user cancellation)
|
|
1682
|
+
# Do NOT treat these as errors - they're intentional interruptions
|
|
1683
|
+
self._logger.debug(
|
|
1684
|
+
f"Agent '{agent.name}' task cancelled (task={ctx.task_id})"
|
|
1685
|
+
)
|
|
1686
|
+
raise # Propagate cancellation so task.cancelled() == True
|
|
1687
|
+
except Exception as exc:
|
|
1688
|
+
# Agent already called component.on_error hooks before re-raising
|
|
1689
|
+
# Now orchestrator publishes error artifact and continues workflow
|
|
1690
|
+
from flock.system_artifacts import WorkflowError
|
|
1691
|
+
|
|
1692
|
+
error_artifact_data = WorkflowError(
|
|
1693
|
+
failed_agent=agent.name,
|
|
1694
|
+
error_type=type(exc).__name__,
|
|
1695
|
+
error_message=str(exc),
|
|
1696
|
+
timestamp=datetime.now(UTC),
|
|
1697
|
+
task_id=ctx.task_id,
|
|
1698
|
+
)
|
|
1699
|
+
|
|
1700
|
+
# Build and publish error artifact with correlation_id
|
|
1701
|
+
from flock.artifacts import ArtifactSpec
|
|
1702
|
+
|
|
1703
|
+
error_spec = ArtifactSpec.from_model(WorkflowError)
|
|
1704
|
+
error_artifact = error_spec.build(
|
|
1705
|
+
produced_by=f"orchestrator#{agent.name}",
|
|
1706
|
+
data=error_artifact_data.model_dump(),
|
|
1707
|
+
correlation_id=correlation_id,
|
|
1708
|
+
)
|
|
1709
|
+
|
|
1710
|
+
await self._persist_and_schedule(error_artifact)
|
|
1711
|
+
|
|
1712
|
+
# Log error but don't re-raise - workflow continues
|
|
1713
|
+
self._logger.error(
|
|
1714
|
+
f"Agent '{agent.name}' failed (task={ctx.task_id}): {exc}",
|
|
1715
|
+
exc_info=True,
|
|
1716
|
+
)
|
|
1717
|
+
return # Exit early - no outputs to publish
|
|
1718
|
+
|
|
1719
|
+
# Phase 6: Orchestrator publishes outputs (security fix)
|
|
1720
|
+
# This fixes Vulnerability #2 (WRITE Bypass) - agents can't bypass validation
|
|
1721
|
+
for output in outputs:
|
|
1722
|
+
await self._persist_and_schedule(output)
|
|
1334
1723
|
|
|
1335
1724
|
if artifacts:
|
|
1336
1725
|
try:
|
|
@@ -1373,7 +1762,9 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1373
1762
|
from flock.dashboard.service import _get_correlation_groups
|
|
1374
1763
|
|
|
1375
1764
|
# Get current correlation groups state from engine
|
|
1376
|
-
groups = _get_correlation_groups(
|
|
1765
|
+
groups = _get_correlation_groups(
|
|
1766
|
+
self._correlation_engine, agent_name, subscription_index
|
|
1767
|
+
)
|
|
1377
1768
|
|
|
1378
1769
|
if not groups:
|
|
1379
1770
|
return # No groups to report (shouldn't happen, but defensive)
|