flock-core 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- flock/agent.py +153 -17
- flock/components.py +36 -0
- flock/dashboard/collector.py +2 -0
- flock/dashboard/static_v2/assets/index-DFRnI_mt.js +1 -1
- flock/dashboard/static_v2/index.html +3 -3
- flock/engines/dspy_engine.py +41 -3
- flock/engines/examples/__init__.py +6 -0
- flock/engines/examples/simple_batch_engine.py +61 -0
- flock/frontend/README.md +4 -4
- flock/frontend/docs/DESIGN_SYSTEM.md +1 -1
- flock/frontend/package-lock.json +2 -2
- flock/frontend/package.json +1 -1
- flock/frontend/src/components/settings/SettingsPanel.css +1 -1
- flock/frontend/src/components/settings/ThemeSelector.tsx +2 -2
- flock/frontend/src/services/indexeddb.ts +1 -1
- flock/frontend/src/styles/variables.css +1 -1
- flock/orchestrator.py +500 -140
- flock/orchestrator_component.py +686 -0
- flock/runtime.py +3 -0
- {flock_core-0.5.4.dist-info → flock_core-0.5.6.dist-info}/METADATA +69 -3
- {flock_core-0.5.4.dist-info → flock_core-0.5.6.dist-info}/RECORD +24 -21
- {flock_core-0.5.4.dist-info → flock_core-0.5.6.dist-info}/WHEEL +0 -0
- {flock_core-0.5.4.dist-info → flock_core-0.5.6.dist-info}/entry_points.txt +0 -0
- {flock_core-0.5.4.dist-info → flock_core-0.5.6.dist-info}/licenses/LICENSE +0 -0
flock/orchestrator.py
CHANGED
|
@@ -31,9 +31,15 @@ from flock.mcp import (
|
|
|
31
31
|
FlockMCPFeatureConfiguration,
|
|
32
32
|
ServerParameters,
|
|
33
33
|
)
|
|
34
|
+
from flock.orchestrator_component import (
|
|
35
|
+
CollectionResult,
|
|
36
|
+
OrchestratorComponent,
|
|
37
|
+
ScheduleDecision,
|
|
38
|
+
)
|
|
34
39
|
from flock.registry import type_registry
|
|
35
40
|
from flock.runtime import Context
|
|
36
41
|
from flock.store import BlackboardStore, ConsumptionRecord, InMemoryBlackboardStore
|
|
42
|
+
from flock.subscription import Subscription
|
|
37
43
|
from flock.visibility import AgentIdentity, PublicVisibility, Visibility
|
|
38
44
|
|
|
39
45
|
|
|
@@ -135,8 +141,14 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
135
141
|
self._artifact_collector = ArtifactCollector()
|
|
136
142
|
# JoinSpec logic: Correlation engine for correlated AND gates
|
|
137
143
|
self._correlation_engine = CorrelationEngine()
|
|
144
|
+
# Background task for checking correlation expiry (time-based JoinSpec)
|
|
145
|
+
self._correlation_cleanup_task: Task[Any] | None = None
|
|
146
|
+
self._correlation_cleanup_interval: float = 0.1 # Check every 100ms
|
|
138
147
|
# BatchSpec logic: Batch accumulator for size/timeout batching
|
|
139
148
|
self._batch_engine = BatchEngine()
|
|
149
|
+
# Background task for checking batch timeouts
|
|
150
|
+
self._batch_timeout_task: Task[Any] | None = None
|
|
151
|
+
self._batch_timeout_interval: float = 0.1 # Check every 100ms
|
|
140
152
|
# Phase 1.2: WebSocket manager for real-time dashboard events (set by serve())
|
|
141
153
|
self._websocket_manager: Any = None
|
|
142
154
|
# Unified tracing support
|
|
@@ -147,6 +159,25 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
147
159
|
"yes",
|
|
148
160
|
"on",
|
|
149
161
|
}
|
|
162
|
+
|
|
163
|
+
# Phase 2: OrchestratorComponent system
|
|
164
|
+
self._components: list[OrchestratorComponent] = []
|
|
165
|
+
self._components_initialized: bool = False
|
|
166
|
+
|
|
167
|
+
# Auto-add built-in components
|
|
168
|
+
from flock.orchestrator_component import (
|
|
169
|
+
BuiltinCollectionComponent,
|
|
170
|
+
CircuitBreakerComponent,
|
|
171
|
+
DeduplicationComponent,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
self.add_component(CircuitBreakerComponent(max_iterations=max_agent_iterations))
|
|
175
|
+
self.add_component(DeduplicationComponent())
|
|
176
|
+
self.add_component(BuiltinCollectionComponent())
|
|
177
|
+
|
|
178
|
+
# Log orchestrator initialization
|
|
179
|
+
self._logger.debug("Orchestrator initialized: components=[]")
|
|
180
|
+
|
|
150
181
|
if not model:
|
|
151
182
|
self.model = os.getenv("DEFAULT_MODEL")
|
|
152
183
|
|
|
@@ -197,6 +228,47 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
197
228
|
def agents(self) -> list[Agent]:
|
|
198
229
|
return list(self._agents.values())
|
|
199
230
|
|
|
231
|
+
# Component management -------------------------------------------------
|
|
232
|
+
|
|
233
|
+
def add_component(self, component: OrchestratorComponent) -> Flock:
|
|
234
|
+
"""Add an OrchestratorComponent to this orchestrator.
|
|
235
|
+
|
|
236
|
+
Components execute in priority order (lower priority number = earlier).
|
|
237
|
+
Multiple components can have the same priority.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
component: Component to add (must be an OrchestratorComponent instance)
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Self for method chaining
|
|
244
|
+
|
|
245
|
+
Examples:
|
|
246
|
+
>>> # Add single component
|
|
247
|
+
>>> flock = Flock("openai/gpt-4.1")
|
|
248
|
+
>>> flock.add_component(CircuitBreakerComponent(max_iterations=500))
|
|
249
|
+
|
|
250
|
+
>>> # Method chaining
|
|
251
|
+
>>> flock.add_component(CircuitBreakerComponent()) \\
|
|
252
|
+
... .add_component(MetricsComponent()) \\
|
|
253
|
+
... .add_component(DeduplicationComponent())
|
|
254
|
+
|
|
255
|
+
>>> # Custom priority (lower = earlier)
|
|
256
|
+
>>> flock.add_component(
|
|
257
|
+
... CustomComponent(priority=5, name="early_component")
|
|
258
|
+
... )
|
|
259
|
+
"""
|
|
260
|
+
self._components.append(component)
|
|
261
|
+
self._components.sort(key=lambda c: c.priority)
|
|
262
|
+
|
|
263
|
+
# Log component addition
|
|
264
|
+
comp_name = component.name or component.__class__.__name__
|
|
265
|
+
self._logger.info(
|
|
266
|
+
f"Component added: name={comp_name}, "
|
|
267
|
+
f"priority={component.priority}, total_components={len(self._components)}"
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
return self
|
|
271
|
+
|
|
200
272
|
# MCP management -------------------------------------------------------
|
|
201
273
|
|
|
202
274
|
def add_mcp(
|
|
@@ -473,11 +545,42 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
473
545
|
await asyncio.sleep(0.01)
|
|
474
546
|
pending = {task for task in self._tasks if not task.done()}
|
|
475
547
|
self._tasks = pending
|
|
548
|
+
|
|
549
|
+
# Determine whether any deferred work (timeouts/cleanup) is still pending.
|
|
550
|
+
pending_batches = any(
|
|
551
|
+
accumulator.artifacts for accumulator in self._batch_engine.batches.values()
|
|
552
|
+
)
|
|
553
|
+
pending_correlations = any(
|
|
554
|
+
groups and any(group.waiting_artifacts for group in groups.values())
|
|
555
|
+
for groups in self._correlation_engine.correlation_groups.values()
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
# Ensure watchdog loops remain active while pending work exists.
|
|
559
|
+
if pending_batches and (
|
|
560
|
+
self._batch_timeout_task is None or self._batch_timeout_task.done()
|
|
561
|
+
):
|
|
562
|
+
self._batch_timeout_task = asyncio.create_task(self._batch_timeout_checker_loop())
|
|
563
|
+
|
|
564
|
+
if pending_correlations and (
|
|
565
|
+
self._correlation_cleanup_task is None or self._correlation_cleanup_task.done()
|
|
566
|
+
):
|
|
567
|
+
self._correlation_cleanup_task = asyncio.create_task(self._correlation_cleanup_loop())
|
|
568
|
+
|
|
569
|
+
# If deferred work is still outstanding, consider the orchestrator quiescent for
|
|
570
|
+
# now but leave watchdog tasks running to finish the job.
|
|
571
|
+
if pending_batches or pending_correlations:
|
|
572
|
+
self._agent_iteration_count.clear()
|
|
573
|
+
return
|
|
574
|
+
|
|
575
|
+
# Notify components that orchestrator reached idle state
|
|
576
|
+
if self._components_initialized:
|
|
577
|
+
await self._run_idle()
|
|
578
|
+
|
|
476
579
|
# T068: Reset circuit breaker counters when idle
|
|
477
580
|
self._agent_iteration_count.clear()
|
|
478
581
|
|
|
479
582
|
# Automatically shutdown MCP connections when idle
|
|
480
|
-
await self.shutdown()
|
|
583
|
+
await self.shutdown(include_components=False)
|
|
481
584
|
|
|
482
585
|
async def direct_invoke(
|
|
483
586
|
self, agent: Agent, inputs: Sequence[BaseModel | Mapping[str, Any] | Artifact]
|
|
@@ -546,8 +649,33 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
546
649
|
"""
|
|
547
650
|
return asyncio.run(self.arun(agent_builder, *inputs))
|
|
548
651
|
|
|
549
|
-
async def shutdown(self) -> None:
|
|
550
|
-
"""Shutdown orchestrator and clean up resources.
|
|
652
|
+
async def shutdown(self, *, include_components: bool = True) -> None:
|
|
653
|
+
"""Shutdown orchestrator and clean up resources.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
include_components: Whether to invoke component shutdown hooks.
|
|
657
|
+
Internal callers (e.g., run_until_idle) disable this to avoid
|
|
658
|
+
tearing down component state between cascades.
|
|
659
|
+
"""
|
|
660
|
+
if include_components and self._components_initialized:
|
|
661
|
+
await self._run_shutdown()
|
|
662
|
+
|
|
663
|
+
# Cancel correlation cleanup task if running
|
|
664
|
+
if self._correlation_cleanup_task and not self._correlation_cleanup_task.done():
|
|
665
|
+
self._correlation_cleanup_task.cancel()
|
|
666
|
+
try:
|
|
667
|
+
await self._correlation_cleanup_task
|
|
668
|
+
except asyncio.CancelledError:
|
|
669
|
+
pass
|
|
670
|
+
|
|
671
|
+
# Cancel batch timeout checker if running
|
|
672
|
+
if self._batch_timeout_task and not self._batch_timeout_task.done():
|
|
673
|
+
self._batch_timeout_task.cancel()
|
|
674
|
+
try:
|
|
675
|
+
await self._batch_timeout_task
|
|
676
|
+
except asyncio.CancelledError:
|
|
677
|
+
pass
|
|
678
|
+
|
|
551
679
|
if self._mcp_manager is not None:
|
|
552
680
|
await self._mcp_manager.cleanup_all()
|
|
553
681
|
self._mcp_manager = None
|
|
@@ -609,8 +737,8 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
609
737
|
|
|
610
738
|
# Inject event collector into all existing agents
|
|
611
739
|
for agent in self._agents.values():
|
|
612
|
-
#
|
|
613
|
-
agent.
|
|
740
|
+
# Add dashboard collector with priority ordering handled by agent
|
|
741
|
+
agent._add_utilities([event_collector])
|
|
614
742
|
|
|
615
743
|
# Start dashboard launcher (npm process + browser)
|
|
616
744
|
launcher_kwargs: dict[str, Any] = {"port": port}
|
|
@@ -843,163 +971,340 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
843
971
|
|
|
844
972
|
return outputs
|
|
845
973
|
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
self
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
tags: set[str] | None = None,
|
|
856
|
-
) -> Artifact:
|
|
857
|
-
"""Deprecated: Use publish() instead.
|
|
974
|
+
async def _persist_and_schedule(self, artifact: Artifact) -> None:
|
|
975
|
+
await self.store.publish(artifact)
|
|
976
|
+
self.metrics["artifacts_published"] += 1
|
|
977
|
+
await self._schedule_artifact(artifact)
|
|
978
|
+
|
|
979
|
+
# Component Hook Runners ───────────────────────────────────────
|
|
980
|
+
|
|
981
|
+
async def _run_initialize(self) -> None:
|
|
982
|
+
"""Initialize all components in priority order (called once).
|
|
858
983
|
|
|
859
|
-
|
|
984
|
+
Executes on_initialize hook for each component. Sets _components_initialized
|
|
985
|
+
flag to prevent multiple initializations.
|
|
860
986
|
"""
|
|
861
|
-
|
|
987
|
+
if self._components_initialized:
|
|
988
|
+
return
|
|
989
|
+
|
|
990
|
+
self._logger.info(f"Initializing {len(self._components)} orchestrator components")
|
|
991
|
+
|
|
992
|
+
for component in self._components:
|
|
993
|
+
comp_name = component.name or component.__class__.__name__
|
|
994
|
+
self._logger.debug(
|
|
995
|
+
f"Initializing component: name={comp_name}, priority={component.priority}"
|
|
996
|
+
)
|
|
997
|
+
|
|
998
|
+
try:
|
|
999
|
+
await component.on_initialize(self)
|
|
1000
|
+
except Exception as e:
|
|
1001
|
+
self._logger.exception(
|
|
1002
|
+
f"Component initialization failed: name={comp_name}, error={e!s}"
|
|
1003
|
+
)
|
|
1004
|
+
raise
|
|
1005
|
+
|
|
1006
|
+
self._components_initialized = True
|
|
1007
|
+
self._logger.info(f"All components initialized: count={len(self._components)}")
|
|
1008
|
+
|
|
1009
|
+
async def _run_artifact_published(self, artifact: Artifact) -> Artifact | None:
|
|
1010
|
+
"""Run on_artifact_published hooks (returns modified artifact or None to block).
|
|
1011
|
+
|
|
1012
|
+
Components execute in priority order, each receiving the artifact from the
|
|
1013
|
+
previous component (chaining). If any component returns None, the artifact
|
|
1014
|
+
is blocked and scheduling stops.
|
|
1015
|
+
"""
|
|
1016
|
+
current_artifact = artifact
|
|
1017
|
+
|
|
1018
|
+
for component in self._components:
|
|
1019
|
+
comp_name = component.name or component.__class__.__name__
|
|
1020
|
+
self._logger.debug(
|
|
1021
|
+
f"Running on_artifact_published: component={comp_name}, "
|
|
1022
|
+
f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
try:
|
|
1026
|
+
result = await component.on_artifact_published(self, current_artifact)
|
|
1027
|
+
|
|
1028
|
+
if result is None:
|
|
1029
|
+
self._logger.info(
|
|
1030
|
+
f"Artifact blocked by component: component={comp_name}, "
|
|
1031
|
+
f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
|
|
1032
|
+
)
|
|
1033
|
+
return None
|
|
1034
|
+
|
|
1035
|
+
current_artifact = result
|
|
1036
|
+
except Exception as e:
|
|
1037
|
+
self._logger.exception(
|
|
1038
|
+
f"Component hook failed: component={comp_name}, "
|
|
1039
|
+
f"hook=on_artifact_published, error={e!s}"
|
|
1040
|
+
)
|
|
1041
|
+
raise
|
|
1042
|
+
|
|
1043
|
+
return current_artifact
|
|
1044
|
+
|
|
1045
|
+
async def _run_before_schedule(
|
|
1046
|
+
self, artifact: Artifact, agent: Agent, subscription: Subscription
|
|
1047
|
+
) -> ScheduleDecision:
|
|
1048
|
+
"""Run on_before_schedule hooks (returns CONTINUE, SKIP, or DEFER).
|
|
1049
|
+
|
|
1050
|
+
Components execute in priority order. First component to return SKIP or
|
|
1051
|
+
DEFER stops execution and returns that decision.
|
|
1052
|
+
"""
|
|
1053
|
+
from flock.orchestrator_component import ScheduleDecision
|
|
1054
|
+
|
|
1055
|
+
for component in self._components:
|
|
1056
|
+
comp_name = component.name or component.__class__.__name__
|
|
1057
|
+
|
|
1058
|
+
self._logger.debug(
|
|
1059
|
+
f"Running on_before_schedule: component={comp_name}, "
|
|
1060
|
+
f"agent={agent.name}, artifact_type={artifact.type}"
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
try:
|
|
1064
|
+
decision = await component.on_before_schedule(self, artifact, agent, subscription)
|
|
1065
|
+
|
|
1066
|
+
if decision == ScheduleDecision.SKIP:
|
|
1067
|
+
self._logger.info(
|
|
1068
|
+
f"Scheduling skipped by component: component={comp_name}, "
|
|
1069
|
+
f"agent={agent.name}, artifact_type={artifact.type}, decision=SKIP"
|
|
1070
|
+
)
|
|
1071
|
+
return ScheduleDecision.SKIP
|
|
1072
|
+
|
|
1073
|
+
if decision == ScheduleDecision.DEFER:
|
|
1074
|
+
self._logger.debug(
|
|
1075
|
+
f"Scheduling deferred by component: component={comp_name}, "
|
|
1076
|
+
f"agent={agent.name}, decision=DEFER"
|
|
1077
|
+
)
|
|
1078
|
+
return ScheduleDecision.DEFER
|
|
862
1079
|
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
1080
|
+
except Exception as e:
|
|
1081
|
+
self._logger.exception(
|
|
1082
|
+
f"Component hook failed: component={comp_name}, "
|
|
1083
|
+
f"hook=on_before_schedule, error={e!s}"
|
|
1084
|
+
)
|
|
1085
|
+
raise
|
|
1086
|
+
|
|
1087
|
+
return ScheduleDecision.CONTINUE
|
|
1088
|
+
|
|
1089
|
+
async def _run_collect_artifacts(
|
|
1090
|
+
self, artifact: Artifact, agent: Agent, subscription: Subscription
|
|
1091
|
+
) -> CollectionResult:
|
|
1092
|
+
"""Run on_collect_artifacts hooks (returns first non-None result).
|
|
1093
|
+
|
|
1094
|
+
Components execute in priority order. First component to return non-None
|
|
1095
|
+
wins (short-circuit). If all return None, default is immediate scheduling.
|
|
1096
|
+
"""
|
|
1097
|
+
from flock.orchestrator_component import CollectionResult
|
|
1098
|
+
|
|
1099
|
+
for component in self._components:
|
|
1100
|
+
comp_name = component.name or component.__class__.__name__
|
|
1101
|
+
|
|
1102
|
+
self._logger.debug(
|
|
1103
|
+
f"Running on_collect_artifacts: component={comp_name}, "
|
|
1104
|
+
f"agent={agent.name}, artifact_type={artifact.type}"
|
|
1105
|
+
)
|
|
1106
|
+
|
|
1107
|
+
try:
|
|
1108
|
+
result = await component.on_collect_artifacts(self, artifact, agent, subscription)
|
|
1109
|
+
|
|
1110
|
+
if result is not None:
|
|
1111
|
+
self._logger.debug(
|
|
1112
|
+
f"Collection handled by component: component={comp_name}, "
|
|
1113
|
+
f"complete={result.complete}, artifact_count={len(result.artifacts)}"
|
|
1114
|
+
)
|
|
1115
|
+
return result
|
|
1116
|
+
except Exception as e:
|
|
1117
|
+
self._logger.exception(
|
|
1118
|
+
f"Component hook failed: component={comp_name}, "
|
|
1119
|
+
f"hook=on_collect_artifacts, error={e!s}"
|
|
1120
|
+
)
|
|
1121
|
+
raise
|
|
1122
|
+
|
|
1123
|
+
# Default: immediate scheduling with single artifact
|
|
1124
|
+
self._logger.debug(
|
|
1125
|
+
f"No component handled collection, using default: "
|
|
1126
|
+
f"agent={agent.name}, artifact_type={artifact.type}"
|
|
867
1127
|
)
|
|
868
|
-
return
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
1128
|
+
return CollectionResult.immediate([artifact])
|
|
1129
|
+
|
|
1130
|
+
async def _run_before_agent_schedule(
|
|
1131
|
+
self, agent: Agent, artifacts: list[Artifact]
|
|
1132
|
+
) -> list[Artifact] | None:
|
|
1133
|
+
"""Run on_before_agent_schedule hooks (returns modified artifacts or None to block).
|
|
1134
|
+
|
|
1135
|
+
Components execute in priority order, each receiving artifacts from the
|
|
1136
|
+
previous component (chaining). If any component returns None, scheduling
|
|
1137
|
+
is blocked.
|
|
1138
|
+
"""
|
|
1139
|
+
current_artifacts = artifacts
|
|
1140
|
+
|
|
1141
|
+
for component in self._components:
|
|
1142
|
+
comp_name = component.name or component.__class__.__name__
|
|
1143
|
+
|
|
1144
|
+
self._logger.debug(
|
|
1145
|
+
f"Running on_before_agent_schedule: component={comp_name}, "
|
|
1146
|
+
f"agent={agent.name}, artifact_count={len(current_artifacts)}"
|
|
1147
|
+
)
|
|
1148
|
+
|
|
1149
|
+
try:
|
|
1150
|
+
result = await component.on_before_agent_schedule(self, agent, current_artifacts)
|
|
1151
|
+
|
|
1152
|
+
if result is None:
|
|
1153
|
+
self._logger.info(
|
|
1154
|
+
f"Agent scheduling blocked by component: component={comp_name}, "
|
|
1155
|
+
f"agent={agent.name}"
|
|
1156
|
+
)
|
|
1157
|
+
return None
|
|
1158
|
+
|
|
1159
|
+
current_artifacts = result
|
|
1160
|
+
except Exception as e:
|
|
1161
|
+
self._logger.exception(
|
|
1162
|
+
f"Component hook failed: component={comp_name}, "
|
|
1163
|
+
f"hook=on_before_agent_schedule, error={e!s}"
|
|
1164
|
+
)
|
|
1165
|
+
raise
|
|
1166
|
+
|
|
1167
|
+
return current_artifacts
|
|
1168
|
+
|
|
1169
|
+
async def _run_agent_scheduled(
|
|
1170
|
+
self, agent: Agent, artifacts: list[Artifact], task: Task[Any]
|
|
1171
|
+
) -> None:
|
|
1172
|
+
"""Run on_agent_scheduled hooks (notification only, non-blocking).
|
|
1173
|
+
|
|
1174
|
+
Components execute in priority order. Exceptions are logged but don't
|
|
1175
|
+
prevent other components from executing or block scheduling.
|
|
1176
|
+
"""
|
|
1177
|
+
for component in self._components:
|
|
1178
|
+
comp_name = component.name or component.__class__.__name__
|
|
1179
|
+
|
|
1180
|
+
self._logger.debug(
|
|
1181
|
+
f"Running on_agent_scheduled: component={comp_name}, "
|
|
1182
|
+
f"agent={agent.name}, artifact_count={len(artifacts)}"
|
|
1183
|
+
)
|
|
1184
|
+
|
|
1185
|
+
try:
|
|
1186
|
+
await component.on_agent_scheduled(self, agent, artifacts, task)
|
|
1187
|
+
except Exception as e:
|
|
1188
|
+
self._logger.warning(
|
|
1189
|
+
f"Component notification hook failed (non-critical): "
|
|
1190
|
+
f"component={comp_name}, hook=on_agent_scheduled, error={e!s}"
|
|
1191
|
+
)
|
|
1192
|
+
# Don't propagate - this is a notification hook
|
|
1193
|
+
|
|
1194
|
+
async def _run_idle(self) -> None:
|
|
1195
|
+
"""Run on_orchestrator_idle hooks when orchestrator becomes idle.
|
|
1196
|
+
|
|
1197
|
+
Components execute in priority order. Exceptions are logged but don't
|
|
1198
|
+
prevent other components from executing.
|
|
1199
|
+
"""
|
|
1200
|
+
self._logger.debug(
|
|
1201
|
+
f"Running on_orchestrator_idle hooks: component_count={len(self._components)}"
|
|
874
1202
|
)
|
|
875
1203
|
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
1204
|
+
for component in self._components:
|
|
1205
|
+
comp_name = component.name or component.__class__.__name__
|
|
1206
|
+
|
|
1207
|
+
try:
|
|
1208
|
+
await component.on_orchestrator_idle(self)
|
|
1209
|
+
except Exception as e:
|
|
1210
|
+
self._logger.warning(
|
|
1211
|
+
f"Component idle hook failed (non-critical): "
|
|
1212
|
+
f"component={comp_name}, hook=on_orchestrator_idle, error={e!s}"
|
|
1213
|
+
)
|
|
1214
|
+
|
|
1215
|
+
async def _run_shutdown(self) -> None:
|
|
1216
|
+
"""Run on_shutdown hooks when orchestrator shuts down.
|
|
1217
|
+
|
|
1218
|
+
Components execute in priority order. Exceptions are logged but don't
|
|
1219
|
+
prevent shutdown of other components (best-effort cleanup).
|
|
1220
|
+
"""
|
|
1221
|
+
self._logger.info(f"Shutting down {len(self._components)} orchestrator components")
|
|
1222
|
+
|
|
1223
|
+
for component in self._components:
|
|
1224
|
+
comp_name = component.name or component.__class__.__name__
|
|
1225
|
+
self._logger.debug(f"Shutting down component: name={comp_name}")
|
|
1226
|
+
|
|
1227
|
+
try:
|
|
1228
|
+
await component.on_shutdown(self)
|
|
1229
|
+
except Exception as e:
|
|
1230
|
+
self._logger.exception(
|
|
1231
|
+
f"Component shutdown failed: component={comp_name}, "
|
|
1232
|
+
f"hook=on_shutdown, error={e!s}"
|
|
1233
|
+
)
|
|
1234
|
+
# Continue shutting down other components
|
|
1235
|
+
|
|
1236
|
+
# Scheduling ───────────────────────────────────────────────────
|
|
880
1237
|
|
|
881
1238
|
async def _schedule_artifact(self, artifact: Artifact) -> None:
|
|
1239
|
+
"""Schedule agents for an artifact using component hooks.
|
|
1240
|
+
|
|
1241
|
+
Refactored to use OrchestratorComponent hook system for extensibility.
|
|
1242
|
+
Components can modify artifact, control scheduling, and handle collection.
|
|
1243
|
+
"""
|
|
1244
|
+
# Phase 3: Initialize components on first artifact
|
|
1245
|
+
if not self._components_initialized:
|
|
1246
|
+
await self._run_initialize()
|
|
1247
|
+
|
|
1248
|
+
# Phase 3: Component hook - artifact published (can transform or block)
|
|
1249
|
+
artifact = await self._run_artifact_published(artifact)
|
|
1250
|
+
if artifact is None:
|
|
1251
|
+
return # Artifact blocked by component
|
|
1252
|
+
|
|
882
1253
|
for agent in self.agents:
|
|
883
1254
|
identity = agent.identity
|
|
884
1255
|
for subscription in agent.subscriptions:
|
|
885
1256
|
if not subscription.accepts_events():
|
|
886
1257
|
continue
|
|
1258
|
+
|
|
887
1259
|
# T066: Check prevent_self_trigger
|
|
888
1260
|
if agent.prevent_self_trigger and artifact.produced_by == agent.name:
|
|
889
1261
|
continue # Skip - agent produced this artifact (prevents feedback loops)
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
if iteration_count >= self.max_agent_iterations:
|
|
893
|
-
# Agent hit iteration limit - possible infinite loop
|
|
894
|
-
continue
|
|
1262
|
+
|
|
1263
|
+
# Visibility check
|
|
895
1264
|
if not self._check_visibility(artifact, identity):
|
|
896
1265
|
continue
|
|
1266
|
+
|
|
1267
|
+
# Subscription match check
|
|
897
1268
|
if not subscription.matches(artifact):
|
|
898
1269
|
continue
|
|
899
|
-
if self._seen_before(artifact, agent):
|
|
900
|
-
continue
|
|
901
1270
|
|
|
902
|
-
#
|
|
903
|
-
|
|
904
|
-
# Use CorrelationEngine for JoinSpec (correlated AND gates)
|
|
905
|
-
subscription_index = agent.subscriptions.index(subscription)
|
|
906
|
-
completed_group = self._correlation_engine.add_artifact(
|
|
907
|
-
artifact=artifact,
|
|
908
|
-
subscription=subscription,
|
|
909
|
-
subscription_index=subscription_index,
|
|
910
|
-
)
|
|
1271
|
+
# Phase 3: Component hook - before schedule (circuit breaker, deduplication, etc.)
|
|
1272
|
+
from flock.orchestrator_component import ScheduleDecision
|
|
911
1273
|
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
subscription_index=subscription_index,
|
|
918
|
-
artifact=artifact,
|
|
919
|
-
)
|
|
920
|
-
continue
|
|
921
|
-
|
|
922
|
-
# Correlation complete! Get all correlated artifacts
|
|
923
|
-
artifacts = completed_group.get_artifacts()
|
|
924
|
-
else:
|
|
925
|
-
# AND GATE LOGIC: Use artifact collector for simple AND gates (no correlation)
|
|
926
|
-
is_complete, artifacts = self._artifact_collector.add_artifact(
|
|
927
|
-
agent, subscription, artifact
|
|
928
|
-
)
|
|
929
|
-
|
|
930
|
-
if not is_complete:
|
|
931
|
-
# Still waiting for more types (AND gate incomplete)
|
|
932
|
-
continue
|
|
933
|
-
|
|
934
|
-
# BatchSpec BATCHING: Check if subscription has batch accumulator
|
|
935
|
-
if subscription.batch is not None:
|
|
936
|
-
# Add to batch accumulator
|
|
937
|
-
subscription_index = agent.subscriptions.index(subscription)
|
|
938
|
-
|
|
939
|
-
# COMBINED FEATURES: JoinSpec + BatchSpec
|
|
940
|
-
# If we have JoinSpec, artifacts is a correlated GROUP - treat as single batch item
|
|
941
|
-
# If we have AND gate, artifacts is a complete set - treat as single batch item
|
|
942
|
-
# Otherwise (single type), add each artifact individually
|
|
943
|
-
|
|
944
|
-
if subscription.join is not None or len(subscription.type_models) > 1:
|
|
945
|
-
# JoinSpec or AND gate: Treat artifact group as ONE batch item
|
|
946
|
-
should_flush = self._batch_engine.add_artifact_group(
|
|
947
|
-
artifacts=artifacts,
|
|
948
|
-
subscription=subscription,
|
|
949
|
-
subscription_index=subscription_index,
|
|
950
|
-
)
|
|
951
|
-
else:
|
|
952
|
-
# Single type subscription: Add each artifact individually
|
|
953
|
-
should_flush = False
|
|
954
|
-
for single_artifact in artifacts:
|
|
955
|
-
should_flush = self._batch_engine.add_artifact(
|
|
956
|
-
artifact=single_artifact,
|
|
957
|
-
subscription=subscription,
|
|
958
|
-
subscription_index=subscription_index,
|
|
959
|
-
)
|
|
960
|
-
|
|
961
|
-
if should_flush:
|
|
962
|
-
# Size threshold reached! Flush batch now
|
|
963
|
-
break
|
|
964
|
-
|
|
965
|
-
if not should_flush:
|
|
966
|
-
# Batch not full yet - wait for more artifacts
|
|
967
|
-
# Phase 1.2: Emit real-time batch update event
|
|
968
|
-
await self._emit_batch_item_added_event(
|
|
969
|
-
agent_name=agent.name,
|
|
970
|
-
subscription_index=subscription_index,
|
|
971
|
-
subscription=subscription,
|
|
972
|
-
artifact=artifact,
|
|
973
|
-
)
|
|
974
|
-
continue
|
|
975
|
-
|
|
976
|
-
# Flush the batch and get all accumulated artifacts
|
|
977
|
-
batched_artifacts = self._batch_engine.flush_batch(
|
|
978
|
-
agent.name, subscription_index
|
|
979
|
-
)
|
|
1274
|
+
decision = await self._run_before_schedule(artifact, agent, subscription)
|
|
1275
|
+
if decision == ScheduleDecision.SKIP:
|
|
1276
|
+
continue # Skip this subscription
|
|
1277
|
+
if decision == ScheduleDecision.DEFER:
|
|
1278
|
+
continue # Defer for later (batching/correlation)
|
|
980
1279
|
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
1280
|
+
# Phase 3: Component hook - collect artifacts (handles AND gates, correlation, batching)
|
|
1281
|
+
collection = await self._run_collect_artifacts(artifact, agent, subscription)
|
|
1282
|
+
if not collection.complete:
|
|
1283
|
+
continue # Still collecting (AND gate, correlation, or batch incomplete)
|
|
984
1284
|
|
|
985
|
-
|
|
986
|
-
artifacts = batched_artifacts
|
|
1285
|
+
artifacts = collection.artifacts
|
|
987
1286
|
|
|
988
|
-
#
|
|
989
|
-
|
|
990
|
-
|
|
1287
|
+
# Phase 3: Component hook - before agent schedule (final validation/transformation)
|
|
1288
|
+
artifacts = await self._run_before_agent_schedule(agent, artifacts)
|
|
1289
|
+
if artifacts is None:
|
|
1290
|
+
continue # Scheduling blocked by component
|
|
991
1291
|
|
|
992
|
-
#
|
|
993
|
-
|
|
994
|
-
|
|
1292
|
+
# Complete! Schedule agent with collected artifacts
|
|
1293
|
+
# Schedule agent task
|
|
1294
|
+
is_batch_execution = subscription.batch is not None
|
|
1295
|
+
task = self._schedule_task(agent, artifacts, is_batch=is_batch_execution)
|
|
995
1296
|
|
|
996
|
-
#
|
|
997
|
-
self.
|
|
1297
|
+
# Phase 3: Component hook - agent scheduled (notification)
|
|
1298
|
+
await self._run_agent_scheduled(agent, artifacts, task)
|
|
998
1299
|
|
|
999
|
-
def _schedule_task(
|
|
1000
|
-
|
|
1300
|
+
def _schedule_task(
|
|
1301
|
+
self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
|
|
1302
|
+
) -> Task[Any]:
|
|
1303
|
+
"""Schedule agent task and return the task handle."""
|
|
1304
|
+
task = asyncio.create_task(self._run_agent_task(agent, artifacts, is_batch=is_batch))
|
|
1001
1305
|
self._tasks.add(task)
|
|
1002
1306
|
task.add_done_callback(self._tasks.discard)
|
|
1307
|
+
return task
|
|
1003
1308
|
|
|
1004
1309
|
def _record_agent_run(self, agent: Agent) -> None:
|
|
1005
1310
|
self.metrics["agent_runs"] += 1
|
|
@@ -1012,14 +1317,17 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1012
1317
|
key = (str(artifact.id), agent.name)
|
|
1013
1318
|
return key in self._processed
|
|
1014
1319
|
|
|
1015
|
-
async def _run_agent_task(
|
|
1320
|
+
async def _run_agent_task(
|
|
1321
|
+
self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
|
|
1322
|
+
) -> None:
|
|
1016
1323
|
correlation_id = artifacts[0].correlation_id if artifacts else uuid4()
|
|
1017
1324
|
|
|
1018
1325
|
ctx = Context(
|
|
1019
1326
|
board=BoardHandle(self),
|
|
1020
1327
|
orchestrator=self,
|
|
1021
1328
|
task_id=str(uuid4()),
|
|
1022
|
-
correlation_id=correlation_id,
|
|
1329
|
+
correlation_id=correlation_id,
|
|
1330
|
+
is_batch=is_batch, # NEW!
|
|
1023
1331
|
)
|
|
1024
1332
|
self._record_agent_run(agent)
|
|
1025
1333
|
await agent.execute(ctx, artifacts)
|
|
@@ -1154,11 +1462,63 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1154
1462
|
|
|
1155
1463
|
# Batch Helpers --------------------------------------------------------
|
|
1156
1464
|
|
|
1465
|
+
async def _correlation_cleanup_loop(self) -> None:
|
|
1466
|
+
"""Background task that periodically cleans up expired correlation groups.
|
|
1467
|
+
|
|
1468
|
+
Runs continuously until all correlation groups are cleared or orchestrator shuts down.
|
|
1469
|
+
Checks every 100ms for time-based expired correlations and discards them.
|
|
1470
|
+
"""
|
|
1471
|
+
try:
|
|
1472
|
+
while True:
|
|
1473
|
+
await asyncio.sleep(self._correlation_cleanup_interval)
|
|
1474
|
+
self._cleanup_expired_correlations()
|
|
1475
|
+
|
|
1476
|
+
# Stop if no correlation groups remain
|
|
1477
|
+
if not self._correlation_engine.correlation_groups:
|
|
1478
|
+
self._correlation_cleanup_task = None
|
|
1479
|
+
break
|
|
1480
|
+
except asyncio.CancelledError:
|
|
1481
|
+
# Clean shutdown
|
|
1482
|
+
self._correlation_cleanup_task = None
|
|
1483
|
+
raise
|
|
1484
|
+
|
|
1485
|
+
def _cleanup_expired_correlations(self) -> None:
|
|
1486
|
+
"""Clean up all expired correlation groups across all subscriptions.
|
|
1487
|
+
|
|
1488
|
+
Called periodically by background task to enforce time-based correlation windows.
|
|
1489
|
+
Discards incomplete correlations that have exceeded their time window.
|
|
1490
|
+
"""
|
|
1491
|
+
# Get all active subscription keys
|
|
1492
|
+
for agent_name, subscription_index in list(
|
|
1493
|
+
self._correlation_engine.correlation_groups.keys()
|
|
1494
|
+
):
|
|
1495
|
+
self._correlation_engine.cleanup_expired(agent_name, subscription_index)
|
|
1496
|
+
|
|
1497
|
+
async def _batch_timeout_checker_loop(self) -> None:
|
|
1498
|
+
"""Background task that periodically checks for batch timeouts.
|
|
1499
|
+
|
|
1500
|
+
Runs continuously until all batches are cleared or orchestrator shuts down.
|
|
1501
|
+
Checks every 100ms for expired batches and flushes them.
|
|
1502
|
+
"""
|
|
1503
|
+
try:
|
|
1504
|
+
while True:
|
|
1505
|
+
await asyncio.sleep(self._batch_timeout_interval)
|
|
1506
|
+
await self._check_batch_timeouts()
|
|
1507
|
+
|
|
1508
|
+
# Stop if no batches remain
|
|
1509
|
+
if not self._batch_engine.batches:
|
|
1510
|
+
self._batch_timeout_task = None
|
|
1511
|
+
break
|
|
1512
|
+
except asyncio.CancelledError:
|
|
1513
|
+
# Clean shutdown
|
|
1514
|
+
self._batch_timeout_task = None
|
|
1515
|
+
raise
|
|
1516
|
+
|
|
1157
1517
|
async def _check_batch_timeouts(self) -> None:
|
|
1158
1518
|
"""Check all batches for timeout expiry and flush expired batches.
|
|
1159
1519
|
|
|
1160
|
-
This method is called periodically
|
|
1161
|
-
timeout-based batching.
|
|
1520
|
+
This method is called periodically by the background timeout checker
|
|
1521
|
+
or manually (in tests) to enforce timeout-based batching.
|
|
1162
1522
|
"""
|
|
1163
1523
|
expired_batches = self._batch_engine.check_timeouts()
|
|
1164
1524
|
|
|
@@ -1174,8 +1534,8 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1174
1534
|
if agent is None:
|
|
1175
1535
|
continue
|
|
1176
1536
|
|
|
1177
|
-
# Schedule agent with batched artifacts
|
|
1178
|
-
self._schedule_task(agent, artifacts)
|
|
1537
|
+
# Schedule agent with batched artifacts (timeout flush)
|
|
1538
|
+
self._schedule_task(agent, artifacts, is_batch=True)
|
|
1179
1539
|
|
|
1180
1540
|
async def _flush_all_batches(self) -> None:
|
|
1181
1541
|
"""Flush all partial batches (for shutdown - ensures zero data loss)."""
|
|
@@ -1187,8 +1547,8 @@ class Flock(metaclass=AutoTracedMeta):
|
|
|
1187
1547
|
if agent is None:
|
|
1188
1548
|
continue
|
|
1189
1549
|
|
|
1190
|
-
# Schedule agent with partial batch
|
|
1191
|
-
self._schedule_task(agent, artifacts)
|
|
1550
|
+
# Schedule agent with partial batch (shutdown flush)
|
|
1551
|
+
self._schedule_task(agent, artifacts, is_batch=True)
|
|
1192
1552
|
|
|
1193
1553
|
# Wait for all scheduled tasks to complete
|
|
1194
1554
|
await self.run_until_idle()
|