flock-core 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

flock/orchestrator.py CHANGED
@@ -31,9 +31,15 @@ from flock.mcp import (
31
31
  FlockMCPFeatureConfiguration,
32
32
  ServerParameters,
33
33
  )
34
+ from flock.orchestrator_component import (
35
+ CollectionResult,
36
+ OrchestratorComponent,
37
+ ScheduleDecision,
38
+ )
34
39
  from flock.registry import type_registry
35
40
  from flock.runtime import Context
36
41
  from flock.store import BlackboardStore, ConsumptionRecord, InMemoryBlackboardStore
42
+ from flock.subscription import Subscription
37
43
  from flock.visibility import AgentIdentity, PublicVisibility, Visibility
38
44
 
39
45
 
@@ -135,8 +141,14 @@ class Flock(metaclass=AutoTracedMeta):
135
141
  self._artifact_collector = ArtifactCollector()
136
142
  # JoinSpec logic: Correlation engine for correlated AND gates
137
143
  self._correlation_engine = CorrelationEngine()
144
+ # Background task for checking correlation expiry (time-based JoinSpec)
145
+ self._correlation_cleanup_task: Task[Any] | None = None
146
+ self._correlation_cleanup_interval: float = 0.1 # Check every 100ms
138
147
  # BatchSpec logic: Batch accumulator for size/timeout batching
139
148
  self._batch_engine = BatchEngine()
149
+ # Background task for checking batch timeouts
150
+ self._batch_timeout_task: Task[Any] | None = None
151
+ self._batch_timeout_interval: float = 0.1 # Check every 100ms
140
152
  # Phase 1.2: WebSocket manager for real-time dashboard events (set by serve())
141
153
  self._websocket_manager: Any = None
142
154
  # Unified tracing support
@@ -147,6 +159,25 @@ class Flock(metaclass=AutoTracedMeta):
147
159
  "yes",
148
160
  "on",
149
161
  }
162
+
163
+ # Phase 2: OrchestratorComponent system
164
+ self._components: list[OrchestratorComponent] = []
165
+ self._components_initialized: bool = False
166
+
167
+ # Auto-add built-in components
168
+ from flock.orchestrator_component import (
169
+ BuiltinCollectionComponent,
170
+ CircuitBreakerComponent,
171
+ DeduplicationComponent,
172
+ )
173
+
174
+ self.add_component(CircuitBreakerComponent(max_iterations=max_agent_iterations))
175
+ self.add_component(DeduplicationComponent())
176
+ self.add_component(BuiltinCollectionComponent())
177
+
178
+ # Log orchestrator initialization
179
+ self._logger.debug("Orchestrator initialized: components=[]")
180
+
150
181
  if not model:
151
182
  self.model = os.getenv("DEFAULT_MODEL")
152
183
 
@@ -197,6 +228,47 @@ class Flock(metaclass=AutoTracedMeta):
197
228
  def agents(self) -> list[Agent]:
198
229
  return list(self._agents.values())
199
230
 
231
+ # Component management -------------------------------------------------
232
+
233
+ def add_component(self, component: OrchestratorComponent) -> Flock:
234
+ """Add an OrchestratorComponent to this orchestrator.
235
+
236
+ Components execute in priority order (lower priority number = earlier).
237
+ Multiple components can have the same priority.
238
+
239
+ Args:
240
+ component: Component to add (must be an OrchestratorComponent instance)
241
+
242
+ Returns:
243
+ Self for method chaining
244
+
245
+ Examples:
246
+ >>> # Add single component
247
+ >>> flock = Flock("openai/gpt-4.1")
248
+ >>> flock.add_component(CircuitBreakerComponent(max_iterations=500))
249
+
250
+ >>> # Method chaining
251
+ >>> flock.add_component(CircuitBreakerComponent()) \\
252
+ ... .add_component(MetricsComponent()) \\
253
+ ... .add_component(DeduplicationComponent())
254
+
255
+ >>> # Custom priority (lower = earlier)
256
+ >>> flock.add_component(
257
+ ... CustomComponent(priority=5, name="early_component")
258
+ ... )
259
+ """
260
+ self._components.append(component)
261
+ self._components.sort(key=lambda c: c.priority)
262
+
263
+ # Log component addition
264
+ comp_name = component.name or component.__class__.__name__
265
+ self._logger.info(
266
+ f"Component added: name={comp_name}, "
267
+ f"priority={component.priority}, total_components={len(self._components)}"
268
+ )
269
+
270
+ return self
271
+
200
272
  # MCP management -------------------------------------------------------
201
273
 
202
274
  def add_mcp(
@@ -473,11 +545,42 @@ class Flock(metaclass=AutoTracedMeta):
473
545
  await asyncio.sleep(0.01)
474
546
  pending = {task for task in self._tasks if not task.done()}
475
547
  self._tasks = pending
548
+
549
+ # Determine whether any deferred work (timeouts/cleanup) is still pending.
550
+ pending_batches = any(
551
+ accumulator.artifacts for accumulator in self._batch_engine.batches.values()
552
+ )
553
+ pending_correlations = any(
554
+ groups and any(group.waiting_artifacts for group in groups.values())
555
+ for groups in self._correlation_engine.correlation_groups.values()
556
+ )
557
+
558
+ # Ensure watchdog loops remain active while pending work exists.
559
+ if pending_batches and (
560
+ self._batch_timeout_task is None or self._batch_timeout_task.done()
561
+ ):
562
+ self._batch_timeout_task = asyncio.create_task(self._batch_timeout_checker_loop())
563
+
564
+ if pending_correlations and (
565
+ self._correlation_cleanup_task is None or self._correlation_cleanup_task.done()
566
+ ):
567
+ self._correlation_cleanup_task = asyncio.create_task(self._correlation_cleanup_loop())
568
+
569
+ # If deferred work is still outstanding, consider the orchestrator quiescent for
570
+ # now but leave watchdog tasks running to finish the job.
571
+ if pending_batches or pending_correlations:
572
+ self._agent_iteration_count.clear()
573
+ return
574
+
575
+ # Notify components that orchestrator reached idle state
576
+ if self._components_initialized:
577
+ await self._run_idle()
578
+
476
579
  # T068: Reset circuit breaker counters when idle
477
580
  self._agent_iteration_count.clear()
478
581
 
479
582
  # Automatically shutdown MCP connections when idle
480
- await self.shutdown()
583
+ await self.shutdown(include_components=False)
481
584
 
482
585
  async def direct_invoke(
483
586
  self, agent: Agent, inputs: Sequence[BaseModel | Mapping[str, Any] | Artifact]
@@ -546,8 +649,33 @@ class Flock(metaclass=AutoTracedMeta):
546
649
  """
547
650
  return asyncio.run(self.arun(agent_builder, *inputs))
548
651
 
549
- async def shutdown(self) -> None:
550
- """Shutdown orchestrator and clean up resources."""
652
+ async def shutdown(self, *, include_components: bool = True) -> None:
653
+ """Shutdown orchestrator and clean up resources.
654
+
655
+ Args:
656
+ include_components: Whether to invoke component shutdown hooks.
657
+ Internal callers (e.g., run_until_idle) disable this to avoid
658
+ tearing down component state between cascades.
659
+ """
660
+ if include_components and self._components_initialized:
661
+ await self._run_shutdown()
662
+
663
+ # Cancel correlation cleanup task if running
664
+ if self._correlation_cleanup_task and not self._correlation_cleanup_task.done():
665
+ self._correlation_cleanup_task.cancel()
666
+ try:
667
+ await self._correlation_cleanup_task
668
+ except asyncio.CancelledError:
669
+ pass
670
+
671
+ # Cancel batch timeout checker if running
672
+ if self._batch_timeout_task and not self._batch_timeout_task.done():
673
+ self._batch_timeout_task.cancel()
674
+ try:
675
+ await self._batch_timeout_task
676
+ except asyncio.CancelledError:
677
+ pass
678
+
551
679
  if self._mcp_manager is not None:
552
680
  await self._mcp_manager.cleanup_all()
553
681
  self._mcp_manager = None
@@ -609,8 +737,8 @@ class Flock(metaclass=AutoTracedMeta):
609
737
 
610
738
  # Inject event collector into all existing agents
611
739
  for agent in self._agents.values():
612
- # Insert at beginning of utilities list (highest priority)
613
- agent.utilities.insert(0, event_collector)
740
+ # Add dashboard collector with priority ordering handled by agent
741
+ agent._add_utilities([event_collector])
614
742
 
615
743
  # Start dashboard launcher (npm process + browser)
616
744
  launcher_kwargs: dict[str, Any] = {"port": port}
@@ -843,163 +971,340 @@ class Flock(metaclass=AutoTracedMeta):
843
971
 
844
972
  return outputs
845
973
 
846
- # Keep publish_external as deprecated alias
847
- async def publish_external(
848
- self,
849
- type_name: str,
850
- payload: dict[str, Any],
851
- *,
852
- visibility: Visibility | None = None,
853
- correlation_id: str | None = None,
854
- partition_key: str | None = None,
855
- tags: set[str] | None = None,
856
- ) -> Artifact:
857
- """Deprecated: Use publish() instead.
974
+ async def _persist_and_schedule(self, artifact: Artifact) -> None:
975
+ await self.store.publish(artifact)
976
+ self.metrics["artifacts_published"] += 1
977
+ await self._schedule_artifact(artifact)
978
+
979
+ # Component Hook Runners ───────────────────────────────────────
980
+
981
+ async def _run_initialize(self) -> None:
982
+ """Initialize all components in priority order (called once).
858
983
 
859
- This method will be removed in v2.0.
984
+ Executes on_initialize hook for each component. Sets _components_initialized
985
+ flag to prevent multiple initializations.
860
986
  """
861
- import warnings
987
+ if self._components_initialized:
988
+ return
989
+
990
+ self._logger.info(f"Initializing {len(self._components)} orchestrator components")
991
+
992
+ for component in self._components:
993
+ comp_name = component.name or component.__class__.__name__
994
+ self._logger.debug(
995
+ f"Initializing component: name={comp_name}, priority={component.priority}"
996
+ )
997
+
998
+ try:
999
+ await component.on_initialize(self)
1000
+ except Exception as e:
1001
+ self._logger.exception(
1002
+ f"Component initialization failed: name={comp_name}, error={e!s}"
1003
+ )
1004
+ raise
1005
+
1006
+ self._components_initialized = True
1007
+ self._logger.info(f"All components initialized: count={len(self._components)}")
1008
+
1009
+ async def _run_artifact_published(self, artifact: Artifact) -> Artifact | None:
1010
+ """Run on_artifact_published hooks (returns modified artifact or None to block).
1011
+
1012
+ Components execute in priority order, each receiving the artifact from the
1013
+ previous component (chaining). If any component returns None, the artifact
1014
+ is blocked and scheduling stops.
1015
+ """
1016
+ current_artifact = artifact
1017
+
1018
+ for component in self._components:
1019
+ comp_name = component.name or component.__class__.__name__
1020
+ self._logger.debug(
1021
+ f"Running on_artifact_published: component={comp_name}, "
1022
+ f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
1023
+ )
1024
+
1025
+ try:
1026
+ result = await component.on_artifact_published(self, current_artifact)
1027
+
1028
+ if result is None:
1029
+ self._logger.info(
1030
+ f"Artifact blocked by component: component={comp_name}, "
1031
+ f"artifact_type={current_artifact.type}, artifact_id={current_artifact.id}"
1032
+ )
1033
+ return None
1034
+
1035
+ current_artifact = result
1036
+ except Exception as e:
1037
+ self._logger.exception(
1038
+ f"Component hook failed: component={comp_name}, "
1039
+ f"hook=on_artifact_published, error={e!s}"
1040
+ )
1041
+ raise
1042
+
1043
+ return current_artifact
1044
+
1045
+ async def _run_before_schedule(
1046
+ self, artifact: Artifact, agent: Agent, subscription: Subscription
1047
+ ) -> ScheduleDecision:
1048
+ """Run on_before_schedule hooks (returns CONTINUE, SKIP, or DEFER).
1049
+
1050
+ Components execute in priority order. First component to return SKIP or
1051
+ DEFER stops execution and returns that decision.
1052
+ """
1053
+ from flock.orchestrator_component import ScheduleDecision
1054
+
1055
+ for component in self._components:
1056
+ comp_name = component.name or component.__class__.__name__
1057
+
1058
+ self._logger.debug(
1059
+ f"Running on_before_schedule: component={comp_name}, "
1060
+ f"agent={agent.name}, artifact_type={artifact.type}"
1061
+ )
1062
+
1063
+ try:
1064
+ decision = await component.on_before_schedule(self, artifact, agent, subscription)
1065
+
1066
+ if decision == ScheduleDecision.SKIP:
1067
+ self._logger.info(
1068
+ f"Scheduling skipped by component: component={comp_name}, "
1069
+ f"agent={agent.name}, artifact_type={artifact.type}, decision=SKIP"
1070
+ )
1071
+ return ScheduleDecision.SKIP
1072
+
1073
+ if decision == ScheduleDecision.DEFER:
1074
+ self._logger.debug(
1075
+ f"Scheduling deferred by component: component={comp_name}, "
1076
+ f"agent={agent.name}, decision=DEFER"
1077
+ )
1078
+ return ScheduleDecision.DEFER
862
1079
 
863
- warnings.warn(
864
- "publish_external() is deprecated. Use publish(obj) instead.",
865
- DeprecationWarning,
866
- stacklevel=2,
1080
+ except Exception as e:
1081
+ self._logger.exception(
1082
+ f"Component hook failed: component={comp_name}, "
1083
+ f"hook=on_before_schedule, error={e!s}"
1084
+ )
1085
+ raise
1086
+
1087
+ return ScheduleDecision.CONTINUE
1088
+
1089
+ async def _run_collect_artifacts(
1090
+ self, artifact: Artifact, agent: Agent, subscription: Subscription
1091
+ ) -> CollectionResult:
1092
+ """Run on_collect_artifacts hooks (returns first non-None result).
1093
+
1094
+ Components execute in priority order. First component to return non-None
1095
+ wins (short-circuit). If all return None, default is immediate scheduling.
1096
+ """
1097
+ from flock.orchestrator_component import CollectionResult
1098
+
1099
+ for component in self._components:
1100
+ comp_name = component.name or component.__class__.__name__
1101
+
1102
+ self._logger.debug(
1103
+ f"Running on_collect_artifacts: component={comp_name}, "
1104
+ f"agent={agent.name}, artifact_type={artifact.type}"
1105
+ )
1106
+
1107
+ try:
1108
+ result = await component.on_collect_artifacts(self, artifact, agent, subscription)
1109
+
1110
+ if result is not None:
1111
+ self._logger.debug(
1112
+ f"Collection handled by component: component={comp_name}, "
1113
+ f"complete={result.complete}, artifact_count={len(result.artifacts)}"
1114
+ )
1115
+ return result
1116
+ except Exception as e:
1117
+ self._logger.exception(
1118
+ f"Component hook failed: component={comp_name}, "
1119
+ f"hook=on_collect_artifacts, error={e!s}"
1120
+ )
1121
+ raise
1122
+
1123
+ # Default: immediate scheduling with single artifact
1124
+ self._logger.debug(
1125
+ f"No component handled collection, using default: "
1126
+ f"agent={agent.name}, artifact_type={artifact.type}"
867
1127
  )
868
- return await self.publish(
869
- {"type": type_name, "payload": payload},
870
- visibility=visibility,
871
- correlation_id=correlation_id,
872
- partition_key=partition_key,
873
- tags=tags,
1128
+ return CollectionResult.immediate([artifact])
1129
+
1130
+ async def _run_before_agent_schedule(
1131
+ self, agent: Agent, artifacts: list[Artifact]
1132
+ ) -> list[Artifact] | None:
1133
+ """Run on_before_agent_schedule hooks (returns modified artifacts or None to block).
1134
+
1135
+ Components execute in priority order, each receiving artifacts from the
1136
+ previous component (chaining). If any component returns None, scheduling
1137
+ is blocked.
1138
+ """
1139
+ current_artifacts = artifacts
1140
+
1141
+ for component in self._components:
1142
+ comp_name = component.name or component.__class__.__name__
1143
+
1144
+ self._logger.debug(
1145
+ f"Running on_before_agent_schedule: component={comp_name}, "
1146
+ f"agent={agent.name}, artifact_count={len(current_artifacts)}"
1147
+ )
1148
+
1149
+ try:
1150
+ result = await component.on_before_agent_schedule(self, agent, current_artifacts)
1151
+
1152
+ if result is None:
1153
+ self._logger.info(
1154
+ f"Agent scheduling blocked by component: component={comp_name}, "
1155
+ f"agent={agent.name}"
1156
+ )
1157
+ return None
1158
+
1159
+ current_artifacts = result
1160
+ except Exception as e:
1161
+ self._logger.exception(
1162
+ f"Component hook failed: component={comp_name}, "
1163
+ f"hook=on_before_agent_schedule, error={e!s}"
1164
+ )
1165
+ raise
1166
+
1167
+ return current_artifacts
1168
+
1169
+ async def _run_agent_scheduled(
1170
+ self, agent: Agent, artifacts: list[Artifact], task: Task[Any]
1171
+ ) -> None:
1172
+ """Run on_agent_scheduled hooks (notification only, non-blocking).
1173
+
1174
+ Components execute in priority order. Exceptions are logged but don't
1175
+ prevent other components from executing or block scheduling.
1176
+ """
1177
+ for component in self._components:
1178
+ comp_name = component.name or component.__class__.__name__
1179
+
1180
+ self._logger.debug(
1181
+ f"Running on_agent_scheduled: component={comp_name}, "
1182
+ f"agent={agent.name}, artifact_count={len(artifacts)}"
1183
+ )
1184
+
1185
+ try:
1186
+ await component.on_agent_scheduled(self, agent, artifacts, task)
1187
+ except Exception as e:
1188
+ self._logger.warning(
1189
+ f"Component notification hook failed (non-critical): "
1190
+ f"component={comp_name}, hook=on_agent_scheduled, error={e!s}"
1191
+ )
1192
+ # Don't propagate - this is a notification hook
1193
+
1194
+ async def _run_idle(self) -> None:
1195
+ """Run on_orchestrator_idle hooks when orchestrator becomes idle.
1196
+
1197
+ Components execute in priority order. Exceptions are logged but don't
1198
+ prevent other components from executing.
1199
+ """
1200
+ self._logger.debug(
1201
+ f"Running on_orchestrator_idle hooks: component_count={len(self._components)}"
874
1202
  )
875
1203
 
876
- async def _persist_and_schedule(self, artifact: Artifact) -> None:
877
- await self.store.publish(artifact)
878
- self.metrics["artifacts_published"] += 1
879
- await self._schedule_artifact(artifact)
1204
+ for component in self._components:
1205
+ comp_name = component.name or component.__class__.__name__
1206
+
1207
+ try:
1208
+ await component.on_orchestrator_idle(self)
1209
+ except Exception as e:
1210
+ self._logger.warning(
1211
+ f"Component idle hook failed (non-critical): "
1212
+ f"component={comp_name}, hook=on_orchestrator_idle, error={e!s}"
1213
+ )
1214
+
1215
+ async def _run_shutdown(self) -> None:
1216
+ """Run on_shutdown hooks when orchestrator shuts down.
1217
+
1218
+ Components execute in priority order. Exceptions are logged but don't
1219
+ prevent shutdown of other components (best-effort cleanup).
1220
+ """
1221
+ self._logger.info(f"Shutting down {len(self._components)} orchestrator components")
1222
+
1223
+ for component in self._components:
1224
+ comp_name = component.name or component.__class__.__name__
1225
+ self._logger.debug(f"Shutting down component: name={comp_name}")
1226
+
1227
+ try:
1228
+ await component.on_shutdown(self)
1229
+ except Exception as e:
1230
+ self._logger.exception(
1231
+ f"Component shutdown failed: component={comp_name}, "
1232
+ f"hook=on_shutdown, error={e!s}"
1233
+ )
1234
+ # Continue shutting down other components
1235
+
1236
+ # Scheduling ───────────────────────────────────────────────────
880
1237
 
881
1238
  async def _schedule_artifact(self, artifact: Artifact) -> None:
1239
+ """Schedule agents for an artifact using component hooks.
1240
+
1241
+ Refactored to use OrchestratorComponent hook system for extensibility.
1242
+ Components can modify artifact, control scheduling, and handle collection.
1243
+ """
1244
+ # Phase 3: Initialize components on first artifact
1245
+ if not self._components_initialized:
1246
+ await self._run_initialize()
1247
+
1248
+ # Phase 3: Component hook - artifact published (can transform or block)
1249
+ artifact = await self._run_artifact_published(artifact)
1250
+ if artifact is None:
1251
+ return # Artifact blocked by component
1252
+
882
1253
  for agent in self.agents:
883
1254
  identity = agent.identity
884
1255
  for subscription in agent.subscriptions:
885
1256
  if not subscription.accepts_events():
886
1257
  continue
1258
+
887
1259
  # T066: Check prevent_self_trigger
888
1260
  if agent.prevent_self_trigger and artifact.produced_by == agent.name:
889
1261
  continue # Skip - agent produced this artifact (prevents feedback loops)
890
- # T068: Circuit breaker - check iteration limit
891
- iteration_count = self._agent_iteration_count.get(agent.name, 0)
892
- if iteration_count >= self.max_agent_iterations:
893
- # Agent hit iteration limit - possible infinite loop
894
- continue
1262
+
1263
+ # Visibility check
895
1264
  if not self._check_visibility(artifact, identity):
896
1265
  continue
1266
+
1267
+ # Subscription match check
897
1268
  if not subscription.matches(artifact):
898
1269
  continue
899
- if self._seen_before(artifact, agent):
900
- continue
901
1270
 
902
- # JoinSpec CORRELATION: Check if subscription has correlated AND gate
903
- if subscription.join is not None:
904
- # Use CorrelationEngine for JoinSpec (correlated AND gates)
905
- subscription_index = agent.subscriptions.index(subscription)
906
- completed_group = self._correlation_engine.add_artifact(
907
- artifact=artifact,
908
- subscription=subscription,
909
- subscription_index=subscription_index,
910
- )
1271
+ # Phase 3: Component hook - before schedule (circuit breaker, deduplication, etc.)
1272
+ from flock.orchestrator_component import ScheduleDecision
911
1273
 
912
- if completed_group is None:
913
- # Still waiting for correlation to complete
914
- # Phase 1.2: Emit real-time correlation update event
915
- await self._emit_correlation_updated_event(
916
- agent_name=agent.name,
917
- subscription_index=subscription_index,
918
- artifact=artifact,
919
- )
920
- continue
921
-
922
- # Correlation complete! Get all correlated artifacts
923
- artifacts = completed_group.get_artifacts()
924
- else:
925
- # AND GATE LOGIC: Use artifact collector for simple AND gates (no correlation)
926
- is_complete, artifacts = self._artifact_collector.add_artifact(
927
- agent, subscription, artifact
928
- )
929
-
930
- if not is_complete:
931
- # Still waiting for more types (AND gate incomplete)
932
- continue
933
-
934
- # BatchSpec BATCHING: Check if subscription has batch accumulator
935
- if subscription.batch is not None:
936
- # Add to batch accumulator
937
- subscription_index = agent.subscriptions.index(subscription)
938
-
939
- # COMBINED FEATURES: JoinSpec + BatchSpec
940
- # If we have JoinSpec, artifacts is a correlated GROUP - treat as single batch item
941
- # If we have AND gate, artifacts is a complete set - treat as single batch item
942
- # Otherwise (single type), add each artifact individually
943
-
944
- if subscription.join is not None or len(subscription.type_models) > 1:
945
- # JoinSpec or AND gate: Treat artifact group as ONE batch item
946
- should_flush = self._batch_engine.add_artifact_group(
947
- artifacts=artifacts,
948
- subscription=subscription,
949
- subscription_index=subscription_index,
950
- )
951
- else:
952
- # Single type subscription: Add each artifact individually
953
- should_flush = False
954
- for single_artifact in artifacts:
955
- should_flush = self._batch_engine.add_artifact(
956
- artifact=single_artifact,
957
- subscription=subscription,
958
- subscription_index=subscription_index,
959
- )
960
-
961
- if should_flush:
962
- # Size threshold reached! Flush batch now
963
- break
964
-
965
- if not should_flush:
966
- # Batch not full yet - wait for more artifacts
967
- # Phase 1.2: Emit real-time batch update event
968
- await self._emit_batch_item_added_event(
969
- agent_name=agent.name,
970
- subscription_index=subscription_index,
971
- subscription=subscription,
972
- artifact=artifact,
973
- )
974
- continue
975
-
976
- # Flush the batch and get all accumulated artifacts
977
- batched_artifacts = self._batch_engine.flush_batch(
978
- agent.name, subscription_index
979
- )
1274
+ decision = await self._run_before_schedule(artifact, agent, subscription)
1275
+ if decision == ScheduleDecision.SKIP:
1276
+ continue # Skip this subscription
1277
+ if decision == ScheduleDecision.DEFER:
1278
+ continue # Defer for later (batching/correlation)
980
1279
 
981
- if batched_artifacts is None:
982
- # No batch to flush (shouldn't happen, but defensive)
983
- continue
1280
+ # Phase 3: Component hook - collect artifacts (handles AND gates, correlation, batching)
1281
+ collection = await self._run_collect_artifacts(artifact, agent, subscription)
1282
+ if not collection.complete:
1283
+ continue # Still collecting (AND gate, correlation, or batch incomplete)
984
1284
 
985
- # Replace artifacts with batched artifacts
986
- artifacts = batched_artifacts
1285
+ artifacts = collection.artifacts
987
1286
 
988
- # Complete! Schedule agent with all collected artifacts
989
- # T068: Increment iteration counter
990
- self._agent_iteration_count[agent.name] = iteration_count + 1
1287
+ # Phase 3: Component hook - before agent schedule (final validation/transformation)
1288
+ artifacts = await self._run_before_agent_schedule(agent, artifacts)
1289
+ if artifacts is None:
1290
+ continue # Scheduling blocked by component
991
1291
 
992
- # Mark all artifacts as processed (prevent duplicate triggers)
993
- for collected_artifact in artifacts:
994
- self._mark_processed(collected_artifact, agent)
1292
+ # Complete! Schedule agent with collected artifacts
1293
+ # Schedule agent task
1294
+ is_batch_execution = subscription.batch is not None
1295
+ task = self._schedule_task(agent, artifacts, is_batch=is_batch_execution)
995
1296
 
996
- # Schedule agent with ALL artifacts (batched, correlated, or AND gate complete)
997
- self._schedule_task(agent, artifacts)
1297
+ # Phase 3: Component hook - agent scheduled (notification)
1298
+ await self._run_agent_scheduled(agent, artifacts, task)
998
1299
 
999
- def _schedule_task(self, agent: Agent, artifacts: list[Artifact]) -> None:
1000
- task = asyncio.create_task(self._run_agent_task(agent, artifacts))
1300
+ def _schedule_task(
1301
+ self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
1302
+ ) -> Task[Any]:
1303
+ """Schedule agent task and return the task handle."""
1304
+ task = asyncio.create_task(self._run_agent_task(agent, artifacts, is_batch=is_batch))
1001
1305
  self._tasks.add(task)
1002
1306
  task.add_done_callback(self._tasks.discard)
1307
+ return task
1003
1308
 
1004
1309
  def _record_agent_run(self, agent: Agent) -> None:
1005
1310
  self.metrics["agent_runs"] += 1
@@ -1012,14 +1317,17 @@ class Flock(metaclass=AutoTracedMeta):
1012
1317
  key = (str(artifact.id), agent.name)
1013
1318
  return key in self._processed
1014
1319
 
1015
- async def _run_agent_task(self, agent: Agent, artifacts: list[Artifact]) -> None:
1320
+ async def _run_agent_task(
1321
+ self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
1322
+ ) -> None:
1016
1323
  correlation_id = artifacts[0].correlation_id if artifacts else uuid4()
1017
1324
 
1018
1325
  ctx = Context(
1019
1326
  board=BoardHandle(self),
1020
1327
  orchestrator=self,
1021
1328
  task_id=str(uuid4()),
1022
- correlation_id=correlation_id, # NEW!
1329
+ correlation_id=correlation_id,
1330
+ is_batch=is_batch, # NEW!
1023
1331
  )
1024
1332
  self._record_agent_run(agent)
1025
1333
  await agent.execute(ctx, artifacts)
@@ -1154,11 +1462,63 @@ class Flock(metaclass=AutoTracedMeta):
1154
1462
 
1155
1463
  # Batch Helpers --------------------------------------------------------
1156
1464
 
1465
+ async def _correlation_cleanup_loop(self) -> None:
1466
+ """Background task that periodically cleans up expired correlation groups.
1467
+
1468
+ Runs continuously until all correlation groups are cleared or orchestrator shuts down.
1469
+ Checks every 100ms for time-based expired correlations and discards them.
1470
+ """
1471
+ try:
1472
+ while True:
1473
+ await asyncio.sleep(self._correlation_cleanup_interval)
1474
+ self._cleanup_expired_correlations()
1475
+
1476
+ # Stop if no correlation groups remain
1477
+ if not self._correlation_engine.correlation_groups:
1478
+ self._correlation_cleanup_task = None
1479
+ break
1480
+ except asyncio.CancelledError:
1481
+ # Clean shutdown
1482
+ self._correlation_cleanup_task = None
1483
+ raise
1484
+
1485
+ def _cleanup_expired_correlations(self) -> None:
1486
+ """Clean up all expired correlation groups across all subscriptions.
1487
+
1488
+ Called periodically by background task to enforce time-based correlation windows.
1489
+ Discards incomplete correlations that have exceeded their time window.
1490
+ """
1491
+ # Get all active subscription keys
1492
+ for agent_name, subscription_index in list(
1493
+ self._correlation_engine.correlation_groups.keys()
1494
+ ):
1495
+ self._correlation_engine.cleanup_expired(agent_name, subscription_index)
1496
+
1497
+ async def _batch_timeout_checker_loop(self) -> None:
1498
+ """Background task that periodically checks for batch timeouts.
1499
+
1500
+ Runs continuously until all batches are cleared or orchestrator shuts down.
1501
+ Checks every 100ms for expired batches and flushes them.
1502
+ """
1503
+ try:
1504
+ while True:
1505
+ await asyncio.sleep(self._batch_timeout_interval)
1506
+ await self._check_batch_timeouts()
1507
+
1508
+ # Stop if no batches remain
1509
+ if not self._batch_engine.batches:
1510
+ self._batch_timeout_task = None
1511
+ break
1512
+ except asyncio.CancelledError:
1513
+ # Clean shutdown
1514
+ self._batch_timeout_task = None
1515
+ raise
1516
+
1157
1517
  async def _check_batch_timeouts(self) -> None:
1158
1518
  """Check all batches for timeout expiry and flush expired batches.
1159
1519
 
1160
- This method is called periodically or manually (in tests) to enforce
1161
- timeout-based batching.
1520
+ This method is called periodically by the background timeout checker
1521
+ or manually (in tests) to enforce timeout-based batching.
1162
1522
  """
1163
1523
  expired_batches = self._batch_engine.check_timeouts()
1164
1524
 
@@ -1174,8 +1534,8 @@ class Flock(metaclass=AutoTracedMeta):
1174
1534
  if agent is None:
1175
1535
  continue
1176
1536
 
1177
- # Schedule agent with batched artifacts
1178
- self._schedule_task(agent, artifacts)
1537
+ # Schedule agent with batched artifacts (timeout flush)
1538
+ self._schedule_task(agent, artifacts, is_batch=True)
1179
1539
 
1180
1540
  async def _flush_all_batches(self) -> None:
1181
1541
  """Flush all partial batches (for shutdown - ensures zero data loss)."""
@@ -1187,8 +1547,8 @@ class Flock(metaclass=AutoTracedMeta):
1187
1547
  if agent is None:
1188
1548
  continue
1189
1549
 
1190
- # Schedule agent with partial batch
1191
- self._schedule_task(agent, artifacts)
1550
+ # Schedule agent with partial batch (shutdown flush)
1551
+ self._schedule_task(agent, artifacts, is_batch=True)
1192
1552
 
1193
1553
  # Wait for all scheduled tasks to complete
1194
1554
  await self.run_until_idle()