PyPI - flock-core - Versions diffs - 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl - Mend

flock-core 0.5.9py3-none-any.whl → 0.5.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of flock-core might be problematic. Click here for more details.

Files changed (54) hide show

flock/agent.py +149 -62
flock/api/themes.py +6 -2
flock/api_models.py +285 -0
flock/artifact_collector.py +6 -3
flock/batch_accumulator.py +3 -1
flock/cli.py +3 -1
flock/components.py +45 -56
flock/context_provider.py +531 -0
flock/correlation_engine.py +8 -4
flock/dashboard/collector.py +48 -29
flock/dashboard/events.py +10 -4
flock/dashboard/launcher.py +3 -1
flock/dashboard/models/graph.py +9 -3
flock/dashboard/service.py +187 -93
flock/dashboard/websocket.py +17 -4
flock/engines/dspy_engine.py +174 -98
flock/engines/examples/simple_batch_engine.py +9 -3
flock/examples.py +6 -2
flock/frontend/src/services/indexeddb.test.ts +4 -4
flock/frontend/src/services/indexeddb.ts +1 -1
flock/helper/cli_helper.py +14 -1
flock/logging/auto_trace.py +6 -1
flock/logging/formatters/enum_builder.py +3 -1
flock/logging/formatters/theme_builder.py +32 -17
flock/logging/formatters/themed_formatter.py +38 -22
flock/logging/logging.py +21 -7
flock/logging/telemetry.py +9 -3
flock/logging/telemetry_exporter/duckdb_exporter.py +27 -25
flock/logging/trace_and_logged.py +14 -5
flock/mcp/__init__.py +3 -6
flock/mcp/client.py +49 -19
flock/mcp/config.py +12 -6
flock/mcp/manager.py +6 -2
flock/mcp/servers/sse/flock_sse_server.py +9 -3
flock/mcp/servers/streamable_http/flock_streamable_http_server.py +6 -2
flock/mcp/tool.py +18 -6
flock/mcp/types/handlers.py +3 -1
flock/mcp/types/types.py +9 -3
flock/orchestrator.py +449 -58
flock/orchestrator_component.py +15 -5
flock/patches/dspy_streaming_patch.py +12 -4
flock/registry.py +9 -3
flock/runtime.py +69 -18
flock/service.py +135 -64
flock/store.py +29 -10
flock/subscription.py +6 -4
flock/system_artifacts.py +33 -0
flock/utilities.py +41 -13
flock/utility/output_utility_component.py +31 -11
{flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/METADATA +150 -26
{flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/RECORD +54 -51
{flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/WHEEL +0 -0
{flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/entry_points.txt +0 -0
{flock_core-0.5.9.dist-info → flock_core-0.5.11.dist-info}/licenses/LICENSE +0 -0

flock/orchestrator.py CHANGED Viewed

@@ -11,7 +11,7 @@ from contextlib import asynccontextmanager
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
-from uuid import uuid4
+from uuid import UUID, uuid4
 from opentelemetry import trace
 from opentelemetry.trace import Status, StatusCode
@@ -95,6 +95,7 @@ class Flock(metaclass=AutoTracedMeta):
         *,
         store: BlackboardStore | None = None,
         max_agent_iterations: int = 1000,
+        context_provider: Any = None,
     ) -> None:
         """Initialize the Flock orchestrator for blackboard-based agent coordination.
@@ -104,32 +105,46 @@ class Flock(metaclass=AutoTracedMeta):
             store: Custom blackboard storage backend. Defaults to InMemoryBlackboardStore.
             max_agent_iterations: Circuit breaker limit to prevent runaway agent loops.
                 Defaults to 1000 iterations per agent before reset.
+            context_provider: Global context provider for all agents (Phase 3 security fix).
+                If None, agents use DefaultContextProvider. Can be overridden per-agent.
         Examples:
             >>> # Basic initialization with default model
             >>> flock = Flock("openai/gpt-4.1")
             >>> # Custom storage backend
-            >>> flock = Flock(
-            ...     "openai/gpt-4o",
-            ...     store=CustomBlackboardStore()
-            ... )
+            >>> flock = Flock("openai/gpt-4o", store=CustomBlackboardStore())
             >>> # Circuit breaker configuration
+            >>> flock = Flock("openai/gpt-4.1", max_agent_iterations=500)
+            >>> # Global context provider (Phase 3 security fix)
+            >>> from flock.context_provider import DefaultContextProvider
             >>> flock = Flock(
-            ...     "openai/gpt-4.1",
-            ...     max_agent_iterations=500
+            ...     "openai/gpt-4.1", context_provider=DefaultContextProvider()
             ... )
         """
         self._patch_litellm_proxy_imports()
         self._logger = logging.getLogger(__name__)
         self.model = model
+        try:
+            init_console(clear_screen=True, show_banner=True, model=self.model)
+        except (UnicodeEncodeError, UnicodeDecodeError):
+            # Skip banner on Windows consoles with encoding issues (e.g., tests, CI)
+            pass
         self.store: BlackboardStore = store or InMemoryBlackboardStore()
         self._agents: dict[str, Agent] = {}
         self._tasks: set[Task[Any]] = set()
+        self._correlation_tasks: dict[
+            UUID, set[Task[Any]]
+        ] = {}  # Track tasks by correlation_id
         self._processed: set[tuple[str, str]] = set()
         self._lock = asyncio.Lock()
         self.metrics: dict[str, float] = {"artifacts_published": 0, "agent_runs": 0}
+        # Phase 3: Global context provider (security fix)
+        self._default_context_provider = context_provider
         # MCP integration
         self._mcp_configs: dict[str, FlockMCPConfiguration] = {}
         self._mcp_manager: FlockMCPClientManager | None = None
@@ -151,9 +166,14 @@ class Flock(metaclass=AutoTracedMeta):
         self._batch_timeout_interval: float = 0.1  # Check every 100ms
         # Phase 1.2: WebSocket manager for real-time dashboard events (set by serve())
         self._websocket_manager: Any = None
+        # Dashboard server task and launcher (for non-blocking serve)
+        self._server_task: Task[None] | None = None
+        self._dashboard_launcher: Any = None
         # Unified tracing support
         self._workflow_span = None
-        self._auto_workflow_enabled = os.getenv("FLOCK_AUTO_WORKFLOW_TRACE", "false").lower() in {
+        self._auto_workflow_enabled = os.getenv(
+            "FLOCK_AUTO_WORKFLOW_TRACE", "false"
+        ).lower() in {
             "true",
             "1",
             "yes",
@@ -228,6 +248,99 @@ class Flock(metaclass=AutoTracedMeta):
     def agents(self) -> list[Agent]:
         return list(self._agents.values())
+    async def get_correlation_status(self, correlation_id: str) -> dict[str, Any]:
+        """Get the status of a workflow by correlation ID.
+        Args:
+            correlation_id: The correlation ID to check
+        Returns:
+            Dictionary containing workflow status information:
+            - state: "active" if work is pending, "completed" otherwise
+            - has_pending_work: True if orchestrator has pending work for this correlation
+            - artifact_count: Total number of artifacts with this correlation_id
+            - error_count: Number of WorkflowError artifacts
+            - started_at: Timestamp of first artifact (if any)
+            - last_activity_at: Timestamp of most recent artifact (if any)
+        """
+        from uuid import UUID
+        try:
+            correlation_uuid = UUID(correlation_id)
+        except ValueError as exc:
+            raise ValueError(
+                f"Invalid correlation_id format: {correlation_id}"
+            ) from exc
+        # Check if orchestrator has pending work for this correlation
+        # 1. Check active tasks for this correlation_id
+        has_active_tasks = correlation_uuid in self._correlation_tasks and bool(
+            self._correlation_tasks[correlation_uuid]
+        )
+        # 2. Check correlation groups (for agents with JoinSpec that haven't yielded yet)
+        has_pending_groups = False
+        for groups in self._correlation_engine.correlation_groups.values():
+            for group_key, group in groups.items():
+                # Check if this group belongs to our correlation
+                for type_name, artifacts in group.waiting_artifacts.items():
+                    if any(
+                        artifact.correlation_id == correlation_uuid
+                        for artifact in artifacts
+                    ):
+                        has_pending_groups = True
+                        break
+                if has_pending_groups:
+                    break
+            if has_pending_groups:
+                break
+        # Workflow has pending work if EITHER tasks are active OR groups are waiting
+        has_pending_work = has_active_tasks or has_pending_groups
+        # Query artifacts for this correlation
+        from flock.store import FilterConfig
+        filters = FilterConfig(correlation_id=correlation_id)
+        artifacts, total = await self.store.query_artifacts(
+            filters, limit=1000, offset=0
+        )
+        # Count errors
+        error_count = sum(
+            1
+            for artifact in artifacts
+            if artifact.type == "flock.system_artifacts.WorkflowError"
+        )
+        # Get timestamps
+        started_at = None
+        last_activity_at = None
+        if artifacts:
+            timestamps = [artifact.created_at for artifact in artifacts]
+            started_at = min(timestamps).isoformat()
+            last_activity_at = max(timestamps).isoformat()
+        # Determine state
+        if has_pending_work:
+            state = "active"
+        elif total == 0:
+            state = "not_found"
+        elif error_count > 0 and total == error_count:
+            state = "failed"  # Only error artifacts exist
+        else:
+            state = "completed"
+        return {
+            "correlation_id": correlation_id,
+            "state": state,
+            "has_pending_work": has_pending_work,
+            "artifact_count": total,
+            "error_count": error_count,
+            "started_at": started_at,
+            "last_activity_at": last_activity_at,
+        }
     # Component management -------------------------------------------------
     def add_component(self, component: OrchestratorComponent) -> Flock:
@@ -357,7 +470,11 @@ class Flock(metaclass=AutoTracedMeta):
                     path_str = str(abs_path)
                 # Extract a meaningful name (last component of path)
-                name = PathLib(path_str).name or path_str.rstrip("/").split("/")[-1] or "root"
+                name = (
+                    PathLib(path_str).name
+                    or path_str.rstrip("/").split("/")[-1]
+                    or "root"
+                )
                 mcp_roots.append(MCPRoot(uri=uri, name=name))
         # Build configuration
@@ -559,12 +676,17 @@ class Flock(metaclass=AutoTracedMeta):
         if pending_batches and (
             self._batch_timeout_task is None or self._batch_timeout_task.done()
         ):
-            self._batch_timeout_task = asyncio.create_task(self._batch_timeout_checker_loop())
+            self._batch_timeout_task = asyncio.create_task(
+                self._batch_timeout_checker_loop()
+            )
         if pending_correlations and (
-            self._correlation_cleanup_task is None or self._correlation_cleanup_task.done()
+            self._correlation_cleanup_task is None
+            or self._correlation_cleanup_task.done()
         ):
-            self._correlation_cleanup_task = asyncio.create_task(self._correlation_cleanup_loop())
+            self._correlation_cleanup_task = asyncio.create_task(
+                self._correlation_cleanup_loop()
+            )
         # If deferred work is still outstanding, consider the orchestrator quiescent for
         # now but leave watchdog tasks running to finish the job.
@@ -585,15 +707,60 @@ class Flock(metaclass=AutoTracedMeta):
     async def direct_invoke(
         self, agent: Agent, inputs: Sequence[BaseModel | Mapping[str, Any] | Artifact]
     ) -> list[Artifact]:
-        artifacts = [self._normalize_input(value, produced_by="__direct__") for value in inputs]
+        artifacts = [
+            self._normalize_input(value, produced_by="__direct__") for value in inputs
+        ]
         for artifact in artifacts:
             self._mark_processed(artifact, agent)
             await self._persist_and_schedule(artifact)
-        ctx = Context(board=BoardHandle(self), orchestrator=self, task_id=str(uuid4()))
+        # Phase 8: Evaluate context BEFORE creating Context (security fix)
+        # Provider resolution: per-agent > global > DefaultContextProvider
+        from flock.context_provider import (
+            BoundContextProvider,
+            ContextRequest,
+            DefaultContextProvider,
+        )
+        inner_provider = (
+            getattr(agent, "context_provider", None)
+            or self._default_context_provider
+            or DefaultContextProvider()
+        )
+        # SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
+        provider = BoundContextProvider(inner_provider, agent.identity)
+        # Evaluate context using provider (orchestrator controls this!)
+        # Engines will receive pre-filtered artifacts via ctx.artifacts
+        correlation_id = (
+            artifacts[0].correlation_id
+            if artifacts and artifacts[0].correlation_id
+            else uuid4()
+        )
+        request = ContextRequest(
+            agent=agent,
+            correlation_id=correlation_id,
+            store=self.store,
+            agent_identity=agent.identity,
+            exclude_ids={a.id for a in artifacts},  # Exclude input artifacts
+        )
+        context_artifacts = await provider(request)
+        # Phase 8: Create Context with pre-filtered data (no capabilities!)
+        # SECURITY: Context is now just data - engines can't query anything
+        ctx = Context(
+            artifacts=context_artifacts,  # Pre-filtered conversation context
+            agent_identity=agent.identity,
+            task_id=str(uuid4()),
+            correlation_id=correlation_id,
+        )
         self._record_agent_run(agent)
         return await agent.execute(ctx, artifacts)
-    async def arun(self, agent_builder: AgentBuilder, *inputs: BaseModel) -> list[Artifact]:
+    async def arun(
+        self, agent_builder: AgentBuilder, *inputs: BaseModel
+    ) -> list[Artifact]:
         """Execute an agent with inputs and wait for all cascades to complete (async).
         Convenience method that combines direct agent invocation with run_until_idle().
@@ -614,9 +781,7 @@ class Flock(metaclass=AutoTracedMeta):
             >>> # Multiple inputs
             >>> results = await flock.arun(
-            ...     task_agent,
-            ...     Task(name="deploy"),
-            ...     Task(name="test")
+            ...     task_agent, Task(name="deploy"), Task(name="test")
             ... )
         Note:
@@ -676,6 +841,15 @@ class Flock(metaclass=AutoTracedMeta):
             except asyncio.CancelledError:
                 pass
+        # Cancel background server task if running
+        if self._server_task and not self._server_task.done():
+            self._server_task.cancel()
+            try:
+                await self._server_task
+            except asyncio.CancelledError:
+                pass
+            # Note: _cleanup_server_callback will handle launcher.stop()
         if self._mcp_manager is not None:
             await self._mcp_manager.cleanup_all()
             self._mcp_manager = None
@@ -691,14 +865,20 @@ class Flock(metaclass=AutoTracedMeta):
         dashboard_v2: bool = False,
         host: str = "127.0.0.1",
         port: int = 8344,
-    ) -> None:
-        """Start HTTP service for the orchestrator (blocking).
+        blocking: bool = True,
+    ) -> Task[None] | None:
+        """Start HTTP service for the orchestrator.
         Args:
             dashboard: Enable real-time dashboard with WebSocket support (default: False)
             dashboard_v2: Launch the new dashboard v2 frontend (implies dashboard=True)
             host: Host to bind to (default: "127.0.0.1")
             port: Port to bind to (default: 8344)
+            blocking: If True, blocks until server stops. If False, starts server
+                in background and returns task handle (default: True)
+        Returns:
+            None if blocking=True, or Task handle if blocking=False
         Examples:
             # Basic HTTP API (no dashboard) - runs until interrupted
@@ -706,7 +886,75 @@ class Flock(metaclass=AutoTracedMeta):
             # With dashboard (WebSocket + browser launch) - runs until interrupted
             await orchestrator.serve(dashboard=True)
+            # Non-blocking mode - start server in background
+            await orchestrator.serve(dashboard=True, blocking=False)
+            # Now you can publish messages and run other logic
+            await orchestrator.publish(my_message)
+            await orchestrator.run_until_idle()
         """
+        # If non-blocking, start server in background task
+        if not blocking:
+            self._server_task = asyncio.create_task(
+                self._serve_impl(
+                    dashboard=dashboard,
+                    dashboard_v2=dashboard_v2,
+                    host=host,
+                    port=port,
+                )
+            )
+            # Add cleanup callback
+            self._server_task.add_done_callback(self._cleanup_server_callback)
+            # Give server a moment to start
+            await asyncio.sleep(0.1)
+            return self._server_task
+        # Blocking mode - run server directly with cleanup
+        try:
+            await self._serve_impl(
+                dashboard=dashboard,
+                dashboard_v2=dashboard_v2,
+                host=host,
+                port=port,
+            )
+        finally:
+            # In blocking mode, manually cleanup dashboard launcher
+            if self._dashboard_launcher is not None:
+                self._dashboard_launcher.stop()
+                self._dashboard_launcher = None
+        return None
+    def _cleanup_server_callback(self, task: Task[None]) -> None:
+        """Cleanup callback when background server task completes."""
+        # Stop dashboard launcher if it was started
+        if self._dashboard_launcher is not None:
+            try:
+                self._dashboard_launcher.stop()
+            except Exception as e:
+                self._logger.warning(f"Failed to stop dashboard launcher: {e}")
+            finally:
+                self._dashboard_launcher = None
+        # Clear server task reference
+        self._server_task = None
+        # Log any exceptions from the task
+        try:
+            exc = task.exception()
+            if exc and not isinstance(exc, asyncio.CancelledError):
+                self._logger.error(f"Server task failed: {exc}", exc_info=exc)
+        except asyncio.CancelledError:
+            pass  # Normal cancellation
+    async def _serve_impl(
+        self,
+        *,
+        dashboard: bool = False,
+        dashboard_v2: bool = False,
+        host: str = "127.0.0.1",
+        port: int = 8344,
+    ) -> None:
+        """Internal implementation of serve() - actual server logic."""
         if dashboard_v2:
             dashboard = True
@@ -735,6 +983,15 @@ class Flock(metaclass=AutoTracedMeta):
         # Store websocket manager for real-time event emission (Phase 1.2)
         self._websocket_manager = websocket_manager
+        # Phase 6+7: Set class-level WebSocket broadcast wrapper (dashboard mode)
+        async def _broadcast_wrapper(event):
+            """Isolated broadcast wrapper - no reference chain to orchestrator."""
+            return await websocket_manager.broadcast(event)
+        from flock.agent import Agent
+        Agent._websocket_broadcast_global = _broadcast_wrapper
         # Inject event collector into all existing agents
         for agent in self._agents.values():
             # Add dashboard collector with priority ordering handled by agent
@@ -762,11 +1019,8 @@ class Flock(metaclass=AutoTracedMeta):
         self._dashboard_launcher = launcher
         # Run service (blocking call)
-        try:
-            await service.run_async(host=host, port=port)
-        finally:
-            # Cleanup on exit
-            launcher.stop()
+        # Note: Cleanup is handled by serve() (blocking mode) or callback (non-blocking mode)
+        await service.run_async(host=host, port=port)
     # Scheduling -----------------------------------------------------------
@@ -802,21 +1056,12 @@ class Flock(metaclass=AutoTracedMeta):
             >>> # Publish with custom visibility
             >>> await orchestrator.publish(
-            ...     task,
-            ...     visibility=PrivateVisibility(agents={"admin"})
+            ...     task, visibility=PrivateVisibility(agents={"admin"})
             ... )
             >>> # Publish with tags for channel routing
             >>> await orchestrator.publish(task, tags={"urgent", "backend"})
         """
-        self.is_dashboard = is_dashboard
-        # Only show banner in CLI mode, not dashboard mode
-        if not self.is_dashboard:
-            try:
-                init_console(clear_screen=True, show_banner=True, model=self.model)
-            except (UnicodeEncodeError, UnicodeDecodeError):
-                # Skip banner on Windows consoles with encoding issues (e.g., tests, CI)
-                pass
         # Handle different input types
         if isinstance(obj, Artifact):
             # Already an artifact - publish as-is
@@ -925,16 +1170,12 @@ class Flock(metaclass=AutoTracedMeta):
         Examples:
             >>> # Testing: Execute agent without triggering others
             >>> results = await orchestrator.invoke(
-            ...     agent,
-            ...     Task(name="test", priority=5),
-            ...     publish_outputs=False
+            ...     agent, Task(name="test", priority=5), publish_outputs=False
             ... )
             >>> # HTTP endpoint: Execute specific agent, allow cascade
             >>> results = await orchestrator.invoke(
-            ...     movie_agent,
-            ...     Idea(topic="AI", genre="comedy"),
-            ...     publish_outputs=True
+            ...     movie_agent, Idea(topic="AI", genre="comedy"), publish_outputs=True
             ... )
             >>> await orchestrator.run_until_idle()
         """
@@ -953,8 +1194,42 @@ class Flock(metaclass=AutoTracedMeta):
             visibility=PublicVisibility(),
         )
-        # Execute agent directly
-        ctx = Context(board=BoardHandle(self), orchestrator=self, task_id=str(uuid4()))
+        # Phase 8: Evaluate context BEFORE creating Context (security fix)
+        # Provider resolution: per-agent > global > DefaultContextProvider
+        from flock.context_provider import (
+            BoundContextProvider,
+            ContextRequest,
+            DefaultContextProvider,
+        )
+        inner_provider = (
+            getattr(agent_obj, "context_provider", None)
+            or self._default_context_provider
+            or DefaultContextProvider()
+        )
+        # SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
+        provider = BoundContextProvider(inner_provider, agent_obj.identity)
+        # Evaluate context using provider (orchestrator controls this!)
+        correlation_id = artifact.correlation_id if artifact.correlation_id else uuid4()
+        request = ContextRequest(
+            agent=agent_obj,
+            correlation_id=correlation_id,
+            store=self.store,
+            agent_identity=agent_obj.identity,
+            exclude_ids={artifact.id},  # Exclude input artifact
+        )
+        context_artifacts = await provider(request)
+        # Phase 8: Create Context with pre-filtered data (no capabilities!)
+        # SECURITY: Context is now just data - engines can't query anything
+        ctx = Context(
+            artifacts=context_artifacts,  # Pre-filtered conversation context
+            agent_identity=agent_obj.identity,
+            task_id=str(uuid4()),
+            correlation_id=correlation_id,
+        )
         self._record_agent_run(agent_obj)
         # Execute with optional timeout
@@ -964,7 +1239,8 @@ class Flock(metaclass=AutoTracedMeta):
         else:
             outputs = await agent_obj.execute(ctx, [artifact])
-        # Optionally publish outputs to blackboard
+        # Phase 6: Orchestrator publishes outputs (security fix)
+        # Agents return artifacts, orchestrator validates and publishes
         if publish_outputs:
             for output in outputs:
                 await self._persist_and_schedule(output)
@@ -987,7 +1263,9 @@ class Flock(metaclass=AutoTracedMeta):
         if self._components_initialized:
             return
-        self._logger.info(f"Initializing {len(self._components)} orchestrator components")
+        self._logger.info(
+            f"Initializing {len(self._components)} orchestrator components"
+        )
         for component in self._components:
             comp_name = component.name or component.__class__.__name__
@@ -1061,7 +1339,9 @@ class Flock(metaclass=AutoTracedMeta):
             )
             try:
-                decision = await component.on_before_schedule(self, artifact, agent, subscription)
+                decision = await component.on_before_schedule(
+                    self, artifact, agent, subscription
+                )
                 if decision == ScheduleDecision.SKIP:
                     self._logger.info(
@@ -1105,7 +1385,9 @@ class Flock(metaclass=AutoTracedMeta):
             )
             try:
-                result = await component.on_collect_artifacts(self, artifact, agent, subscription)
+                result = await component.on_collect_artifacts(
+                    self, artifact, agent, subscription
+                )
                 if result is not None:
                     self._logger.debug(
@@ -1147,7 +1429,9 @@ class Flock(metaclass=AutoTracedMeta):
             )
             try:
-                result = await component.on_before_agent_schedule(self, agent, current_artifacts)
+                result = await component.on_before_agent_schedule(
+                    self, agent, current_artifacts
+                )
                 if result is None:
                     self._logger.info(
@@ -1218,7 +1502,9 @@ class Flock(metaclass=AutoTracedMeta):
         Components execute in priority order. Exceptions are logged but don't
         prevent shutdown of other components (best-effort cleanup).
         """
-        self._logger.info(f"Shutting down {len(self._components)} orchestrator components")
+        self._logger.info(
+            f"Shutting down {len(self._components)} orchestrator components"
+        )
         for component in self._components:
             comp_name = component.name or component.__class__.__name__
@@ -1271,14 +1557,18 @@ class Flock(metaclass=AutoTracedMeta):
                 # Phase 3: Component hook - before schedule (circuit breaker, deduplication, etc.)
                 from flock.orchestrator_component import ScheduleDecision
-                decision = await self._run_before_schedule(artifact, agent, subscription)
+                decision = await self._run_before_schedule(
+                    artifact, agent, subscription
+                )
                 if decision == ScheduleDecision.SKIP:
                     continue  # Skip this subscription
                 if decision == ScheduleDecision.DEFER:
                     continue  # Defer for later (batching/correlation)
                 # Phase 3: Component hook - collect artifacts (handles AND gates, correlation, batching)
-                collection = await self._run_collect_artifacts(artifact, agent, subscription)
+                collection = await self._run_collect_artifacts(
+                    artifact, agent, subscription
+                )
                 if not collection.complete:
                     continue  # Still collecting (AND gate, correlation, or batch incomplete)
@@ -1292,7 +1582,9 @@ class Flock(metaclass=AutoTracedMeta):
                 # Complete! Schedule agent with collected artifacts
                 # Schedule agent task
                 is_batch_execution = subscription.batch is not None
-                task = self._schedule_task(agent, artifacts, is_batch=is_batch_execution)
+                task = self._schedule_task(
+                    agent, artifacts, is_batch=is_batch_execution
+                )
                 # Phase 3: Component hook - agent scheduled (notification)
                 await self._run_agent_scheduled(agent, artifacts, task)
@@ -1301,9 +1593,29 @@ class Flock(metaclass=AutoTracedMeta):
         self, agent: Agent, artifacts: list[Artifact], is_batch: bool = False
     ) -> Task[Any]:
         """Schedule agent task and return the task handle."""
-        task = asyncio.create_task(self._run_agent_task(agent, artifacts, is_batch=is_batch))
+        task = asyncio.create_task(
+            self._run_agent_task(agent, artifacts, is_batch=is_batch)
+        )
         self._tasks.add(task)
         task.add_done_callback(self._tasks.discard)
+        # Track task by correlation_id for workflow status tracking
+        correlation_id = artifacts[0].correlation_id if artifacts else None
+        if correlation_id:
+            if correlation_id not in self._correlation_tasks:
+                self._correlation_tasks[correlation_id] = set()
+            self._correlation_tasks[correlation_id].add(task)
+            # Clean up correlation tracking when task completes
+            def cleanup_correlation(t: Task[Any]) -> None:
+                if correlation_id in self._correlation_tasks:
+                    self._correlation_tasks[correlation_id].discard(t)
+                    # Remove empty sets to prevent memory leaks
+                    if not self._correlation_tasks[correlation_id]:
+                        del self._correlation_tasks[correlation_id]
+            task.add_done_callback(cleanup_correlation)
         return task
     def _record_agent_run(self, agent: Agent) -> None:
@@ -1322,15 +1634,92 @@ class Flock(metaclass=AutoTracedMeta):
     ) -> None:
         correlation_id = artifacts[0].correlation_id if artifacts else uuid4()
+        # Phase 8: Evaluate context BEFORE creating Context (security fix)
+        # Provider resolution: per-agent > global > DefaultContextProvider
+        from flock.context_provider import (
+            BoundContextProvider,
+            ContextRequest,
+            DefaultContextProvider,
+        )
+        inner_provider = (
+            getattr(agent, "context_provider", None)
+            or self._default_context_provider
+            or DefaultContextProvider()
+        )
+        # SECURITY FIX: Wrap provider with BoundContextProvider to prevent identity spoofing
+        provider = BoundContextProvider(inner_provider, agent.identity)
+        # Evaluate context using provider (orchestrator controls this!)
+        # Engines will receive pre-filtered artifacts via ctx.artifacts
+        request = ContextRequest(
+            agent=agent,
+            correlation_id=correlation_id,
+            store=self.store,
+            agent_identity=agent.identity,
+            exclude_ids={a.id for a in artifacts},  # Exclude input artifacts
+        )
+        context_artifacts = await provider(request)
+        # Phase 8: Create Context with pre-filtered data (no capabilities!)
+        # SECURITY: Context is now just data - engines can't query anything
         ctx = Context(
-            board=BoardHandle(self),
-            orchestrator=self,
+            artifacts=context_artifacts,  # Pre-filtered conversation context
+            agent_identity=agent.identity,
             task_id=str(uuid4()),
             correlation_id=correlation_id,
-            is_batch=is_batch,  # NEW!
+            is_batch=is_batch,
         )
         self._record_agent_run(agent)
-        await agent.execute(ctx, artifacts)
+        # Phase 6: Execute agent (returns artifacts, doesn't publish)
+        # Wrap in try/catch to handle agent failures gracefully
+        try:
+            outputs = await agent.execute(ctx, artifacts)
+        except asyncio.CancelledError:
+            # Re-raise cancellations immediately (shutdown, user cancellation)
+            # Do NOT treat these as errors - they're intentional interruptions
+            self._logger.debug(
+                f"Agent '{agent.name}' task cancelled (task={ctx.task_id})"
+            )
+            raise  # Propagate cancellation so task.cancelled() == True
+        except Exception as exc:
+            # Agent already called component.on_error hooks before re-raising
+            # Now orchestrator publishes error artifact and continues workflow
+            from flock.system_artifacts import WorkflowError
+            error_artifact_data = WorkflowError(
+                failed_agent=agent.name,
+                error_type=type(exc).__name__,
+                error_message=str(exc),
+                timestamp=datetime.now(UTC),
+                task_id=ctx.task_id,
+            )
+            # Build and publish error artifact with correlation_id
+            from flock.artifacts import ArtifactSpec
+            error_spec = ArtifactSpec.from_model(WorkflowError)
+            error_artifact = error_spec.build(
+                produced_by=f"orchestrator#{agent.name}",
+                data=error_artifact_data.model_dump(),
+                correlation_id=correlation_id,
+            )
+            await self._persist_and_schedule(error_artifact)
+            # Log error but don't re-raise - workflow continues
+            self._logger.error(
+                f"Agent '{agent.name}' failed (task={ctx.task_id}): {exc}",
+                exc_info=True,
+            )
+            return  # Exit early - no outputs to publish
+        # Phase 6: Orchestrator publishes outputs (security fix)
+        # This fixes Vulnerability #2 (WRITE Bypass) - agents can't bypass validation
+        for output in outputs:
+            await self._persist_and_schedule(output)
         if artifacts:
             try:
@@ -1373,7 +1762,9 @@ class Flock(metaclass=AutoTracedMeta):
         from flock.dashboard.service import _get_correlation_groups
         # Get current correlation groups state from engine
-        groups = _get_correlation_groups(self._correlation_engine, agent_name, subscription_index)
+        groups = _get_correlation_groups(
+            self._correlation_engine, agent_name, subscription_index
+        )
         if not groups:
             return  # No groups to report (shouldn't happen, but defensive)

flock-core 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl

Potentially problematic release.

flock-core 0.5.9py3-none-any.whl → 0.5.11py3-none-any.whl