RubyGems - phronomy - Versions diffs - 0.7.0 → 0.8.0 - Mend

phronomy 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

checksums.yaml +4 -4
data/.mutant.yml +8 -7
data/CHANGELOG.md +151 -1
data/README.md +170 -47
data/Rakefile +33 -0
data/benchmark/baseline.json +1 -1
data/benchmark/bench_context_assembler.rb +2 -2
data/benchmark/bench_regression.rb +6 -5
data/benchmark/bench_token_estimator.rb +5 -5
data/benchmark/bench_tool_schema.rb +1 -1
data/benchmark/bench_vector_store.rb +1 -1
data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +24 -0
data/docs/decisions/006-no-built-in-guardrails.md +20 -2
data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
data/lib/phronomy/agent/base.rb +285 -137
data/lib/phronomy/agent/checkpoint.rb +118 -0
data/lib/phronomy/agent/concerns/suspendable.rb +15 -0
data/lib/phronomy/agent/context/conversation/compaction_context.rb +117 -0
data/lib/phronomy/agent/context/conversation/trigger_context.rb +43 -0
data/lib/phronomy/agent/context/conversation/trim_context.rb +82 -0
data/lib/phronomy/agent/context/instruction/prompt_template.rb +102 -0
data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +45 -0
data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +51 -0
data/lib/phronomy/agent/context/knowledge/loader/base.rb +31 -0
data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +62 -0
data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +82 -0
data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +28 -0
data/lib/phronomy/agent/context/knowledge/source/base.rb +60 -0
data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +102 -0
data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +63 -0
data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +58 -0
data/lib/phronomy/agent/context/knowledge/splitter/base.rb +53 -0
data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +57 -0
data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +111 -0
data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +116 -0
data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +95 -0
data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +109 -0
data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +133 -0
data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +198 -0
data/lib/phronomy/agent/fsm.rb +42 -65
data/lib/phronomy/agent/invocation_pipeline.rb +99 -0
data/lib/phronomy/agent/lifecycle/fsm_session.rb +251 -0
data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +249 -0
data/lib/phronomy/agent/react_agent.rb +27 -14
data/lib/phronomy/agent/runner.rb +2 -2
data/lib/phronomy/agent/tool_executor.rb +108 -0
data/lib/phronomy/concurrency/async_queue.rb +157 -0
data/lib/phronomy/concurrency/blocking_adapter_pool.rb +443 -0
data/lib/phronomy/concurrency/cancellation_scope.rb +125 -0
data/lib/phronomy/concurrency/cancellation_token.rb +140 -0
data/lib/phronomy/concurrency/concurrency_gate.rb +157 -0
data/lib/phronomy/concurrency/deadline.rb +65 -0
data/lib/phronomy/concurrency/gate_registry.rb +52 -0
data/lib/phronomy/concurrency/pool_registry.rb +57 -0
data/lib/phronomy/configuration.rb +142 -0
data/lib/phronomy/context.rb +2 -8
data/lib/phronomy/diagnostics.rb +62 -0
data/lib/phronomy/embeddings.rb +2 -2
data/lib/phronomy/eval/runner.rb +13 -9
data/lib/phronomy/eval/scorer/llm_judge.rb +12 -1
data/lib/phronomy/event_loop.rb +184 -46
data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
data/lib/phronomy/invocation_context.rb +152 -0
data/lib/phronomy/knowledge_source.rb +0 -5
data/lib/phronomy/llm_adapter/base.rb +104 -0
data/lib/phronomy/llm_adapter/ruby_llm.rb +47 -0
data/lib/phronomy/llm_adapter.rb +20 -0
data/lib/phronomy/{context → llm_context_window}/assembler.rb +18 -3
data/lib/phronomy/{context → llm_context_window}/context_version_cache.rb +1 -1
data/lib/phronomy/{context → llm_context_window}/token_budget.rb +7 -4
data/lib/phronomy/{context → llm_context_window}/token_estimator.rb +3 -3
data/lib/phronomy/loader.rb +4 -4
data/lib/phronomy/metrics.rb +38 -0
data/lib/phronomy/{agent → multi_agent}/handoff.rb +2 -2
data/lib/phronomy/{agent → multi_agent}/orchestrator.rb +151 -126
data/lib/phronomy/multi_agent/parallel_tool_chat.rb +149 -0
data/lib/phronomy/{agent → multi_agent}/team_coordinator.rb +2 -2
data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
data/lib/phronomy/runtime/scheduler.rb +98 -0
data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
data/lib/phronomy/runtime/task_registry.rb +48 -0
data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
data/lib/phronomy/runtime/timer_queue.rb +106 -0
data/lib/phronomy/runtime/timer_service.rb +42 -0
data/lib/phronomy/runtime.rb +389 -0
data/lib/phronomy/splitter.rb +3 -3
data/lib/phronomy/task/backend.rb +80 -0
data/lib/phronomy/task/fiber_backend.rb +157 -0
data/lib/phronomy/task/immediate_backend.rb +89 -0
data/lib/phronomy/task/thread_backend.rb +84 -0
data/lib/phronomy/task.rb +275 -0
data/lib/phronomy/task_group.rb +265 -0
data/lib/phronomy/testing/fake_clock.rb +109 -0
data/lib/phronomy/testing/fake_scheduler.rb +104 -0
data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
data/lib/phronomy/testing.rb +12 -0
data/lib/phronomy/tool/base.rb +156 -7
data/lib/phronomy/tool/mcp_tool.rb +47 -16
data/lib/phronomy/tool/scope_policy.rb +50 -0
data/lib/phronomy/tracing/null_tracer.rb +3 -1
data/lib/phronomy/tracing/open_telemetry_tracer.rb +34 -0
data/lib/phronomy/vector_store.rb +2 -2
data/lib/phronomy/version.rb +1 -1
data/lib/phronomy/workflow.rb +52 -5
data/lib/phronomy/workflow_context.rb +37 -2
data/lib/phronomy/workflow_runner.rb +28 -77
data/lib/phronomy.rb +43 -0
metadata +73 -33
data/lib/phronomy/agent/parallel_tool_chat.rb +0 -92
data/lib/phronomy/cancellation_token.rb +0 -92
data/lib/phronomy/context/compaction_context.rb +0 -111
data/lib/phronomy/context/trigger_context.rb +0 -39
data/lib/phronomy/context/trim_context.rb +0 -75
data/lib/phronomy/embeddings/base.rb +0 -22
data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +0 -45
data/lib/phronomy/fsm_session.rb +0 -201
data/lib/phronomy/knowledge_source/base.rb +0 -36
data/lib/phronomy/knowledge_source/entity_knowledge.rb +0 -96
data/lib/phronomy/knowledge_source/rag_knowledge.rb +0 -57
data/lib/phronomy/knowledge_source/static_knowledge.rb +0 -52
data/lib/phronomy/loader/base.rb +0 -25
data/lib/phronomy/loader/csv_loader.rb +0 -56
data/lib/phronomy/loader/markdown_loader.rb +0 -76
data/lib/phronomy/loader/plain_text_loader.rb +0 -22
data/lib/phronomy/prompt_template.rb +0 -96
data/lib/phronomy/splitter/base.rb +0 -47
data/lib/phronomy/splitter/fixed_size_splitter.rb +0 -51
data/lib/phronomy/splitter/recursive_splitter.rb +0 -105
data/lib/phronomy/vector_store/base.rb +0 -82
data/lib/phronomy/vector_store/in_memory.rb +0 -93
data/lib/phronomy/vector_store/pgvector.rb +0 -127
data/lib/phronomy/vector_store/redis_search.rb +0 -192

data/lib/phronomy/{agent → multi_agent}/orchestrator.rb RENAMED Viewed

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 module Phronomy
-  module Agent
+  module MultiAgent
     # Base class for orchestrator agents that coordinate multiple subagents.
     # Implements the Orchestrator-Subagent multi-agent coordination pattern
     # (Anthropic blog, Pattern 2).
@@ -16,7 +16,7 @@ module Phronomy
     # - +fan_out+ for parallel invocation of the same agent across multiple inputs.
     #
     # @example Declarative DSL
-    #   class ResearchOrchestrator < Phronomy::Agent::Orchestrator
+    #   class ResearchOrchestrator < Phronomy::MultiAgent::Orchestrator
     #     model "gpt-4o"
     #     instructions "You coordinate research tasks."
     #     subagent :searcher,   SearchAgent
@@ -26,7 +26,7 @@ module Phronomy
     #   result = ResearchOrchestrator.new.invoke("Research the latest AI news.")
     #
     # @example Programmatic parallel dispatch
-    #   class MyOrchestrator < Phronomy::Agent::Orchestrator
+    #   class MyOrchestrator < Phronomy::MultiAgent::Orchestrator
     #     model "gpt-4o"
     #     instructions "Dispatch tasks in parallel."
     #
@@ -41,7 +41,7 @@ module Phronomy
     #
     # @example Fan-out (same agent, multiple inputs)
     #   results = fan_out(agent: TranslationAgent, inputs: ["Hello", "World"])
-    class Orchestrator < Base
+    class Orchestrator < Agent::Base
       # Declares a named subagent and registers it as a tool accessible to the
       # LLM during an +invoke+ call.
       #
@@ -62,15 +62,29 @@ module Phronomy
           description "Dispatch work to the #{name} subagent (#{agent_class.name})"
           param :input, type: :string, desc: "The task or question for the subagent"
+          # @_orchestrator_context is injected at call time by prepare_tool_class.
+          attr_writer :_orchestrator_context
           define_method(:execute) do |input:|
-            # Inherit the calling orchestrator's thread_id and config when
-            # available so that sub-agent spans and memory stay connected.
-            ctx = Thread.current[:phronomy_orchestrator_context] || {}
-            result = agent_class.new.invoke(
+            # Inherit the calling orchestrator's thread_id, config, and
+            # InvocationContext so that child subagent spans and memory stay
+            # connected to the parent invocation.
+            ctx = @_orchestrator_context || {}
+            parent_ic = ctx[:invocation_context]
+            task_config = ctx[:config] || {}
+            # Propagate parent InvocationContext to the child agent so that
+            # cancellation, deadline, and tracing carry through automatically.
+            if parent_ic && !task_config[:invocation_context]
+              child_ic = parent_ic.merge(parent_task_id: parent_ic.task_id)
+              task_config = task_config.merge(invocation_context: child_ic)
+            end
+            result = agent_class.new.invoke_async(
               input,
-              thread_id: ctx[:thread_id],
-              config: ctx[:config] || {}
-            )
+              thread_id: ctx[:thread_id] || parent_ic&.thread_id,
+              config: task_config
+            ).await
             result[:output]
           rescue
             raise if on_error == :raise
@@ -78,6 +92,9 @@ module Phronomy
           end
         end
+        # Track this tool class so prepare_tool_class can inject context.
+        @_subagent_tool_classes = (@_subagent_tool_classes || []) + [tool_class]
         # Append without clobbering previously registered tools or aliases.
         @tools = (@tools || []) + [tool_class]
         @tool_aliases ||= {}
@@ -85,6 +102,14 @@ module Phronomy
         registered_subagents[name] = {agent_class: agent_class, on_error: on_error}
       end
+      # Returns the subagent tool classes registered on this specific class.
+      # Used by {#prepare_tool_class} to inject context.
+      # @return [Array<Class>]
+      # @api private
+      def self._subagent_tool_classes
+        @_subagent_tool_classes || []
+      end
       # Returns the subagent registry for this specific class (not inherited).
       #
       # @return [Hash{Symbol => Hash}]
@@ -93,8 +118,8 @@ module Phronomy
         @registered_subagents ||= {}
       end
-      # Dispatches multiple heterogeneous agent tasks in parallel using Ruby
-      # threads. Each task is a Hash describing one agent invocation.
+      # Dispatches multiple heterogeneous agent tasks in parallel using
+      # cooperative {Task}s. Each task is a Hash describing one agent invocation.
       #
       # Results are returned in the same order as the input +tasks+ array.
       # Concurrency is bounded by +max_concurrency+; when nil all tasks run at
@@ -102,7 +127,7 @@ module Phronomy
       #
       # Error semantics are controlled by +on_error+:
       # - +:raise+ (default) — every task runs to completion; the first
-      #   exception in input order is then re-raised in the calling thread.
+      #   exception in input order is then re-raised in the calling task.
       # - +:skip+            — failed tasks return +nil+; no exception is raised.
       #
       # @param tasks           [Array<Hash>]
@@ -110,27 +135,27 @@ module Phronomy
       # @option task [String]  :input  input string for the agent (required)
       # @option task [Hash]    :config forwarded to +agent#invoke+ (default: +{}+)
       # @option task [String]  :thread_id forwarded to +agent#invoke+ (default: nil)
-      # @param max_concurrency    [Integer, nil] maximum number of concurrent threads;
+      # @param max_concurrency    [Integer, nil] maximum number of concurrent tasks;
       #   nil means no limit (all tasks run simultaneously)
       # @param on_error            [Symbol] +:raise+ or +:skip+
-      # @param timeout             [Numeric, nil] maximum seconds to wait for all workers;
+      # @param timeout             [Numeric, nil] maximum seconds to wait for all tasks;
       #   nil means wait indefinitely. When the deadline is exceeded,
-      #   {Phronomy::TimeoutError} is raised and all surviving worker threads are killed.
-      # @param cancellation_token [Phronomy::CancellationToken, nil] when provided, the
+      #   {Phronomy::TimeoutError} is raised and all surviving tasks are cancelled
+      #   cooperatively.
+      # @param cancellation_token [Phronomy::Concurrency::CancellationToken, nil] when provided, the
       #   token is merged into each task's config (unless the task already sets one) so
-      #   that every worker agent checks it before making LLM calls.
-      # @param force_kill [Boolean] when +true+, surviving worker threads are killed with
-      #   +Thread#kill+ after the grace period if they do not stop cooperatively. When
-      #   +false+ (default), workers are asked to stop cooperatively but are never killed;
-      #   the caller receives {Phronomy::TimeoutError} immediately and abandoned workers
-      #   discard their results when they eventually finish. +false+ is safer for
-      #   production because +Thread#kill+ can interrupt +ensure+ blocks.
+      #   that every child agent checks it before making LLM calls.
+      # @param invocation_context [Phronomy::InvocationContext, nil] when provided,
+      #   the context (cancellation_token, deadline, thread_id) is propagated to each
+      #   child agent as a child InvocationContext.
+      # @param force_kill [Boolean] deprecated — cooperative cancellation is always
+      #   used; this parameter is accepted for backwards compatibility but has no effect.
       # @return [Array<Hash, nil>] agent results in the same order as +tasks+
       # @raise [ArgumentError] if +on_error+ is not +:raise+ or +:skip+
       # @raise [ArgumentError] if +max_concurrency+ is not a positive Integer or nil
       # @raise [Phronomy::TimeoutError] if +timeout+ is exceeded
       # @api public
-      def dispatch_parallel(*tasks, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, force_kill: false)
+      def dispatch_parallel(*tasks, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, invocation_context: nil, force_kill: false)
         unless [:raise, :skip].include?(on_error)
           raise ArgumentError, "unknown on_error: #{on_error.inspect}"
         end
@@ -138,7 +163,7 @@ module Phronomy
           raise ArgumentError, "max_concurrency must be a positive Integer"
         end
-        bounded_map(tasks, max_concurrency: max_concurrency, on_error: on_error, timeout: timeout, cancellation_token: cancellation_token, force_kill: force_kill)
+        bounded_map(tasks, max_concurrency: max_concurrency, on_error: on_error, timeout: timeout, cancellation_token: cancellation_token, invocation_context: invocation_context, force_kill: force_kill)
       end
       # Runs the same agent against multiple inputs in parallel (fan-out pattern).
@@ -150,17 +175,20 @@ module Phronomy
       # @param inputs          [Array<String>] list of input strings
       # @param config          [Hash]          forwarded to every +agent#invoke+ call
       # @param thread_id       [String, nil]   forwarded to every +agent#invoke+ call
-      # @param max_concurrency [Integer, nil]  forwarded to {#dispatch_parallel}
-      # @param on_error        [Symbol]        forwarded to {#dispatch_parallel}
+      # @param max_concurrency    [Integer, nil]  forwarded to {#dispatch_parallel}
+      # @param on_error            [Symbol]        forwarded to {#dispatch_parallel}
+      # @param invocation_context [Phronomy::InvocationContext, nil] forwarded to
+      #   {#dispatch_parallel} for child context propagation
       # @return [Array<Hash, nil>] results in the same order as +inputs+
       # @api public
-      def fan_out(agent:, inputs:, config: {}, thread_id: nil, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, force_kill: false)
+      def fan_out(agent:, inputs:, config: {}, thread_id: nil, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, invocation_context: nil, force_kill: false)
         dispatch_parallel(
           *inputs.map { |input| {agent: agent, input: input, config: config, thread_id: thread_id} },
           max_concurrency: max_concurrency,
           on_error: on_error,
           timeout: timeout,
           cancellation_token: cancellation_token,
+          invocation_context: invocation_context,
           force_kill: force_kill
         )
       end
@@ -175,131 +203,128 @@ module Phronomy
       # @return [Hash]  the sub-agent's result hash (+:output+, +:messages+)
       # @api public
       def subagent(agent_class, input, config: nil, thread_id: nil)
-        ctx = Thread.current[:phronomy_orchestrator_context] || {}
-        agent_class.new.invoke(
+        ctx = @_orchestrator_context || {}
+        parent_ic = ctx[:invocation_context]
+        effective_config = config || ctx[:config] || {}
+        # Propagate parent InvocationContext to the child agent.
+        if parent_ic && !effective_config[:invocation_context]
+          child_ic = parent_ic.merge(parent_task_id: parent_ic.task_id)
+          effective_config = effective_config.merge(invocation_context: child_ic)
+        end
+        agent_class.new.invoke_async(
           input,
-          config: config || ctx[:config] || {},
-          thread_id: thread_id || ctx[:thread_id]
-        )
+          config: effective_config,
+          thread_id: thread_id || ctx[:thread_id] || parent_ic&.thread_id
+        ).await
       end
       private
-      # Override invoke_once to expose the current thread_id and config via a
-      # thread-local so that DSL-registered subagent tools can inherit them.
+      # Override invoke_once to expose the current thread_id and config via an
+      # instance variable so that DSL-registered subagent tools can inherit them
+      # without using Thread.current.
       def invoke_once(input, messages: [], thread_id: nil, config: {})
-        prev = Thread.current[:phronomy_orchestrator_context]
-        Thread.current[:phronomy_orchestrator_context] = {thread_id: thread_id, config: config}
+        prev = @_orchestrator_context
+        @_orchestrator_context = {
+          thread_id: thread_id,
+          config: config,
+          invocation_context: config[:invocation_context]
+        }
         super
       ensure
-        Thread.current[:phronomy_orchestrator_context] = prev
+        @_orchestrator_context = prev
       end
-      # Worker-pool implementation shared by {#dispatch_parallel} and {#fan_out}.
-      #
-      # Uses a +Queue+ as a work-stealing mechanism: each worker thread pops a
-      # task, executes it, and loops until the queue is empty.  The number of
-      # workers is +min(max_concurrency, tasks.length)+, capped at the task count
-      # so we never spin up idle threads.
+      # Override prepare_tool_class to inject the current orchestrator context
+      # into DSL-registered subagent tools before each call.
+      def prepare_tool_class(tool_class)
+        prepared = super
+        orch = self
+        # Only wrap subagent tools (those registered via the .subagent DSL).
+        return prepared unless self.class._subagent_tool_classes.include?(tool_class)
+        # Capture the effective tool name before building the anonymous subclass.
+        # Class-level instance variables (@tool_name) are not inherited through
+        # subclassing, so the wrapper must set it explicitly.
+        effective_name = prepared.new.name
+        Class.new(prepared) do
+          tool_name effective_name
+          define_method(:call) do |args|
+            self._orchestrator_context = orch.instance_variable_get(:@_orchestrator_context)
+            super(args)
+          end
+        end
+      end
+      # Task-based worker pool shared by {#dispatch_parallel} and {#fan_out}.
       #
-      # +errors+ is indexed by task position so that the first error in *input*
-      # order is deterministically re-raised when +on_error: :raise+ is used.
-      # A +Mutex+ guards concurrent writes to +errors+ even though Array element
-      # assignment at different indices is safe in MRI; this keeps the code
-      # correct across alternative Ruby runtimes.
+      # Spawns one {Task} per input using a {TaskGroup} so that +max_concurrency+
+      # acts as a semaphore: spare tasks block on {TaskGroup#spawn} until a slot
+      # becomes available.  Results are written back to +results+ in input order;
+      # +errors+ captures the first error per position so that the first error in
+      # *input* order is deterministically re-raised when +on_error: :raise+ is used.
       #
-      # When +timeout+ is given, workers are first asked to stop cooperatively
-      # via a cancellation flag (so they do not pick up new tasks) and then given
-      # +KILL_GRACE_SECONDS+ to finish any in-flight +ensure+ blocks.  Only
-      # workers that are still alive after the grace period are force-killed, and
-      # a warning is logged in that case.  Use a +CancellationToken+ (see #216)
-      # for full cooperative cancellation of long-running tasks.
+      # When +timeout+ is given, each spawned task is joined with the remaining
+      # deadline.  Any still-alive tasks are cancelled cooperatively via
+      # {TaskGroup#cancel_all!} before {Phronomy::TimeoutError} is raised.
+      # The +force_kill+ argument is deprecated: cooperative cancellation is always
+      # used regardless of its value.
       #
       # Deadline tracking uses +Process.clock_gettime(Process::CLOCK_MONOTONIC)+
       # to avoid sensitivity to NTP adjustments and system-clock changes.
-      KILL_GRACE_SECONDS = 0.5
-      private_constant :KILL_GRACE_SECONDS
-      def bounded_map(tasks, max_concurrency:, on_error:, timeout: nil, cancellation_token: nil, force_kill: false)
+      def bounded_map(tasks, max_concurrency:, on_error:, timeout: nil, cancellation_token: nil, invocation_context: nil, force_kill: false) # rubocop:disable Lint/UnusedMethodArgument
         return [] if tasks.empty?
         results = Array.new(tasks.length)
         errors = Array.new(tasks.length)
-        errors_mutex = Mutex.new
-        # Mutex-backed cooperative stop token; workers check before each task pick-up.
-        internal_stop_token = Phronomy::CancellationToken.new
-        queue = Queue.new
-        tasks.each_with_index { |task, i| queue << [i, task] }
-        worker_count = [max_concurrency || tasks.length, tasks.length].min
-        workers = worker_count.times.map do
-          Thread.new do
-            loop do
-              break if internal_stop_token.cancelled?
-              i, task = begin
-                queue.pop(true)
-              rescue ThreadError
-                break # queue is empty; this worker is done
-              end
-              # Merge the shared cancellation token into the task's config unless
-              # the task already supplies its own token.
-              task_config = task.fetch(:config, {})
-              if cancellation_token && !task_config[:cancellation_token]
-                task_config = task_config.merge(cancellation_token: cancellation_token)
-              end
-              begin
-                results[i] = task[:agent].new.invoke(
-                  task[:input],
-                  config: task_config,
-                  thread_id: task[:thread_id]
-                )
-              rescue => e
-                case on_error
-                when :skip
-                  results[i] = nil
-                else
-                  errors_mutex.synchronize { errors[i] = e }
-                end
-              end
+        group = Phronomy::Runtime.instance.task_group(limit: max_concurrency || tasks.length)
+        # Resolve the effective cancellation token: explicit argument wins;
+        # fall back to the one embedded in the InvocationContext if present.
+        effective_ct = cancellation_token || invocation_context&.cancellation_token
+        spawned = tasks.each_with_index.map do |task, i|
+          group.spawn do
+            task_config = task.fetch(:config, {})
+            # Merge the shared cancellation token unless the task already has one.
+            if effective_ct && !task_config[:cancellation_token]
+              task_config = task_config.merge(cancellation_token: effective_ct)
+            end
+            # Propagate parent InvocationContext to each child task so that
+            # cancellation, deadline, and tracing carry through automatically.
+            if invocation_context && !task_config[:invocation_context]
+              child_ic = invocation_context.merge(parent_task_id: invocation_context.task_id)
+              task_config = task_config.merge(invocation_context: child_ic)
             end
+            results[i] = task[:agent].new.invoke_async(
+              task[:input],
+              config: task_config,
+              thread_id: task[:thread_id] || invocation_context&.thread_id
+            ).await
+          rescue => e
+            errors[i] = e unless on_error == :skip
           end
         end
-        workers.each(&:join) if timeout.nil?
         if timeout
-          deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
-          workers.each do |w|
-            remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
-            w.join([remaining, 0].max)
-          end
+          deadline = Phronomy::Concurrency::Deadline.in(timeout)
+          spawned.each { |t| t.join([deadline.remaining_seconds, 0].max) }
-          alive = workers.select(&:alive?)
+          alive = spawned.select(&:alive?)
           unless alive.empty?
-            # Signal workers cooperatively to stop picking up new tasks.
-            internal_stop_token.cancel!
-            if force_kill
-              # Give in-flight ensure blocks a short grace period before kill.
-              alive.each { |w| w.join(KILL_GRACE_SECONDS) }
-              still_alive = alive.select(&:alive?)
-              if still_alive.any?
-                Phronomy.configuration.logger&.warn(
-                  "[Phronomy] dispatch_parallel: #{still_alive.length} worker(s) did not stop " \
-                  "within grace period; force-killing. Use CancellationToken for " \
-                  "cooperative cancellation of long-running tasks."
-                )
-                still_alive.each(&:kill)
-              end
-            end
+            group.cancel_all!
             raise Phronomy::TimeoutError,
               "dispatch_parallel timed out after #{timeout}s " \
-              "(#{alive.length} of #{workers.length} workers still running)"
+              "(#{alive.length} of #{spawned.length} tasks still running)"
           end
+        else
+          spawned.each(&:await)
         end
         first_error = errors.compact.first

data/lib/phronomy/multi_agent/parallel_tool_chat.rb ADDED Viewed

@@ -0,0 +1,149 @@
+# frozen_string_literal: true
+module Phronomy
+  module MultiAgent
+    # RubyLLM::Chat subclass that executes multiple tool calls concurrently.
+    #
+    # When the LLM returns more than one tool call in a single response, each
+    # tool is dispatched according to its +execution_mode+:
+    # - +:cooperative+ tools run via +Runtime.instance.spawn+, delegating
+    #   scheduling to the configured runtime backend.
+    # - +:blocking_io+ tools are offloaded to a +BlockingAdapterPool+ worker
+    #   thread so they do not occupy a scheduler task slot.
+    # All results are collected before being appended to the message history,
+    # preserving deterministic message order while reducing wall-clock latency
+    # when tools are IO-bound (HTTP calls, DB queries, etc.).
+    #
+    # Single-tool responses fall through to the standard sequential path via
+    # +super+, preserving all existing edge-case behaviour (Tool::Halt,
+    # forced_tool_choice, streaming, SuspendSignal, etc.).
+    #
+    # This class is used automatically when EventLoop mode is enabled
+    # ({Phronomy.configuration.event_loop}).  It is not used for direct
+    # synchronous +invoke+ calls so that the streaming callback state remains
+    # single-threaded.
+    # @api private
+    class ParallelToolChat < RubyLLM::Chat
+      # @param max_parallel_tools [Integer] maximum simultaneous tool executions
+      # @param cancellation_token [Phronomy::Concurrency::CancellationToken, nil] token observed before each batch
+      # @param opts [Hash] remaining kwargs forwarded to RubyLLM::Chat
+      # @api private
+      def initialize(max_parallel_tools: 10, cancellation_token: nil, **opts)
+        super(**opts)
+        @max_parallel_tools = max_parallel_tools
+        @cancellation_token = cancellation_token
+      end
+      # Allows the owning agent to update the token between retries.
+      # @api private
+      attr_writer :cancellation_token
+      private
+      # Overrides RubyLLM::Chat#handle_tool_calls to parallelise execution
+      # when multiple tool calls are present in a single LLM response.
+      #
+      # The method preserves the three-phase protocol of the original:
+      #   1. Pre-execution callbacks (+on_new_message+, +on_tool_call+) —
+      #      sequential so that the Suspendable concern's approval hook can
+      #      raise +SuspendSignal+ before any tool is executed.
+      #   2. Parallel tool execution — cooperative tools via Runtime.instance.spawn
+      #      (respects the configured runtime backend), blocking_io tools via BlockingAdapterPool.
+      #   3. Post-execution callbacks and message recording — sequential,
+      #      in the original tool-call order.
+      #
+      # @param response [RubyLLM::Message] the LLM response containing tool calls
+      # @yield streaming block forwarded to +complete+
+      # @api private
+      def handle_tool_calls(response, &block)
+        tool_calls = response.tool_calls.values
+        # Single tool: delegate to the parent implementation to preserve every
+        # edge case (forced_tool_choice, streaming, Halt, SuspendSignal…).
+        return super if tool_calls.size <= 1
+        # Phase 1 — pre-execution callbacks (sequential, original order).
+        # The SuspendSignal approval hook is registered via on_tool_call, so it
+        # MUST fire before execution begins.
+        tool_calls.each do |tool_call|
+          @on[:new_message]&.call
+          @on[:tool_call]&.call(tool_call)
+        end
+        # Phase 2 — parallel tool execution.
+        # :cooperative tools run inside a Task (no pool).
+        # :blocking_io/:cpu_bound/:external_process tools are submitted directly
+        # to BlockingAdapterPool when available — eliminating the extra Task
+        # Thread that previously wrapped each pool operation.
+        #
+        # Both Phronomy::Task and BlockingAdapterPool::PendingOperation support
+        # #await, so results are collected uniformly below.
+        ct = @cancellation_token
+        max = @max_parallel_tools
+        tool_results = tool_calls.each_slice(max).flat_map do |batch|
+          if ct&.cancelled?
+            raise Phronomy::CancellationError, "invocation cancelled before tool execution"
+          end
+          # Dispatch all tools in this batch via ToolExecutor (centralised routing).
+          dispatched = batch.map do |tc|
+            tool = tools[tc.name.to_sym]
+            unless tool
+              next {tool_call: tc, awaitable: nil, result: {
+                error: "Model tried to call unavailable tool `#{tc.name}`. " \
+                       "Available tools: #{tools.keys.to_json}."
+              }}
+            end
+            awaitable = Phronomy::Agent::ToolExecutor.call_async(
+              tool: tool,
+              args: tc.arguments,
+              cancellation_token: ct
+            )
+            {tool_call: tc, awaitable: awaitable, result: nil}
+          end
+          # Await all dispatched operations in original order.
+          dispatched.map do |item|
+            result = item[:awaitable] ? item[:awaitable].await : item[:result]
+            {tool_call: item[:tool_call], result: result}
+          end
+        end
+        # Phase 3 — post-execution callbacks and message recording (sequential).
+        halt_result = nil
+        tool_results.each do |item|
+          result = item[:result]
+          @on[:tool_result]&.call(result)
+          tool_payload = result.is_a?(RubyLLM::Tool::Halt) ? result.content : result
+          content = content_like?(tool_payload) ? tool_payload : tool_payload.to_s
+          message = add_message(role: :tool, content: content, tool_call_id: item[:tool_call].id)
+          @on[:end_message]&.call(message)
+          halt_result = result if result.is_a?(RubyLLM::Tool::Halt)
+        end
+        reset_tool_choice if forced_tool_choice?
+        halt_result || complete(&block)
+      end
+      # Overrides RubyLLM::Chat#execute_tool to forward the cancellation token
+      # explicitly and to route the call through {ToolExecutor} so that the
+      # execution_mode decision is made in a single place.
+      def execute_tool(tool_call)
+        tool = tools[tool_call.name.to_sym]
+        unless tool
+          return {
+            error: "Model tried to call unavailable tool `#{tool_call.name}`. " \
+                   "Available tools: #{tools.keys.to_json}."
+          }
+        end
+        Phronomy::Agent::ToolExecutor.call_async(
+          tool: tool,
+          args: tool_call.arguments,
+          cancellation_token: @cancellation_token
+        ).await
+      end
+    end
+  end
+end

data/lib/phronomy/{agent → multi_agent}/team_coordinator.rb RENAMED Viewed

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 module Phronomy
-  module Agent
+  module MultiAgent
     # Implements the "Agent teams" coordination pattern (Anthropic blog, Pattern 3).
     #
     # @see https://claude.com/blog/multi-agent-coordination-patterns
@@ -24,7 +24,7 @@ module Phronomy
     # +invoke+ call, so the LLM retains context across multiple task assignments.
     #
     # @example Basic usage
-    #   class MigrationTeam < Phronomy::Agent::TeamCoordinator
+    #   class MigrationTeam < Phronomy::MultiAgent::TeamCoordinator
     #     coordinator_model        "claude-3-5-sonnet-20241022"
     #     coordinator_instructions <<~INST
     #       Analyze the request and enqueue one migration task per service.