RubyGems - phronomy - Versions diffs - 0.5.4 → 0.7.0 - Mend

phronomy 0.5.4 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

checksums.yaml +4 -4
data/.mutant.yml +21 -0
data/CHANGELOG.md +379 -0
data/CONTRIBUTING.md +102 -0
data/README.md +262 -48
data/RELEASE_CHECKLIST.md +86 -0
data/SECURITY.md +80 -0
data/benchmark/baseline.json +9 -0
data/benchmark/bench_agent_invoke.rb +105 -0
data/benchmark/bench_context_assembler.rb +46 -0
data/benchmark/bench_regression.rb +171 -0
data/benchmark/bench_token_estimator.rb +44 -0
data/benchmark/bench_tool_schema.rb +69 -0
data/benchmark/bench_vector_store.rb +39 -0
data/benchmark/bench_workflow.rb +55 -0
data/benchmark/run_all.rb +118 -0
data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
data/docs/decisions/002-workflow-context-immutability.md +42 -0
data/docs/decisions/003-event-loop-singleton.md +48 -0
data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
data/docs/decisions/006-no-built-in-guardrails.md +48 -0
data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
data/docs/decisions/009-state-store-abstraction.md +141 -0
data/lib/phronomy/agent/base.rb +281 -13
data/lib/phronomy/agent/before_completion_context.rb +1 -0
data/lib/phronomy/agent/checkpoint.rb +1 -0
data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
data/lib/phronomy/agent/concerns/retryable.rb +12 -1
data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
data/lib/phronomy/agent/fsm.rb +180 -0
data/lib/phronomy/agent/handoff.rb +3 -0
data/lib/phronomy/agent/orchestrator.rb +123 -11
data/lib/phronomy/agent/parallel_tool_chat.rb +92 -0
data/lib/phronomy/agent/react_agent.rb +8 -6
data/lib/phronomy/agent/runner.rb +2 -0
data/lib/phronomy/agent/shared_state.rb +11 -0
data/lib/phronomy/agent/suspend_signal.rb +2 -0
data/lib/phronomy/agent/team_coordinator.rb +17 -5
data/lib/phronomy/cancellation_token.rb +92 -0
data/lib/phronomy/configuration.rb +32 -2
data/lib/phronomy/context/assembler.rb +6 -0
data/lib/phronomy/context/compaction_context.rb +2 -0
data/lib/phronomy/context/context_version_cache.rb +2 -0
data/lib/phronomy/context/token_budget.rb +3 -0
data/lib/phronomy/context/token_estimator.rb +9 -2
data/lib/phronomy/context/trigger_context.rb +1 -0
data/lib/phronomy/context/trim_context.rb +4 -0
data/lib/phronomy/context.rb +0 -1
data/lib/phronomy/embeddings/base.rb +5 -2
data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
data/lib/phronomy/eval/comparison.rb +2 -0
data/lib/phronomy/eval/dataset.rb +4 -0
data/lib/phronomy/eval/metrics.rb +6 -0
data/lib/phronomy/eval/runner.rb +2 -0
data/lib/phronomy/eval/scorer/base.rb +1 -0
data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
data/lib/phronomy/event.rb +14 -0
data/lib/phronomy/event_loop.rb +254 -0
data/lib/phronomy/fsm_session.rb +201 -0
data/lib/phronomy/generator_verifier.rb +24 -22
data/lib/phronomy/guardrail/base.rb +3 -0
data/lib/phronomy/guardrail.rb +0 -1
data/lib/phronomy/knowledge_source/base.rb +6 -2
data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
data/lib/phronomy/loader/base.rb +1 -0
data/lib/phronomy/loader/csv_loader.rb +2 -0
data/lib/phronomy/loader/markdown_loader.rb +2 -0
data/lib/phronomy/loader/plain_text_loader.rb +1 -0
data/lib/phronomy/output_parser/base.rb +1 -0
data/lib/phronomy/output_parser/json_parser.rb +22 -3
data/lib/phronomy/output_parser/structured_parser.rb +2 -0
data/lib/phronomy/prompt_template.rb +5 -0
data/lib/phronomy/runnable.rb +20 -3
data/lib/phronomy/splitter/base.rb +2 -0
data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
data/lib/phronomy/state_store/base.rb +48 -0
data/lib/phronomy/state_store/in_memory.rb +62 -0
data/lib/phronomy/tool/agent_tool.rb +1 -0
data/lib/phronomy/tool/base.rb +189 -27
data/lib/phronomy/tool/mcp_tool.rb +68 -13
data/lib/phronomy/tracing/base.rb +3 -0
data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
data/lib/phronomy/vector_store/base.rb +33 -7
data/lib/phronomy/vector_store/in_memory.rb +16 -7
data/lib/phronomy/vector_store/pgvector.rb +40 -9
data/lib/phronomy/vector_store/redis_search.rb +29 -8
data/lib/phronomy/version.rb +1 -1
data/lib/phronomy/workflow.rb +175 -74
data/lib/phronomy/workflow_context.rb +55 -5
data/lib/phronomy/workflow_runner.rb +197 -114
data/lib/phronomy.rb +74 -1
data/scripts/api_snapshot.rb +91 -0
data/scripts/check_api_annotations.rb +68 -0
data/scripts/check_private_enforcement.rb +93 -0
data/scripts/check_readme_runnable.rb +98 -0
data/scripts/run_mutation.sh +46 -0
metadata +50 -6
data/lib/phronomy/context/builder.rb +0 -92
data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +0 -100
data/lib/phronomy/guardrail/builtin/prompt_injection_detector.rb +0 -67
data/lib/phronomy/guardrail/builtin.rb +0 -16

data/lib/phronomy/agent/concerns/retryable.rb CHANGED Viewed

@@ -7,6 +7,7 @@ module Phronomy
       #
       # Included in {Phronomy::Agent::Base}. The retry loop wraps the full
       # #invoke_once call; {Phronomy::GuardrailError} is never retried.
+      # @api private
       module Retryable
         def self.included(base)
           base.extend(ClassMethods)
@@ -25,6 +26,7 @@ module Phronomy
           #   class MyAgent < Phronomy::Agent::Base
           #     retry_policy times: 2, wait: :exponential, base: 1.0
           #   end
+          # @api private
           def retry_policy(times: 0, wait: 0, base: 1.0)
             @_retry_policy = {times: times, wait: wait, base: base}
           end
@@ -35,6 +37,7 @@ module Phronomy
           # Injectable sleep callable for testing (shared with Tool::Base pattern).
           # @return [#call]
+          # @api private
           def _sleep_proc
             @_sleep_proc || method(:sleep)
           end
@@ -48,12 +51,19 @@ module Phronomy
         # Retry loop for #invoke. Separated so that ReactAgent can override #invoke_once.
         def _invoke_impl(input, messages: [], thread_id: nil, config: {})
+          # Fail fast when the token is already cancelled before any LLM call.
+          if (token = config[:cancellation_token]) && token.cancelled?
+            raise Phronomy::CancellationError, "invocation cancelled"
+          end
           policy = self.class._retry_policy
           attempt = 0
           begin
             invoke_once(input, messages: messages, thread_id: thread_id, config: config)
           rescue Phronomy::GuardrailError
             raise
+          rescue Phronomy::CancellationError
+            raise # Never retry after cancellation.
           rescue
             if policy && attempt < policy[:times]
               wait = compute_agent_retry_wait(policy[:wait], policy[:base], attempt)
@@ -61,7 +71,7 @@ module Phronomy
               attempt += 1
               retry
             end
-            raise
+            translate_and_reraise!($!)
           end
         end
@@ -70,6 +80,7 @@ module Phronomy
         # @param base     [Float]
         # @param attempt  [Integer]
         # @return [Float]
+        # @api private
         def compute_agent_retry_wait(strategy, base, attempt)
           case strategy
           when :exponential

data/lib/phronomy/agent/concerns/suspendable.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module Phronomy
       # Included in {Phronomy::Agent::Base}. When a tool decorated with
       # +requires_approval true+ is called and no synchronous approval handler
       # has been registered, the invocation is suspended and a
+      # @api private
       # {Phronomy::Agent::Checkpoint} is returned so the caller can resume later.
       module Suspendable
         # Registers a callback that is invoked before executing any tool that has
@@ -25,6 +26,7 @@ module Phronomy
         #   agent = MyAgent.new
         #   agent.on_approval_required { |tool_name, args| prompt_user(tool_name, args) }
         # @return [self]
+        # @api private
         def on_approval_required(&block)
           @approval_handler = block
           self
@@ -43,6 +45,7 @@ module Phronomy
         # @param config     [Hash] same runtime options as #invoke
         # @return [Hash] +{ output: String, suspended: false, messages: Array, usage: Phronomy::TokenUsage }+
         # @raise [Phronomy::GuardrailError] when an output guardrail rejects the value
+        # @api private
         def resume(checkpoint, approved:, config: {})
           # Build a fresh chat with all tools registered.
           chat = build_chat
@@ -95,6 +98,7 @@ module Phronomy
         #   - none of the agent's tools have requires_approval set.
         #
         # @param chat [RubyLLM::Chat]
+        # @api private
         def _register_suspension_hook!(chat)
           return if @approval_handler
           return if self.class.tools.none? { |tc| tc.requires_approval }

data/lib/phronomy/agent/fsm.rb ADDED Viewed

@@ -0,0 +1,180 @@
+# frozen_string_literal: true
+require "securerandom"
+module Phronomy
+  module Agent
+    # EventLoop-registered execution unit for a single agent invocation.
+    #
+    # +AgentFSM+ implements the minimal interface expected by {Phronomy::EventLoop}
+    # (+#id+, +#start+, +#handle+) so it can be managed alongside
+    # {Phronomy::FSMSession} instances.  It is *not* a traditional finite-state
+    # machine; the name reflects its role in the EventLoop rather than internal
+    # state transitions.
+    #
+    # == Execution model
+    #
+    # {#start} is called by the EventLoop on the +:start+ event.  It immediately
+    # returns after spawning a background IO thread that runs the agent's full
+    # invocation pipeline (via +_invoke_impl+).  The EventLoop thread is never
+    # blocked by agent execution.
+    #
+    # Inside the IO thread, the +:phronomy_agent_parallel_tools+ thread-local
+    # flag is set to +true+ so that {Agent::Base#build_chat} returns a
+    # {ParallelToolChat} instance, enabling concurrent tool dispatch when the LLM
+    # returns multiple tool calls in one response.
+    #
+    # == Completion events
+    #
+    # On *success*:
+    #   - Posts +:finished+ to this FSM's own +#id+ so the EventLoop cleans up
+    #     its registry entry and unblocks any +completion_queue.pop+ caller.
+    #   - When +parent_id+ is set (child-FSM pattern), additionally posts
+    #     +:child_completed+ to +parent_id+, carrying the result hash as the
+    #     event payload.  The parent {FSMSession} must declare an +on:+ transition
+    #     for +:child_completed+ to advance correctly.
+    #
+    # On *error*:
+    #   - Posts +:error+ to this FSM's own +#id+.  The EventLoop propagates the
+    #     exception through the +completion_queue+ so that the original caller of
+    #     +Agent::Base#invoke+ (in EventLoop mode) receives and re-raises it.
+    #
+    # == Standalone usage (blocking caller)
+    #
+    #   Phronomy.configure { |c| c.event_loop = true }
+    #   result = MyAgent.new.invoke("Hello!")   # => { output:, messages:, usage: }
+    #
+    # {Agent::Base#invoke} detects EventLoop mode, creates an +AgentFSM+, registers
+    # it via {EventLoop#register}, and blocks the *calling* thread on the returned
+    # +completion_queue+ until the agent finishes.
+    #
+    # == Child-FSM usage (non-blocking, inside a Workflow)
+    #
+    #   state :run_agent
+    #   entry :run_agent, ->(ctx) { MyAgent.new.run_as_child(ctx.query, ctx: ctx) }
+    #   transition from: :run_agent, on: :child_completed, to: :process_result
+    #
+    # {Agent::Base#run_as_child} creates an +AgentFSM+ with +parent_id+ set to
+    # +ctx.thread_id+, registers it with the EventLoop, and returns immediately.
+    # The parent {FSMSession} waits for the +:child_completed+ event.
+    # @api private
+    class FSM
+      # @return [String] unique identifier used as the EventLoop target_id
+      attr_reader :id
+      # @return [Symbol] current internal phase (:idle, :running)
+      attr_reader :current_phase
+      # @param agent     [Phronomy::Agent::Base]  agent instance to run
+      # @param input     [String, Hash]           user input passed to +invoke_once+
+      # @param messages  [Array]                  prior conversation history
+      # @param thread_id [String, nil]            conversation thread id;
+      #                                           auto-generated when nil
+      # @param config    [Hash]                   invocation config forwarded to
+      #                                           +_invoke_impl+
+      # @param parent_id    [String, nil]  EventLoop id of the parent
+      #                                     FSMSession; when set, a
+      #                                     +:child_completed+ event is posted
+      #                                     on completion
+      # @param result_writer [Proc, nil]   optional callable invoked with the
+      #                                     result hash <b>before</b>
+      #                                     +:child_completed+ is posted.
+      #                                     Use this to write the agent output
+      #                                     back into the parent WorkflowContext.
+      #                                     Thread::Queue provides the
+      #                                     happens-before guarantee.
+      #
+      # @example Writing result into context
+      #   entry :run_agent, ->(ctx) {
+      #     MyAgent.new.run_as_child(ctx.query, ctx: ctx) { |r| ctx.answer = r[:output] }
+      #   }
+      # @api private
+      def initialize(agent:, input:, messages: [], thread_id: nil, config: {}, parent_id: nil, result_writer: nil)
+        @agent = agent
+        @input = input
+        @messages = Array(messages).dup
+        @thread_id = thread_id || SecureRandom.uuid
+        @config = config
+        @parent_id = parent_id
+        @result_writer = result_writer
+        @id = @thread_id
+        @current_phase = :idle
+      end
+      # Called by {EventLoop} on the +:start+ event.
+      # Transitions to +:running+ and spawns the agent IO thread.
+      def start
+        @current_phase = :running
+        spawn_agent_thread
+      end
+      # Called by {EventLoop} for external events dispatched to this id.
+      # +AgentFSM+ is fully driven by its own IO thread and does not respond
+      # to external events after {#start}.
+      def handle(_event)
+        # No-op: AgentFSM is driven entirely by its IO thread.
+      end
+      private
+      # Spawns the background IO thread that runs the agent invocation.
+      # Captures all instance variables by value so the thread closure is
+      # safe even if the FSM object is modified (though it is not in practice).
+      def spawn_agent_thread
+        agent = @agent
+        input = @input
+        messages = @messages
+        thread_id = @thread_id
+        config = @config
+        fsm_id = @id
+        parent_id = @parent_id
+        result_writer = @result_writer
+        Thread.new do
+          # Enable parallel tool dispatch inside this IO thread.
+          Thread.current[:phronomy_agent_parallel_tools] = true
+          # Forward the concurrency cap to ParallelToolChat.
+          Thread.current[:phronomy_max_parallel_tools] =
+            agent.class.respond_to?(:max_parallel_tools) ? agent.class.max_parallel_tools : 10
+          begin
+            result = agent.send(:_invoke_impl,
+              input,
+              messages: messages,
+              thread_id: thread_id,
+              config: config)
+            if parent_id
+              # Let the caller write the result into the context BEFORE the
+              # parent FSMSession advances.  Thread::Queue provides the
+              # happens-before guarantee — no Mutex needed.
+              result_writer&.call(result)
+              Phronomy::EventLoop.instance.post(
+                Phronomy::Event.new(type: :child_completed, target_id: parent_id, payload: result)
+              )
+            end
+            Phronomy::EventLoop.instance.post(
+              Phronomy::Event.new(type: :finished, target_id: fsm_id, payload: result)
+            )
+          rescue => e
+            if parent_id
+              Phronomy::EventLoop.instance.post(
+                Phronomy::Event.new(type: :child_failed, target_id: parent_id, payload: e)
+              )
+            end
+            Phronomy::EventLoop.instance.post(
+              Phronomy::Event.new(type: :error, target_id: fsm_id, payload: e)
+            )
+          ensure
+            # Clear the thread-local context cache for this agent so the IO
+            # thread's cache does not grow unboundedly across invocations.
+            Thread.current[:phronomy_context_version_caches]&.delete(agent.object_id)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/phronomy/agent/handoff.rb CHANGED Viewed

@@ -22,6 +22,7 @@ module Phronomy
       # @param target_agent [Phronomy::Agent::Base] the agent to hand off to
       # @param description  [String, nil] overrides the auto-generated tool description
+      # @api public
       def initialize(target_agent:, description: nil)
         @target_agent = target_agent
         klass_name = target_agent.class.name&.split("::")&.last || "Agent"
@@ -33,6 +34,7 @@ module Phronomy
       # Builds an anonymous Phronomy::Tool::Base subclass for this handoff.
       # @return [Class<Phronomy::Tool::Base>]
+      # @api public
       def to_tool_class
         sentinel_value = sentinel
         tn = tool_name
@@ -46,6 +48,7 @@ module Phronomy
       # The sentinel string embedded in the tool result.
       # @return [String]
+      # @api public
       def sentinel
         "#{SENTINEL_PREFIX}:#{target_agent.class.name}:#{@uuid}"
       end

data/lib/phronomy/agent/orchestrator.rb CHANGED Viewed

@@ -55,6 +55,7 @@ module Phronomy
       # @param on_error    [Symbol] +:raise+ (default) re-raises any exception
       #   from the subagent; +:skip+ returns +nil+ so the LLM can decide how to
       #   proceed
+      # @api public
       def self.subagent(name, agent_class, on_error: :raise)
         tool_class = Class.new(Phronomy::Tool::Base) do
           tool_name "dispatch_to_#{name}"
@@ -62,7 +63,14 @@ module Phronomy
           param :input, type: :string, desc: "The task or question for the subagent"
           define_method(:execute) do |input:|
-            result = agent_class.new.invoke(input)
+            # Inherit the calling orchestrator's thread_id and config when
+            # available so that sub-agent spans and memory stay connected.
+            ctx = Thread.current[:phronomy_orchestrator_context] || {}
+            result = agent_class.new.invoke(
+              input,
+              thread_id: ctx[:thread_id],
+              config: ctx[:config] || {}
+            )
             result[:output]
           rescue
             raise if on_error == :raise
@@ -80,6 +88,7 @@ module Phronomy
       # Returns the subagent registry for this specific class (not inherited).
       #
       # @return [Hash{Symbol => Hash}]
+      # @api public
       def self.registered_subagents
         @registered_subagents ||= {}
       end
@@ -100,13 +109,28 @@ module Phronomy
       # @option task [Class]   :agent  agent class to invoke (required)
       # @option task [String]  :input  input string for the agent (required)
       # @option task [Hash]    :config forwarded to +agent#invoke+ (default: +{}+)
-      # @param max_concurrency [Integer, nil] maximum number of concurrent threads;
+      # @option task [String]  :thread_id forwarded to +agent#invoke+ (default: nil)
+      # @param max_concurrency    [Integer, nil] maximum number of concurrent threads;
       #   nil means no limit (all tasks run simultaneously)
-      # @param on_error        [Symbol] +:raise+ or +:skip+
+      # @param on_error            [Symbol] +:raise+ or +:skip+
+      # @param timeout             [Numeric, nil] maximum seconds to wait for all workers;
+      #   nil means wait indefinitely. When the deadline is exceeded,
+      #   {Phronomy::TimeoutError} is raised and all surviving worker threads are killed.
+      # @param cancellation_token [Phronomy::CancellationToken, nil] when provided, the
+      #   token is merged into each task's config (unless the task already sets one) so
+      #   that every worker agent checks it before making LLM calls.
+      # @param force_kill [Boolean] when +true+, surviving worker threads are killed with
+      #   +Thread#kill+ after the grace period if they do not stop cooperatively. When
+      #   +false+ (default), workers are asked to stop cooperatively but are never killed;
+      #   the caller receives {Phronomy::TimeoutError} immediately and abandoned workers
+      #   discard their results when they eventually finish. +false+ is safer for
+      #   production because +Thread#kill+ can interrupt +ensure+ blocks.
       # @return [Array<Hash, nil>] agent results in the same order as +tasks+
       # @raise [ArgumentError] if +on_error+ is not +:raise+ or +:skip+
       # @raise [ArgumentError] if +max_concurrency+ is not a positive Integer or nil
-      def dispatch_parallel(*tasks, max_concurrency: nil, on_error: :raise)
+      # @raise [Phronomy::TimeoutError] if +timeout+ is exceeded
+      # @api public
+      def dispatch_parallel(*tasks, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, force_kill: false)
         unless [:raise, :skip].include?(on_error)
           raise ArgumentError, "unknown on_error: #{on_error.inspect}"
         end
@@ -114,7 +138,7 @@ module Phronomy
           raise ArgumentError, "max_concurrency must be a positive Integer"
         end
-        bounded_map(tasks, max_concurrency: max_concurrency, on_error: on_error)
+        bounded_map(tasks, max_concurrency: max_concurrency, on_error: on_error, timeout: timeout, cancellation_token: cancellation_token, force_kill: force_kill)
       end
       # Runs the same agent against multiple inputs in parallel (fan-out pattern).
@@ -125,19 +149,52 @@ module Phronomy
       # @param agent           [Class]         agent class to invoke for every input
       # @param inputs          [Array<String>] list of input strings
       # @param config          [Hash]          forwarded to every +agent#invoke+ call
+      # @param thread_id       [String, nil]   forwarded to every +agent#invoke+ call
       # @param max_concurrency [Integer, nil]  forwarded to {#dispatch_parallel}
       # @param on_error        [Symbol]        forwarded to {#dispatch_parallel}
       # @return [Array<Hash, nil>] results in the same order as +inputs+
-      def fan_out(agent:, inputs:, config: {}, max_concurrency: nil, on_error: :raise)
+      # @api public
+      def fan_out(agent:, inputs:, config: {}, thread_id: nil, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, force_kill: false)
         dispatch_parallel(
-          *inputs.map { |input| {agent: agent, input: input, config: config} },
+          *inputs.map { |input| {agent: agent, input: input, config: config, thread_id: thread_id} },
           max_concurrency: max_concurrency,
-          on_error: on_error
+          on_error: on_error,
+          timeout: timeout,
+          cancellation_token: cancellation_token,
+          force_kill: force_kill
+        )
+      end
+      # Programmatically dispatches a single sub-agent from inside an orchestrator
+      # instance, inheriting the parent's +thread_id+ and +config+ by default.
+      #
+      # @param agent_class [Class]         subclass of {Phronomy::Agent::Base}
+      # @param input       [String]        task or question for the sub-agent
+      # @param config      [Hash, nil]     override config (falls back to parent's)
+      # @param thread_id   [String, nil]   override thread_id (falls back to parent's)
+      # @return [Hash]  the sub-agent's result hash (+:output+, +:messages+)
+      # @api public
+      def subagent(agent_class, input, config: nil, thread_id: nil)
+        ctx = Thread.current[:phronomy_orchestrator_context] || {}
+        agent_class.new.invoke(
+          input,
+          config: config || ctx[:config] || {},
+          thread_id: thread_id || ctx[:thread_id]
         )
       end
       private
+      # Override invoke_once to expose the current thread_id and config via a
+      # thread-local so that DSL-registered subagent tools can inherit them.
+      def invoke_once(input, messages: [], thread_id: nil, config: {})
+        prev = Thread.current[:phronomy_orchestrator_context]
+        Thread.current[:phronomy_orchestrator_context] = {thread_id: thread_id, config: config}
+        super
+      ensure
+        Thread.current[:phronomy_orchestrator_context] = prev
+      end
       # Worker-pool implementation shared by {#dispatch_parallel} and {#fan_out}.
       #
       # Uses a +Queue+ as a work-stealing mechanism: each worker thread pops a
@@ -150,12 +207,27 @@ module Phronomy
       # A +Mutex+ guards concurrent writes to +errors+ even though Array element
       # assignment at different indices is safe in MRI; this keeps the code
       # correct across alternative Ruby runtimes.
-      def bounded_map(tasks, max_concurrency:, on_error:)
+      #
+      # When +timeout+ is given, workers are first asked to stop cooperatively
+      # via a cancellation flag (so they do not pick up new tasks) and then given
+      # +KILL_GRACE_SECONDS+ to finish any in-flight +ensure+ blocks.  Only
+      # workers that are still alive after the grace period are force-killed, and
+      # a warning is logged in that case.  Use a +CancellationToken+ (see #216)
+      # for full cooperative cancellation of long-running tasks.
+      #
+      # Deadline tracking uses +Process.clock_gettime(Process::CLOCK_MONOTONIC)+
+      # to avoid sensitivity to NTP adjustments and system-clock changes.
+      KILL_GRACE_SECONDS = 0.5
+      private_constant :KILL_GRACE_SECONDS
+      def bounded_map(tasks, max_concurrency:, on_error:, timeout: nil, cancellation_token: nil, force_kill: false)
         return [] if tasks.empty?
         results = Array.new(tasks.length)
         errors = Array.new(tasks.length)
         errors_mutex = Mutex.new
+        # Mutex-backed cooperative stop token; workers check before each task pick-up.
+        internal_stop_token = Phronomy::CancellationToken.new
         queue = Queue.new
         tasks.each_with_index { |task, i| queue << [i, task] }
@@ -165,16 +237,26 @@ module Phronomy
         workers = worker_count.times.map do
           Thread.new do
             loop do
+              break if internal_stop_token.cancelled?
               i, task = begin
                 queue.pop(true)
               rescue ThreadError
                 break # queue is empty; this worker is done
               end
+              # Merge the shared cancellation token into the task's config unless
+              # the task already supplies its own token.
+              task_config = task.fetch(:config, {})
+              if cancellation_token && !task_config[:cancellation_token]
+                task_config = task_config.merge(cancellation_token: cancellation_token)
+              end
               begin
                 results[i] = task[:agent].new.invoke(
                   task[:input],
-                  config: task.fetch(:config, {})
+                  config: task_config,
+                  thread_id: task[:thread_id]
                 )
               rescue => e
                 case on_error
@@ -188,7 +270,37 @@ module Phronomy
           end
         end
-        workers.each(&:join)
+        workers.each(&:join) if timeout.nil?
+        if timeout
+          deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
+          workers.each do |w|
+            remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
+            w.join([remaining, 0].max)
+          end
+          alive = workers.select(&:alive?)
+          unless alive.empty?
+            # Signal workers cooperatively to stop picking up new tasks.
+            internal_stop_token.cancel!
+            if force_kill
+              # Give in-flight ensure blocks a short grace period before kill.
+              alive.each { |w| w.join(KILL_GRACE_SECONDS) }
+              still_alive = alive.select(&:alive?)
+              if still_alive.any?
+                Phronomy.configuration.logger&.warn(
+                  "[Phronomy] dispatch_parallel: #{still_alive.length} worker(s) did not stop " \
+                  "within grace period; force-killing. Use CancellationToken for " \
+                  "cooperative cancellation of long-running tasks."
+                )
+                still_alive.each(&:kill)
+              end
+            end
+            raise Phronomy::TimeoutError,
+              "dispatch_parallel timed out after #{timeout}s " \
+              "(#{alive.length} of #{workers.length} workers still running)"
+          end
+        end
         first_error = errors.compact.first
         raise first_error if first_error

data/lib/phronomy/agent/parallel_tool_chat.rb ADDED Viewed

@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+module Phronomy
+  module Agent
+    # RubyLLM::Chat subclass that executes multiple tool calls concurrently.
+    #
+    # When the LLM returns more than one tool call in a single response, each
+    # tool is dispatched in a dedicated IO thread and all results are collected
+    # before being appended to the message history. This preserves a
+    # deterministic message order while reducing wall-clock latency when tools
+    # are IO-bound (HTTP calls, DB queries, etc.).
+    #
+    # Single-tool responses fall through to the standard sequential path via
+    # +super+, preserving all existing edge-case behaviour (Tool::Halt,
+    # forced_tool_choice, streaming, SuspendSignal, etc.).
+    #
+    # This class is used automatically when the agent is running inside an
+    # {AgentFSM} IO thread (i.e. when the +:phronomy_agent_parallel_tools+
+    # thread-local flag is +true+).  It is not used for direct synchronous
+    # +invoke+ calls so that the streaming callback state remains single-threaded.
+    # @api private
+    class ParallelToolChat < RubyLLM::Chat
+      private
+      # Overrides RubyLLM::Chat#handle_tool_calls to parallelise execution
+      # when multiple tool calls are present in a single LLM response.
+      #
+      # The method preserves the three-phase protocol of the original:
+      #   1. Pre-execution callbacks (+on_new_message+, +on_tool_call+) —
+      #      sequential so that the Suspendable concern's approval hook can
+      #      raise +SuspendSignal+ before any tool is executed.
+      #   2. Parallel tool execution — one IO thread per tool call.
+      #   3. Post-execution callbacks and message recording — sequential,
+      #      in the original tool-call order.
+      #
+      # @param response [RubyLLM::Message] the LLM response containing tool calls
+      # @yield streaming block forwarded to +complete+
+      # @api private
+      def handle_tool_calls(response, &block)
+        tool_calls = response.tool_calls.values
+        # Single tool: delegate to the parent implementation to preserve every
+        # edge case (forced_tool_choice, streaming, Halt, SuspendSignal…).
+        return super if tool_calls.size <= 1
+        # Phase 1 — pre-execution callbacks (sequential, original order).
+        # The SuspendSignal approval hook is registered via on_tool_call, so it
+        # MUST fire before execution begins.
+        tool_calls.each do |tool_call|
+          @on[:new_message]&.call
+          @on[:tool_call]&.call(tool_call)
+        end
+        # Phase 2 — parallel tool execution.
+        # Honour the per-agent concurrency cap (max_parallel_tools DSL).
+        # Tool calls are processed in batches of at most `max` threads;
+        # batches run sequentially so the total in-flight thread count never
+        # exceeds the limit.
+        #
+        # Check for cancellation before dispatching each batch so that
+        # already-cancelled tokens do not start new LLM/tool-round-trips.
+        ct = Thread.current[:phronomy_cancellation_token]
+        max = Thread.current[:phronomy_max_parallel_tools] || 10
+        thread_results = tool_calls.each_slice(max).flat_map do |batch|
+          if ct&.cancelled?
+            raise Phronomy::CancellationError, "invocation cancelled before tool execution"
+          end
+          threads = batch.map do |tool_call|
+            Thread.new { {tool_call: tool_call, result: execute_tool(tool_call)} }
+          end
+          threads.map(&:value)
+        end
+        # Phase 3 — post-execution callbacks and message recording (sequential).
+        halt_result = nil
+        thread_results.each do |item|
+          result = item[:result]
+          @on[:tool_result]&.call(result)
+          tool_payload = result.is_a?(RubyLLM::Tool::Halt) ? result.content : result
+          content = content_like?(tool_payload) ? tool_payload : tool_payload.to_s
+          message = add_message(role: :tool, content: content, tool_call_id: item[:tool_call].id)
+          @on[:end_message]&.call(message)
+          halt_result = result if result.is_a?(RubyLLM::Tool::Halt)
+        end
+        reset_tool_choice if forced_tool_choice?
+        halt_result || complete(&block)
+      end
+    end
+  end
+end

data/lib/phronomy/agent/react_agent.rb CHANGED Viewed

@@ -37,9 +37,10 @@ module Phronomy
             end
           end
-          # Fall back to the last message
-          # guards against the case where the final message is a tool-call or
-          output = messages.reverse.find { |m| m.content && !m.content.empty? }&.content
+          # Select the last assistant-produced content as the output, skipping
+          # raw tool result messages (role: :tool) to avoid returning tool JSON
+          # or status strings as the agent's answer when iterations are exhausted.
+          output = messages.reverse.find { |m| m.content && !m.content.empty? && m.role != :tool }&.content
           # Run output guardrails before returning to the caller.
           run_output_guardrails!(output)
@@ -60,6 +61,7 @@ module Phronomy
       # @param config    [Hash]
       # @yield [Phronomy::Agent::StreamEvent]
       # @return [Hash] { output:, messages:, usage: }
+      # @api public
       def stream(input, messages: [], thread_id: nil, config: {}, &block)
         return invoke(input, messages: messages, thread_id: thread_id, config: config) unless block
@@ -88,9 +90,9 @@ module Phronomy
             end
           end
-          # Fall back to the last message that carries non-nil content (same as
-          # the non-streaming path above).
-          output = messages.reverse.find { |m| m.content && !m.content.empty? }&.content
+          # Select the last assistant-produced content as the output, skipping
+          # raw tool result messages (role: :tool) — same as the non-streaming path.
+          output = messages.reverse.find { |m| m.content && !m.content.empty? && m.role != :tool }&.content
           run_output_guardrails!(output)
           result = {output: output, messages: messages, usage: total_usage, iterations_exhausted: iterations_exhausted}

data/lib/phronomy/agent/runner.rb CHANGED Viewed

@@ -30,6 +30,7 @@ module Phronomy
       # @param routes [Hash{Phronomy::Agent::Base => Array<Phronomy::Agent::Base>}]
       #   declares which target agents each source agent may hand off to;
       #   when omitted no handoffs are configured and the entry agent handles everything
+      # @api public
       def initialize(agents:, routes: {})
         @agents = Array(agents)
         raise ArgumentError, "At least one agent is required" if @agents.empty?
@@ -47,6 +48,7 @@ module Phronomy
       # @param config [Hash] forwarded to each agent's #invoke
       # @return [Hash] { output:, messages:, usage:, agent: }
       # @raise [Phronomy::HandoffError] if more than MAX_HANDOFFS handoffs occur
+      # @api public
       def invoke(input, config: {})
         current = @entry_agent
         handoffs_taken = 0