RubyGems - phronomy - Versions diffs - 0.5.4 → 0.7.0 - Mend

phronomy 0.5.4 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

checksums.yaml +4 -4
data/.mutant.yml +21 -0
data/CHANGELOG.md +379 -0
data/CONTRIBUTING.md +102 -0
data/README.md +262 -48
data/RELEASE_CHECKLIST.md +86 -0
data/SECURITY.md +80 -0
data/benchmark/baseline.json +9 -0
data/benchmark/bench_agent_invoke.rb +105 -0
data/benchmark/bench_context_assembler.rb +46 -0
data/benchmark/bench_regression.rb +171 -0
data/benchmark/bench_token_estimator.rb +44 -0
data/benchmark/bench_tool_schema.rb +69 -0
data/benchmark/bench_vector_store.rb +39 -0
data/benchmark/bench_workflow.rb +55 -0
data/benchmark/run_all.rb +118 -0
data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
data/docs/decisions/002-workflow-context-immutability.md +42 -0
data/docs/decisions/003-event-loop-singleton.md +48 -0
data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
data/docs/decisions/006-no-built-in-guardrails.md +48 -0
data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
data/docs/decisions/009-state-store-abstraction.md +141 -0
data/lib/phronomy/agent/base.rb +281 -13
data/lib/phronomy/agent/before_completion_context.rb +1 -0
data/lib/phronomy/agent/checkpoint.rb +1 -0
data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
data/lib/phronomy/agent/concerns/retryable.rb +12 -1
data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
data/lib/phronomy/agent/fsm.rb +180 -0
data/lib/phronomy/agent/handoff.rb +3 -0
data/lib/phronomy/agent/orchestrator.rb +123 -11
data/lib/phronomy/agent/parallel_tool_chat.rb +92 -0
data/lib/phronomy/agent/react_agent.rb +8 -6
data/lib/phronomy/agent/runner.rb +2 -0
data/lib/phronomy/agent/shared_state.rb +11 -0
data/lib/phronomy/agent/suspend_signal.rb +2 -0
data/lib/phronomy/agent/team_coordinator.rb +17 -5
data/lib/phronomy/cancellation_token.rb +92 -0
data/lib/phronomy/configuration.rb +32 -2
data/lib/phronomy/context/assembler.rb +6 -0
data/lib/phronomy/context/compaction_context.rb +2 -0
data/lib/phronomy/context/context_version_cache.rb +2 -0
data/lib/phronomy/context/token_budget.rb +3 -0
data/lib/phronomy/context/token_estimator.rb +9 -2
data/lib/phronomy/context/trigger_context.rb +1 -0
data/lib/phronomy/context/trim_context.rb +4 -0
data/lib/phronomy/context.rb +0 -1
data/lib/phronomy/embeddings/base.rb +5 -2
data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
data/lib/phronomy/eval/comparison.rb +2 -0
data/lib/phronomy/eval/dataset.rb +4 -0
data/lib/phronomy/eval/metrics.rb +6 -0
data/lib/phronomy/eval/runner.rb +2 -0
data/lib/phronomy/eval/scorer/base.rb +1 -0
data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
data/lib/phronomy/event.rb +14 -0
data/lib/phronomy/event_loop.rb +254 -0
data/lib/phronomy/fsm_session.rb +201 -0
data/lib/phronomy/generator_verifier.rb +24 -22
data/lib/phronomy/guardrail/base.rb +3 -0
data/lib/phronomy/guardrail.rb +0 -1
data/lib/phronomy/knowledge_source/base.rb +6 -2
data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
data/lib/phronomy/loader/base.rb +1 -0
data/lib/phronomy/loader/csv_loader.rb +2 -0
data/lib/phronomy/loader/markdown_loader.rb +2 -0
data/lib/phronomy/loader/plain_text_loader.rb +1 -0
data/lib/phronomy/output_parser/base.rb +1 -0
data/lib/phronomy/output_parser/json_parser.rb +22 -3
data/lib/phronomy/output_parser/structured_parser.rb +2 -0
data/lib/phronomy/prompt_template.rb +5 -0
data/lib/phronomy/runnable.rb +20 -3
data/lib/phronomy/splitter/base.rb +2 -0
data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
data/lib/phronomy/state_store/base.rb +48 -0
data/lib/phronomy/state_store/in_memory.rb +62 -0
data/lib/phronomy/tool/agent_tool.rb +1 -0
data/lib/phronomy/tool/base.rb +189 -27
data/lib/phronomy/tool/mcp_tool.rb +68 -13
data/lib/phronomy/tracing/base.rb +3 -0
data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
data/lib/phronomy/vector_store/base.rb +33 -7
data/lib/phronomy/vector_store/in_memory.rb +16 -7
data/lib/phronomy/vector_store/pgvector.rb +40 -9
data/lib/phronomy/vector_store/redis_search.rb +29 -8
data/lib/phronomy/version.rb +1 -1
data/lib/phronomy/workflow.rb +175 -74
data/lib/phronomy/workflow_context.rb +55 -5
data/lib/phronomy/workflow_runner.rb +197 -114
data/lib/phronomy.rb +74 -1
data/scripts/api_snapshot.rb +91 -0
data/scripts/check_api_annotations.rb +68 -0
data/scripts/check_private_enforcement.rb +93 -0
data/scripts/check_readme_runnable.rb +98 -0
data/scripts/run_mutation.sh +46 -0
metadata +50 -6
data/lib/phronomy/context/builder.rb +0 -92
data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +0 -100
data/lib/phronomy/guardrail/builtin/prompt_injection_detector.rb +0 -67
data/lib/phronomy/guardrail/builtin.rb +0 -16

data/lib/phronomy/agent/base.rb CHANGED Viewed

@@ -1,10 +1,13 @@
 # frozen_string_literal: true
 require "digest"
+require "securerandom"
+require "timeout"
 require_relative "concerns/retryable"
 require_relative "concerns/guardrailable"
 require_relative "concerns/before_completion"
 require_relative "concerns/suspendable"
+require_relative "concerns/error_translation"
 module Phronomy
   module Agent
@@ -35,6 +38,7 @@ module Phronomy
       include Concerns::Guardrailable
       include Concerns::BeforeCompletion
       include Concerns::Suspendable
+      include Concerns::ErrorTranslation
       class << self
         # Sets or reads the LLM model identifier for this agent.
@@ -47,6 +51,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     model "gpt-4o"
         #   end
+        # @api public
         def model(name = nil)
           if name
             @model = name
@@ -70,6 +75,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     instructions { |input| "Answer in #{input[:lang]}." }
         #   end
+        # @api public
         def instructions(text = nil, &block)
           if text || block_given?
             @instructions = text || block
@@ -95,6 +101,7 @@ module Phronomy
         #     Places::SearchTool  => "places_search",
         #     CurrentTimeTool     => nil
         #   )
+        # @api public
         def tools(*args)
           if args.empty?
             if instance_variable_defined?(:@tools)
@@ -114,9 +121,17 @@ module Phronomy
         end
         # Returns the alias map registered via the hash form of .tools.
+        # Merges parent class aliases so subclasses inherit their parent's mappings.
+        # Subclass-specific aliases take precedence over parent aliases.
         # @return [Hash{Class => String}]
+        # @api public
         def tool_aliases
-          @tool_aliases ||= {}
+          own = @tool_aliases || {}
+          if superclass.respond_to?(:tool_aliases)
+            superclass.tool_aliases.merge(own)
+          else
+            own
+          end
         end
         # Sets or reads the LLM provider for this agent.
@@ -130,6 +145,7 @@ module Phronomy
         #     model "openai/gpt-oss-20b"
         #     provider :openai
         #   end
+        # @api public
         def provider(name = nil)
           if name
             @provider = name
@@ -148,6 +164,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     temperature 0.2
         #   end
+        # @api public
         def temperature(val = nil)
           if val
             @temperature = val
@@ -165,6 +182,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     max_iterations 5
         #   end
+        # @api public
         def max_iterations(val = nil)
           if val
             @max_iterations = val
@@ -173,27 +191,121 @@ module Phronomy
           end
         end
+        # Sets or reads the maximum number of tool calls executed concurrently
+        # when the LLM returns multiple tool calls in a single response
+        # (ParallelToolChat mode, active inside an AgentFSM IO thread).
+        #
+        # Defaults to 10. Set to 1 to force sequential execution.
+        # Inherited by subclasses; the most-specific definition wins.
+        #
+        # @param val [Integer, nil]
+        # @return [Integer]
+        # @example
+        #   class MyAgent < Phronomy::Agent::Base
+        #     max_parallel_tools 4
+        #   end
+        # @api public
+        def max_parallel_tools(val = nil)
+          if val.nil?
+            @max_parallel_tools ||
+              (superclass.respond_to?(:max_parallel_tools) ? superclass.max_parallel_tools : 10)
+          else
+            unless val.is_a?(Integer) && val >= 1
+              raise ArgumentError,
+                "max_parallel_tools must be a positive Integer (>= 1), got #{val.inspect}"
+            end
+            @max_parallel_tools = val
+          end
+        end
+        # Sets or reads the per-invocation timeout (in seconds) for EventLoop-mode
+        # agent calls.  When set, +invoke+ raises {Phronomy::TimeoutError} if the
+        # agent does not finish within the given number of seconds.
+        #
+        # Has no effect when EventLoop mode is disabled (direct invoke path).
+        # Defaults to +nil+ (no timeout).
+        # Inherited by subclasses; the most-specific definition wins.
+        #
+        # **Note**: +invoke_timeout+ is a *wait timeout*, not a cancellation.
+        # When the timeout fires, +Phronomy::TimeoutError+ is raised to the
+        # caller, but the background agent thread and any in-flight LLM or tool
+        # calls are **not** interrupted — they continue running until they
+        # complete naturally.  The agent therefore keeps consuming threads,
+        # memory, and external API credits after the caller has already received
+        # the error.  True cancellation is not yet supported.
+        #
+        # @param val [Numeric, nil]
+        # @return [Numeric, nil]
+        # @example
+        #   class MyAgent < Phronomy::Agent::Base
+        #     invoke_timeout 30
+        #   end
+        # @api public
+        def invoke_timeout(val = nil)
+          if val.nil?
+            return @invoke_timeout if defined?(@invoke_timeout)
+            superclass.respond_to?(:invoke_timeout) ? superclass.invoke_timeout : nil
+          else
+            unless val.is_a?(Numeric) && val > 0
+              raise ArgumentError,
+                "invoke_timeout must be a positive number, got #{val.inspect}"
+            end
+            @invoke_timeout = val
+          end
+        end
         # Registers one or more static knowledge sources on the agent class.
-        # Static sources are fetched once per agent instance and their content
-        # is cached in ContextVersionCache keyed by a fingerprint of the
-        # instruction text + source content. The cache is invalidated automatically
-        # when the fingerprint changes (e.g. because a source was updated).
+        # Static source content is fetched and memoized at the **class** level
+        # the first time +invoke+ is called. The cache persists for the lifetime
+        # of the process; call {.static_knowledge_refresh!} to force a reload.
         #
         # @param sources [Array<Phronomy::KnowledgeSource::Base>]
         # @example
         #   class PolicyAgent < Phronomy::Agent::Base
         #     static_knowledge Phronomy::KnowledgeSource::StaticKnowledge.new(POLICY_TEXT)
         #   end
+        # @api public
         def static_knowledge(*sources)
           @static_knowledge_sources = sources.flatten
+          # Invalidate the cached chunks so the new sources are fetched on
+          # the next call to static_knowledge_chunks.
+          @static_knowledge_chunks = nil
         end
         # Returns the registered static knowledge sources.
         # @return [Array<Phronomy::KnowledgeSource::Base>]
+        # @api public
         def static_knowledge_sources
           @static_knowledge_sources || []
         end
+        # Returns the fetched content from all static knowledge sources.
+        # Results are cached at the class level so that each source is fetched
+        # only once regardless of how many times the agent is invoked.
+        # @return [Array<Hash>]
+        # @api public
+        def static_knowledge_chunks
+          @static_knowledge_chunks ||= static_knowledge_sources.flat_map { |ks|
+            ks.fetch(query: nil)
+          }
+        end
+        # Clears the class-level knowledge cache so that the next +invoke+ call
+        # re-fetches content from all registered static knowledge sources.
+        #
+        # Call this method when the underlying knowledge source has been updated
+        # at runtime (e.g. a file was rewritten, a DB record changed) and you
+        # want the agent to pick up the new content without restarting the
+        # process.
+        #
+        # @return [nil]
+        # @example Refresh after updating a knowledge file
+        #   MyAgent.static_knowledge_refresh!
+        # @api public
+        def static_knowledge_refresh!
+          @static_knowledge_chunks = nil
+        end
         # Registers a callback that is invoked before every LLM call so the
         # application can remove stale or irrelevant messages from the
         # conversation history.
@@ -208,11 +320,13 @@ module Phronomy
         #     limit = ctx.budget&.available(used: 0) || Float::INFINITY
         #     ctx.remove(ctx.message_elements.first[:seq]) if ctx.total_tokens > limit * 0.8
         #   end
+        # @api public
         def on_trim(&block)
           @on_trim_callback = block
         end
         # @return [Proc, nil]
+        # @api private
         def _on_trim_callback
           @on_trim_callback
         end
@@ -231,11 +345,13 @@ module Phronomy
         #     limit = ctx.budget&.available(used: 0) || Float::INFINITY
         #     ctx.total_tokens > limit * 0.7
         #   end
+        # @api public
         def on_compaction_trigger(&block)
           @on_compaction_trigger_callback = block
         end
         # @return [Proc, nil]
+        # @api private
         def _on_compaction_trigger_callback
           @on_compaction_trigger_callback
         end
@@ -253,11 +369,13 @@ module Phronomy
         #       "Earlier conversation summary: #{texts}"
         #     end
         #   end
+        # @api public
         def on_compact(&block)
           @on_compact_callback = block
         end
         # @return [Proc, nil]
+        # @api private
         def _on_compact_callback
           @on_compact_callback
         end
@@ -277,6 +395,7 @@ module Phronomy
         #     provider :anthropic
         #     cache_instructions true
         #   end
+        # @api public
         def cache_instructions(enabled = nil)
           if enabled.nil?
             @cache_instructions
@@ -292,6 +411,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     max_output_tokens 4096
         #   end
+        # @api public
         def max_output_tokens(val = nil)
           if val.nil?
             @max_output_tokens
@@ -309,6 +429,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     context_window 4096
         #   end
+        # @api public
         def context_window(val = nil)
           if val.nil?
             @context_window
@@ -324,6 +445,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     context_overhead 500
         #   end
+        # @api public
         def context_overhead(val = nil)
           if val.nil?
             @context_overhead || 0
@@ -337,6 +459,7 @@ module Phronomy
       # Called by Runner during construction when routes are configured.
       # @param tool_class [Class<Phronomy::Tool::Base>]
       # @return [self]
+      # @api private
       def _add_handoff_tool(tool_class)
         @_handoff_tools ||= []
         @_handoff_tools << tool_class
@@ -345,6 +468,7 @@ module Phronomy
       # Returns handoff tool classes registered on this instance by Runner.
       # @return [Array<Class>]
+      # @api private
       def _handoff_tools
         @_handoff_tools || []
       end
@@ -381,8 +505,100 @@ module Phronomy
       #     result = agent.resume(result[:checkpoint], approved: true)
       #   end
       #   puts result[:output]
+      # @api public
       def invoke(input, messages: [], thread_id: nil, config: {})
-        _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
+        if Phronomy.configuration.event_loop
+          # Protect against blocking the EventLoop thread itself.
+          if Thread.current[:phronomy_event_loop_thread]
+            raise Phronomy::Error,
+              "Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
+              "entry action. Use agent.run_as_child(input, ctx: ctx) instead."
+          end
+          fsm = Agent::FSM.new(
+            agent: self,
+            input: input,
+            messages: messages,
+            thread_id: thread_id || SecureRandom.uuid,
+            config: config
+          )
+          completion_queue = Phronomy::EventLoop.instance.register(fsm)
+          timeout_sec = self.class.invoke_timeout
+          result = if timeout_sec
+            begin
+              Timeout.timeout(timeout_sec) { completion_queue.pop }
+            rescue Timeout::Error
+              raise Phronomy::TimeoutError,
+                "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
+            end
+          else
+            completion_queue.pop
+          end
+          raise result if result.is_a?(Exception)
+          result
+        else
+          _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
+        end
+      ensure
+        # Remove this agent's context cache entry from the current thread to
+        # prevent unbounded growth of the thread-local hash in long-lived
+        # processes (e.g. Rails servers).
+        Thread.current[:phronomy_context_version_caches]&.delete(object_id)
+      end
+      # Registers this agent as a child {AgentFSM} inside the given Workflow context.
+      #
+      # Use this method from a Workflow entry action (running on the EventLoop thread)
+      # instead of {#invoke}, which would raise a deadlock error because +invoke+ blocks
+      # on a +Thread::Queue+ when EventLoop mode is active.
+      #
+      # The agent runs asynchronously in a background IO thread.  When it finishes, the
+      # parent {FSMSession} receives a +:child_completed+ event whose payload is the
+      # result hash +{ output:, messages:, usage: }+.  Declare an +on: :child_completed+
+      # transition in your Workflow to advance to the next state.
+      #
+      # An optional block may be provided to write the result back into the parent
+      # WorkflowContext <b>before</b> the +:child_completed+ event is dispatched.
+      # +Thread::Queue+ provides the happens-before guarantee \u2014 no Mutex is needed.
+      #
+      # @example Without block (result available only as event payload)
+      #   entry :run_agent, ->(ctx) { MyAgent.new.run_as_child(ctx.query, ctx: ctx) }
+      #   transition from: :run_agent, on: :child_completed, to: :process_result
+      #
+      # @example With block (writes result into context)
+      #   entry :run_agent, ->(ctx) {
+      #     MyAgent.new.run_as_child(ctx.query, ctx: ctx) { |r| ctx.answer = r[:output] }
+      #   }
+      #   transition from: :run_agent, on: :child_completed, to: :process_result
+      #
+      # @param input     [String, Hash]  user input passed to the agent
+      # @param ctx       [Object]        a WorkflowContext that responds to +#thread_id+
+      # @param messages  [Array]         prior conversation history
+      # @param config    [Hash]          invocation config (forwarded to +_invoke_impl+)
+      # @yield [Hash]  result hash +{ output:, messages:, usage: }+ — called from the
+      #                agent IO thread before +:child_completed+ is posted
+      # @return [nil]  the caller must not wait on any return value;
+      #                the result arrives as a +:child_completed+ event
+      # @raise [Phronomy::Error] when EventLoop mode is not enabled
+      # @api public
+      def run_as_child(input, ctx:, messages: [], config: {}, &result_writer)
+        unless Phronomy.configuration.event_loop
+          raise Phronomy::Error,
+            "run_as_child requires EventLoop mode. " \
+            "Enable with: Phronomy.configure { |c| c.event_loop = true }"
+        end
+        fsm = Agent::FSM.new(
+          agent: self,
+          input: input,
+          messages: messages,
+          thread_id: "#{ctx.thread_id}_agent_#{SecureRandom.uuid}",
+          config: config,
+          parent_id: ctx.thread_id,
+          result_writer: result_writer
+        )
+        Phronomy::EventLoop.instance.enqueue_child(fsm)
+        nil
       end
       # Streaming version of #invoke. Yields {Phronomy::Agent::StreamEvent} objects
@@ -401,6 +617,7 @@ module Phronomy
       # @param config    [Hash]        same as #invoke
       # @yield [Phronomy::Agent::StreamEvent]
       # @return [Hash] { output:, messages:, usage: } — same as #invoke
+      # @api public
       def stream(input, messages: [], thread_id: nil, config: {}, &block)
         return invoke(input, messages: messages, thread_id: thread_id, config: config) unless block
@@ -410,10 +627,19 @@ module Phronomy
         raise
       end
-      # Returns the {Context::ContextVersionCache} for the current thread.
+      # Returns the {Context::ContextVersionCache} built during the most recent
+      # {#invoke} call on this agent instance.  The thread-local cache entry is
+      # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
+      # in +@last_context_version_cache+ so callers can inspect it after invoke
+      # returns.
+      #
+      # NOTE: Not thread-safe.  When the same Agent instance is used concurrently,
+      # +@last_context_version_cache+ reflects the most recent +invoke+ on *any*
+      # thread.  For per-invocation isolation, use a separate Agent instance per
+      # thread.
       # @api private
       def context_version_cache
-        (Thread.current[:phronomy_context_version_caches] ||= {})[object_id]
+        @last_context_version_cache
       end
       private
@@ -455,6 +681,7 @@ module Phronomy
           response = chat.ask(user_message) do |chunk|
             block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
+            check_cancellation!(config, "invocation cancelled during streaming")
           end
           output = response.content
@@ -478,6 +705,7 @@ module Phronomy
       # @param thread_id [String, nil] conversation thread identifier
       # @param config    [Hash] the invocation config (see #invoke)
       # @return [Hash] { system: String|nil, messages: Array }
+      # @api public
       def build_context(input, messages: [], thread_id: nil, config: {})
         history = prepare_history(messages: messages, thread_id: thread_id, config: config)
         budget = build_token_budget
@@ -488,7 +716,8 @@ module Phronomy
         assembler.add_instruction(system_text) if system_text
         Array(config[:knowledge_sources]).each do |ks|
-          ks.fetch(query: user_message).each do |chunk|
+          check_cancellation!(config, "invocation cancelled during RAG fetch")
+          ks.fetch(query: user_message, cancellation_token: config[:cancellation_token]).each do |chunk|
             assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
           end
         end
@@ -509,6 +738,7 @@ module Phronomy
       # @param thread_id [String, nil] conversation thread identifier
       # @param config    [Hash] additional invocation options
       # @return [Array] filtered and/or compacted message objects
+      # @api public
       def prepare_history(messages: [], thread_id: nil, config: {})
         budget = build_token_budget
         elements = build_message_elements(Array(messages))
@@ -565,6 +795,15 @@ module Phronomy
           # synchronous on_approval_required handler is already registered).
           _register_suspension_hook!(chat)
+          # Check for cancellation immediately before the LLM call.
+          check_cancellation!(config, "invocation cancelled before LLM call")
+          # Forward the cancellation token to ParallelToolChat via a thread-local
+          # so that tool dispatch batches can observe cancellation without needing
+          # direct access to config.
+          prev_ct = Thread.current[:phronomy_cancellation_token]
+          Thread.current[:phronomy_cancellation_token] = config[:cancellation_token]
           begin
             response = chat.ask(user_message)
           rescue SuspendSignal => signal
@@ -578,6 +817,8 @@ module Phronomy
             )
             suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
             next [suspended_result, nil]
+          ensure
+            Thread.current[:phronomy_cancellation_token] = prev_ct
           end
           output = response.content
@@ -623,6 +864,7 @@ module Phronomy
       #
       # @param messages [Array] message-like objects with #role and #content
       # @return [Array<Hash>]
+      # @api public
       def build_message_elements(messages)
         Array(messages).each_with_index.map do |msg, idx|
           tokens = Context::TokenEstimator.estimate(msg.content.to_s)
@@ -638,12 +880,11 @@ module Phronomy
       #
       # @param input [String, Hash] the agent's current input (used for template evaluation)
       # @return [String, nil] assembled system text, or nil when empty
+      # @api public
       def build_cached_system_text(input)
         instruction = build_instructions(input)
-        static_chunks = self.class.static_knowledge_sources.flat_map { |ks|
-          ks.fetch(query: nil)
-        }
+        static_chunks = self.class.static_knowledge_chunks
         fingerprint = Digest::SHA256.hexdigest(
           [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
@@ -660,11 +901,25 @@ module Phronomy
           cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
         end
+        # Persist a reference on the instance so that context_version_cache
+        # remains accessible after invoke's ensure block cleans up the
+        # thread-local entry.
+        @last_context_version_cache = cache
         cache.system_text.empty? ? nil : cache.system_text
       end
       # Load messages from a ConversationManager.
       #
+      # Returns the chat class to instantiate for this invocation.
+      # When the +:phronomy_agent_parallel_tools+ thread-local flag is set
+      # (i.e. inside an {AgentFSM} IO thread), returns {ParallelToolChat} so
+      # that concurrent tool dispatch is enabled.  Falls back to +nil+ otherwise,
+      # signalling {#build_chat} to use the standard +RubyLLM.chat+ factory.
+      def build_chat_class
+        Thread.current[:phronomy_agent_parallel_tools] ? Agent::ParallelToolChat : nil
+      end
       def build_chat
         opts = {}
         m = self.class.model
@@ -675,7 +930,8 @@ module Phronomy
           opts[:assume_model_exists] = true
         end
         t = self.class.temperature
-        chat = RubyLLM.chat(**opts)
+        parallel_class = build_chat_class
+        chat = parallel_class ? parallel_class.new(**opts) : RubyLLM.chat(**opts)
         chat.with_temperature(t) if t
         self.class.tools.each do |tool_class|
           chat.with_tool(prepare_tool_class(tool_class))
@@ -725,6 +981,18 @@ module Phronomy
         end
       end
+      # Raises CancellationError if the cancellation_token in config is cancelled.
+      # No-op when config has no cancellation_token or the token is not cancelled.
+      #
+      # @param config [Hash] the invocation config hash
+      # @param message [String] the message for the CancellationError
+      # @raise [Phronomy::CancellationError]
+      # @api public
+      def check_cancellation!(config, message = "invocation cancelled")
+        ct = config[:cancellation_token]
+        raise Phronomy::CancellationError, message if ct&.cancelled?
+      end
       # Builds the final tool class to register with the chat.
       #
       # Two transformations are applied in order:

data/lib/phronomy/agent/before_completion_context.rb CHANGED Viewed

@@ -35,6 +35,7 @@ module Phronomy
       # @param messages [Array]
       # @param config   [Hash]
       # @param params   [Hash] initial params (model, temperature already set on chat)
+      # @api public
       def initialize(agent:, messages:, config:, params: {})
         @agent = agent
         @messages = messages.dup.freeze

data/lib/phronomy/agent/checkpoint.rb CHANGED Viewed

@@ -47,6 +47,7 @@ module Phronomy
       # @param pending_tool_name    [String]
       # @param pending_tool_args    [Hash]
       # @param pending_tool_call_id [String]
+      # @api public
       def initialize(thread_id:, original_input:, messages:, pending_tool_name:, pending_tool_args:, pending_tool_call_id:)
         @thread_id = thread_id
         @original_input = original_input

data/lib/phronomy/agent/concerns/before_completion.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module Phronomy
       # Included in {Phronomy::Agent::Base}. Hooks are executed just before every
       # LLM call (global → class → instance order) and may inject or override
       # LLM parameters such as temperature or model.
+      # @api private
       module BeforeCompletion
         def self.included(base)
           base.extend(ClassMethods)
@@ -26,6 +27,7 @@ module Phronomy
           #   class MyAgent < Phronomy::Agent::Base
           #     before_completion ->(ctx) { { temperature: 0.2 } }
           #   end
+          # @api private
           def before_completion(callable = nil)
             if callable.nil? && !block_given?
               @before_completion
@@ -35,6 +37,7 @@ module Phronomy
           end
           # @return [#call, nil]
+          # @api private
           def _before_completion
             @before_completion
           end
@@ -53,6 +56,7 @@ module Phronomy
         # @param chat   [RubyLLM::Chat] the assembled chat object
         # @param config [Hash] the invocation config hash
         # @return [Hash] the merged params applied to the chat
+        # @api private
         def run_before_completion_hooks!(chat, config)
           hooks = [
             Phronomy.configuration.before_completion,
@@ -72,6 +76,7 @@ module Phronomy
           merged = {}
           hooks.each do |hook|
             result = hook.call(ctx)
+            check_cancellation!(config, "invocation cancelled during before_completion hook")
             merged.merge!(result) if result.is_a?(Hash)
           end
@@ -86,6 +91,7 @@ module Phronomy
         #
         # @param chat   [RubyLLM::Chat]
         # @param params [Hash]
+        # @api private
         def apply_before_completion_params!(chat, params)
           params.each do |key, value|
             case key

data/lib/phronomy/agent/concerns/error_translation.rb ADDED Viewed

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+module Phronomy
+  module Agent
+    module Concerns
+      # Translates RubyLLM transport errors into the corresponding Phronomy error
+      # classes so that callers can rescue Phronomy-namespaced exceptions rather
+      # than coupling themselves to the underlying provider library.
+      #
+      # Included in {Phronomy::Agent::Base}.
+      module ErrorTranslation
+        private
+        # Re-raises +error+ as the most specific Phronomy error class that
+        # corresponds to it.  Non-RubyLLM errors are re-raised unchanged.
+        # The original exception is available as +#cause+ on the translated error.
+        #
+        # Must be called from within an active +rescue+ block so that Ruby
+        # automatically sets +#cause+ on the new exception.
+        #
+        # @param error [Exception]
+        # @raise [Phronomy::RateLimitError] for provider HTTP 429
+        # @raise [Phronomy::AuthenticationError] for provider HTTP 401 / 403
+        # @raise [Phronomy::ContextLengthError] for context window overflow
+        # @raise [Phronomy::TransportError] for all other +RubyLLM::Error+ subclasses
+        # @raise re-raises +error+ unchanged for non-RubyLLM exceptions
+        # @api private
+        def translate_and_reraise!(error)
+          case error
+          when RubyLLM::RateLimitError
+            raise Phronomy::RateLimitError, error.message
+          when RubyLLM::UnauthorizedError, RubyLLM::ForbiddenError
+            raise Phronomy::AuthenticationError, error.message
+          when RubyLLM::ContextLengthExceededError
+            raise Phronomy::ContextLengthError, error.message
+          when RubyLLM::Error
+            raise Phronomy::TransportError, error.message
+          else
+            raise # bare re-raise preserves $! and its backtrace unchanged
+          end
+        end
+      end
+    end
+  end
+end

data/lib/phronomy/agent/concerns/guardrailable.rb CHANGED Viewed

@@ -8,10 +8,12 @@ module Phronomy
       # Included in {Phronomy::Agent::Base}. Guardrails are run on the raw
       # input string before the LLM is called, and on the raw output string
       # before the result is returned to the caller.
+      # @api private
       module Guardrailable
         # Attach a guardrail that validates input before every #invoke call.
         # @param guardrail [Phronomy::Guardrail::InputGuardrail]
         # @return [self]
+        # @api private
         def add_input_guardrail(guardrail)
           @input_guardrails ||= []
           @input_guardrails << guardrail
@@ -21,6 +23,7 @@ module Phronomy
         # Attach a guardrail that validates output before it is returned.
         # @param guardrail [Phronomy::Guardrail::OutputGuardrail]
         # @return [self]
+        # @api private
         def add_output_guardrail(guardrail)
           @output_guardrails ||= []
           @output_guardrails << guardrail