RubyGems - phronomy - Versions diffs - 0.6.0 → 0.7.1 - Mend

phronomy 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

checksums.yaml +4 -4
data/.mutant.yml +22 -0
data/CHANGELOG.md +488 -0
data/CONTRIBUTING.md +102 -0
data/README.md +374 -36
data/RELEASE_CHECKLIST.md +86 -0
data/Rakefile +33 -0
data/SECURITY.md +80 -0
data/benchmark/baseline.json +9 -0
data/benchmark/bench_agent_invoke.rb +105 -0
data/benchmark/bench_context_assembler.rb +46 -0
data/benchmark/bench_regression.rb +172 -0
data/benchmark/bench_token_estimator.rb +44 -0
data/benchmark/bench_tool_schema.rb +69 -0
data/benchmark/bench_vector_store.rb +39 -0
data/benchmark/bench_workflow.rb +55 -0
data/benchmark/run_all.rb +118 -0
data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
data/docs/decisions/002-workflow-context-immutability.md +42 -0
data/docs/decisions/003-event-loop-singleton.md +48 -0
data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +75 -0
data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
data/docs/decisions/006-no-built-in-guardrails.md +66 -0
data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
data/docs/decisions/009-state-store-abstraction.md +141 -0
data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
data/lib/phronomy/agent/base.rb +416 -49
data/lib/phronomy/agent/before_completion_context.rb +1 -0
data/lib/phronomy/agent/checkpoint.rb +1 -0
data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
data/lib/phronomy/agent/concerns/retryable.rb +12 -1
data/lib/phronomy/agent/concerns/suspendable.rb +19 -0
data/lib/phronomy/agent/fsm.rb +44 -52
data/lib/phronomy/agent/handoff.rb +3 -0
data/lib/phronomy/agent/orchestrator.rb +191 -54
data/lib/phronomy/agent/parallel_tool_chat.rb +87 -13
data/lib/phronomy/agent/react_agent.rb +16 -6
data/lib/phronomy/agent/runner.rb +2 -0
data/lib/phronomy/agent/shared_state.rb +11 -0
data/lib/phronomy/agent/suspend_signal.rb +2 -0
data/lib/phronomy/agent/team_coordinator.rb +17 -5
data/lib/phronomy/async_queue.rb +155 -0
data/lib/phronomy/blocking_adapter_pool.rb +435 -0
data/lib/phronomy/cancellation_scope.rb +123 -0
data/lib/phronomy/cancellation_token.rb +133 -0
data/lib/phronomy/concurrency_gate.rb +155 -0
data/lib/phronomy/configuration.rb +168 -2
data/lib/phronomy/context/assembler.rb +6 -0
data/lib/phronomy/context/compaction_context.rb +2 -0
data/lib/phronomy/context/context_version_cache.rb +2 -0
data/lib/phronomy/context/token_budget.rb +3 -0
data/lib/phronomy/context/token_estimator.rb +9 -2
data/lib/phronomy/context/trigger_context.rb +1 -0
data/lib/phronomy/context/trim_context.rb +4 -0
data/lib/phronomy/deadline.rb +63 -0
data/lib/phronomy/diagnostics.rb +62 -0
data/lib/phronomy/embeddings/base.rb +22 -2
data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
data/lib/phronomy/eval/comparison.rb +2 -0
data/lib/phronomy/eval/dataset.rb +4 -0
data/lib/phronomy/eval/metrics.rb +6 -0
data/lib/phronomy/eval/runner.rb +11 -9
data/lib/phronomy/eval/scorer/base.rb +1 -0
data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
data/lib/phronomy/event_loop.rb +275 -30
data/lib/phronomy/fsm_session.rb +57 -4
data/lib/phronomy/generator_verifier.rb +2 -0
data/lib/phronomy/guardrail/base.rb +3 -0
data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
data/lib/phronomy/invocation_context.rb +152 -0
data/lib/phronomy/knowledge_source/base.rb +24 -2
data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
data/lib/phronomy/llm_adapter/base.rb +104 -0
data/lib/phronomy/llm_adapter/ruby_llm.rb +41 -0
data/lib/phronomy/llm_adapter.rb +20 -0
data/lib/phronomy/loader/base.rb +1 -0
data/lib/phronomy/loader/csv_loader.rb +2 -0
data/lib/phronomy/loader/markdown_loader.rb +2 -0
data/lib/phronomy/loader/plain_text_loader.rb +1 -0
data/lib/phronomy/metrics.rb +38 -0
data/lib/phronomy/output_parser/base.rb +1 -0
data/lib/phronomy/output_parser/json_parser.rb +22 -3
data/lib/phronomy/output_parser/structured_parser.rb +2 -0
data/lib/phronomy/prompt_template.rb +5 -0
data/lib/phronomy/runnable.rb +20 -3
data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
data/lib/phronomy/runtime/gate_registry.rb +52 -0
data/lib/phronomy/runtime/pool_registry.rb +57 -0
data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
data/lib/phronomy/runtime/scheduler.rb +98 -0
data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
data/lib/phronomy/runtime/task_registry.rb +48 -0
data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
data/lib/phronomy/runtime/timer_queue.rb +106 -0
data/lib/phronomy/runtime/timer_service.rb +42 -0
data/lib/phronomy/runtime.rb +374 -0
data/lib/phronomy/splitter/base.rb +2 -0
data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
data/lib/phronomy/state_store/base.rb +48 -0
data/lib/phronomy/state_store/in_memory.rb +62 -0
data/lib/phronomy/task/backend.rb +80 -0
data/lib/phronomy/task/fiber_backend.rb +157 -0
data/lib/phronomy/task/immediate_backend.rb +89 -0
data/lib/phronomy/task/thread_backend.rb +84 -0
data/lib/phronomy/task.rb +275 -0
data/lib/phronomy/task_group.rb +265 -0
data/lib/phronomy/testing/fake_clock.rb +109 -0
data/lib/phronomy/testing/fake_scheduler.rb +104 -0
data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
data/lib/phronomy/testing.rb +12 -0
data/lib/phronomy/tool/agent_tool.rb +1 -0
data/lib/phronomy/tool/base.rb +298 -28
data/lib/phronomy/tool/mcp_tool.rb +103 -17
data/lib/phronomy/tool/scope_policy.rb +50 -0
data/lib/phronomy/tool_executor.rb +106 -0
data/lib/phronomy/tracing/base.rb +3 -0
data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
data/lib/phronomy/tracing/open_telemetry_tracer.rb +36 -0
data/lib/phronomy/vector_store/async_backend.rb +110 -0
data/lib/phronomy/vector_store/base.rb +40 -7
data/lib/phronomy/vector_store/in_memory.rb +16 -7
data/lib/phronomy/vector_store/pgvector.rb +40 -9
data/lib/phronomy/vector_store/redis_search.rb +29 -8
data/lib/phronomy/version.rb +1 -1
data/lib/phronomy/workflow.rb +147 -11
data/lib/phronomy/workflow_context.rb +83 -6
data/lib/phronomy/workflow_runner.rb +106 -7
data/lib/phronomy.rb +112 -1
data/scripts/api_snapshot.rb +91 -0
data/scripts/check_api_annotations.rb +68 -0
data/scripts/check_private_enforcement.rb +93 -0
data/scripts/check_readme_runnable.rb +98 -0
data/scripts/run_mutation.sh +46 -0
metadata +83 -2

data/lib/phronomy/agent/base.rb CHANGED Viewed

@@ -6,6 +6,7 @@ require_relative "concerns/retryable"
 require_relative "concerns/guardrailable"
 require_relative "concerns/before_completion"
 require_relative "concerns/suspendable"
+require_relative "concerns/error_translation"
 module Phronomy
   module Agent
@@ -36,6 +37,7 @@ module Phronomy
       include Concerns::Guardrailable
       include Concerns::BeforeCompletion
       include Concerns::Suspendable
+      include Concerns::ErrorTranslation
       class << self
         # Sets or reads the LLM model identifier for this agent.
@@ -48,6 +50,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     model "gpt-4o"
         #   end
+        # @api public
         def model(name = nil)
           if name
             @model = name
@@ -71,6 +74,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     instructions { |input| "Answer in #{input[:lang]}." }
         #   end
+        # @api public
         def instructions(text = nil, &block)
           if text || block_given?
             @instructions = text || block
@@ -96,6 +100,7 @@ module Phronomy
         #     Places::SearchTool  => "places_search",
         #     CurrentTimeTool     => nil
         #   )
+        # @api public
         def tools(*args)
           if args.empty?
             if instance_variable_defined?(:@tools)
@@ -115,9 +120,17 @@ module Phronomy
         end
         # Returns the alias map registered via the hash form of .tools.
+        # Merges parent class aliases so subclasses inherit their parent's mappings.
+        # Subclass-specific aliases take precedence over parent aliases.
         # @return [Hash{Class => String}]
+        # @api public
         def tool_aliases
-          @tool_aliases ||= {}
+          own = @tool_aliases || {}
+          if superclass.respond_to?(:tool_aliases)
+            superclass.tool_aliases.merge(own)
+          else
+            own
+          end
         end
         # Sets or reads the LLM provider for this agent.
@@ -131,6 +144,7 @@ module Phronomy
         #     model "openai/gpt-oss-20b"
         #     provider :openai
         #   end
+        # @api public
         def provider(name = nil)
           if name
             @provider = name
@@ -149,6 +163,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     temperature 0.2
         #   end
+        # @api public
         def temperature(val = nil)
           if val
             @temperature = val
@@ -166,6 +181,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     max_iterations 5
         #   end
+        # @api public
         def max_iterations(val = nil)
           if val
             @max_iterations = val
@@ -174,27 +190,118 @@ module Phronomy
           end
         end
+        # Sets or reads the maximum number of tool calls executed concurrently
+        # when the LLM returns multiple tool calls in a single response
+        # (ParallelToolChat mode, active inside an AgentFSM IO thread).
+        #
+        # Defaults to 10. Set to 1 to force sequential execution.
+        # Inherited by subclasses; the most-specific definition wins.
+        #
+        # @param val [Integer, nil]
+        # @return [Integer]
+        # @example
+        #   class MyAgent < Phronomy::Agent::Base
+        #     max_parallel_tools 4
+        #   end
+        # @api public
+        def max_parallel_tools(val = nil)
+          if val.nil?
+            @max_parallel_tools ||
+              (superclass.respond_to?(:max_parallel_tools) ? superclass.max_parallel_tools : 10)
+          else
+            unless val.is_a?(Integer) && val >= 1
+              raise ArgumentError,
+                "max_parallel_tools must be a positive Integer (>= 1), got #{val.inspect}"
+            end
+            @max_parallel_tools = val
+          end
+        end
+        # Sets or reads the per-invocation timeout (in seconds) for EventLoop-mode
+        # agent calls.  When set, +invoke+ raises {Phronomy::TimeoutError} if the
+        # agent does not finish within the given number of seconds.
+        #
+        # Has no effect when EventLoop mode is disabled (direct invoke path).
+        # Defaults to +nil+ (no timeout).
+        # Inherited by subclasses; the most-specific definition wins.
+        #
+        # When the timeout fires, a {Phronomy::CancellationScope} is cancelled
+        # and its token is propagated to the FSM config so that in-flight LLM,
+        # tool, and RAG calls observe cancellation via their +cancellation_token:+
+        # keyword argument.  +Phronomy::TimeoutError+ is raised to the caller.
+        #
+        # @param val [Numeric, nil]
+        # @return [Numeric, nil]
+        # @example
+        #   class MyAgent < Phronomy::Agent::Base
+        #     invoke_timeout 30
+        #   end
+        # @api public
+        def invoke_timeout(val = nil)
+          if val.nil?
+            return @invoke_timeout if defined?(@invoke_timeout)
+            superclass.respond_to?(:invoke_timeout) ? superclass.invoke_timeout : nil
+          else
+            unless val.is_a?(Numeric) && val > 0
+              raise ArgumentError,
+                "invoke_timeout must be a positive number, got #{val.inspect}"
+            end
+            @invoke_timeout = val
+          end
+        end
         # Registers one or more static knowledge sources on the agent class.
-        # Static sources are fetched once per agent instance and their content
-        # is cached in ContextVersionCache keyed by a fingerprint of the
-        # instruction text + source content. The cache is invalidated automatically
-        # when the fingerprint changes (e.g. because a source was updated).
+        # Static source content is fetched and memoized at the **class** level
+        # the first time +invoke+ is called. The cache persists for the lifetime
+        # of the process; call {.static_knowledge_refresh!} to force a reload.
         #
         # @param sources [Array<Phronomy::KnowledgeSource::Base>]
         # @example
         #   class PolicyAgent < Phronomy::Agent::Base
         #     static_knowledge Phronomy::KnowledgeSource::StaticKnowledge.new(POLICY_TEXT)
         #   end
+        # @api public
         def static_knowledge(*sources)
           @static_knowledge_sources = sources.flatten
+          # Invalidate the cached chunks so the new sources are fetched on
+          # the next call to static_knowledge_chunks.
+          @static_knowledge_chunks = nil
         end
         # Returns the registered static knowledge sources.
         # @return [Array<Phronomy::KnowledgeSource::Base>]
+        # @api public
         def static_knowledge_sources
           @static_knowledge_sources || []
         end
+        # Returns the fetched content from all static knowledge sources.
+        # Results are cached at the class level so that each source is fetched
+        # only once regardless of how many times the agent is invoked.
+        # @return [Array<Hash>]
+        # @api public
+        def static_knowledge_chunks
+          @static_knowledge_chunks ||= static_knowledge_sources.flat_map { |ks|
+            ks.fetch(query: nil)
+          }
+        end
+        # Clears the class-level knowledge cache so that the next +invoke+ call
+        # re-fetches content from all registered static knowledge sources.
+        #
+        # Call this method when the underlying knowledge source has been updated
+        # at runtime (e.g. a file was rewritten, a DB record changed) and you
+        # want the agent to pick up the new content without restarting the
+        # process.
+        #
+        # @return [nil]
+        # @example Refresh after updating a knowledge file
+        #   MyAgent.static_knowledge_refresh!
+        # @api public
+        def static_knowledge_refresh!
+          @static_knowledge_chunks = nil
+        end
         # Registers a callback that is invoked before every LLM call so the
         # application can remove stale or irrelevant messages from the
         # conversation history.
@@ -209,11 +316,13 @@ module Phronomy
         #     limit = ctx.budget&.available(used: 0) || Float::INFINITY
         #     ctx.remove(ctx.message_elements.first[:seq]) if ctx.total_tokens > limit * 0.8
         #   end
+        # @api public
         def on_trim(&block)
           @on_trim_callback = block
         end
         # @return [Proc, nil]
+        # @api private
         def _on_trim_callback
           @on_trim_callback
         end
@@ -232,11 +341,13 @@ module Phronomy
         #     limit = ctx.budget&.available(used: 0) || Float::INFINITY
         #     ctx.total_tokens > limit * 0.7
         #   end
+        # @api public
         def on_compaction_trigger(&block)
           @on_compaction_trigger_callback = block
         end
         # @return [Proc, nil]
+        # @api private
         def _on_compaction_trigger_callback
           @on_compaction_trigger_callback
         end
@@ -254,11 +365,13 @@ module Phronomy
         #       "Earlier conversation summary: #{texts}"
         #     end
         #   end
+        # @api public
         def on_compact(&block)
           @on_compact_callback = block
         end
         # @return [Proc, nil]
+        # @api private
         def _on_compact_callback
           @on_compact_callback
         end
@@ -278,6 +391,7 @@ module Phronomy
         #     provider :anthropic
         #     cache_instructions true
         #   end
+        # @api public
         def cache_instructions(enabled = nil)
           if enabled.nil?
             @cache_instructions
@@ -293,6 +407,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     max_output_tokens 4096
         #   end
+        # @api public
         def max_output_tokens(val = nil)
           if val.nil?
             @max_output_tokens
@@ -310,6 +425,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     context_window 4096
         #   end
+        # @api public
         def context_window(val = nil)
           if val.nil?
             @context_window
@@ -325,6 +441,7 @@ module Phronomy
         #   class MyAgent < Phronomy::Agent::Base
         #     context_overhead 500
         #   end
+        # @api public
         def context_overhead(val = nil)
           if val.nil?
             @context_overhead || 0
@@ -338,6 +455,7 @@ module Phronomy
       # Called by Runner during construction when routes are configured.
       # @param tool_class [Class<Phronomy::Tool::Base>]
       # @return [self]
+      # @api private
       def _add_handoff_tool(tool_class)
         @_handoff_tools ||= []
         @_handoff_tools << tool_class
@@ -346,6 +464,7 @@ module Phronomy
       # Returns handoff tool classes registered on this instance by Runner.
       # @return [Array<Class>]
+      # @api private
       def _handoff_tools
         @_handoff_tools || []
       end
@@ -366,6 +485,11 @@ module Phronomy
       #   +:knowledge_sources+ (Array) — dynamic knowledge sources for this turn
       #   +:user_id+    (+String+, optional) — caller identity forwarded to the tracer
       #   +:session_id+ (+String+, optional) — session identity forwarded to the tracer
+      # @param invocation_context [Phronomy::InvocationContext, nil] optional first-class context
+      #   object.  When present, +thread_id+, +cancellation_token+, and +deadline+ are
+      #   derived from it (existing +config:+ keys take precedence as backward-compat
+      #   aliases).  The object is also stored in +config[:invocation_context]+ so that
+      #   +task_id+ / +parent_task_id+ appear in trace spans automatically.
       # @return [Hash] +{ output: String, messages: Array, usage: Phronomy::TokenUsage }+,
       #   or +{ output: nil, suspended: true, checkpoint: Phronomy::Agent::Checkpoint,
       #   messages: Array }+ when the invocation was suspended awaiting tool approval.
@@ -382,28 +506,111 @@ module Phronomy
       #     result = agent.resume(result[:checkpoint], approved: true)
       #   end
       #   puts result[:output]
-      def invoke(input, messages: [], thread_id: nil, config: {})
+      # @example With InvocationContext (deadline-based timeout)
+      #   ctx = Phronomy::InvocationContext.new(
+      #     thread_id: "conv-123",
+      #     deadline: Phronomy::Deadline.in(30),
+      #     task_id: SecureRandom.uuid
+      #   )
+      #   result = MyAgent.new.invoke("Hello", invocation_context: ctx)
+      # @api public
+      def invoke(input, messages: [], thread_id: nil, config: {}, invocation_context: nil)
+        if invocation_context
+          thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
+        end
         if Phronomy.configuration.event_loop
           # Protect against blocking the EventLoop thread itself.
-          if Thread.current[:phronomy_event_loop_thread]
+          if Phronomy::EventLoop.current?
             raise Phronomy::Error,
               "Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
               "entry action. Use agent.run_as_child(input, ctx: ctx) instead."
           end
+          # Build an effective config that includes the invoke_timeout scope's
+          # CancellationToken before constructing the FSM.  This ensures that
+          # every LLM, tool, and RAG call made inside _invoke_impl observes
+          # cancellation when the deadline fires.
+          timeout_sec = self.class.invoke_timeout
+          effective_config, scope = if timeout_sec
+            s = Phronomy::CancellationScope.new(parent_token: config[:cancellation_token])
+            s.deadline_in(timeout_sec)
+            [config.merge(cancellation_token: s.token), s]
+          else
+            [config, nil]
+          end
           fsm = Agent::FSM.new(
             agent: self,
             input: input,
             messages: messages,
             thread_id: thread_id || SecureRandom.uuid,
-            config: config
+            config: effective_config
           )
           completion_queue = Phronomy::EventLoop.instance.register(fsm)
-          result = completion_queue.pop
+          result = if scope
+            scope.pop_queue(completion_queue) do
+              raise Phronomy::TimeoutError,
+                "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
+            end
+          else
+            completion_queue.pop
+          end
           raise result if result.is_a?(Exception)
           result
         else
-          _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
+          # Guard: calling invoke from inside a scheduler task would block the task
+          # against itself when using a cooperative backend.  Use invoke_async
+          # instead to compose agents without introducing a blocking wait.
+          if Phronomy::Task.current
+            msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
+              "This blocks the scheduler until the inner invocation completes, preventing " \
+              "other tasks from making progress. Use invoke_async + await instead."
+            if Phronomy.configuration.strict_runtime_guards
+              raise Phronomy::SchedulerReentrancyError, msg
+            elsif Phronomy.configuration.logger
+              Phronomy.configuration.logger.warn(msg)
+            else
+              Kernel.warn("[phronomy] WARNING: #{msg}")
+            end
+          end
+          invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
+        end
+      end
+      # Invokes this agent asynchronously and returns a {Phronomy::Task}.
+      #
+      # This is the primary async entry point.  {#invoke} is a synchronous wrapper
+      # that calls this method and blocks the caller until the task completes.
+      # Calling {#invoke} from inside an active scheduler task raises
+      # {Phronomy::SchedulerReentrancyError}; use +invoke_async+ directly in that
+      # context.
+      #
+      # The task is registered with the Runtime task registry so {Runtime#shutdown}
+      # drains in-flight invocations before process exit.
+      #
+      # @example
+      #   task = agent.invoke_async("Hello!")
+      #   result = task.await   # => { output: "...", messages: [...], usage: ... }
+      #
+      # @param input    [String, Hash]
+      # @param messages [Array]
+      # @param thread_id [String, nil]
+      # @param config   [Hash]
+      # @param invocation_context [Phronomy::InvocationContext, nil]
+      # @return [Phronomy::Task]
+      # @api public
+      def invoke_async(input, messages: [], thread_id: nil, config: {}, invocation_context: nil)
+        if invocation_context
+          thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
+        end
+        bp = Phronomy.configuration.backpressure
+        on_full = (bp == :raise) ? :reject : (bp || :wait)
+        bp_timeout = Phronomy.configuration.backpressure_timeout
+        gate = Phronomy::Runtime.instance.gate(:agent)
+        Phronomy::Runtime.instance.spawn(name: "agent-#{(self.class.name || "anonymous").downcase}-async") do
+          gate.acquire(on_full: on_full, timeout: bp_timeout) do
+            _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
+          end
         end
       end
@@ -418,30 +625,24 @@ module Phronomy
       # result hash +{ output:, messages:, usage: }+.  Declare an +on: :child_completed+
       # transition in your Workflow to advance to the next state.
       #
-      # An optional block may be provided to write the result back into the parent
-      # WorkflowContext <b>before</b> the +:child_completed+ event is dispatched.
-      # +Thread::Queue+ provides the happens-before guarantee \u2014 no Mutex is needed.
+      # The result is delivered exclusively as the +:child_completed+ event payload.
+      # The parent Workflow task is the sole owner of the parent +WorkflowContext+ and
+      # applies the result after receiving the event — no background thread writes to
+      # the parent context directly.
       #
-      # @example Without block (result available only as event payload)
+      # @example
       #   entry :run_agent, ->(ctx) { MyAgent.new.run_as_child(ctx.query, ctx: ctx) }
       #   transition from: :run_agent, on: :child_completed, to: :process_result
       #
-      # @example With block (writes result into context)
-      #   entry :run_agent, ->(ctx) {
-      #     MyAgent.new.run_as_child(ctx.query, ctx: ctx) { |r| ctx.answer = r[:output] }
-      #   }
-      #   transition from: :run_agent, on: :child_completed, to: :process_result
-      #
       # @param input     [String, Hash]  user input passed to the agent
       # @param ctx       [Object]        a WorkflowContext that responds to +#thread_id+
       # @param messages  [Array]         prior conversation history
       # @param config    [Hash]          invocation config (forwarded to +_invoke_impl+)
-      # @yield [Hash]  result hash +{ output:, messages:, usage: }+ — called from the
-      #                agent IO thread before +:child_completed+ is posted
       # @return [nil]  the caller must not wait on any return value;
       #                the result arrives as a +:child_completed+ event
       # @raise [Phronomy::Error] when EventLoop mode is not enabled
-      def run_as_child(input, ctx:, messages: [], config: {}, &result_writer)
+      # @api public
+      def run_as_child(input, ctx:, messages: [], config: {})
         unless Phronomy.configuration.event_loop
           raise Phronomy::Error,
             "run_as_child requires EventLoop mode. " \
@@ -454,8 +655,7 @@ module Phronomy
           messages: messages,
           thread_id: "#{ctx.thread_id}_agent_#{SecureRandom.uuid}",
           config: config,
-          parent_id: ctx.thread_id,
-          result_writer: result_writer
+          parent_id: ctx.thread_id
         )
         Phronomy::EventLoop.instance.enqueue_child(fsm)
         nil
@@ -477,6 +677,7 @@ module Phronomy
       # @param config    [Hash]        same as #invoke
       # @yield [Phronomy::Agent::StreamEvent]
       # @return [Hash] { output:, messages:, usage: } — same as #invoke
+      # @api public
       def stream(input, messages: [], thread_id: nil, config: {}, &block)
         return invoke(input, messages: messages, thread_id: thread_id, config: config) unless block
@@ -486,19 +687,50 @@ module Phronomy
         raise
       end
-      # Returns the {Context::ContextVersionCache} for the current thread.
+      # Returns the {Context::ContextVersionCache} built during the most recent
+      # {#invoke} call on this agent instance.  The thread-local cache entry is
+      # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
+      # in +@last_context_version_cache+ so callers can inspect it after invoke
+      # returns.
+      #
+      # NOTE: Not thread-safe.  When the same Agent instance is used concurrently,
+      # +@last_context_version_cache+ reflects the most recent +invoke+ on *any*
+      # thread.  For per-invocation isolation, use a separate Agent instance per
+      # thread.
       # @api private
       def context_version_cache
-        (Thread.current[:phronomy_context_version_caches] ||= {})[object_id]
+        @last_context_version_cache
       end
       private
+      # Merges an {InvocationContext} into the +thread_id+ / +config+ pair.
+      # Returns +[effective_thread_id, effective_config]+.
+      #
+      # Precedence rules (existing explicit values always win):
+      # - +thread_id+ argument > +ic.thread_id+
+      # - +config[:cancellation_token]+ > +ic.cancellation_token+ > token derived from +ic.deadline+
+      # - +ic+ is stored in +config[:invocation_context]+ (overwriting any previous value)
+      def _apply_invocation_context(thread_id, config, ic)
+        effective_thread_id = thread_id || ic.thread_id
+        effective_config = config.merge(invocation_context: ic)
+        if effective_config[:cancellation_token].nil?
+          if (tok = ic.effective_timeout_token)
+            effective_config = effective_config.merge(cancellation_token: tok)
+          end
+        end
+        [effective_thread_id, effective_config]
+      end
       # Streaming implementation for #stream.
       def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
         caller_meta = {}
         caller_meta[:user_id] = config[:user_id] if config[:user_id]
         caller_meta[:session_id] = config[:session_id] if config[:session_id]
+        if (ic = config[:invocation_context])
+          caller_meta[:task_id] = ic.task_id if ic.task_id
+          caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
+        end
         trace("agent.invoke", input: input, **caller_meta) do |_span|
           run_input_guardrails!(input)
@@ -529,10 +761,26 @@ module Phronomy
           # Run before_completion hooks (global → class → instance) before the LLM call.
           run_before_completion_hooks!(chat, config)
-          response = chat.ask(user_message) do |chunk|
+          # Route the LLM streaming call through the configured LLMAdapter.
+          # Chunks are pushed into a token queue by the pool worker thread and
+          # drained here (on the caller's side) so that the user block is never
+          # executed on a BlockingAdapterPool worker thread.
+          # The queue capacity is bounded by Configuration#stream_queue_max_size
+          # (nil = unbounded) to provide backpressure against a fast LLM producer.
+          adapter = Phronomy.configuration.llm_adapter
+          chunk_queue = Phronomy::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
+          pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
+          # Drain the chunk queue on this side (scheduler task / caller thread).
+          loop do
+            chunk = chunk_queue.pop
+            break if chunk.nil? # queue closed — LLM streaming complete
             block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
+            check_cancellation!(config, "invocation cancelled during streaming")
           end
+          response = pending.await
           output = response.content
           usage = Phronomy::TokenUsage.from_tokens(response.tokens)
@@ -554,6 +802,7 @@ module Phronomy
       # @param thread_id [String, nil] conversation thread identifier
       # @param config    [Hash] the invocation config (see #invoke)
       # @return [Hash] { system: String|nil, messages: Array }
+      # @api public
       def build_context(input, messages: [], thread_id: nil, config: {})
         history = prepare_history(messages: messages, thread_id: thread_id, config: config)
         budget = build_token_budget
@@ -563,9 +812,49 @@ module Phronomy
         assembler = Context::Assembler.new(budget: budget)
         assembler.add_instruction(system_text) if system_text
-        Array(config[:knowledge_sources]).each do |ks|
-          ks.fetch(query: user_message).each do |chunk|
-            assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
+        sources = Array(config[:knowledge_sources])
+        unless sources.empty?
+          check_cancellation!(config, "invocation cancelled before RAG fetch")
+          # Determine TaskGroup failure policy: :skip (default) ignores per-source
+          # failures so the agent can still answer with partial context; :fail
+          # surfaces the first error immediately via :fail_fast.
+          failure_policy =
+            case config[:rag_failure_policy]
+            when :fail then :fail_fast
+            else :skip_failed
+            end
+          group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
+          bp = Phronomy.configuration.backpressure
+          rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
+          rag_bp_timeout = Phronomy.configuration.backpressure_timeout
+          # Spawn all fetches concurrently. Results are returned in spawn order
+          # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
+          sources.each do |ks|
+            group.spawn do
+              Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
+                t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+                result = ks.fetch_async(
+                  query: user_message,
+                  cancellation_token: config[:cancellation_token],
+                  timeout: config[:rag_timeout]
+                ).await
+                elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0
+                Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{(elapsed * 1000).round}ms" }
+                result
+              end
+            end
+          end
+          # await_all returns results in spawn order; nil entries indicate
+          # skipped failures when using :skip_failed.
+          per_source_chunks = group.await_all
+          per_source_chunks.each do |chunks|
+            Array(chunks).each do |chunk|
+              assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
+            end
           end
         end
@@ -585,6 +874,7 @@ module Phronomy
       # @param thread_id [String, nil] conversation thread identifier
       # @param config    [Hash] additional invocation options
       # @return [Array] filtered and/or compacted message objects
+      # @api public
       def prepare_history(messages: [], thread_id: nil, config: {})
         budget = build_token_budget
         elements = build_message_elements(Array(messages))
@@ -620,6 +910,10 @@ module Phronomy
         caller_meta = {}
         caller_meta[:user_id] = config[:user_id] if config[:user_id]
         caller_meta[:session_id] = config[:session_id] if config[:session_id]
+        if (ic = config[:invocation_context])
+          caller_meta[:task_id] = ic.task_id if ic.task_id
+          caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
+        end
         trace("agent.invoke", input: input, **caller_meta) do |_span|
           # Run input guardrails before touching the LLM.
@@ -641,8 +935,20 @@ module Phronomy
           # synchronous on_approval_required handler is already registered).
           _register_suspension_hook!(chat)
+          # Check for cancellation immediately before the LLM call.
+          check_cancellation!(config, "invocation cancelled before LLM call")
+          # Forward the cancellation token to ParallelToolChat explicitly
+          # via the chat instance so that tool dispatch batches can observe
+          # cancellation without needing Thread.current.
+          chat.cancellation_token = config[:cancellation_token] if chat.respond_to?(:cancellation_token=)
           begin
-            response = chat.ask(user_message)
+            # Route the LLM call through the configured LLMAdapter so that the
+            # blocking HTTP request runs inside BlockingAdapterPool and the
+            # adapter can be swapped without changing agent code.
+            adapter = Phronomy.configuration.llm_adapter
+            response = adapter.complete_async(chat, user_message, config: config).await
           rescue SuspendSignal => signal
             checkpoint = Checkpoint.new(
               thread_id: thread_id,
@@ -654,6 +960,9 @@ module Phronomy
             )
             suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
             next [suspended_result, nil]
+          ensure
+            # Clear the chat's cancellation token reference after each LLM call.
+            chat.cancellation_token = nil if chat.respond_to?(:cancellation_token=)
           end
           output = response.content
@@ -699,6 +1008,7 @@ module Phronomy
       #
       # @param messages [Array] message-like objects with #role and #content
       # @return [Array<Hash>]
+      # @api public
       def build_message_elements(messages)
         Array(messages).each_with_index.map do |msg, idx|
           tokens = Context::TokenEstimator.estimate(msg.content.to_s)
@@ -714,20 +1024,17 @@ module Phronomy
       #
       # @param input [String, Hash] the agent's current input (used for template evaluation)
       # @return [String, nil] assembled system text, or nil when empty
+      # @api public
       def build_cached_system_text(input)
         instruction = build_instructions(input)
-        static_chunks = self.class.static_knowledge_sources.flat_map { |ks|
-          ks.fetch(query: nil)
-        }
+        static_chunks = self.class.static_knowledge_chunks
         fingerprint = Digest::SHA256.hexdigest(
           [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
         )
-        agent_id = object_id
-        cache = (Thread.current[:phronomy_context_version_caches] ||= {})[agent_id] ||=
-          Context::ContextVersionCache.new
+        cache = (@context_version_cache ||= Context::ContextVersionCache.new)
         unless cache.valid?(fingerprint)
           parts = [instruction]
           static_chunks.each do |chunk|
@@ -736,18 +1043,20 @@ module Phronomy
           cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
         end
+        # Persist a reference on the instance so that context_version_cache
+        # remains accessible after invoke completes.
+        @last_context_version_cache = cache
         cache.system_text.empty? ? nil : cache.system_text
       end
-      # Load messages from a ConversationManager.
-      #
       # Returns the chat class to instantiate for this invocation.
-      # When the +:phronomy_agent_parallel_tools+ thread-local flag is set
-      # (i.e. inside an {AgentFSM} IO thread), returns {ParallelToolChat} so
-      # that concurrent tool dispatch is enabled.  Falls back to +nil+ otherwise,
-      # signalling {#build_chat} to use the standard +RubyLLM.chat+ factory.
+      # When EventLoop mode is enabled ({Phronomy.configuration.event_loop}),
+      # returns {ParallelToolChat} so that concurrent tool dispatch is enabled.
+      # Falls back to +nil+ otherwise, signalling {#build_chat} to use the
+      # standard +RubyLLM.chat+ factory.
       def build_chat_class
-        Thread.current[:phronomy_agent_parallel_tools] ? Agent::ParallelToolChat : nil
+        Phronomy.configuration.event_loop ? Agent::ParallelToolChat : nil
       end
       def build_chat
@@ -761,7 +1070,11 @@ module Phronomy
         end
         t = self.class.temperature
         parallel_class = build_chat_class
-        chat = parallel_class ? parallel_class.new(**opts) : RubyLLM.chat(**opts)
+        chat = if parallel_class
+          parallel_class.new(max_parallel_tools: self.class.max_parallel_tools, **opts)
+        else
+          RubyLLM.chat(**opts)
+        end
         chat.with_temperature(t) if t
         self.class.tools.each do |tool_class|
           chat.with_tool(prepare_tool_class(tool_class))
@@ -811,17 +1124,44 @@ module Phronomy
         end
       end
+      # Raises CancellationError if the cancellation_token in config is cancelled.
+      # No-op when config has no cancellation_token or the token is not cancelled.
+      #
+      # @param config [Hash] the invocation config hash
+      # @param message [String] the message for the CancellationError
+      # @raise [Phronomy::CancellationError]
+      # @api public
+      def check_cancellation!(config, message = "invocation cancelled")
+        ct = config[:cancellation_token]
+        raise Phronomy::CancellationError, message if ct&.cancelled?
+      end
       # Builds the final tool class to register with the chat.
       #
-      # Two transformations are applied in order:
+      # When an already-instantiated tool object is passed (e.g. a
+      # {Phronomy::Tool::McpTool} returned by +McpTool.from_server+), it is
+      # returned as-is.  RubyLLM's +with_tool+ accepts both classes and
+      # instances, so no wrapping is needed.
+      #
+      # For tool classes, three transformations are applied in order:
       #   1. Alias override — when the Hash form of .tools maps this class to an
       #      explicit name, an anonymous subclass with that tool_name is returned.
-      #   2. Approval gate  — when the tool class has +requires_approval+ set AND
+      #   2. Scope policy   — when a scope is declared on the tool, the configured
+      #      {Phronomy::Tool::ScopePolicy} (or the default) is evaluated.
+      #      +:reject+ wraps the tool to return a denial message without executing.
+      #      +:approve+ behaves like requiring approval (same as step 3 when the
+      #      tool does not already have +requires_approval+).
+      #   3. Approval gate  — when the tool class has +requires_approval+ set AND
       #      an approval handler has been registered via #on_approval_required,
       #      the tool's #call method is wrapped: the handler is invoked with
       #      (tool_name, args) and, if it returns falsy, the tool returns a denial
       #      message instead of executing.
       def prepare_tool_class(tool_class)
+        # When an instantiated tool object is passed (e.g. McpTool.from_server
+        # returns an instance, not a class), skip class-level processing and
+        # return it directly. RubyLLM#with_tool handles both forms.
+        return tool_class unless tool_class.is_a?(Class)
         # Step 1: apply alias if needed.
         resolved = if (alias_name = self.class.tool_aliases[tool_class])
           parent_description = tool_class.description
@@ -833,7 +1173,34 @@ module Phronomy
           tool_class
         end
-        # Step 2: wrap with approval gate when handler is registered.
+        # Step 2: evaluate scope policy.
+        scope = resolved.scope
+        if scope
+          policy = @scope_policy || Phronomy::Tool::ScopePolicy::DEFAULT
+          decision = policy.call(resolved, scope, self)
+          case decision
+          when :reject
+            effective_name = resolved.new.name
+            rejected_class = Class.new(resolved) do
+              tool_name effective_name
+              define_method(:call) do |_args|
+                "Tool execution denied: scope :#{scope} is not permitted."
+              end
+            end
+            return rejected_class
+          when :approve
+            # Treat as requires_approval unless the tool already has that flag.
+            unless resolved.requires_approval
+              effective_name = resolved.new.name
+              resolved = Class.new(resolved) do
+                tool_name effective_name
+                requires_approval true
+              end
+            end
+          end
+        end
+        # Step 3: wrap with approval gate when handler is registered.
         return resolved unless resolved.requires_approval && @approval_handler
         handler = @approval_handler