RubyGems - phronomy - Versions diffs - 0.7.0 → 0.8.0 - Mend

phronomy 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

checksums.yaml +4 -4
data/.mutant.yml +8 -7
data/CHANGELOG.md +151 -1
data/README.md +170 -47
data/Rakefile +33 -0
data/benchmark/baseline.json +1 -1
data/benchmark/bench_context_assembler.rb +2 -2
data/benchmark/bench_regression.rb +6 -5
data/benchmark/bench_token_estimator.rb +5 -5
data/benchmark/bench_tool_schema.rb +1 -1
data/benchmark/bench_vector_store.rb +1 -1
data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +24 -0
data/docs/decisions/006-no-built-in-guardrails.md +20 -2
data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
data/lib/phronomy/agent/base.rb +285 -137
data/lib/phronomy/agent/checkpoint.rb +118 -0
data/lib/phronomy/agent/concerns/suspendable.rb +15 -0
data/lib/phronomy/agent/context/conversation/compaction_context.rb +117 -0
data/lib/phronomy/agent/context/conversation/trigger_context.rb +43 -0
data/lib/phronomy/agent/context/conversation/trim_context.rb +82 -0
data/lib/phronomy/agent/context/instruction/prompt_template.rb +102 -0
data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +45 -0
data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +51 -0
data/lib/phronomy/agent/context/knowledge/loader/base.rb +31 -0
data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +62 -0
data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +82 -0
data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +28 -0
data/lib/phronomy/agent/context/knowledge/source/base.rb +60 -0
data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +102 -0
data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +63 -0
data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +58 -0
data/lib/phronomy/agent/context/knowledge/splitter/base.rb +53 -0
data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +57 -0
data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +111 -0
data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +116 -0
data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +95 -0
data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +109 -0
data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +133 -0
data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +198 -0
data/lib/phronomy/agent/fsm.rb +42 -65
data/lib/phronomy/agent/invocation_pipeline.rb +99 -0
data/lib/phronomy/agent/lifecycle/fsm_session.rb +251 -0
data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +249 -0
data/lib/phronomy/agent/react_agent.rb +27 -14
data/lib/phronomy/agent/runner.rb +2 -2
data/lib/phronomy/agent/tool_executor.rb +108 -0
data/lib/phronomy/concurrency/async_queue.rb +157 -0
data/lib/phronomy/concurrency/blocking_adapter_pool.rb +443 -0
data/lib/phronomy/concurrency/cancellation_scope.rb +125 -0
data/lib/phronomy/concurrency/cancellation_token.rb +140 -0
data/lib/phronomy/concurrency/concurrency_gate.rb +157 -0
data/lib/phronomy/concurrency/deadline.rb +65 -0
data/lib/phronomy/concurrency/gate_registry.rb +52 -0
data/lib/phronomy/concurrency/pool_registry.rb +57 -0
data/lib/phronomy/configuration.rb +142 -0
data/lib/phronomy/context.rb +2 -8
data/lib/phronomy/diagnostics.rb +62 -0
data/lib/phronomy/embeddings.rb +2 -2
data/lib/phronomy/eval/runner.rb +13 -9
data/lib/phronomy/eval/scorer/llm_judge.rb +12 -1
data/lib/phronomy/event_loop.rb +184 -46
data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
data/lib/phronomy/invocation_context.rb +152 -0
data/lib/phronomy/knowledge_source.rb +0 -5
data/lib/phronomy/llm_adapter/base.rb +104 -0
data/lib/phronomy/llm_adapter/ruby_llm.rb +47 -0
data/lib/phronomy/llm_adapter.rb +20 -0
data/lib/phronomy/{context → llm_context_window}/assembler.rb +18 -3
data/lib/phronomy/{context → llm_context_window}/context_version_cache.rb +1 -1
data/lib/phronomy/{context → llm_context_window}/token_budget.rb +7 -4
data/lib/phronomy/{context → llm_context_window}/token_estimator.rb +3 -3
data/lib/phronomy/loader.rb +4 -4
data/lib/phronomy/metrics.rb +38 -0
data/lib/phronomy/{agent → multi_agent}/handoff.rb +2 -2
data/lib/phronomy/{agent → multi_agent}/orchestrator.rb +151 -126
data/lib/phronomy/multi_agent/parallel_tool_chat.rb +149 -0
data/lib/phronomy/{agent → multi_agent}/team_coordinator.rb +2 -2
data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
data/lib/phronomy/runtime/scheduler.rb +98 -0
data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
data/lib/phronomy/runtime/task_registry.rb +48 -0
data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
data/lib/phronomy/runtime/timer_queue.rb +106 -0
data/lib/phronomy/runtime/timer_service.rb +42 -0
data/lib/phronomy/runtime.rb +389 -0
data/lib/phronomy/splitter.rb +3 -3
data/lib/phronomy/task/backend.rb +80 -0
data/lib/phronomy/task/fiber_backend.rb +157 -0
data/lib/phronomy/task/immediate_backend.rb +89 -0
data/lib/phronomy/task/thread_backend.rb +84 -0
data/lib/phronomy/task.rb +275 -0
data/lib/phronomy/task_group.rb +265 -0
data/lib/phronomy/testing/fake_clock.rb +109 -0
data/lib/phronomy/testing/fake_scheduler.rb +104 -0
data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
data/lib/phronomy/testing.rb +12 -0
data/lib/phronomy/tool/base.rb +156 -7
data/lib/phronomy/tool/mcp_tool.rb +47 -16
data/lib/phronomy/tool/scope_policy.rb +50 -0
data/lib/phronomy/tracing/null_tracer.rb +3 -1
data/lib/phronomy/tracing/open_telemetry_tracer.rb +34 -0
data/lib/phronomy/vector_store.rb +2 -2
data/lib/phronomy/version.rb +1 -1
data/lib/phronomy/workflow.rb +52 -5
data/lib/phronomy/workflow_context.rb +37 -2
data/lib/phronomy/workflow_runner.rb +28 -77
data/lib/phronomy.rb +43 -0
metadata +73 -33
data/lib/phronomy/agent/parallel_tool_chat.rb +0 -92
data/lib/phronomy/cancellation_token.rb +0 -92
data/lib/phronomy/context/compaction_context.rb +0 -111
data/lib/phronomy/context/trigger_context.rb +0 -39
data/lib/phronomy/context/trim_context.rb +0 -75
data/lib/phronomy/embeddings/base.rb +0 -22
data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +0 -45
data/lib/phronomy/fsm_session.rb +0 -201
data/lib/phronomy/knowledge_source/base.rb +0 -36
data/lib/phronomy/knowledge_source/entity_knowledge.rb +0 -96
data/lib/phronomy/knowledge_source/rag_knowledge.rb +0 -57
data/lib/phronomy/knowledge_source/static_knowledge.rb +0 -52
data/lib/phronomy/loader/base.rb +0 -25
data/lib/phronomy/loader/csv_loader.rb +0 -56
data/lib/phronomy/loader/markdown_loader.rb +0 -76
data/lib/phronomy/loader/plain_text_loader.rb +0 -22
data/lib/phronomy/prompt_template.rb +0 -96
data/lib/phronomy/splitter/base.rb +0 -47
data/lib/phronomy/splitter/fixed_size_splitter.rb +0 -51
data/lib/phronomy/splitter/recursive_splitter.rb +0 -105
data/lib/phronomy/vector_store/base.rb +0 -82
data/lib/phronomy/vector_store/in_memory.rb +0 -93
data/lib/phronomy/vector_store/pgvector.rb +0 -127
data/lib/phronomy/vector_store/redis_search.rb +0 -192

data/lib/phronomy/agent/base.rb CHANGED Viewed

@@ -2,7 +2,6 @@
 require "digest"
 require "securerandom"
-require "timeout"
 require_relative "concerns/retryable"
 require_relative "concerns/guardrailable"
 require_relative "concerns/before_completion"
@@ -61,12 +60,12 @@ module Phronomy
         end
         # Sets or reads the system instructions for this agent.
-        # Accepts a String, a {Phronomy::PromptTemplate}, or a block (Proc).
+        # Accepts a String, a {Phronomy::Agent::Context::Instruction::PromptTemplate}, or a block (Proc).
         # When used as a reader (no argument, no block), returns the stored value.
         #
-        # @param text [String, Phronomy::PromptTemplate, nil]
+        # @param text [String, Phronomy::Agent::Context::Instruction::PromptTemplate, nil]
         # @yield optionally provide instructions as a block
-        # @return [String, Phronomy::PromptTemplate, Proc, nil]
+        # @return [String, Phronomy::Agent::Context::Instruction::PromptTemplate, Proc, nil]
         # @example String instructions
         #   class MyAgent < Phronomy::Agent::Base
         #     instructions "You are a helpful assistant."
@@ -226,13 +225,10 @@ module Phronomy
         # Defaults to +nil+ (no timeout).
         # Inherited by subclasses; the most-specific definition wins.
         #
-        # **Note**: +invoke_timeout+ is a *wait timeout*, not a cancellation.
-        # When the timeout fires, +Phronomy::TimeoutError+ is raised to the
-        # caller, but the background agent thread and any in-flight LLM or tool
-        # calls are **not** interrupted — they continue running until they
-        # complete naturally.  The agent therefore keeps consuming threads,
-        # memory, and external API credits after the caller has already received
-        # the error.  True cancellation is not yet supported.
+        # When the timeout fires, a {Phronomy::Concurrency::CancellationScope} is cancelled
+        # and its token is propagated to the FSM config so that in-flight LLM,
+        # tool, and RAG calls observe cancellation via their +cancellation_token:+
+        # keyword argument.  +Phronomy::TimeoutError+ is raised to the caller.
         #
         # @param val [Numeric, nil]
         # @return [Numeric, nil]
@@ -259,10 +255,10 @@ module Phronomy
         # the first time +invoke+ is called. The cache persists for the lifetime
         # of the process; call {.static_knowledge_refresh!} to force a reload.
         #
-        # @param sources [Array<Phronomy::KnowledgeSource::Base>]
+        # @param sources [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
         # @example
         #   class PolicyAgent < Phronomy::Agent::Base
-        #     static_knowledge Phronomy::KnowledgeSource::StaticKnowledge.new(POLICY_TEXT)
+        #     static_knowledge Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(POLICY_TEXT)
         #   end
         # @api public
         def static_knowledge(*sources)
@@ -273,7 +269,7 @@ module Phronomy
         end
         # Returns the registered static knowledge sources.
-        # @return [Array<Phronomy::KnowledgeSource::Base>]
+        # @return [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
         # @api public
         def static_knowledge_sources
           @static_knowledge_sources || []
@@ -310,11 +306,11 @@ module Phronomy
         # application can remove stale or irrelevant messages from the
         # conversation history.
         #
-        # The block receives a {Phronomy::Context::TrimContext} and may call
+        # The block receives a {Phronomy::Agent::Context::Conversation::TrimContext} and may call
         # +ctx.remove(seqs)+ to drop messages by seq number. Changes affect
         # only the current invocation; the underlying memory store is unchanged.
         #
-        # @yield [ctx] Phronomy::Context::TrimContext
+        # @yield [ctx] Phronomy::Agent::Context::Conversation::TrimContext
         # @example Drop the oldest message when over 80% of budget is used
         #   on_trim do |ctx|
         #     limit = ctx.budget&.available(used: 0) || Float::INFINITY
@@ -336,9 +332,9 @@ module Phronomy
         # truthy AND an +on_compact+ callback is also registered, the compact
         # pipeline is executed.
         #
-        # The block receives a read-only {Phronomy::Context::TriggerContext}.
+        # The block receives a read-only {Phronomy::Agent::Context::Conversation::TriggerContext}.
         #
-        # @yield [ctx] Phronomy::Context::TriggerContext
+        # @yield [ctx] Phronomy::Agent::Context::Conversation::TriggerContext
         # @return [Boolean] truthy → run on_compact; falsy → skip
         # @example Trigger when messages exceed 70% of token budget
         #   on_compaction_trigger do |ctx|
@@ -358,10 +354,10 @@ module Phronomy
         # Registers a callback that performs the actual compaction when the
         # +on_compaction_trigger+ callback fires. The block receives a
-        # {Phronomy::Context::CompactionContext} and should call +ctx.compact+
+        # {Phronomy::Agent::Context::Conversation::CompactionContext} and should call +ctx.compact+
         # to specify which messages to summarise.
         #
-        # @yield [ctx] Phronomy::Context::CompactionContext
+        # @yield [ctx] Phronomy::Agent::Context::Conversation::CompactionContext
         # @example Replace the first 4 messages with a short summary
         #   on_compact do |ctx|
         #     ctx.compact(0..3) do |elements|
@@ -489,6 +485,11 @@ module Phronomy
       #   +:knowledge_sources+ (Array) — dynamic knowledge sources for this turn
       #   +:user_id+    (+String+, optional) — caller identity forwarded to the tracer
       #   +:session_id+ (+String+, optional) — session identity forwarded to the tracer
+      # @param invocation_context [Phronomy::InvocationContext, nil] optional first-class context
+      #   object.  When present, +thread_id+, +cancellation_token+, and +deadline+ are
+      #   derived from it (existing +config:+ keys take precedence as backward-compat
+      #   aliases).  The object is also stored in +config[:invocation_context]+ so that
+      #   +task_id+ / +parent_task_id+ appear in trace spans automatically.
       # @return [Hash] +{ output: String, messages: Array, usage: Phronomy::TokenUsage }+,
       #   or +{ output: nil, suspended: true, checkpoint: Phronomy::Agent::Checkpoint,
       #   messages: Array }+ when the invocation was suspended awaiting tool approval.
@@ -505,29 +506,49 @@ module Phronomy
       #     result = agent.resume(result[:checkpoint], approved: true)
       #   end
       #   puts result[:output]
+      # @example With InvocationContext (deadline-based timeout)
+      #   ctx = Phronomy::InvocationContext.new(
+      #     thread_id: "conv-123",
+      #     deadline: Phronomy::Concurrency::Deadline.in(30),
+      #     task_id: SecureRandom.uuid
+      #   )
+      #   result = MyAgent.new.invoke("Hello", invocation_context: ctx)
       # @api public
-      def invoke(input, messages: [], thread_id: nil, config: {})
+      def invoke(input, messages: [], thread_id: nil, config: {}, invocation_context: nil)
+        if invocation_context
+          thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
+        end
         if Phronomy.configuration.event_loop
           # Protect against blocking the EventLoop thread itself.
-          if Thread.current[:phronomy_event_loop_thread]
+          if Phronomy::EventLoop.current?
             raise Phronomy::Error,
               "Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
               "entry action. Use agent.run_as_child(input, ctx: ctx) instead."
           end
+          # Build an effective config that includes the invoke_timeout scope's
+          # CancellationToken before constructing the FSM.  This ensures that
+          # every LLM, tool, and RAG call made inside _invoke_impl observes
+          # cancellation when the deadline fires.
+          timeout_sec = self.class.invoke_timeout
+          effective_config, scope = if timeout_sec
+            s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
+            s.deadline_in(timeout_sec)
+            [config.merge(cancellation_token: s.token), s]
+          else
+            [config, nil]
+          end
           fsm = Agent::FSM.new(
             agent: self,
             input: input,
             messages: messages,
             thread_id: thread_id || SecureRandom.uuid,
-            config: config
+            config: effective_config
           )
           completion_queue = Phronomy::EventLoop.instance.register(fsm)
-          timeout_sec = self.class.invoke_timeout
-          result = if timeout_sec
-            begin
-              Timeout.timeout(timeout_sec) { completion_queue.pop }
-            rescue Timeout::Error
+          result = if scope
+            scope.pop_queue(completion_queue) do
               raise Phronomy::TimeoutError,
                 "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
             end
@@ -537,13 +558,60 @@ module Phronomy
           raise result if result.is_a?(Exception)
           result
         else
-          _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
+          # Guard: calling invoke from inside a scheduler task would block the task
+          # against itself when using a cooperative backend.  Use invoke_async
+          # instead to compose agents without introducing a blocking wait.
+          if Phronomy::Task.current
+            msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
+              "This blocks the scheduler until the inner invocation completes, preventing " \
+              "other tasks from making progress. Use invoke_async + await instead."
+            if Phronomy.configuration.strict_runtime_guards
+              raise Phronomy::SchedulerReentrancyError, msg
+            elsif Phronomy.configuration.logger
+              Phronomy.configuration.logger.warn(msg)
+            else
+              Kernel.warn("[phronomy] WARNING: #{msg}")
+            end
+          end
+          invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
+        end
+      end
+      # Invokes this agent asynchronously and returns a {Phronomy::Task}.
+      #
+      # This is the primary async entry point.  {#invoke} is a synchronous wrapper
+      # that calls this method and blocks the caller until the task completes.
+      # Calling {#invoke} from inside an active scheduler task raises
+      # {Phronomy::SchedulerReentrancyError}; use +invoke_async+ directly in that
+      # context.
+      #
+      # The task is registered with the Runtime task registry so {Runtime#shutdown}
+      # drains in-flight invocations before process exit.
+      #
+      # @example
+      #   task = agent.invoke_async("Hello!")
+      #   result = task.await   # => { output: "...", messages: [...], usage: ... }
+      #
+      # @param input    [String, Hash]
+      # @param messages [Array]
+      # @param thread_id [String, nil]
+      # @param config   [Hash]
+      # @param invocation_context [Phronomy::InvocationContext, nil]
+      # @return [Phronomy::Task]
+      # @api public
+      def invoke_async(input, messages: [], thread_id: nil, config: {}, invocation_context: nil)
+        if invocation_context
+          thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
+        end
+        bp = Phronomy.configuration.backpressure
+        on_full = (bp == :raise) ? :reject : (bp || :wait)
+        bp_timeout = Phronomy.configuration.backpressure_timeout
+        gate = Phronomy::Runtime.instance.gate(:agent)
+        Phronomy::Runtime.instance.spawn(name: "agent-#{(self.class.name || "anonymous").downcase}-async") do
+          gate.acquire(on_full: on_full, timeout: bp_timeout) do
+            _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
+          end
         end
-      ensure
-        # Remove this agent's context cache entry from the current thread to
-        # prevent unbounded growth of the thread-local hash in long-lived
-        # processes (e.g. Rails servers).
-        Thread.current[:phronomy_context_version_caches]&.delete(object_id)
       end
       # Registers this agent as a child {AgentFSM} inside the given Workflow context.
@@ -557,31 +625,24 @@ module Phronomy
       # result hash +{ output:, messages:, usage: }+.  Declare an +on: :child_completed+
       # transition in your Workflow to advance to the next state.
       #
-      # An optional block may be provided to write the result back into the parent
-      # WorkflowContext <b>before</b> the +:child_completed+ event is dispatched.
-      # +Thread::Queue+ provides the happens-before guarantee \u2014 no Mutex is needed.
+      # The result is delivered exclusively as the +:child_completed+ event payload.
+      # The parent Workflow task is the sole owner of the parent +WorkflowContext+ and
+      # applies the result after receiving the event — no background thread writes to
+      # the parent context directly.
       #
-      # @example Without block (result available only as event payload)
+      # @example
       #   entry :run_agent, ->(ctx) { MyAgent.new.run_as_child(ctx.query, ctx: ctx) }
       #   transition from: :run_agent, on: :child_completed, to: :process_result
       #
-      # @example With block (writes result into context)
-      #   entry :run_agent, ->(ctx) {
-      #     MyAgent.new.run_as_child(ctx.query, ctx: ctx) { |r| ctx.answer = r[:output] }
-      #   }
-      #   transition from: :run_agent, on: :child_completed, to: :process_result
-      #
       # @param input     [String, Hash]  user input passed to the agent
       # @param ctx       [Object]        a WorkflowContext that responds to +#thread_id+
       # @param messages  [Array]         prior conversation history
       # @param config    [Hash]          invocation config (forwarded to +_invoke_impl+)
-      # @yield [Hash]  result hash +{ output:, messages:, usage: }+ — called from the
-      #                agent IO thread before +:child_completed+ is posted
       # @return [nil]  the caller must not wait on any return value;
       #                the result arrives as a +:child_completed+ event
       # @raise [Phronomy::Error] when EventLoop mode is not enabled
       # @api public
-      def run_as_child(input, ctx:, messages: [], config: {}, &result_writer)
+      def run_as_child(input, ctx:, messages: [], config: {})
         unless Phronomy.configuration.event_loop
           raise Phronomy::Error,
             "run_as_child requires EventLoop mode. " \
@@ -594,8 +655,7 @@ module Phronomy
           messages: messages,
           thread_id: "#{ctx.thread_id}_agent_#{SecureRandom.uuid}",
           config: config,
-          parent_id: ctx.thread_id,
-          result_writer: result_writer
+          parent_id: ctx.thread_id
         )
         Phronomy::EventLoop.instance.enqueue_child(fsm)
         nil
@@ -627,7 +687,7 @@ module Phronomy
         raise
       end
-      # Returns the {Context::ContextVersionCache} built during the most recent
+      # Returns the {LlmContextWindow::ContextVersionCache} built during the most recent
       # {#invoke} call on this agent instance.  The thread-local cache entry is
       # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
       # in +@last_context_version_cache+ so callers can inspect it after invoke
@@ -644,11 +704,33 @@ module Phronomy
       private
+      # Merges an {InvocationContext} into the +thread_id+ / +config+ pair.
+      # Returns +[effective_thread_id, effective_config]+.
+      #
+      # Precedence rules (existing explicit values always win):
+      # - +thread_id+ argument > +ic.thread_id+
+      # - +config[:cancellation_token]+ > +ic.cancellation_token+ > token derived from +ic.deadline+
+      # - +ic+ is stored in +config[:invocation_context]+ (overwriting any previous value)
+      def _apply_invocation_context(thread_id, config, ic)
+        effective_thread_id = thread_id || ic.thread_id
+        effective_config = config.merge(invocation_context: ic)
+        if effective_config[:cancellation_token].nil?
+          if (tok = ic.effective_timeout_token)
+            effective_config = effective_config.merge(cancellation_token: tok)
+          end
+        end
+        [effective_thread_id, effective_config]
+      end
       # Streaming implementation for #stream.
       def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
         caller_meta = {}
         caller_meta[:user_id] = config[:user_id] if config[:user_id]
         caller_meta[:session_id] = config[:session_id] if config[:session_id]
+        if (ic = config[:invocation_context])
+          caller_meta[:task_id] = ic.task_id if ic.task_id
+          caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
+        end
         trace("agent.invoke", input: input, **caller_meta) do |_span|
           run_input_guardrails!(input)
@@ -679,11 +761,26 @@ module Phronomy
           # Run before_completion hooks (global → class → instance) before the LLM call.
           run_before_completion_hooks!(chat, config)
-          response = chat.ask(user_message) do |chunk|
+          # Route the LLM streaming call through the configured LLMAdapter.
+          # Chunks are pushed into a token queue by the pool worker thread and
+          # drained here (on the caller's side) so that the user block is never
+          # executed on a BlockingAdapterPool worker thread.
+          # The queue capacity is bounded by Configuration#stream_queue_max_size
+          # (nil = unbounded) to provide backpressure against a fast LLM producer.
+          adapter = Phronomy.configuration.llm_adapter
+          chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
+          pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
+          # Drain the chunk queue on this side (scheduler task / caller thread).
+          loop do
+            chunk = chunk_queue.pop
+            break if chunk.nil? # queue closed — LLM streaming complete
             block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
             check_cancellation!(config, "invocation cancelled during streaming")
           end
+          response = pending.await
           output = response.content
           usage = Phronomy::TokenUsage.from_tokens(response.tokens)
@@ -712,21 +809,74 @@ module Phronomy
         system_text = build_cached_system_text(input)
         user_message = extract_message(input)
-        assembler = Context::Assembler.new(budget: budget)
+        assembler = LlmContextWindow::Assembler.new(budget: budget)
         assembler.add_instruction(system_text) if system_text
-        Array(config[:knowledge_sources]).each do |ks|
-          check_cancellation!(config, "invocation cancelled during RAG fetch")
-          ks.fetch(query: user_message, cancellation_token: config[:cancellation_token]).each do |chunk|
-            assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
-          end
+        fetch_knowledge_chunks(user_message, config).each do |chunk|
+          assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
         end
         assembler.add_messages(history)
         assembler.build
       end
       protected :build_context
+      # Fetches knowledge chunks from all registered sources concurrently.
+      #
+      # Each source is spawned as a separate task within a {Phronomy::TaskGroup};
+      # the RAG concurrency gate enforces the +max_concurrent_rag_fetches+ cap.
+      # Results are returned in registration order (spawn order) as a flat array.
+      #
+      # This method is available to subclasses as a building block when
+      # overriding {#build_context}. Pass a custom +query+ to implement
+      # multi-hop RAG or other retrieval strategies.
+      #
+      # @param query  [String] RAG query string (typically the current user message)
+      # @param config [Hash]   invocation config; relevant keys:
+      #   +:knowledge_sources+, +:rag_failure_policy+, +:cancellation_token+, +:rag_timeout+
+      # @return [Array<Hash>] flat list of chunk hashes with +:content+, +:type+, +:source+
+      # @api private
+      def fetch_knowledge_chunks(query, config)
+        sources = Array(config[:knowledge_sources])
+        return [] if sources.empty?
+        check_cancellation!(config, "invocation cancelled before RAG fetch")
+        # :skip (default) — ignore per-source failures so the agent can still
+        # answer with partial context. :fail surfaces the first error immediately.
+        failure_policy =
+          case config[:rag_failure_policy]
+          when :fail then :fail_fast
+          else :skip_failed
+          end
+        group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
+        bp = Phronomy.configuration.backpressure
+        rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
+        rag_bp_timeout = Phronomy.configuration.backpressure_timeout
+        # Spawn all fetches concurrently. Results are returned in spawn order
+        # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
+        sources.each do |ks|
+          group.spawn do
+            Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
+              result, elapsed_ms = Phronomy::Runtime.measure_ms do
+                ks.fetch_async(
+                  query: query,
+                  cancellation_token: config[:cancellation_token],
+                  timeout: config[:rag_timeout]
+                ).await
+              end
+              Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{elapsed_ms}ms" }
+              result
+            end
+          end
+        end
+        # await_all returns results in spawn order; nil entries indicate
+        # skipped failures when using :skip_failed.
+        group.await_all.flat_map { |chunks| Array(chunks) }
+      end
+      protected :fetch_knowledge_chunks
       # Runs the on_trim / on_compaction_trigger / on_compact pipeline on the
       # supplied message array and returns the final Array of message objects
       # ready to pass to the Assembler.
@@ -744,16 +894,16 @@ module Phronomy
         elements = build_message_elements(Array(messages))
         if (trim_cb = self.class._on_trim_callback)
-          trim_ctx = Context::TrimContext.new(message_elements: elements, budget: budget)
+          trim_ctx = Context::Conversation::TrimContext.new(message_elements: elements, budget: budget)
           trim_cb.call(trim_ctx)
           elements = trim_ctx.message_elements
         end
         if (trigger_cb = self.class._on_compaction_trigger_callback)
-          trigger_ctx = Context::TriggerContext.new(message_elements: elements, budget: budget)
+          trigger_ctx = Context::Conversation::TriggerContext.new(message_elements: elements, budget: budget)
           if trigger_cb.call(trigger_ctx)
             if (compact_cb = self.class._on_compact_callback)
-              compact_ctx = Context::CompactionContext.new(
+              compact_ctx = Context::Conversation::CompactionContext.new(
                 message_elements: elements,
                 budget: budget,
                 thread_id: thread_id
@@ -774,61 +924,18 @@ module Phronomy
         caller_meta = {}
         caller_meta[:user_id] = config[:user_id] if config[:user_id]
         caller_meta[:session_id] = config[:session_id] if config[:session_id]
+        if (ic = config[:invocation_context])
+          caller_meta[:task_id] = ic.task_id if ic.task_id
+          caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
+        end
         trace("agent.invoke", input: input, **caller_meta) do |_span|
-          # Run input guardrails before touching the LLM.
-          run_input_guardrails!(input)
-          user_message = extract_message(input)
-          chat = build_chat
-          # Assemble context (system prompt + history). Override #build_context to
-          # inject custom context editing logic at the Agent subclass level.
-          context = build_context(input, messages: messages, thread_id: thread_id, config: config)
-          apply_instructions(chat, context[:system]) if context[:system]
-          context[:messages].each { |msg| chat.messages << msg }
-          # Run before_completion hooks (global → class → instance) before the LLM call.
-          run_before_completion_hooks!(chat, config)
-          # Register suspension hook for approval-required tools (no-op when a
-          # synchronous on_approval_required handler is already registered).
-          _register_suspension_hook!(chat)
-          # Check for cancellation immediately before the LLM call.
-          check_cancellation!(config, "invocation cancelled before LLM call")
-          # Forward the cancellation token to ParallelToolChat via a thread-local
-          # so that tool dispatch batches can observe cancellation without needing
-          # direct access to config.
-          prev_ct = Thread.current[:phronomy_cancellation_token]
-          Thread.current[:phronomy_cancellation_token] = config[:cancellation_token]
-          begin
-            response = chat.ask(user_message)
-          rescue SuspendSignal => signal
-            checkpoint = Checkpoint.new(
-              thread_id: thread_id,
-              original_input: input,
-              messages: chat.messages.dup,
-              pending_tool_name: signal.tool_name,
-              pending_tool_args: signal.args,
-              pending_tool_call_id: signal.tool_call_id
-            )
-            suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
-            next [suspended_result, nil]
-          ensure
-            Thread.current[:phronomy_cancellation_token] = prev_ct
-          end
-          output = response.content
-          usage = Phronomy::TokenUsage.from_tokens(response.tokens)
-          # Run output guardrails before returning to the caller.
-          run_output_guardrails!(output)
-          result = {output: output, messages: chat.messages, usage: usage}
-          [result, usage]
+          Agent::InvocationPipeline.new(self).run(
+            input,
+            messages: messages,
+            thread_id: thread_id,
+            config: config
+          )
         end
       end
@@ -842,19 +949,19 @@ module Phronomy
         return nil unless model_name
         if (cw = self.class.context_window)
-          Phronomy::Context::TokenBudget.new(
+          Phronomy::LlmContextWindow::TokenBudget.new(
             context_window: cw,
             max_output_tokens: self.class.max_output_tokens || 0,
             overhead: self.class.context_overhead
           )
         else
-          Phronomy::Context::TokenBudget.new(
+          Phronomy::LlmContextWindow::TokenBudget.new(
             model: model_name,
             max_output_tokens: self.class.max_output_tokens,
             overhead: self.class.context_overhead
           )
         end
-      rescue Phronomy::Context::UnknownModelError, RubyLLM::ModelNotFoundError
+      rescue Phronomy::LlmContextWindow::UnknownModelError, RubyLLM::ModelNotFoundError
         nil
       end
@@ -867,7 +974,7 @@ module Phronomy
       # @api public
       def build_message_elements(messages)
         Array(messages).each_with_index.map do |msg, idx|
-          tokens = Context::TokenEstimator.estimate(msg.content.to_s)
+          tokens = LlmContextWindow::TokenEstimator.estimate(msg.content.to_s)
           {seq: idx, message: msg, tokens: tokens, role: msg.role}
         end
       end
@@ -890,34 +997,29 @@ module Phronomy
           [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
         )
-        agent_id = object_id
-        cache = (Thread.current[:phronomy_context_version_caches] ||= {})[agent_id] ||=
-          Context::ContextVersionCache.new
+        cache = (@context_version_cache ||= LlmContextWindow::ContextVersionCache.new)
         unless cache.valid?(fingerprint)
           parts = [instruction]
           static_chunks.each do |chunk|
-            parts << Context::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
+            parts << LlmContextWindow::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
           end
           cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
         end
         # Persist a reference on the instance so that context_version_cache
-        # remains accessible after invoke's ensure block cleans up the
-        # thread-local entry.
+        # remains accessible after invoke completes.
         @last_context_version_cache = cache
         cache.system_text.empty? ? nil : cache.system_text
       end
-      # Load messages from a ConversationManager.
-      #
       # Returns the chat class to instantiate for this invocation.
-      # When the +:phronomy_agent_parallel_tools+ thread-local flag is set
-      # (i.e. inside an {AgentFSM} IO thread), returns {ParallelToolChat} so
-      # that concurrent tool dispatch is enabled.  Falls back to +nil+ otherwise,
-      # signalling {#build_chat} to use the standard +RubyLLM.chat+ factory.
+      # When EventLoop mode is enabled ({Phronomy.configuration.event_loop}),
+      # returns {ParallelToolChat} so that concurrent tool dispatch is enabled.
+      # Falls back to +nil+ otherwise, signalling {#build_chat} to use the
+      # standard +RubyLLM.chat+ factory.
       def build_chat_class
-        Thread.current[:phronomy_agent_parallel_tools] ? Agent::ParallelToolChat : nil
+        Phronomy.configuration.event_loop ? Phronomy::MultiAgent::ParallelToolChat : nil
       end
       def build_chat
@@ -931,7 +1033,11 @@ module Phronomy
         end
         t = self.class.temperature
         parallel_class = build_chat_class
-        chat = parallel_class ? parallel_class.new(**opts) : RubyLLM.chat(**opts)
+        chat = if parallel_class
+          parallel_class.new(max_parallel_tools: self.class.max_parallel_tools, **opts)
+        else
+          RubyLLM.chat(**opts)
+        end
         chat.with_temperature(t) if t
         self.class.tools.each do |tool_class|
           chat.with_tool(prepare_tool_class(tool_class))
@@ -943,7 +1049,7 @@ module Phronomy
       def build_instructions(input)
         instr = self.class.instructions
         case instr
-        when Phronomy::PromptTemplate
+        when Phronomy::Agent::Context::Instruction::PromptTemplate
           vars = input.is_a?(Hash) ? input : {input: input}
           instr.format_system(**vars) || instr.format(**vars)
         when String then instr
@@ -995,15 +1101,30 @@ module Phronomy
       # Builds the final tool class to register with the chat.
       #
-      # Two transformations are applied in order:
+      # When an already-instantiated tool object is passed (e.g. a
+      # {Phronomy::Tool::McpTool} returned by +McpTool.from_server+), it is
+      # returned as-is.  RubyLLM's +with_tool+ accepts both classes and
+      # instances, so no wrapping is needed.
+      #
+      # For tool classes, three transformations are applied in order:
       #   1. Alias override — when the Hash form of .tools maps this class to an
       #      explicit name, an anonymous subclass with that tool_name is returned.
-      #   2. Approval gate  — when the tool class has +requires_approval+ set AND
+      #   2. Scope policy   — when a scope is declared on the tool, the configured
+      #      {Phronomy::Tool::ScopePolicy} (or the default) is evaluated.
+      #      +:reject+ wraps the tool to return a denial message without executing.
+      #      +:approve+ behaves like requiring approval (same as step 3 when the
+      #      tool does not already have +requires_approval+).
+      #   3. Approval gate  — when the tool class has +requires_approval+ set AND
       #      an approval handler has been registered via #on_approval_required,
       #      the tool's #call method is wrapped: the handler is invoked with
       #      (tool_name, args) and, if it returns falsy, the tool returns a denial
       #      message instead of executing.
       def prepare_tool_class(tool_class)
+        # When an instantiated tool object is passed (e.g. McpTool.from_server
+        # returns an instance, not a class), skip class-level processing and
+        # return it directly. RubyLLM#with_tool handles both forms.
+        return tool_class unless tool_class.is_a?(Class)
         # Step 1: apply alias if needed.
         resolved = if (alias_name = self.class.tool_aliases[tool_class])
           parent_description = tool_class.description
@@ -1015,7 +1136,34 @@ module Phronomy
           tool_class
         end
-        # Step 2: wrap with approval gate when handler is registered.
+        # Step 2: evaluate scope policy.
+        scope = resolved.scope
+        if scope
+          policy = @scope_policy || Phronomy::Tool::ScopePolicy::DEFAULT
+          decision = policy.call(resolved, scope, self)
+          case decision
+          when :reject
+            effective_name = resolved.new.name
+            rejected_class = Class.new(resolved) do
+              tool_name effective_name
+              define_method(:call) do |_args|
+                "Tool execution denied: scope :#{scope} is not permitted."
+              end
+            end
+            return rejected_class
+          when :approve
+            # Treat as requires_approval unless the tool already has that flag.
+            unless resolved.requires_approval
+              effective_name = resolved.new.name
+              resolved = Class.new(resolved) do
+                tool_name effective_name
+                requires_approval true
+              end
+            end
+          end
+        end
+        # Step 3: wrap with approval gate when handler is registered.
         return resolved unless resolved.requires_approval && @approval_handler
         handler = @approval_handler