RubyGems - phronomy - Versions diffs - 0.7.1 → 0.8.0 - Mend

phronomy 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

checksums.yaml +4 -4
data/README.md +16 -16
data/benchmark/bench_context_assembler.rb +2 -2
data/benchmark/bench_regression.rb +5 -5
data/benchmark/bench_token_estimator.rb +5 -5
data/benchmark/bench_tool_schema.rb +1 -1
data/benchmark/bench_vector_store.rb +1 -1
data/lib/phronomy/agent/base.rb +86 -123
data/lib/phronomy/agent/checkpoint.rb +118 -0
data/lib/phronomy/agent/context/conversation/compaction_context.rb +117 -0
data/lib/phronomy/agent/context/conversation/trigger_context.rb +43 -0
data/lib/phronomy/agent/context/conversation/trim_context.rb +82 -0
data/lib/phronomy/agent/context/instruction/prompt_template.rb +102 -0
data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +45 -0
data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +51 -0
data/lib/phronomy/agent/context/knowledge/loader/base.rb +31 -0
data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +62 -0
data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +82 -0
data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +28 -0
data/lib/phronomy/agent/context/knowledge/source/base.rb +60 -0
data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +102 -0
data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +63 -0
data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +58 -0
data/lib/phronomy/agent/context/knowledge/splitter/base.rb +53 -0
data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +57 -0
data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +111 -0
data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +116 -0
data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +95 -0
data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +109 -0
data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +133 -0
data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +198 -0
data/lib/phronomy/agent/fsm.rb +1 -1
data/lib/phronomy/agent/invocation_pipeline.rb +99 -0
data/lib/phronomy/agent/lifecycle/fsm_session.rb +251 -0
data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +249 -0
data/lib/phronomy/agent/react_agent.rb +19 -14
data/lib/phronomy/agent/runner.rb +2 -2
data/lib/phronomy/agent/tool_executor.rb +108 -0
data/lib/phronomy/concurrency/async_queue.rb +157 -0
data/lib/phronomy/concurrency/blocking_adapter_pool.rb +443 -0
data/lib/phronomy/concurrency/cancellation_scope.rb +125 -0
data/lib/phronomy/concurrency/cancellation_token.rb +140 -0
data/lib/phronomy/concurrency/concurrency_gate.rb +157 -0
data/lib/phronomy/concurrency/deadline.rb +65 -0
data/lib/phronomy/{runtime → concurrency}/gate_registry.rb +1 -1
data/lib/phronomy/{runtime → concurrency}/pool_registry.rb +1 -1
data/lib/phronomy/context.rb +2 -8
data/lib/phronomy/embeddings.rb +2 -2
data/lib/phronomy/eval/runner.rb +4 -0
data/lib/phronomy/eval/scorer/llm_judge.rb +12 -1
data/lib/phronomy/event_loop.rb +7 -7
data/lib/phronomy/invocation_context.rb +3 -3
data/lib/phronomy/knowledge_source.rb +0 -5
data/lib/phronomy/llm_adapter/ruby_llm.rb +17 -11
data/lib/phronomy/{context → llm_context_window}/assembler.rb +18 -3
data/lib/phronomy/{context → llm_context_window}/context_version_cache.rb +1 -1
data/lib/phronomy/{context → llm_context_window}/token_budget.rb +7 -4
data/lib/phronomy/{context → llm_context_window}/token_estimator.rb +3 -3
data/lib/phronomy/loader.rb +4 -4
data/lib/phronomy/{agent → multi_agent}/handoff.rb +2 -2
data/lib/phronomy/{agent → multi_agent}/orchestrator.rb +6 -6
data/lib/phronomy/{agent → multi_agent}/parallel_tool_chat.rb +4 -4
data/lib/phronomy/{agent → multi_agent}/team_coordinator.rb +2 -2
data/lib/phronomy/runtime.rb +19 -4
data/lib/phronomy/splitter.rb +3 -3
data/lib/phronomy/task_group.rb +1 -1
data/lib/phronomy/tool/base.rb +50 -9
data/lib/phronomy/tracing/null_tracer.rb +3 -1
data/lib/phronomy/vector_store.rb +2 -2
data/lib/phronomy/version.rb +1 -1
data/lib/phronomy/workflow_context.rb +8 -0
data/lib/phronomy/workflow_runner.rb +11 -131
data/lib/phronomy.rb +1 -0
metadata +44 -42
data/lib/phronomy/async_queue.rb +0 -155
data/lib/phronomy/blocking_adapter_pool.rb +0 -435
data/lib/phronomy/cancellation_scope.rb +0 -123
data/lib/phronomy/cancellation_token.rb +0 -133
data/lib/phronomy/concurrency_gate.rb +0 -155
data/lib/phronomy/context/compaction_context.rb +0 -111
data/lib/phronomy/context/trigger_context.rb +0 -39
data/lib/phronomy/context/trim_context.rb +0 -75
data/lib/phronomy/deadline.rb +0 -63
data/lib/phronomy/embeddings/base.rb +0 -39
data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +0 -45
data/lib/phronomy/fsm_session.rb +0 -247
data/lib/phronomy/knowledge_source/base.rb +0 -54
data/lib/phronomy/knowledge_source/entity_knowledge.rb +0 -96
data/lib/phronomy/knowledge_source/rag_knowledge.rb +0 -57
data/lib/phronomy/knowledge_source/static_knowledge.rb +0 -52
data/lib/phronomy/loader/base.rb +0 -25
data/lib/phronomy/loader/csv_loader.rb +0 -56
data/lib/phronomy/loader/markdown_loader.rb +0 -76
data/lib/phronomy/loader/plain_text_loader.rb +0 -22
data/lib/phronomy/prompt_template.rb +0 -96
data/lib/phronomy/splitter/base.rb +0 -47
data/lib/phronomy/splitter/fixed_size_splitter.rb +0 -51
data/lib/phronomy/splitter/recursive_splitter.rb +0 -105
data/lib/phronomy/tool_executor.rb +0 -106
data/lib/phronomy/vector_store/async_backend.rb +0 -110
data/lib/phronomy/vector_store/base.rb +0 -89
data/lib/phronomy/vector_store/in_memory.rb +0 -93
data/lib/phronomy/vector_store/pgvector.rb +0 -127
data/lib/phronomy/vector_store/redis_search.rb +0 -192

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d9ae370d656048e38f700b6bced931fe249f731cea819ab94691eb4bcf6ef43c
-  data.tar.gz: 97d01ca3475f547a41397d1dad2ddb8ccaa10f6466d5a75c3f79e6875a7af0c6
+  metadata.gz: d4410424efcdcdf0ab529106ba2c872bddae9decc322995f37065f426255a05b
+  data.tar.gz: c9ae0dff7f184244b92fc91c585536f767aded5ff6ea8ecaafe86221863738e8
 SHA512:
-  metadata.gz: d3ab9ebd145e1ed706ad1741a2e3184c412aa8fd0eac32c95eb0b4a1ef87af38ae73eb5b4205b7f2894dd228929130c9a7569d24a1d7a571a5aa3ec5a68a4172
-  data.tar.gz: efa88afdbaa2f3d8fc38ee7cbc7044711479490546a888d44540f3b6bae6da60a3a3e64cfbbef455d65f78bab64dd9a68056e4c9f7ac7a360d512179364c8b23
+  metadata.gz: e7afa1749dc1431e27e225dfe7a8eafebb2e781c0e6a6ca6e0bdda9712c22b4b5b68d3a9897bc92b466026c698070045774d79a0197ad0463da3f81ff103b36c
+  data.tar.gz: be93a29c2b98b2069ef912815e847f0e0115de07bb435e36fbcb834433757fc72442d7c5db129c9f97dd891113ecf75c46606a6295b2e9f3357831c24246d974

data/README.md CHANGED Viewed

@@ -45,7 +45,7 @@ It provides composable building blocks — Workflows, Agents, Tools, Guardrails,
 |---|---|
 | **Knowledge/RAG** — Retrieval sources with pluggable loaders, splitters, and vector stores; `static_knowledge_refresh!` for runtime cache invalidation | Beta |
 | **`VectorStore#size`** — Returns document count for all three backends (InMemory, RedisSearch, Pgvector) | Beta |
-| **`VectorStore::AsyncBackend` mixin** — Pluggable async interface for `VectorStore`; default pool-backed implementations for `search_async`, `add_async`, `remove_async`, `clear_async`; backends with native async drivers override individual methods to bypass `BlockingAdapterPool` entirely; all existing backends remain unchanged | Beta |
+| **`Agent::Context::Knowledge::VectorStore::AsyncBackend` mixin** — Pluggable async interface for `VectorStore`; default pool-backed implementations for `search_async`, `add_async`, `remove_async`, `clear_async`; backends with native async drivers override individual methods to bypass `BlockingAdapterPool` entirely; all existing backends remain unchanged | Beta |
 | **Parallel RAG multi-source fetch** — `Agent#build_context` fetches all `knowledge_sources` concurrently via `TaskGroup`; `config[:rag_failure_policy]` `:skip` (default) silently ignores failed sources so the agent answers with partial context, `:fail` surfaces the first error; per-source latency is emitted to `Phronomy.configuration.logger` at debug level | Beta |
 | **MCP Tool** — Model Context Protocol server integration | Beta |
@@ -131,8 +131,8 @@ Install additional gems only for the features you use:
 | Gem | Required for |
 |-----|-------------|
-| `pgvector` | `Phronomy::VectorStore::Pgvector` |
-| `redis` | `Phronomy::VectorStore::RedisSearch` |
+| `pgvector` | `Phronomy::Agent::Context::Knowledge::VectorStore::Pgvector` |
+| `redis` | `Phronomy::Agent::Context::Knowledge::VectorStore::RedisSearch` |
 | `opentelemetry-api` | `Phronomy::Tracing::OpenTelemetryTracer` |
 ## Quick Start
@@ -284,15 +284,15 @@ end
 ```ruby
 # Static knowledge (policy files, reference docs)
-policy = Phronomy::KnowledgeSource::StaticKnowledge.new(
+policy = Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(
   File.read("policy.md"),
   type:   :policy,
   source: "policy.md"   # exposed to LLM for citation
 )
 # RAG retrieval from a vector store
-store      = Phronomy::VectorStore::InMemory.new
-embeddings = Phronomy::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small")
+store      = Phronomy::Agent::Context::Knowledge::VectorStore::InMemory.new
+embeddings = Phronomy::Agent::Context::Knowledge::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small")
 # Add documents before querying
 text1 = "Refunds are processed within 5 business days."
@@ -300,7 +300,7 @@ text2 = "Contact support@example.com for refund requests."
 store.add(id: "doc-1", embedding: embeddings.embed(text1), metadata: { content: text1, source: "policy.md" })
 store.add(id: "doc-2", embedding: embeddings.embed(text2), metadata: { content: text2, source: "policy.md" })
-rag = Phronomy::KnowledgeSource::RAGKnowledge.new(store: store, embeddings: embeddings, k: 5)
+rag = Phronomy::Agent::Context::Knowledge::Source::RAGKnowledge.new(store: store, embeddings: embeddings, k: 5)
 # Inject at invocation time
 result = MyAgent.new.invoke("What is the refund policy?",
@@ -319,8 +319,8 @@ MyAgent.static_knowledge_refresh!
 Load and split documents with built-in loaders:
 ```ruby
-chunks = Phronomy::Loader::MarkdownLoader.new.load("docs/guide.md")
-         .then { |docs| Phronomy::Splitter::RecursiveSplitter.new(chunk_size: 512).split(docs) }
+chunks = Phronomy::Agent::Context::Knowledge::Loader::MarkdownLoader.new.load("docs/guide.md")
+         .then { |docs| Phronomy::Agent::Context::Knowledge::Splitter::RecursiveSplitter.new(chunk_size: 512).split(docs) }
 ```
 ### Multi-Agent Handoff — Hub-and-spoke routing
@@ -674,7 +674,7 @@ agents automatically stay within the configured token limit.
 Derives the effective token budget from RubyLLM's model registry:
 ```ruby
-budget = Phronomy::Context::TokenBudget.new(
+budget = Phronomy::LlmContextWindow::TokenBudget.new(
   model:    "claude-3-5-sonnet-20241022",  # looks up context_window + max_output_tokens
   overhead: 500                            # extra reservation for tool definitions
 )
@@ -686,7 +686,7 @@ budget.effective_input_limit # => 191_308
 Or supply explicit values (useful for local / unregistered models):
 ```ruby
-budget = Phronomy::Context::TokenBudget.new(
+budget = Phronomy::LlmContextWindow::TokenBudget.new(
   context_window:    32_768,
   max_output_tokens: 4_096
 )
@@ -716,7 +716,7 @@ registry the budget is silently skipped.
 > ```ruby
 > require "tiktoken_ruby"
 > enc = Tiktoken.encoding_for_model("gpt-4o")
-> Phronomy::Context::TokenEstimator.tokenizer = ->(text) { enc.encode(text).length }
+> Phronomy::LlmContextWindow::TokenEstimator.tokenizer = ->(text) { enc.encode(text).length }
 > ```
@@ -750,7 +750,7 @@ blocks always execute.
 > risky interruption.
 ```ruby
-token = Phronomy::CancellationToken.new
+token = Phronomy::Concurrency::CancellationToken.new
 # Cancel from another thread after 5 s
 Thread.new { sleep 5; token.cancel! }
@@ -762,15 +762,15 @@ rescue Phronomy::CancellationError
 end
 # Hard deadline via monotonic clock (recommended — immune to NTP/DST changes)
-token = Phronomy::CancellationToken.timeout_after(30)
+token = Phronomy::Concurrency::CancellationToken.timeout_after(30)
 result = MyAgent.new.invoke("...", config: { cancellation_token: token })
 # Hard deadline via wall-clock (legacy — still supported)
-token = Phronomy::CancellationToken.new(deadline: Time.now + 30)
+token = Phronomy::Concurrency::CancellationToken.new(deadline: Time.now + 30)
 result = MyAgent.new.invoke("...", config: { cancellation_token: token })
 # Propagate to all parallel workers via dispatch_parallel / fan_out
-token = Phronomy::CancellationToken.new
+token = Phronomy::Concurrency::CancellationToken.new
 Thread.new { sleep 10; token.cancel! }
 orchestrator.dispatch_parallel(

data/benchmark/bench_context_assembler.rb CHANGED Viewed

@@ -12,9 +12,9 @@ BenchAsmMessage = Struct.new(:content)
 def make_assembler(n_messages:, n_chunks:, with_budget: false)
   budget = if with_budget
-    Phronomy::Context::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
+    Phronomy::LlmContextWindow::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
   end
-  asm = Phronomy::Context::Assembler.new(budget: budget)
+  asm = Phronomy::LlmContextWindow::Assembler.new(budget: budget)
   asm.add_instruction("You are a helpful assistant. Answer the user's question.")
   n_chunks.times do |i|
     asm.add_knowledge("Fact #{i}: The capital of country #{i} is City #{i}.", type: :entity, trusted: true)

data/benchmark/bench_regression.rb CHANGED Viewed

@@ -94,7 +94,7 @@ stub_agent_class = Class.new(Phronomy::Agent::Base) do
   define_method(:invoke_async) { |input, **_kw| Phronomy::Runtime.instance.spawn(name: "bench-stub") { invoke(input) } }
 end
-orchestrator_class = Class.new(Phronomy::Agent::Orchestrator)
+orchestrator_class = Class.new(Phronomy::MultiAgent::Orchestrator)
 orchestrator = orchestrator_class.new
 PARALLEL_ITERATIONS = 200
@@ -109,7 +109,7 @@ end
 # ---------------------------------------------------------------------------
 # Target 5: CancellationToken#cancelled? throughput (8 threads)
 # ---------------------------------------------------------------------------
-CANCEL_TOKEN = Phronomy::CancellationToken.new
+CANCEL_TOKEN = Phronomy::Concurrency::CancellationToken.new
 CANCEL_ITERATIONS = 10_000
 t5 = Benchmark.measure("CancellationToken#cancelled? (8 threads)") do
@@ -122,7 +122,7 @@ end
 # ---------------------------------------------------------------------------
 # Target 6: CancellationToken#raise_if_cancelled! hot path (no-op, single thread)
 # ---------------------------------------------------------------------------
-RAISE_TOKEN = Phronomy::CancellationToken.new  # not cancelled — no-op path
+RAISE_TOKEN = Phronomy::Concurrency::CancellationToken.new  # not cancelled — no-op path
 RAISE_ITERATIONS = 200_000
 t6 = Benchmark.measure("CancellationToken#raise_if_cancelled! (no-op)") do
@@ -135,12 +135,12 @@ end
 BenchMsg = Struct.new(:content) unless defined?(BenchMsg)
 TRIM_ELEMENTS = Array.new(2_000) { |i| {seq: i, message: BenchMsg.new("msg #{i}"), tokens: 10, role: :user} }
-TRIM_BUDGET = Phronomy::Context::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
+TRIM_BUDGET = Phronomy::LlmContextWindow::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
 TRIM_ITERATIONS = 500
 t7 = Benchmark.measure("TrimContext#remove (2000-element history)") do
   TRIM_ITERATIONS.times do
-    tc = Phronomy::Context::TrimContext.new(message_elements: TRIM_ELEMENTS, budget: TRIM_BUDGET)
+    tc = Phronomy::Agent::Context::Conversation::TrimContext.new(message_elements: TRIM_ELEMENTS, budget: TRIM_BUDGET)
     tc.remove((0...200).to_a)  # remove 200 oldest messages
   end
 end

data/benchmark/bench_token_estimator.rb CHANGED Viewed

@@ -23,22 +23,22 @@ BENCH_TOKEN_ITERATIONS = 10_000
 puts "=== bench_token_estimator ==="
 Benchmark.bm(30) do |x|
   x.report("estimate(short text)") do
-    BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(SHORT_TEXT) }
+    BENCH_TOKEN_ITERATIONS.times { Phronomy::LlmContextWindow::TokenEstimator.estimate(SHORT_TEXT) }
   end
   x.report("estimate(medium text 500c)") do
-    BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(MEDIUM_TEXT) }
+    BENCH_TOKEN_ITERATIONS.times { Phronomy::LlmContextWindow::TokenEstimator.estimate(MEDIUM_TEXT) }
   end
   x.report("estimate(long text 10k c)") do
-    BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(LONG_TEXT) }
+    BENCH_TOKEN_ITERATIONS.times { Phronomy::LlmContextWindow::TokenEstimator.estimate(LONG_TEXT) }
   end
   x.report("estimate(100 messages)") do
-    BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(MESSAGES_100) }
+    BENCH_TOKEN_ITERATIONS.times { Phronomy::LlmContextWindow::TokenEstimator.estimate(MESSAGES_100) }
   end
   x.report("estimate(1000 messages)") do
-    (BENCH_TOKEN_ITERATIONS / 10).times { Phronomy::Context::TokenEstimator.estimate(MESSAGES_1000) }
+    (BENCH_TOKEN_ITERATIONS / 10).times { Phronomy::LlmContextWindow::TokenEstimator.estimate(MESSAGES_1000) }
   end
 end

data/benchmark/bench_tool_schema.rb CHANGED Viewed

@@ -43,7 +43,7 @@ end
 # --- static_knowledge_chunks cache ---
-class BenchKnowledgeSource < Phronomy::KnowledgeSource::Base
+class BenchKnowledgeSource < Phronomy::Agent::Context::Knowledge::Source::Base
   def fetch(query: nil)
     [{content: "Cached knowledge fact.", type: :static}]
   end

data/benchmark/bench_vector_store.rb CHANGED Viewed

@@ -28,7 +28,7 @@ BENCH_VS_ITERS = {100 => 100, 1_000 => 20, 10_000 => 5}.freeze
 puts "=== bench_vector_store_inmemory ==="
 Benchmark.bm(35) do |x|
   [100, 1_000, 10_000].each do |n|
-    store = Phronomy::VectorStore::InMemory.new(dimension: DIM)
+    store = Phronomy::Agent::Context::Knowledge::VectorStore::InMemory.new(dimension: DIM)
     populate(store, n)
     iters = BENCH_VS_ITERS[n]

data/lib/phronomy/agent/base.rb CHANGED Viewed

@@ -60,12 +60,12 @@ module Phronomy
         end
         # Sets or reads the system instructions for this agent.
-        # Accepts a String, a {Phronomy::PromptTemplate}, or a block (Proc).
+        # Accepts a String, a {Phronomy::Agent::Context::Instruction::PromptTemplate}, or a block (Proc).
         # When used as a reader (no argument, no block), returns the stored value.
         #
-        # @param text [String, Phronomy::PromptTemplate, nil]
+        # @param text [String, Phronomy::Agent::Context::Instruction::PromptTemplate, nil]
         # @yield optionally provide instructions as a block
-        # @return [String, Phronomy::PromptTemplate, Proc, nil]
+        # @return [String, Phronomy::Agent::Context::Instruction::PromptTemplate, Proc, nil]
         # @example String instructions
         #   class MyAgent < Phronomy::Agent::Base
         #     instructions "You are a helpful assistant."
@@ -225,7 +225,7 @@ module Phronomy
         # Defaults to +nil+ (no timeout).
         # Inherited by subclasses; the most-specific definition wins.
         #
-        # When the timeout fires, a {Phronomy::CancellationScope} is cancelled
+        # When the timeout fires, a {Phronomy::Concurrency::CancellationScope} is cancelled
         # and its token is propagated to the FSM config so that in-flight LLM,
         # tool, and RAG calls observe cancellation via their +cancellation_token:+
         # keyword argument.  +Phronomy::TimeoutError+ is raised to the caller.
@@ -255,10 +255,10 @@ module Phronomy
         # the first time +invoke+ is called. The cache persists for the lifetime
         # of the process; call {.static_knowledge_refresh!} to force a reload.
         #
-        # @param sources [Array<Phronomy::KnowledgeSource::Base>]
+        # @param sources [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
         # @example
         #   class PolicyAgent < Phronomy::Agent::Base
-        #     static_knowledge Phronomy::KnowledgeSource::StaticKnowledge.new(POLICY_TEXT)
+        #     static_knowledge Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(POLICY_TEXT)
         #   end
         # @api public
         def static_knowledge(*sources)
@@ -269,7 +269,7 @@ module Phronomy
         end
         # Returns the registered static knowledge sources.
-        # @return [Array<Phronomy::KnowledgeSource::Base>]
+        # @return [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
         # @api public
         def static_knowledge_sources
           @static_knowledge_sources || []
@@ -306,11 +306,11 @@ module Phronomy
         # application can remove stale or irrelevant messages from the
         # conversation history.
         #
-        # The block receives a {Phronomy::Context::TrimContext} and may call
+        # The block receives a {Phronomy::Agent::Context::Conversation::TrimContext} and may call
         # +ctx.remove(seqs)+ to drop messages by seq number. Changes affect
         # only the current invocation; the underlying memory store is unchanged.
         #
-        # @yield [ctx] Phronomy::Context::TrimContext
+        # @yield [ctx] Phronomy::Agent::Context::Conversation::TrimContext
         # @example Drop the oldest message when over 80% of budget is used
         #   on_trim do |ctx|
         #     limit = ctx.budget&.available(used: 0) || Float::INFINITY
@@ -332,9 +332,9 @@ module Phronomy
         # truthy AND an +on_compact+ callback is also registered, the compact
         # pipeline is executed.
         #
-        # The block receives a read-only {Phronomy::Context::TriggerContext}.
+        # The block receives a read-only {Phronomy::Agent::Context::Conversation::TriggerContext}.
         #
-        # @yield [ctx] Phronomy::Context::TriggerContext
+        # @yield [ctx] Phronomy::Agent::Context::Conversation::TriggerContext
         # @return [Boolean] truthy → run on_compact; falsy → skip
         # @example Trigger when messages exceed 70% of token budget
         #   on_compaction_trigger do |ctx|
@@ -354,10 +354,10 @@ module Phronomy
         # Registers a callback that performs the actual compaction when the
         # +on_compaction_trigger+ callback fires. The block receives a
-        # {Phronomy::Context::CompactionContext} and should call +ctx.compact+
+        # {Phronomy::Agent::Context::Conversation::CompactionContext} and should call +ctx.compact+
         # to specify which messages to summarise.
         #
-        # @yield [ctx] Phronomy::Context::CompactionContext
+        # @yield [ctx] Phronomy::Agent::Context::Conversation::CompactionContext
         # @example Replace the first 4 messages with a short summary
         #   on_compact do |ctx|
         #     ctx.compact(0..3) do |elements|
@@ -509,7 +509,7 @@ module Phronomy
       # @example With InvocationContext (deadline-based timeout)
       #   ctx = Phronomy::InvocationContext.new(
       #     thread_id: "conv-123",
-      #     deadline: Phronomy::Deadline.in(30),
+      #     deadline: Phronomy::Concurrency::Deadline.in(30),
       #     task_id: SecureRandom.uuid
       #   )
       #   result = MyAgent.new.invoke("Hello", invocation_context: ctx)
@@ -532,7 +532,7 @@ module Phronomy
           # cancellation when the deadline fires.
           timeout_sec = self.class.invoke_timeout
           effective_config, scope = if timeout_sec
-            s = Phronomy::CancellationScope.new(parent_token: config[:cancellation_token])
+            s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
             s.deadline_in(timeout_sec)
             [config.merge(cancellation_token: s.token), s]
           else
@@ -687,7 +687,7 @@ module Phronomy
         raise
       end
-      # Returns the {Context::ContextVersionCache} built during the most recent
+      # Returns the {LlmContextWindow::ContextVersionCache} built during the most recent
       # {#invoke} call on this agent instance.  The thread-local cache entry is
       # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
       # in +@last_context_version_cache+ so callers can inspect it after invoke
@@ -768,7 +768,7 @@ module Phronomy
           # The queue capacity is bounded by Configuration#stream_queue_max_size
           # (nil = unbounded) to provide backpressure against a fast LLM producer.
           adapter = Phronomy.configuration.llm_adapter
-          chunk_queue = Phronomy::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
+          chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
           pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
           # Drain the chunk queue on this side (scheduler task / caller thread).
@@ -809,59 +809,73 @@ module Phronomy
         system_text = build_cached_system_text(input)
         user_message = extract_message(input)
-        assembler = Context::Assembler.new(budget: budget)
+        assembler = LlmContextWindow::Assembler.new(budget: budget)
         assembler.add_instruction(system_text) if system_text
+        fetch_knowledge_chunks(user_message, config).each do |chunk|
+          assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
+        end
+        assembler.add_messages(history)
+        assembler.build
+      end
+      protected :build_context
+      # Fetches knowledge chunks from all registered sources concurrently.
+      #
+      # Each source is spawned as a separate task within a {Phronomy::TaskGroup};
+      # the RAG concurrency gate enforces the +max_concurrent_rag_fetches+ cap.
+      # Results are returned in registration order (spawn order) as a flat array.
+      #
+      # This method is available to subclasses as a building block when
+      # overriding {#build_context}. Pass a custom +query+ to implement
+      # multi-hop RAG or other retrieval strategies.
+      #
+      # @param query  [String] RAG query string (typically the current user message)
+      # @param config [Hash]   invocation config; relevant keys:
+      #   +:knowledge_sources+, +:rag_failure_policy+, +:cancellation_token+, +:rag_timeout+
+      # @return [Array<Hash>] flat list of chunk hashes with +:content+, +:type+, +:source+
+      # @api private
+      def fetch_knowledge_chunks(query, config)
         sources = Array(config[:knowledge_sources])
-        unless sources.empty?
-          check_cancellation!(config, "invocation cancelled before RAG fetch")
-          # Determine TaskGroup failure policy: :skip (default) ignores per-source
-          # failures so the agent can still answer with partial context; :fail
-          # surfaces the first error immediately via :fail_fast.
-          failure_policy =
-            case config[:rag_failure_policy]
-            when :fail then :fail_fast
-            else :skip_failed
-            end
+        return [] if sources.empty?
-          group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
+        check_cancellation!(config, "invocation cancelled before RAG fetch")
-          bp = Phronomy.configuration.backpressure
-          rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
-          rag_bp_timeout = Phronomy.configuration.backpressure_timeout
+        # :skip (default) — ignore per-source failures so the agent can still
+        # answer with partial context. :fail surfaces the first error immediately.
+        failure_policy =
+          case config[:rag_failure_policy]
+          when :fail then :fail_fast
+          else :skip_failed
+          end
-          # Spawn all fetches concurrently. Results are returned in spawn order
-          # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
-          sources.each do |ks|
-            group.spawn do
-              Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
-                t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
-                result = ks.fetch_async(
-                  query: user_message,
+        group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
+        bp = Phronomy.configuration.backpressure
+        rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
+        rag_bp_timeout = Phronomy.configuration.backpressure_timeout
+        # Spawn all fetches concurrently. Results are returned in spawn order
+        # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
+        sources.each do |ks|
+          group.spawn do
+            Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
+              result, elapsed_ms = Phronomy::Runtime.measure_ms do
+                ks.fetch_async(
+                  query: query,
                   cancellation_token: config[:cancellation_token],
                   timeout: config[:rag_timeout]
                 ).await
-                elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0
-                Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{(elapsed * 1000).round}ms" }
-                result
               end
-            end
-          end
-          # await_all returns results in spawn order; nil entries indicate
-          # skipped failures when using :skip_failed.
-          per_source_chunks = group.await_all
-          per_source_chunks.each do |chunks|
-            Array(chunks).each do |chunk|
-              assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
+              Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{elapsed_ms}ms" }
+              result
             end
           end
         end
-        assembler.add_messages(history)
-        assembler.build
+        # await_all returns results in spawn order; nil entries indicate
+        # skipped failures when using :skip_failed.
+        group.await_all.flat_map { |chunks| Array(chunks) }
       end
-      protected :build_context
+      protected :fetch_knowledge_chunks
       # Runs the on_trim / on_compaction_trigger / on_compact pipeline on the
       # supplied message array and returns the final Array of message objects
@@ -880,16 +894,16 @@ module Phronomy
         elements = build_message_elements(Array(messages))
         if (trim_cb = self.class._on_trim_callback)
-          trim_ctx = Context::TrimContext.new(message_elements: elements, budget: budget)
+          trim_ctx = Context::Conversation::TrimContext.new(message_elements: elements, budget: budget)
           trim_cb.call(trim_ctx)
           elements = trim_ctx.message_elements
         end
         if (trigger_cb = self.class._on_compaction_trigger_callback)
-          trigger_ctx = Context::TriggerContext.new(message_elements: elements, budget: budget)
+          trigger_ctx = Context::Conversation::TriggerContext.new(message_elements: elements, budget: budget)
           if trigger_cb.call(trigger_ctx)
             if (compact_cb = self.class._on_compact_callback)
-              compact_ctx = Context::CompactionContext.new(
+              compact_ctx = Context::Conversation::CompactionContext.new(
                 message_elements: elements,
                 budget: budget,
                 thread_id: thread_id
@@ -916,63 +930,12 @@ module Phronomy
         end
         trace("agent.invoke", input: input, **caller_meta) do |_span|
-          # Run input guardrails before touching the LLM.
-          run_input_guardrails!(input)
-          user_message = extract_message(input)
-          chat = build_chat
-          # Assemble context (system prompt + history). Override #build_context to
-          # inject custom context editing logic at the Agent subclass level.
-          context = build_context(input, messages: messages, thread_id: thread_id, config: config)
-          apply_instructions(chat, context[:system]) if context[:system]
-          context[:messages].each { |msg| chat.messages << msg }
-          # Run before_completion hooks (global → class → instance) before the LLM call.
-          run_before_completion_hooks!(chat, config)
-          # Register suspension hook for approval-required tools (no-op when a
-          # synchronous on_approval_required handler is already registered).
-          _register_suspension_hook!(chat)
-          # Check for cancellation immediately before the LLM call.
-          check_cancellation!(config, "invocation cancelled before LLM call")
-          # Forward the cancellation token to ParallelToolChat explicitly
-          # via the chat instance so that tool dispatch batches can observe
-          # cancellation without needing Thread.current.
-          chat.cancellation_token = config[:cancellation_token] if chat.respond_to?(:cancellation_token=)
-          begin
-            # Route the LLM call through the configured LLMAdapter so that the
-            # blocking HTTP request runs inside BlockingAdapterPool and the
-            # adapter can be swapped without changing agent code.
-            adapter = Phronomy.configuration.llm_adapter
-            response = adapter.complete_async(chat, user_message, config: config).await
-          rescue SuspendSignal => signal
-            checkpoint = Checkpoint.new(
-              thread_id: thread_id,
-              original_input: input,
-              messages: chat.messages.dup,
-              pending_tool_name: signal.tool_name,
-              pending_tool_args: signal.args,
-              pending_tool_call_id: signal.tool_call_id
-            )
-            suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
-            next [suspended_result, nil]
-          ensure
-            # Clear the chat's cancellation token reference after each LLM call.
-            chat.cancellation_token = nil if chat.respond_to?(:cancellation_token=)
-          end
-          output = response.content
-          usage = Phronomy::TokenUsage.from_tokens(response.tokens)
-          # Run output guardrails before returning to the caller.
-          run_output_guardrails!(output)
-          result = {output: output, messages: chat.messages, usage: usage}
-          [result, usage]
+          Agent::InvocationPipeline.new(self).run(
+            input,
+            messages: messages,
+            thread_id: thread_id,
+            config: config
+          )
         end
       end
@@ -986,19 +949,19 @@ module Phronomy
         return nil unless model_name
         if (cw = self.class.context_window)
-          Phronomy::Context::TokenBudget.new(
+          Phronomy::LlmContextWindow::TokenBudget.new(
             context_window: cw,
             max_output_tokens: self.class.max_output_tokens || 0,
             overhead: self.class.context_overhead
           )
         else
-          Phronomy::Context::TokenBudget.new(
+          Phronomy::LlmContextWindow::TokenBudget.new(
             model: model_name,
             max_output_tokens: self.class.max_output_tokens,
             overhead: self.class.context_overhead
           )
         end
-      rescue Phronomy::Context::UnknownModelError, RubyLLM::ModelNotFoundError
+      rescue Phronomy::LlmContextWindow::UnknownModelError, RubyLLM::ModelNotFoundError
         nil
       end
@@ -1011,7 +974,7 @@ module Phronomy
       # @api public
       def build_message_elements(messages)
         Array(messages).each_with_index.map do |msg, idx|
-          tokens = Context::TokenEstimator.estimate(msg.content.to_s)
+          tokens = LlmContextWindow::TokenEstimator.estimate(msg.content.to_s)
           {seq: idx, message: msg, tokens: tokens, role: msg.role}
         end
       end
@@ -1034,11 +997,11 @@ module Phronomy
           [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
         )
-        cache = (@context_version_cache ||= Context::ContextVersionCache.new)
+        cache = (@context_version_cache ||= LlmContextWindow::ContextVersionCache.new)
         unless cache.valid?(fingerprint)
           parts = [instruction]
           static_chunks.each do |chunk|
-            parts << Context::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
+            parts << LlmContextWindow::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
           end
           cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
         end
@@ -1056,7 +1019,7 @@ module Phronomy
       # Falls back to +nil+ otherwise, signalling {#build_chat} to use the
       # standard +RubyLLM.chat+ factory.
       def build_chat_class
-        Phronomy.configuration.event_loop ? Agent::ParallelToolChat : nil
+        Phronomy.configuration.event_loop ? Phronomy::MultiAgent::ParallelToolChat : nil
       end
       def build_chat
@@ -1086,7 +1049,7 @@ module Phronomy
       def build_instructions(input)
         instr = self.class.instructions
         case instr
-        when Phronomy::PromptTemplate
+        when Phronomy::Agent::Context::Instruction::PromptTemplate
           vars = input.is_a?(Hash) ? input : {input: input}
           instr.format_system(**vars) || instr.format(**vars)
         when String then instr