phronomy 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +16 -16
  3. data/benchmark/bench_context_assembler.rb +2 -2
  4. data/benchmark/bench_regression.rb +5 -5
  5. data/benchmark/bench_token_estimator.rb +5 -5
  6. data/benchmark/bench_tool_schema.rb +1 -1
  7. data/benchmark/bench_vector_store.rb +1 -1
  8. data/lib/phronomy/agent/base.rb +86 -123
  9. data/lib/phronomy/agent/checkpoint.rb +118 -0
  10. data/lib/phronomy/agent/context/conversation/compaction_context.rb +117 -0
  11. data/lib/phronomy/agent/context/conversation/trigger_context.rb +43 -0
  12. data/lib/phronomy/agent/context/conversation/trim_context.rb +82 -0
  13. data/lib/phronomy/agent/context/instruction/prompt_template.rb +102 -0
  14. data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +45 -0
  15. data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +51 -0
  16. data/lib/phronomy/agent/context/knowledge/loader/base.rb +31 -0
  17. data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +62 -0
  18. data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +82 -0
  19. data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +28 -0
  20. data/lib/phronomy/agent/context/knowledge/source/base.rb +60 -0
  21. data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +102 -0
  22. data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +63 -0
  23. data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +58 -0
  24. data/lib/phronomy/agent/context/knowledge/splitter/base.rb +53 -0
  25. data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +57 -0
  26. data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +111 -0
  27. data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +116 -0
  28. data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +95 -0
  29. data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +109 -0
  30. data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +133 -0
  31. data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +198 -0
  32. data/lib/phronomy/agent/fsm.rb +1 -1
  33. data/lib/phronomy/agent/invocation_pipeline.rb +99 -0
  34. data/lib/phronomy/agent/lifecycle/fsm_session.rb +251 -0
  35. data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +249 -0
  36. data/lib/phronomy/agent/react_agent.rb +19 -14
  37. data/lib/phronomy/agent/runner.rb +2 -2
  38. data/lib/phronomy/agent/tool_executor.rb +108 -0
  39. data/lib/phronomy/concurrency/async_queue.rb +157 -0
  40. data/lib/phronomy/concurrency/blocking_adapter_pool.rb +443 -0
  41. data/lib/phronomy/concurrency/cancellation_scope.rb +125 -0
  42. data/lib/phronomy/concurrency/cancellation_token.rb +140 -0
  43. data/lib/phronomy/concurrency/concurrency_gate.rb +157 -0
  44. data/lib/phronomy/concurrency/deadline.rb +65 -0
  45. data/lib/phronomy/{runtime → concurrency}/gate_registry.rb +1 -1
  46. data/lib/phronomy/{runtime → concurrency}/pool_registry.rb +1 -1
  47. data/lib/phronomy/context.rb +2 -8
  48. data/lib/phronomy/embeddings.rb +2 -2
  49. data/lib/phronomy/eval/runner.rb +4 -0
  50. data/lib/phronomy/eval/scorer/llm_judge.rb +12 -1
  51. data/lib/phronomy/event_loop.rb +7 -7
  52. data/lib/phronomy/invocation_context.rb +3 -3
  53. data/lib/phronomy/knowledge_source.rb +0 -5
  54. data/lib/phronomy/llm_adapter/ruby_llm.rb +17 -11
  55. data/lib/phronomy/{context → llm_context_window}/assembler.rb +18 -3
  56. data/lib/phronomy/{context → llm_context_window}/context_version_cache.rb +1 -1
  57. data/lib/phronomy/{context → llm_context_window}/token_budget.rb +7 -4
  58. data/lib/phronomy/{context → llm_context_window}/token_estimator.rb +3 -3
  59. data/lib/phronomy/loader.rb +4 -4
  60. data/lib/phronomy/{agent → multi_agent}/handoff.rb +2 -2
  61. data/lib/phronomy/{agent → multi_agent}/orchestrator.rb +6 -6
  62. data/lib/phronomy/{agent → multi_agent}/parallel_tool_chat.rb +4 -4
  63. data/lib/phronomy/{agent → multi_agent}/team_coordinator.rb +2 -2
  64. data/lib/phronomy/runtime.rb +19 -4
  65. data/lib/phronomy/splitter.rb +3 -3
  66. data/lib/phronomy/task_group.rb +1 -1
  67. data/lib/phronomy/tool/base.rb +50 -9
  68. data/lib/phronomy/tracing/null_tracer.rb +3 -1
  69. data/lib/phronomy/vector_store.rb +2 -2
  70. data/lib/phronomy/version.rb +1 -1
  71. data/lib/phronomy/workflow_context.rb +8 -0
  72. data/lib/phronomy/workflow_runner.rb +11 -131
  73. data/lib/phronomy.rb +1 -0
  74. metadata +44 -42
  75. data/lib/phronomy/async_queue.rb +0 -155
  76. data/lib/phronomy/blocking_adapter_pool.rb +0 -435
  77. data/lib/phronomy/cancellation_scope.rb +0 -123
  78. data/lib/phronomy/cancellation_token.rb +0 -133
  79. data/lib/phronomy/concurrency_gate.rb +0 -155
  80. data/lib/phronomy/context/compaction_context.rb +0 -111
  81. data/lib/phronomy/context/trigger_context.rb +0 -39
  82. data/lib/phronomy/context/trim_context.rb +0 -75
  83. data/lib/phronomy/deadline.rb +0 -63
  84. data/lib/phronomy/embeddings/base.rb +0 -39
  85. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +0 -45
  86. data/lib/phronomy/fsm_session.rb +0 -247
  87. data/lib/phronomy/knowledge_source/base.rb +0 -54
  88. data/lib/phronomy/knowledge_source/entity_knowledge.rb +0 -96
  89. data/lib/phronomy/knowledge_source/rag_knowledge.rb +0 -57
  90. data/lib/phronomy/knowledge_source/static_knowledge.rb +0 -52
  91. data/lib/phronomy/loader/base.rb +0 -25
  92. data/lib/phronomy/loader/csv_loader.rb +0 -56
  93. data/lib/phronomy/loader/markdown_loader.rb +0 -76
  94. data/lib/phronomy/loader/plain_text_loader.rb +0 -22
  95. data/lib/phronomy/prompt_template.rb +0 -96
  96. data/lib/phronomy/splitter/base.rb +0 -47
  97. data/lib/phronomy/splitter/fixed_size_splitter.rb +0 -51
  98. data/lib/phronomy/splitter/recursive_splitter.rb +0 -105
  99. data/lib/phronomy/tool_executor.rb +0 -106
  100. data/lib/phronomy/vector_store/async_backend.rb +0 -110
  101. data/lib/phronomy/vector_store/base.rb +0 -89
  102. data/lib/phronomy/vector_store/in_memory.rb +0 -93
  103. data/lib/phronomy/vector_store/pgvector.rb +0 -127
  104. data/lib/phronomy/vector_store/redis_search.rb +0 -192
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d9ae370d656048e38f700b6bced931fe249f731cea819ab94691eb4bcf6ef43c
4
- data.tar.gz: 97d01ca3475f547a41397d1dad2ddb8ccaa10f6466d5a75c3f79e6875a7af0c6
3
+ metadata.gz: d4410424efcdcdf0ab529106ba2c872bddae9decc322995f37065f426255a05b
4
+ data.tar.gz: c9ae0dff7f184244b92fc91c585536f767aded5ff6ea8ecaafe86221863738e8
5
5
  SHA512:
6
- metadata.gz: d3ab9ebd145e1ed706ad1741a2e3184c412aa8fd0eac32c95eb0b4a1ef87af38ae73eb5b4205b7f2894dd228929130c9a7569d24a1d7a571a5aa3ec5a68a4172
7
- data.tar.gz: efa88afdbaa2f3d8fc38ee7cbc7044711479490546a888d44540f3b6bae6da60a3a3e64cfbbef455d65f78bab64dd9a68056e4c9f7ac7a360d512179364c8b23
6
+ metadata.gz: e7afa1749dc1431e27e225dfe7a8eafebb2e781c0e6a6ca6e0bdda9712c22b4b5b68d3a9897bc92b466026c698070045774d79a0197ad0463da3f81ff103b36c
7
+ data.tar.gz: be93a29c2b98b2069ef912815e847f0e0115de07bb435e36fbcb834433757fc72442d7c5db129c9f97dd891113ecf75c46606a6295b2e9f3357831c24246d974
data/README.md CHANGED
@@ -45,7 +45,7 @@ It provides composable building blocks — Workflows, Agents, Tools, Guardrails,
45
45
  |---|---|
46
46
  | **Knowledge/RAG** — Retrieval sources with pluggable loaders, splitters, and vector stores; `static_knowledge_refresh!` for runtime cache invalidation | Beta |
47
47
  | **`VectorStore#size`** — Returns document count for all three backends (InMemory, RedisSearch, Pgvector) | Beta |
48
- | **`VectorStore::AsyncBackend` mixin** — Pluggable async interface for `VectorStore`; default pool-backed implementations for `search_async`, `add_async`, `remove_async`, `clear_async`; backends with native async drivers override individual methods to bypass `BlockingAdapterPool` entirely; all existing backends remain unchanged | Beta |
48
+ | **`Agent::Context::Knowledge::VectorStore::AsyncBackend` mixin** — Pluggable async interface for `VectorStore`; default pool-backed implementations for `search_async`, `add_async`, `remove_async`, `clear_async`; backends with native async drivers override individual methods to bypass `BlockingAdapterPool` entirely; all existing backends remain unchanged | Beta |
49
49
  | **Parallel RAG multi-source fetch** — `Agent#build_context` fetches all `knowledge_sources` concurrently via `TaskGroup`; `config[:rag_failure_policy]` `:skip` (default) silently ignores failed sources so the agent answers with partial context, `:fail` surfaces the first error; per-source latency is emitted to `Phronomy.configuration.logger` at debug level | Beta |
50
50
  | **MCP Tool** — Model Context Protocol server integration | Beta |
51
51
 
@@ -131,8 +131,8 @@ Install additional gems only for the features you use:
131
131
 
132
132
  | Gem | Required for |
133
133
  |-----|-------------|
134
- | `pgvector` | `Phronomy::VectorStore::Pgvector` |
135
- | `redis` | `Phronomy::VectorStore::RedisSearch` |
134
+ | `pgvector` | `Phronomy::Agent::Context::Knowledge::VectorStore::Pgvector` |
135
+ | `redis` | `Phronomy::Agent::Context::Knowledge::VectorStore::RedisSearch` |
136
136
  | `opentelemetry-api` | `Phronomy::Tracing::OpenTelemetryTracer` |
137
137
 
138
138
  ## Quick Start
@@ -284,15 +284,15 @@ end
284
284
 
285
285
  ```ruby
286
286
  # Static knowledge (policy files, reference docs)
287
- policy = Phronomy::KnowledgeSource::StaticKnowledge.new(
287
+ policy = Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(
288
288
  File.read("policy.md"),
289
289
  type: :policy,
290
290
  source: "policy.md" # exposed to LLM for citation
291
291
  )
292
292
 
293
293
  # RAG retrieval from a vector store
294
- store = Phronomy::VectorStore::InMemory.new
295
- embeddings = Phronomy::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small")
294
+ store = Phronomy::Agent::Context::Knowledge::VectorStore::InMemory.new
295
+ embeddings = Phronomy::Agent::Context::Knowledge::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small")
296
296
 
297
297
  # Add documents before querying
298
298
  text1 = "Refunds are processed within 5 business days."
@@ -300,7 +300,7 @@ text2 = "Contact support@example.com for refund requests."
300
300
  store.add(id: "doc-1", embedding: embeddings.embed(text1), metadata: { content: text1, source: "policy.md" })
301
301
  store.add(id: "doc-2", embedding: embeddings.embed(text2), metadata: { content: text2, source: "policy.md" })
302
302
 
303
- rag = Phronomy::KnowledgeSource::RAGKnowledge.new(store: store, embeddings: embeddings, k: 5)
303
+ rag = Phronomy::Agent::Context::Knowledge::Source::RAGKnowledge.new(store: store, embeddings: embeddings, k: 5)
304
304
 
305
305
  # Inject at invocation time
306
306
  result = MyAgent.new.invoke("What is the refund policy?",
@@ -319,8 +319,8 @@ MyAgent.static_knowledge_refresh!
319
319
  Load and split documents with built-in loaders:
320
320
 
321
321
  ```ruby
322
- chunks = Phronomy::Loader::MarkdownLoader.new.load("docs/guide.md")
323
- .then { |docs| Phronomy::Splitter::RecursiveSplitter.new(chunk_size: 512).split(docs) }
322
+ chunks = Phronomy::Agent::Context::Knowledge::Loader::MarkdownLoader.new.load("docs/guide.md")
323
+ .then { |docs| Phronomy::Agent::Context::Knowledge::Splitter::RecursiveSplitter.new(chunk_size: 512).split(docs) }
324
324
  ```
325
325
 
326
326
  ### Multi-Agent Handoff — Hub-and-spoke routing
@@ -674,7 +674,7 @@ agents automatically stay within the configured token limit.
674
674
  Derives the effective token budget from RubyLLM's model registry:
675
675
 
676
676
  ```ruby
677
- budget = Phronomy::Context::TokenBudget.new(
677
+ budget = Phronomy::LlmContextWindow::TokenBudget.new(
678
678
  model: "claude-3-5-sonnet-20241022", # looks up context_window + max_output_tokens
679
679
  overhead: 500 # extra reservation for tool definitions
680
680
  )
@@ -686,7 +686,7 @@ budget.effective_input_limit # => 191_308
686
686
  Or supply explicit values (useful for local / unregistered models):
687
687
 
688
688
  ```ruby
689
- budget = Phronomy::Context::TokenBudget.new(
689
+ budget = Phronomy::LlmContextWindow::TokenBudget.new(
690
690
  context_window: 32_768,
691
691
  max_output_tokens: 4_096
692
692
  )
@@ -716,7 +716,7 @@ registry the budget is silently skipped.
716
716
  > ```ruby
717
717
  > require "tiktoken_ruby"
718
718
  > enc = Tiktoken.encoding_for_model("gpt-4o")
719
- > Phronomy::Context::TokenEstimator.tokenizer = ->(text) { enc.encode(text).length }
719
+ > Phronomy::LlmContextWindow::TokenEstimator.tokenizer = ->(text) { enc.encode(text).length }
720
720
  > ```
721
721
 
722
722
 
@@ -750,7 +750,7 @@ blocks always execute.
750
750
  > risky interruption.
751
751
 
752
752
  ```ruby
753
- token = Phronomy::CancellationToken.new
753
+ token = Phronomy::Concurrency::CancellationToken.new
754
754
 
755
755
  # Cancel from another thread after 5 s
756
756
  Thread.new { sleep 5; token.cancel! }
@@ -762,15 +762,15 @@ rescue Phronomy::CancellationError
762
762
  end
763
763
 
764
764
  # Hard deadline via monotonic clock (recommended — immune to NTP/DST changes)
765
- token = Phronomy::CancellationToken.timeout_after(30)
765
+ token = Phronomy::Concurrency::CancellationToken.timeout_after(30)
766
766
  result = MyAgent.new.invoke("...", config: { cancellation_token: token })
767
767
 
768
768
  # Hard deadline via wall-clock (legacy — still supported)
769
- token = Phronomy::CancellationToken.new(deadline: Time.now + 30)
769
+ token = Phronomy::Concurrency::CancellationToken.new(deadline: Time.now + 30)
770
770
  result = MyAgent.new.invoke("...", config: { cancellation_token: token })
771
771
 
772
772
  # Propagate to all parallel workers via dispatch_parallel / fan_out
773
- token = Phronomy::CancellationToken.new
773
+ token = Phronomy::Concurrency::CancellationToken.new
774
774
  Thread.new { sleep 10; token.cancel! }
775
775
 
776
776
  orchestrator.dispatch_parallel(
@@ -12,9 +12,9 @@ BenchAsmMessage = Struct.new(:content)
12
12
 
13
13
  def make_assembler(n_messages:, n_chunks:, with_budget: false)
14
14
  budget = if with_budget
15
- Phronomy::Context::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
15
+ Phronomy::LlmContextWindow::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
16
16
  end
17
- asm = Phronomy::Context::Assembler.new(budget: budget)
17
+ asm = Phronomy::LlmContextWindow::Assembler.new(budget: budget)
18
18
  asm.add_instruction("You are a helpful assistant. Answer the user's question.")
19
19
  n_chunks.times do |i|
20
20
  asm.add_knowledge("Fact #{i}: The capital of country #{i} is City #{i}.", type: :entity, trusted: true)
@@ -94,7 +94,7 @@ stub_agent_class = Class.new(Phronomy::Agent::Base) do
94
94
  define_method(:invoke_async) { |input, **_kw| Phronomy::Runtime.instance.spawn(name: "bench-stub") { invoke(input) } }
95
95
  end
96
96
 
97
- orchestrator_class = Class.new(Phronomy::Agent::Orchestrator)
97
+ orchestrator_class = Class.new(Phronomy::MultiAgent::Orchestrator)
98
98
  orchestrator = orchestrator_class.new
99
99
 
100
100
  PARALLEL_ITERATIONS = 200
@@ -109,7 +109,7 @@ end
109
109
  # ---------------------------------------------------------------------------
110
110
  # Target 5: CancellationToken#cancelled? throughput (8 threads)
111
111
  # ---------------------------------------------------------------------------
112
- CANCEL_TOKEN = Phronomy::CancellationToken.new
112
+ CANCEL_TOKEN = Phronomy::Concurrency::CancellationToken.new
113
113
  CANCEL_ITERATIONS = 10_000
114
114
 
115
115
  t5 = Benchmark.measure("CancellationToken#cancelled? (8 threads)") do
@@ -122,7 +122,7 @@ end
122
122
  # ---------------------------------------------------------------------------
123
123
  # Target 6: CancellationToken#raise_if_cancelled! hot path (no-op, single thread)
124
124
  # ---------------------------------------------------------------------------
125
- RAISE_TOKEN = Phronomy::CancellationToken.new # not cancelled — no-op path
125
+ RAISE_TOKEN = Phronomy::Concurrency::CancellationToken.new # not cancelled — no-op path
126
126
  RAISE_ITERATIONS = 200_000
127
127
 
128
128
  t6 = Benchmark.measure("CancellationToken#raise_if_cancelled! (no-op)") do
@@ -135,12 +135,12 @@ end
135
135
  BenchMsg = Struct.new(:content) unless defined?(BenchMsg)
136
136
 
137
137
  TRIM_ELEMENTS = Array.new(2_000) { |i| {seq: i, message: BenchMsg.new("msg #{i}"), tokens: 10, role: :user} }
138
- TRIM_BUDGET = Phronomy::Context::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
138
+ TRIM_BUDGET = Phronomy::LlmContextWindow::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
139
139
  TRIM_ITERATIONS = 500
140
140
 
141
141
  t7 = Benchmark.measure("TrimContext#remove (2000-element history)") do
142
142
  TRIM_ITERATIONS.times do
143
- tc = Phronomy::Context::TrimContext.new(message_elements: TRIM_ELEMENTS, budget: TRIM_BUDGET)
143
+ tc = Phronomy::Agent::Context::Conversation::TrimContext.new(message_elements: TRIM_ELEMENTS, budget: TRIM_BUDGET)
144
144
  tc.remove((0...200).to_a) # remove 200 oldest messages
145
145
  end
146
146
  end
@@ -23,22 +23,22 @@ BENCH_TOKEN_ITERATIONS = 10_000
23
23
  puts "=== bench_token_estimator ==="
24
24
  Benchmark.bm(30) do |x|
25
25
  x.report("estimate(short text)") do
26
- BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(SHORT_TEXT) }
26
+ BENCH_TOKEN_ITERATIONS.times { Phronomy::LlmContextWindow::TokenEstimator.estimate(SHORT_TEXT) }
27
27
  end
28
28
 
29
29
  x.report("estimate(medium text 500c)") do
30
- BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(MEDIUM_TEXT) }
30
+ BENCH_TOKEN_ITERATIONS.times { Phronomy::LlmContextWindow::TokenEstimator.estimate(MEDIUM_TEXT) }
31
31
  end
32
32
 
33
33
  x.report("estimate(long text 10k c)") do
34
- BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(LONG_TEXT) }
34
+ BENCH_TOKEN_ITERATIONS.times { Phronomy::LlmContextWindow::TokenEstimator.estimate(LONG_TEXT) }
35
35
  end
36
36
 
37
37
  x.report("estimate(100 messages)") do
38
- BENCH_TOKEN_ITERATIONS.times { Phronomy::Context::TokenEstimator.estimate(MESSAGES_100) }
38
+ BENCH_TOKEN_ITERATIONS.times { Phronomy::LlmContextWindow::TokenEstimator.estimate(MESSAGES_100) }
39
39
  end
40
40
 
41
41
  x.report("estimate(1000 messages)") do
42
- (BENCH_TOKEN_ITERATIONS / 10).times { Phronomy::Context::TokenEstimator.estimate(MESSAGES_1000) }
42
+ (BENCH_TOKEN_ITERATIONS / 10).times { Phronomy::LlmContextWindow::TokenEstimator.estimate(MESSAGES_1000) }
43
43
  end
44
44
  end
@@ -43,7 +43,7 @@ end
43
43
 
44
44
  # --- static_knowledge_chunks cache ---
45
45
 
46
- class BenchKnowledgeSource < Phronomy::KnowledgeSource::Base
46
+ class BenchKnowledgeSource < Phronomy::Agent::Context::Knowledge::Source::Base
47
47
  def fetch(query: nil)
48
48
  [{content: "Cached knowledge fact.", type: :static}]
49
49
  end
@@ -28,7 +28,7 @@ BENCH_VS_ITERS = {100 => 100, 1_000 => 20, 10_000 => 5}.freeze
28
28
  puts "=== bench_vector_store_inmemory ==="
29
29
  Benchmark.bm(35) do |x|
30
30
  [100, 1_000, 10_000].each do |n|
31
- store = Phronomy::VectorStore::InMemory.new(dimension: DIM)
31
+ store = Phronomy::Agent::Context::Knowledge::VectorStore::InMemory.new(dimension: DIM)
32
32
  populate(store, n)
33
33
  iters = BENCH_VS_ITERS[n]
34
34
 
@@ -60,12 +60,12 @@ module Phronomy
60
60
  end
61
61
 
62
62
  # Sets or reads the system instructions for this agent.
63
- # Accepts a String, a {Phronomy::PromptTemplate}, or a block (Proc).
63
+ # Accepts a String, a {Phronomy::Agent::Context::Instruction::PromptTemplate}, or a block (Proc).
64
64
  # When used as a reader (no argument, no block), returns the stored value.
65
65
  #
66
- # @param text [String, Phronomy::PromptTemplate, nil]
66
+ # @param text [String, Phronomy::Agent::Context::Instruction::PromptTemplate, nil]
67
67
  # @yield optionally provide instructions as a block
68
- # @return [String, Phronomy::PromptTemplate, Proc, nil]
68
+ # @return [String, Phronomy::Agent::Context::Instruction::PromptTemplate, Proc, nil]
69
69
  # @example String instructions
70
70
  # class MyAgent < Phronomy::Agent::Base
71
71
  # instructions "You are a helpful assistant."
@@ -225,7 +225,7 @@ module Phronomy
225
225
  # Defaults to +nil+ (no timeout).
226
226
  # Inherited by subclasses; the most-specific definition wins.
227
227
  #
228
- # When the timeout fires, a {Phronomy::CancellationScope} is cancelled
228
+ # When the timeout fires, a {Phronomy::Concurrency::CancellationScope} is cancelled
229
229
  # and its token is propagated to the FSM config so that in-flight LLM,
230
230
  # tool, and RAG calls observe cancellation via their +cancellation_token:+
231
231
  # keyword argument. +Phronomy::TimeoutError+ is raised to the caller.
@@ -255,10 +255,10 @@ module Phronomy
255
255
  # the first time +invoke+ is called. The cache persists for the lifetime
256
256
  # of the process; call {.static_knowledge_refresh!} to force a reload.
257
257
  #
258
- # @param sources [Array<Phronomy::KnowledgeSource::Base>]
258
+ # @param sources [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
259
259
  # @example
260
260
  # class PolicyAgent < Phronomy::Agent::Base
261
- # static_knowledge Phronomy::KnowledgeSource::StaticKnowledge.new(POLICY_TEXT)
261
+ # static_knowledge Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(POLICY_TEXT)
262
262
  # end
263
263
  # @api public
264
264
  def static_knowledge(*sources)
@@ -269,7 +269,7 @@ module Phronomy
269
269
  end
270
270
 
271
271
  # Returns the registered static knowledge sources.
272
- # @return [Array<Phronomy::KnowledgeSource::Base>]
272
+ # @return [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
273
273
  # @api public
274
274
  def static_knowledge_sources
275
275
  @static_knowledge_sources || []
@@ -306,11 +306,11 @@ module Phronomy
306
306
  # application can remove stale or irrelevant messages from the
307
307
  # conversation history.
308
308
  #
309
- # The block receives a {Phronomy::Context::TrimContext} and may call
309
+ # The block receives a {Phronomy::Agent::Context::Conversation::TrimContext} and may call
310
310
  # +ctx.remove(seqs)+ to drop messages by seq number. Changes affect
311
311
  # only the current invocation; the underlying memory store is unchanged.
312
312
  #
313
- # @yield [ctx] Phronomy::Context::TrimContext
313
+ # @yield [ctx] Phronomy::Agent::Context::Conversation::TrimContext
314
314
  # @example Drop the oldest message when over 80% of budget is used
315
315
  # on_trim do |ctx|
316
316
  # limit = ctx.budget&.available(used: 0) || Float::INFINITY
@@ -332,9 +332,9 @@ module Phronomy
332
332
  # truthy AND an +on_compact+ callback is also registered, the compact
333
333
  # pipeline is executed.
334
334
  #
335
- # The block receives a read-only {Phronomy::Context::TriggerContext}.
335
+ # The block receives a read-only {Phronomy::Agent::Context::Conversation::TriggerContext}.
336
336
  #
337
- # @yield [ctx] Phronomy::Context::TriggerContext
337
+ # @yield [ctx] Phronomy::Agent::Context::Conversation::TriggerContext
338
338
  # @return [Boolean] truthy → run on_compact; falsy → skip
339
339
  # @example Trigger when messages exceed 70% of token budget
340
340
  # on_compaction_trigger do |ctx|
@@ -354,10 +354,10 @@ module Phronomy
354
354
 
355
355
  # Registers a callback that performs the actual compaction when the
356
356
  # +on_compaction_trigger+ callback fires. The block receives a
357
- # {Phronomy::Context::CompactionContext} and should call +ctx.compact+
357
+ # {Phronomy::Agent::Context::Conversation::CompactionContext} and should call +ctx.compact+
358
358
  # to specify which messages to summarise.
359
359
  #
360
- # @yield [ctx] Phronomy::Context::CompactionContext
360
+ # @yield [ctx] Phronomy::Agent::Context::Conversation::CompactionContext
361
361
  # @example Replace the first 4 messages with a short summary
362
362
  # on_compact do |ctx|
363
363
  # ctx.compact(0..3) do |elements|
@@ -509,7 +509,7 @@ module Phronomy
509
509
  # @example With InvocationContext (deadline-based timeout)
510
510
  # ctx = Phronomy::InvocationContext.new(
511
511
  # thread_id: "conv-123",
512
- # deadline: Phronomy::Deadline.in(30),
512
+ # deadline: Phronomy::Concurrency::Deadline.in(30),
513
513
  # task_id: SecureRandom.uuid
514
514
  # )
515
515
  # result = MyAgent.new.invoke("Hello", invocation_context: ctx)
@@ -532,7 +532,7 @@ module Phronomy
532
532
  # cancellation when the deadline fires.
533
533
  timeout_sec = self.class.invoke_timeout
534
534
  effective_config, scope = if timeout_sec
535
- s = Phronomy::CancellationScope.new(parent_token: config[:cancellation_token])
535
+ s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
536
536
  s.deadline_in(timeout_sec)
537
537
  [config.merge(cancellation_token: s.token), s]
538
538
  else
@@ -687,7 +687,7 @@ module Phronomy
687
687
  raise
688
688
  end
689
689
 
690
- # Returns the {Context::ContextVersionCache} built during the most recent
690
+ # Returns the {LlmContextWindow::ContextVersionCache} built during the most recent
691
691
  # {#invoke} call on this agent instance. The thread-local cache entry is
692
692
  # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
693
693
  # in +@last_context_version_cache+ so callers can inspect it after invoke
@@ -768,7 +768,7 @@ module Phronomy
768
768
  # The queue capacity is bounded by Configuration#stream_queue_max_size
769
769
  # (nil = unbounded) to provide backpressure against a fast LLM producer.
770
770
  adapter = Phronomy.configuration.llm_adapter
771
- chunk_queue = Phronomy::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
771
+ chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
772
772
  pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
773
773
 
774
774
  # Drain the chunk queue on this side (scheduler task / caller thread).
@@ -809,59 +809,73 @@ module Phronomy
809
809
  system_text = build_cached_system_text(input)
810
810
  user_message = extract_message(input)
811
811
 
812
- assembler = Context::Assembler.new(budget: budget)
812
+ assembler = LlmContextWindow::Assembler.new(budget: budget)
813
813
  assembler.add_instruction(system_text) if system_text
814
+ fetch_knowledge_chunks(user_message, config).each do |chunk|
815
+ assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
816
+ end
817
+ assembler.add_messages(history)
818
+ assembler.build
819
+ end
820
+ protected :build_context
814
821
 
822
+ # Fetches knowledge chunks from all registered sources concurrently.
823
+ #
824
+ # Each source is spawned as a separate task within a {Phronomy::TaskGroup};
825
+ # the RAG concurrency gate enforces the +max_concurrent_rag_fetches+ cap.
826
+ # Results are returned in registration order (spawn order) as a flat array.
827
+ #
828
+ # This method is available to subclasses as a building block when
829
+ # overriding {#build_context}. Pass a custom +query+ to implement
830
+ # multi-hop RAG or other retrieval strategies.
831
+ #
832
+ # @param query [String] RAG query string (typically the current user message)
833
+ # @param config [Hash] invocation config; relevant keys:
834
+ # +:knowledge_sources+, +:rag_failure_policy+, +:cancellation_token+, +:rag_timeout+
835
+ # @return [Array<Hash>] flat list of chunk hashes with +:content+, +:type+, +:source+
836
+ # @api private
837
+ def fetch_knowledge_chunks(query, config)
815
838
  sources = Array(config[:knowledge_sources])
816
- unless sources.empty?
817
- check_cancellation!(config, "invocation cancelled before RAG fetch")
818
- # Determine TaskGroup failure policy: :skip (default) ignores per-source
819
- # failures so the agent can still answer with partial context; :fail
820
- # surfaces the first error immediately via :fail_fast.
821
- failure_policy =
822
- case config[:rag_failure_policy]
823
- when :fail then :fail_fast
824
- else :skip_failed
825
- end
839
+ return [] if sources.empty?
826
840
 
827
- group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
841
+ check_cancellation!(config, "invocation cancelled before RAG fetch")
828
842
 
829
- bp = Phronomy.configuration.backpressure
830
- rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
831
- rag_bp_timeout = Phronomy.configuration.backpressure_timeout
843
+ # :skip (default) — ignore per-source failures so the agent can still
844
+ # answer with partial context. :fail surfaces the first error immediately.
845
+ failure_policy =
846
+ case config[:rag_failure_policy]
847
+ when :fail then :fail_fast
848
+ else :skip_failed
849
+ end
832
850
 
833
- # Spawn all fetches concurrently. Results are returned in spawn order
834
- # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
835
- sources.each do |ks|
836
- group.spawn do
837
- Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
838
- t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
839
- result = ks.fetch_async(
840
- query: user_message,
851
+ group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
852
+ bp = Phronomy.configuration.backpressure
853
+ rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
854
+ rag_bp_timeout = Phronomy.configuration.backpressure_timeout
855
+
856
+ # Spawn all fetches concurrently. Results are returned in spawn order
857
+ # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
858
+ sources.each do |ks|
859
+ group.spawn do
860
+ Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
861
+ result, elapsed_ms = Phronomy::Runtime.measure_ms do
862
+ ks.fetch_async(
863
+ query: query,
841
864
  cancellation_token: config[:cancellation_token],
842
865
  timeout: config[:rag_timeout]
843
866
  ).await
844
- elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0
845
- Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{(elapsed * 1000).round}ms" }
846
- result
847
867
  end
848
- end
849
- end
850
-
851
- # await_all returns results in spawn order; nil entries indicate
852
- # skipped failures when using :skip_failed.
853
- per_source_chunks = group.await_all
854
- per_source_chunks.each do |chunks|
855
- Array(chunks).each do |chunk|
856
- assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
868
+ Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{elapsed_ms}ms" }
869
+ result
857
870
  end
858
871
  end
859
872
  end
860
873
 
861
- assembler.add_messages(history)
862
- assembler.build
874
+ # await_all returns results in spawn order; nil entries indicate
875
+ # skipped failures when using :skip_failed.
876
+ group.await_all.flat_map { |chunks| Array(chunks) }
863
877
  end
864
- protected :build_context
878
+ protected :fetch_knowledge_chunks
865
879
 
866
880
  # Runs the on_trim / on_compaction_trigger / on_compact pipeline on the
867
881
  # supplied message array and returns the final Array of message objects
@@ -880,16 +894,16 @@ module Phronomy
880
894
  elements = build_message_elements(Array(messages))
881
895
 
882
896
  if (trim_cb = self.class._on_trim_callback)
883
- trim_ctx = Context::TrimContext.new(message_elements: elements, budget: budget)
897
+ trim_ctx = Context::Conversation::TrimContext.new(message_elements: elements, budget: budget)
884
898
  trim_cb.call(trim_ctx)
885
899
  elements = trim_ctx.message_elements
886
900
  end
887
901
 
888
902
  if (trigger_cb = self.class._on_compaction_trigger_callback)
889
- trigger_ctx = Context::TriggerContext.new(message_elements: elements, budget: budget)
903
+ trigger_ctx = Context::Conversation::TriggerContext.new(message_elements: elements, budget: budget)
890
904
  if trigger_cb.call(trigger_ctx)
891
905
  if (compact_cb = self.class._on_compact_callback)
892
- compact_ctx = Context::CompactionContext.new(
906
+ compact_ctx = Context::Conversation::CompactionContext.new(
893
907
  message_elements: elements,
894
908
  budget: budget,
895
909
  thread_id: thread_id
@@ -916,63 +930,12 @@ module Phronomy
916
930
  end
917
931
 
918
932
  trace("agent.invoke", input: input, **caller_meta) do |_span|
919
- # Run input guardrails before touching the LLM.
920
- run_input_guardrails!(input)
921
-
922
- user_message = extract_message(input)
923
- chat = build_chat
924
-
925
- # Assemble context (system prompt + history). Override #build_context to
926
- # inject custom context editing logic at the Agent subclass level.
927
- context = build_context(input, messages: messages, thread_id: thread_id, config: config)
928
- apply_instructions(chat, context[:system]) if context[:system]
929
- context[:messages].each { |msg| chat.messages << msg }
930
-
931
- # Run before_completion hooks (global → class → instance) before the LLM call.
932
- run_before_completion_hooks!(chat, config)
933
-
934
- # Register suspension hook for approval-required tools (no-op when a
935
- # synchronous on_approval_required handler is already registered).
936
- _register_suspension_hook!(chat)
937
-
938
- # Check for cancellation immediately before the LLM call.
939
- check_cancellation!(config, "invocation cancelled before LLM call")
940
-
941
- # Forward the cancellation token to ParallelToolChat explicitly
942
- # via the chat instance so that tool dispatch batches can observe
943
- # cancellation without needing Thread.current.
944
- chat.cancellation_token = config[:cancellation_token] if chat.respond_to?(:cancellation_token=)
945
-
946
- begin
947
- # Route the LLM call through the configured LLMAdapter so that the
948
- # blocking HTTP request runs inside BlockingAdapterPool and the
949
- # adapter can be swapped without changing agent code.
950
- adapter = Phronomy.configuration.llm_adapter
951
- response = adapter.complete_async(chat, user_message, config: config).await
952
- rescue SuspendSignal => signal
953
- checkpoint = Checkpoint.new(
954
- thread_id: thread_id,
955
- original_input: input,
956
- messages: chat.messages.dup,
957
- pending_tool_name: signal.tool_name,
958
- pending_tool_args: signal.args,
959
- pending_tool_call_id: signal.tool_call_id
960
- )
961
- suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
962
- next [suspended_result, nil]
963
- ensure
964
- # Clear the chat's cancellation token reference after each LLM call.
965
- chat.cancellation_token = nil if chat.respond_to?(:cancellation_token=)
966
- end
967
-
968
- output = response.content
969
- usage = Phronomy::TokenUsage.from_tokens(response.tokens)
970
-
971
- # Run output guardrails before returning to the caller.
972
- run_output_guardrails!(output)
973
-
974
- result = {output: output, messages: chat.messages, usage: usage}
975
- [result, usage]
933
+ Agent::InvocationPipeline.new(self).run(
934
+ input,
935
+ messages: messages,
936
+ thread_id: thread_id,
937
+ config: config
938
+ )
976
939
  end
977
940
  end
978
941
 
@@ -986,19 +949,19 @@ module Phronomy
986
949
  return nil unless model_name
987
950
 
988
951
  if (cw = self.class.context_window)
989
- Phronomy::Context::TokenBudget.new(
952
+ Phronomy::LlmContextWindow::TokenBudget.new(
990
953
  context_window: cw,
991
954
  max_output_tokens: self.class.max_output_tokens || 0,
992
955
  overhead: self.class.context_overhead
993
956
  )
994
957
  else
995
- Phronomy::Context::TokenBudget.new(
958
+ Phronomy::LlmContextWindow::TokenBudget.new(
996
959
  model: model_name,
997
960
  max_output_tokens: self.class.max_output_tokens,
998
961
  overhead: self.class.context_overhead
999
962
  )
1000
963
  end
1001
- rescue Phronomy::Context::UnknownModelError, RubyLLM::ModelNotFoundError
964
+ rescue Phronomy::LlmContextWindow::UnknownModelError, RubyLLM::ModelNotFoundError
1002
965
  nil
1003
966
  end
1004
967
 
@@ -1011,7 +974,7 @@ module Phronomy
1011
974
  # @api public
1012
975
  def build_message_elements(messages)
1013
976
  Array(messages).each_with_index.map do |msg, idx|
1014
- tokens = Context::TokenEstimator.estimate(msg.content.to_s)
977
+ tokens = LlmContextWindow::TokenEstimator.estimate(msg.content.to_s)
1015
978
  {seq: idx, message: msg, tokens: tokens, role: msg.role}
1016
979
  end
1017
980
  end
@@ -1034,11 +997,11 @@ module Phronomy
1034
997
  [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
1035
998
  )
1036
999
 
1037
- cache = (@context_version_cache ||= Context::ContextVersionCache.new)
1000
+ cache = (@context_version_cache ||= LlmContextWindow::ContextVersionCache.new)
1038
1001
  unless cache.valid?(fingerprint)
1039
1002
  parts = [instruction]
1040
1003
  static_chunks.each do |chunk|
1041
- parts << Context::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
1004
+ parts << LlmContextWindow::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
1042
1005
  end
1043
1006
  cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
1044
1007
  end
@@ -1056,7 +1019,7 @@ module Phronomy
1056
1019
  # Falls back to +nil+ otherwise, signalling {#build_chat} to use the
1057
1020
  # standard +RubyLLM.chat+ factory.
1058
1021
  def build_chat_class
1059
- Phronomy.configuration.event_loop ? Agent::ParallelToolChat : nil
1022
+ Phronomy.configuration.event_loop ? Phronomy::MultiAgent::ParallelToolChat : nil
1060
1023
  end
1061
1024
 
1062
1025
  def build_chat
@@ -1086,7 +1049,7 @@ module Phronomy
1086
1049
  def build_instructions(input)
1087
1050
  instr = self.class.instructions
1088
1051
  case instr
1089
- when Phronomy::PromptTemplate
1052
+ when Phronomy::Agent::Context::Instruction::PromptTemplate
1090
1053
  vars = input.is_a?(Hash) ? input : {input: input}
1091
1054
  instr.format_system(**vars) || instr.format(**vars)
1092
1055
  when String then instr