phronomy 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +31 -41
  3. data/benchmark/baseline.json +1 -1
  4. data/benchmark/bench_agent_invoke.rb +1 -1
  5. data/benchmark/bench_context_assembler.rb +9 -1
  6. data/benchmark/bench_regression.rb +8 -8
  7. data/benchmark/bench_tool_schema.rb +2 -2
  8. data/benchmark/bench_vector_store.rb +1 -1
  9. data/docs/decisions/011-build-context-as-single-llm-input-authority.md +224 -0
  10. data/lib/phronomy/agent/base.rb +253 -351
  11. data/lib/phronomy/agent/concerns/suspendable.rb +6 -6
  12. data/lib/phronomy/agent/context/capability/base.rb +689 -0
  13. data/lib/phronomy/agent/context/capability/scope_policy.rb +54 -0
  14. data/lib/phronomy/agent/context/knowledge/base.rb +58 -0
  15. data/lib/phronomy/agent/context/knowledge/entity_knowledge.rb +102 -0
  16. data/lib/phronomy/agent/context/knowledge/static_knowledge.rb +58 -0
  17. data/lib/phronomy/agent/invocation_pipeline.rb +10 -1
  18. data/lib/phronomy/agent/react_agent.rb +24 -23
  19. data/lib/phronomy/agent/shared_state.rb +2 -2
  20. data/lib/phronomy/agent/tool_executor.rb +1 -1
  21. data/lib/phronomy/concurrency/gate_registry.rb +0 -1
  22. data/lib/phronomy/configuration.rb +0 -6
  23. data/lib/phronomy/llm_context_window/assembler.rb +77 -44
  24. data/lib/phronomy/multi_agent/handoff.rb +4 -4
  25. data/lib/phronomy/multi_agent/orchestrator.rb +1 -1
  26. data/lib/phronomy/multi_agent/team_coordinator.rb +2 -2
  27. data/lib/phronomy/runtime/runtime_metrics.rb +0 -1
  28. data/lib/phronomy/runtime.rb +1 -2
  29. data/lib/phronomy/tool.rb +3 -4
  30. data/lib/phronomy/{tool/agent_tool.rb → tools/agent.rb} +6 -6
  31. data/lib/phronomy/{tool/mcp_tool.rb → tools/mcp.rb} +9 -9
  32. data/lib/phronomy/tools/vector_search.rb +70 -0
  33. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  34. data/lib/phronomy/vector_store/base.rb +89 -0
  35. data/lib/phronomy/vector_store/embeddings/base.rb +41 -0
  36. data/lib/phronomy/vector_store/embeddings/ruby_llm_embeddings.rb +47 -0
  37. data/lib/phronomy/vector_store/in_memory.rb +103 -0
  38. data/lib/phronomy/vector_store/loader/base.rb +27 -0
  39. data/lib/phronomy/vector_store/loader/csv_loader.rb +58 -0
  40. data/lib/phronomy/vector_store/loader/markdown_loader.rb +78 -0
  41. data/lib/phronomy/vector_store/loader/plain_text_loader.rb +24 -0
  42. data/lib/phronomy/vector_store/pgvector.rb +127 -0
  43. data/lib/phronomy/vector_store/redis_search.rb +192 -0
  44. data/lib/phronomy/vector_store/splitter/base.rb +49 -0
  45. data/lib/phronomy/vector_store/splitter/fixed_size_splitter.rb +53 -0
  46. data/lib/phronomy/vector_store/splitter/recursive_splitter.rb +107 -0
  47. data/lib/phronomy/vector_store.rb +16 -4
  48. data/lib/phronomy/version.rb +1 -1
  49. data/lib/phronomy.rb +2 -1
  50. data/scripts/api_snapshot.rb +11 -9
  51. metadata +28 -32
  52. data/lib/phronomy/agent/context/conversation/compaction_context.rb +0 -117
  53. data/lib/phronomy/agent/context/conversation/trigger_context.rb +0 -43
  54. data/lib/phronomy/agent/context/conversation/trim_context.rb +0 -82
  55. data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +0 -45
  56. data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +0 -51
  57. data/lib/phronomy/agent/context/knowledge/loader/base.rb +0 -31
  58. data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +0 -62
  59. data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +0 -82
  60. data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +0 -28
  61. data/lib/phronomy/agent/context/knowledge/source/base.rb +0 -60
  62. data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +0 -102
  63. data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +0 -63
  64. data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +0 -58
  65. data/lib/phronomy/agent/context/knowledge/splitter/base.rb +0 -53
  66. data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +0 -57
  67. data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +0 -111
  68. data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +0 -116
  69. data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +0 -95
  70. data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +0 -109
  71. data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +0 -133
  72. data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +0 -198
  73. data/lib/phronomy/embeddings.rb +0 -11
  74. data/lib/phronomy/loader.rb +0 -13
  75. data/lib/phronomy/splitter.rb +0 -12
  76. data/lib/phronomy/tool/base.rb +0 -685
  77. data/lib/phronomy/tool/scope_policy.rb +0 -50
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d4410424efcdcdf0ab529106ba2c872bddae9decc322995f37065f426255a05b
4
- data.tar.gz: c9ae0dff7f184244b92fc91c585536f767aded5ff6ea8ecaafe86221863738e8
3
+ metadata.gz: d91e0fb85732153a69d268b41bdfe865791dd8f007e8bed983269284478af002
4
+ data.tar.gz: c334678280139ac7934b6804b06e282051218472985c022823d26913a3f64905
5
5
  SHA512:
6
- metadata.gz: e7afa1749dc1431e27e225dfe7a8eafebb2e781c0e6a6ca6e0bdda9712c22b4b5b68d3a9897bc92b466026c698070045774d79a0197ad0463da3f81ff103b36c
7
- data.tar.gz: be93a29c2b98b2069ef912815e847f0e0115de07bb435e36fbcb834433757fc72442d7c5db129c9f97dd891113ecf75c46606a6295b2e9f3357831c24246d974
6
+ metadata.gz: 393567f7c01633ea20160101705b0fde21ddd009a4950f1cb44a106285500b90a3bec88d4c9681cebb7656d0529c09c9e7c52da42e3e12f103231423921b43aa
7
+ data.tar.gz: 03f5d2e764df9d3becb782ecdec0bf42f03b0f3fc7414efaad2334fe1d047443ef3180e1993244cad92c305607113d0afe2915caa6ff53d14c05c779a61f6b4b
data/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
  > We apologise for the instability this may cause.
8
8
 
9
9
  **Phronomy** is a Ruby AI agent framework inspired by open-source AI agent frameworks.
10
- It provides composable building blocks — Workflows, Agents, Tools, Guardrails, RAG, and Tracing — all powered by [RubyLLM](https://github.com/crmne/ruby_llm) for LLM abstraction.
10
+ It provides composable building blocks — Workflows, Agents, Tools, Guardrails, and Tracing — all powered by [RubyLLM](https://github.com/crmne/ruby_llm) for LLM abstraction.
11
11
 
12
12
  ## Features
13
13
 
@@ -30,10 +30,10 @@ It provides composable building blocks — Workflows, Agents, Tools, Guardrails,
30
30
  | **Workflow action_timeout** — Per-state `action_timeout:` keyword on `state` DSL; cancels Task-returning entry actions that exceed the limit and raises `Phronomy::ActionTimeoutError` | Beta |
31
31
  | **Agent** — ReAct-style tool-calling agents with guardrails and conversation history | Stable |
32
32
  | **Before-Completion Hook** — Three-tier LLM parameter injection | Stable |
33
- | **Context Management** — Token budget calculation, estimation, and pruning | Stable |
33
+ | **Context Management** — Token budget calculation, estimation, and pruning; `Agent::Base` protected hooks: `build_context` (overridable), `trim_messages`, `trim_to_budget`, `compact_messages`, `budget_exceeded?`, `drop_messages_over` | Stable |
34
34
  | **Guardrails** — Input/output validation with custom `InputGuardrail`/`OutputGuardrail` | Beta |
35
35
  | **`PromptInjectionGuardrail`** — Built-in `InputGuardrail` subclass that detects prompt-injection patterns; usable standalone or as part of a guardrail chain | Beta |
36
- | **`Tool::Base.redact_params` / `.max_result_size`** — Class-level DSL: `redact_params` masks parameter values in log/trace output; `max_result_size` truncates oversized tool results before they reach the LLM | Beta |
36
+ | **`Agent::Context::Capability::Base.redact_params` / `.max_result_size`** — Class-level DSL: `redact_params` masks parameter values in log/trace output; `max_result_size` truncates oversized tool results before they reach the LLM | Beta |
37
37
  | **Output Parser** — JSON and Struct-mapped parsers for structured LLM responses | Stable |
38
38
  | **Eval Framework** — Dataset-driven evaluation with multiple scorer types | Beta |
39
39
  | **Tracing** — Pluggable span-based observability | Stable |
@@ -43,11 +43,11 @@ It provides composable building blocks — Workflows, Agents, Tools, Guardrails,
43
43
 
44
44
  | Feature | Stability |
45
45
  |---|---|
46
- | **Knowledge/RAG** — Retrieval sources with pluggable loaders, splitters, and vector stores; `static_knowledge_refresh!` for runtime cache invalidation | Beta |
46
+ | **Knowledge** — Static context injection with pluggable loaders, splitters, and vector stores; `static_knowledge_refresh!` for runtime cache invalidation | Beta |
47
47
  | **`VectorStore#size`** — Returns document count for all three backends (InMemory, RedisSearch, Pgvector) | Beta |
48
- | **`Agent::Context::Knowledge::VectorStore::AsyncBackend` mixin** — Pluggable async interface for `VectorStore`; default pool-backed implementations for `search_async`, `add_async`, `remove_async`, `clear_async`; backends with native async drivers override individual methods to bypass `BlockingAdapterPool` entirely; all existing backends remain unchanged | Beta |
49
- | **Parallel RAG multi-source fetch** — `Agent#build_context` fetches all `knowledge_sources` concurrently via `TaskGroup`; `config[:rag_failure_policy]` `:skip` (default) silently ignores failed sources so the agent answers with partial context, `:fail` surfaces the first error; per-source latency is emitted to `Phronomy.configuration.logger` at debug level | Beta |
50
- | **MCP Tool** — Model Context Protocol server integration | Beta |
48
+ | **`VectorStore::AsyncBackend` mixin** — Pluggable async interface for `VectorStore`; default pool-backed implementations for `search_async`, `add_async`, `remove_async`, `clear_async`; backends with native async drivers override individual methods to bypass `BlockingAdapterPool` entirely; all existing backends remain unchanged | Beta |
49
+ | **MCP Tool** — `Phronomy::Tools::Mcp`: Model Context Protocol server integration; `Phronomy::Tools::Agent`: wraps an agent class as a callable tool via `from_agent` | Beta |
50
+ | **Vector Search Tool** — `Phronomy::Tools::VectorSearch`: wraps a `VectorStore` and `Embeddings` adapter as a callable agent tool via `from_store` | Beta |
51
51
 
52
52
  **Execution and reliability**
53
53
 
@@ -55,14 +55,14 @@ It provides composable building blocks — Workflows, Agents, Tools, Guardrails,
55
55
  |---|---|
56
56
  | **Workflow EventLoop Mode** — Opt-in event-driven execution: `Phronomy.configure { \|c\| c.event_loop = true }` | Experimental |
57
57
  | **Agent EventLoop Mode** — `Agent#invoke` (non-blocking via EventLoop), `Agent#run_as_child` (child-FSM pattern for Workflow integration), parallel tool dispatch via `ParallelToolChat` | Experimental |
58
- | **`invoke_async` / `call_async`** — `Agent::Base#invoke_async` and `Workflow#invoke_async` return a `Task`; `Tool::Base#call_async` similarly; compatible with EventLoop and standalone contexts | Experimental |
58
+ | **`invoke_async` / `call_async`** — `Agent::Base#invoke_async` and `Workflow#invoke_async` return a `Task`; `Agent::Context::Capability::Base#call_async` similarly; compatible with EventLoop and standalone contexts | Experimental |
59
59
  | **CancellationToken** — Cooperative cancellation via `cancel!`/`cancelled?`/`raise_if_cancelled!`; `timeout_after(seconds)` for monotonic-clock deadlines; optional `deadline:` (wall-clock) for backward compatibility; passed as `config: { cancellation_token: token }` to agents and `dispatch_parallel`; injected into `tool.execute` when the method declares a `cancellation_token:` keyword | Experimental |
60
60
  | **`dispatch_parallel` / `fan_out` `force_kill:` option** — `force_kill: false` (default) leaves timed-out workers running and raises `TimeoutError` immediately; `force_kill: true` restores the old `Thread#kill` behaviour with a `logger.warn` | Beta |
61
- | **`execution_mode` DSL on `Tool::Base`** — Declares how a tool's `execute` should be dispatched: `:cooperative` (same scheduler thread), `:blocking_io` (default; offloaded to `BlockingAdapterPool`), `:cpu_bound`, `:external_process` | Experimental |
61
+ | **`execution_mode` DSL on `Agent::Context::Capability::Base`** — Declares how a tool's `execute` should be dispatched: `:cooperative` (same scheduler thread), `:blocking_io` (default; offloaded to `BlockingAdapterPool`), `:cpu_bound`, `:external_process` | Experimental |
62
62
  | **`invocation_context:` keyword on `Agent#invoke` / `Workflow#invoke`** — Pass a `Phronomy::InvocationContext` directly; `thread_id`, `cancellation_token`, and `deadline`-based timeout are derived from it; `task_id` / `parent_task_id` appear in trace spans automatically; `config:` keys remain supported as backward-compat aliases | Beta |
63
- | **`ConcurrencyGate` — unified backpressure** — Counting semaphore that enforces per-resource concurrency caps (`max_concurrent_agent_tasks`, `max_concurrent_tool_tasks`, `max_concurrent_workflow_tasks`, `max_concurrent_llm_calls`, `max_concurrent_rag_fetches`, `max_concurrent_vector_searches`); configured via `Phronomy.configure`; backpressure behaviour follows the global `backpressure` setting (`:wait`, `:raise`/`:reject`, `:timeout`); `nil` cap = unlimited (default) | Beta |
63
+ | **`ConcurrencyGate` — unified backpressure** — Counting semaphore that enforces per-resource concurrency caps (`max_concurrent_agent_tasks`, `max_concurrent_tool_tasks`, `max_concurrent_workflow_tasks`, `max_concurrent_llm_calls`, `max_concurrent_vector_searches`); configured via `Phronomy.configure`; backpressure behaviour follows the global `backpressure` setting (`:wait`, `:raise`/`:reject`, `:timeout`); `nil` cap = unlimited (default) | Beta |
64
64
  | **Cooperative scheduler yield points** — `Runtime#yield` (cooperative yield; yields the current task's time slice); `Runtime#yield_if_needed(every: N)` (thread-local counter, yields every N calls); CPU-bound detection when `blocking_detect_threshold_ms` is set (warns and increments `non_yield_threshold_violation_count` when a task runs longer than the threshold without yielding); `starvation_threshold_ms` configuration field (default: 50ms) | Beta |
65
- | **`Phronomy::Metrics`** — `Phronomy::Metrics.snapshot` returns task-tree and pool counters; task-centric keys: `active_agent_tasks`, `active_tool_tasks`, `active_workflow_tasks`, `active_rag_tasks`, `active_llm_tasks`, `task_wait_time_p50_ms`, `task_wait_time_p95_ms`, `task_run_time_p50_ms`, `task_run_time_p95_ms`, `cancelled_tasks`, `failed_tasks`, `non_yield_threshold_violation_count`; pool/event-loop keys remain for backward compatibility; `Runtime#task_snapshot` exposes task-centric metrics directly | Beta |
65
+ | **`Phronomy::Metrics`** — `Phronomy::Metrics.snapshot` returns task-tree and pool counters; task-centric keys: `active_agent_tasks`, `active_tool_tasks`, `active_workflow_tasks`, `active_llm_tasks`, `task_wait_time_p50_ms`, `task_wait_time_p95_ms`, `task_run_time_p50_ms`, `task_run_time_p95_ms`, `cancelled_tasks`, `failed_tasks`, `non_yield_threshold_violation_count`; pool/event-loop keys remain for backward compatibility; `Runtime#task_snapshot` exposes task-centric metrics directly | Beta |
66
66
  | **`Phronomy.with_configuration` / `Phronomy.reset_runtime!`** — Scoped configuration override and full runtime reset for test isolation | Beta |
67
67
 
68
68
  **Agent patterns**
@@ -131,8 +131,8 @@ Install additional gems only for the features you use:
131
131
 
132
132
  | Gem | Required for |
133
133
  |-----|-------------|
134
- | `pgvector` | `Phronomy::Agent::Context::Knowledge::VectorStore::Pgvector` |
135
- | `redis` | `Phronomy::Agent::Context::Knowledge::VectorStore::RedisSearch` |
134
+ | `pgvector` | `Phronomy::VectorStore::Pgvector` |
135
+ | `redis` | `Phronomy::VectorStore::RedisSearch` |
136
136
  | `opentelemetry-api` | `Phronomy::Tracing::OpenTelemetryTracer` |
137
137
 
138
138
  ## Quick Start
@@ -140,7 +140,7 @@ Install additional gems only for the features you use:
140
140
  ### Agent — ReAct tool-calling agent
141
141
 
142
142
  ```ruby runnable
143
- class WebSearch < Phronomy::Tool::Base
143
+ class WebSearch < Phronomy::Agent::Context::Capability::Base
144
144
  description "Search the web"
145
145
  param :query, type: :string, desc: "Search query"
146
146
 
@@ -216,10 +216,10 @@ transition from: :run_agent, on: :child_failed, to: :handle_error
216
216
 
217
217
  ### Multi-Agent — Agent-as-Tool pattern
218
218
 
219
- Wrap sub-agents as `Tool::Base` subclasses so the orchestrator LLM can call them on demand.
219
+ Wrap sub-agents as `Agent::Context::Capability::Base` subclasses so the orchestrator LLM can call them on demand.
220
220
 
221
221
  ```ruby
222
- class ResearchTool < Phronomy::Tool::Base
222
+ class ResearchTool < Phronomy::Agent::Context::Capability::Base
223
223
  description "Research a topic and return key findings as bullet points."
224
224
  param :topic, type: :string, desc: "The topic to research"
225
225
 
@@ -233,7 +233,7 @@ class WriterAgent < Phronomy::Agent::Base
233
233
  instructions "You are a professional technical writer."
234
234
  end
235
235
 
236
- class WriteTool < Phronomy::Tool::Base
236
+ class WriteTool < Phronomy::Agent::Context::Capability::Base
237
237
  description "Write a technical blog post given research notes and a writing brief."
238
238
  param :instructions, type: :string, desc: "Writing brief including research notes"
239
239
 
@@ -280,35 +280,25 @@ end
280
280
  > that logic must be implemented by the application. Reference implementations for
281
281
  > common patterns are available in `phronomy-examples` (example 06).
282
282
 
283
- ### Knowledge/RAGContext injection and vector retrieval
283
+ ### Knowledge — Static context injection
284
284
 
285
285
  ```ruby
286
286
  # Static knowledge (policy files, reference docs)
287
- policy = Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(
287
+ policy = Phronomy::Agent::Context::Knowledge::StaticKnowledge.new(
288
288
  File.read("policy.md"),
289
289
  type: :policy,
290
290
  source: "policy.md" # exposed to LLM for citation
291
291
  )
292
292
 
293
- # RAG retrieval from a vector store
294
- store = Phronomy::Agent::Context::Knowledge::VectorStore::InMemory.new
295
- embeddings = Phronomy::Agent::Context::Knowledge::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small")
296
-
297
- # Add documents before querying
298
- text1 = "Refunds are processed within 5 business days."
299
- text2 = "Contact support@example.com for refund requests."
300
- store.add(id: "doc-1", embedding: embeddings.embed(text1), metadata: { content: text1, source: "policy.md" })
301
- store.add(id: "doc-2", embedding: embeddings.embed(text2), metadata: { content: text2, source: "policy.md" })
302
-
303
- rag = Phronomy::Agent::Context::Knowledge::Source::RAGKnowledge.new(store: store, embeddings: embeddings, k: 5)
304
-
305
- # Inject at invocation time
306
- result = MyAgent.new.invoke("What is the refund policy?",
307
- config: { knowledge_sources: [policy, rag] })
293
+ # Inject at invocation time via the agent DSL
294
+ class MyAgent < Phronomy::Agent::Base
295
+ model "gpt-4o-mini"
296
+ knowledge policy
297
+ end
308
298
  ```
309
299
 
310
- `static_knowledge_refresh!` invalidates the class-level cache of *static* knowledge sources
311
- (not RAG stores). Call it when the underlying file or content has changed:
300
+ `static_knowledge_refresh!` invalidates the class-level cache of static knowledge sources.
301
+ Call it when the underlying file or content has changed:
312
302
 
313
303
  ```ruby
314
304
  # Static knowledge sources are cached at the class level after the first fetch.
@@ -319,8 +309,8 @@ MyAgent.static_knowledge_refresh!
319
309
  Load and split documents with built-in loaders:
320
310
 
321
311
  ```ruby
322
- chunks = Phronomy::Agent::Context::Knowledge::Loader::MarkdownLoader.new.load("docs/guide.md")
323
- .then { |docs| Phronomy::Agent::Context::Knowledge::Splitter::RecursiveSplitter.new(chunk_size: 512).split(docs) }
312
+ chunks = Phronomy::VectorStore::Loader::MarkdownLoader.new.load("docs/guide.md")
313
+ .then { |docs| Phronomy::VectorStore::Splitter::RecursiveSplitter.new(chunk_size: 512).split(docs) }
324
314
  ```
325
315
 
326
316
  ### Multi-Agent Handoff — Hub-and-spoke routing
@@ -539,7 +529,7 @@ end
539
529
  ### MCP Tool — External tool servers
540
530
 
541
531
  ```ruby
542
- search_tool = Phronomy::Tool::McpTool.from_server(
532
+ search_tool = Phronomy::Tools::Mcp.from_server(
543
533
  "stdio://./mcp-server",
544
534
  tool_name: "web_search"
545
535
  )
@@ -723,8 +713,8 @@ registry the budget is silently skipped.
723
713
  ### CancellationToken — Cooperative cancellation
724
714
 
725
715
  Pass a `CancellationToken` to any agent via `config: { cancellation_token: token }`.
726
- Cancellation is checked at multiple granular checkpoints: before the LLM call, before
727
- each RAG knowledge-source fetch, after each streaming chunk, before each parallel
716
+ Cancellation is checked at multiple granular checkpoints: before the LLM call,
717
+ after each streaming chunk, before each parallel
728
718
  tool-call batch, and after each `before_completion` hook. `CancellationError` is
729
719
  raised immediately and is never retried. No threads are force-killed — `ensure`
730
720
  blocks always execute.
@@ -5,5 +5,5 @@
5
5
  "dispatch_parallel_10": 886.0,
6
6
  "cancellation_token_cancelled": 4335060.97443425,
7
7
  "cancellation_token_raise_if_cancelled_noop": 3566903.189098373,
8
- "trim_context_remove_2000": 1761.5700678986254
8
+ "trim_messages_2000": 2896552.0
9
9
  }
@@ -53,7 +53,7 @@ class BenchStubChat
53
53
  end
54
54
 
55
55
  # A stub tool that does nothing but conforms to the Tool::Base interface.
56
- class BenchNullTool < Phronomy::Tool::Base
56
+ class BenchNullTool < Phronomy::Agent::Context::Capability::Base
57
57
  description "No-op benchmark tool"
58
58
  param :x, type: :string, desc: "input"
59
59
 
@@ -41,6 +41,14 @@ Benchmark.bm(40) do |x|
41
41
  end
42
42
 
43
43
  x.report("build(1000 msgs, 10 chunks, budgeted)") do
44
- (BENCH_ASM_ITERATIONS / 10).times { make_assembler(n_messages: 1000, n_chunks: 10, with_budget: true).build }
44
+ (BENCH_ASM_ITERATIONS / 10).times do
45
+ # Assembler raises ContextLengthError when messages exceed the budget;
46
+ # callers (e.g. Agent::Base#build_context) are expected to pre-trim via
47
+ # trim_to_budget before calling build. The rescue here keeps the benchmark
48
+ # measuring build's fast path without triggering the error path.
49
+ make_assembler(n_messages: 1000, n_chunks: 10, with_budget: true).build
50
+ rescue Phronomy::ContextLengthError
51
+ # expected — budget exceeded
52
+ end
45
53
  end
46
54
  end
@@ -62,7 +62,7 @@ end
62
62
  # ---------------------------------------------------------------------------
63
63
  # Target 3: Tool::Base#params_schema generation (10 params)
64
64
  # ---------------------------------------------------------------------------
65
- tool_class = Class.new(Phronomy::Tool::Base) do
65
+ tool_class = Class.new(Phronomy::Agent::Context::Capability::Base) do
66
66
  description "Test tool with 10 params"
67
67
  param :p1, type: :string, desc: "param 1"
68
68
  param :p2, type: :string, desc: "param 2"
@@ -130,18 +130,18 @@ t6 = Benchmark.measure("CancellationToken#raise_if_cancelled! (no-op)") do
130
130
  end
131
131
 
132
132
  # ---------------------------------------------------------------------------
133
- # Target 7: Context::TrimContext#remove on a 2000-element history
133
+ # Target 7: Agent::Base#trim_messages on a 2000-message history
134
134
  # ---------------------------------------------------------------------------
135
135
  BenchMsg = Struct.new(:content) unless defined?(BenchMsg)
136
136
 
137
- TRIM_ELEMENTS = Array.new(2_000) { |i| {seq: i, message: BenchMsg.new("msg #{i}"), tokens: 10, role: :user} }
138
- TRIM_BUDGET = Phronomy::LlmContextWindow::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
137
+ TRIM_MESSAGES = Array.new(2_000) { |i| BenchMsg.new("msg #{i}") }
139
138
  TRIM_ITERATIONS = 500
140
139
 
141
- t7 = Benchmark.measure("TrimContext#remove (2000-element history)") do
140
+ bench_trim_agent = Class.new(Phronomy::Agent::Base).new
141
+
142
+ t7 = Benchmark.measure("Agent::Base#trim_messages (2000-msg history)") do
142
143
  TRIM_ITERATIONS.times do
143
- tc = Phronomy::Agent::Context::Conversation::TrimContext.new(message_elements: TRIM_ELEMENTS, budget: TRIM_BUDGET)
144
- tc.remove((0...200).to_a) # remove 200 oldest messages
144
+ bench_trim_agent.send(:trim_messages, TRIM_MESSAGES, keep: 1_800)
145
145
  end
146
146
  end
147
147
 
@@ -159,7 +159,7 @@ metrics = {
159
159
  "dispatch_parallel_10" => [t4, PARALLEL_ITERATIONS],
160
160
  "cancellation_token_cancelled" => [t5, 8 * CANCEL_ITERATIONS],
161
161
  "cancellation_token_raise_if_cancelled_noop" => [t6, RAISE_ITERATIONS],
162
- "trim_context_remove_2000" => [t7, TRIM_ITERATIONS]
162
+ "trim_messages_2000" => [t7, TRIM_ITERATIONS]
163
163
  }
164
164
 
165
165
  REGRESSION_RESULTS = {} # rubocop:disable Style/MutableConstant
@@ -11,7 +11,7 @@ require_relative "../lib/phronomy"
11
11
 
12
12
  # --- Tool schema ---
13
13
 
14
- class BenchTool10Params < Phronomy::Tool::Base
14
+ class BenchTool10Params < Phronomy::Agent::Context::Capability::Base
15
15
  description "A tool with 10 parameters for benchmarking purposes"
16
16
  param :param1, type: :string, desc: "First parameter"
17
17
  param :param2, type: :integer, desc: "Second parameter"
@@ -43,7 +43,7 @@ end
43
43
 
44
44
  # --- static_knowledge_chunks cache ---
45
45
 
46
- class BenchKnowledgeSource < Phronomy::Agent::Context::Knowledge::Source::Base
46
+ class BenchKnowledgeSource < Phronomy::Agent::Context::Knowledge::Base
47
47
  def fetch(query: nil)
48
48
  [{content: "Cached knowledge fact.", type: :static}]
49
49
  end
@@ -28,7 +28,7 @@ BENCH_VS_ITERS = {100 => 100, 1_000 => 20, 10_000 => 5}.freeze
28
28
  puts "=== bench_vector_store_inmemory ==="
29
29
  Benchmark.bm(35) do |x|
30
30
  [100, 1_000, 10_000].each do |n|
31
- store = Phronomy::Agent::Context::Knowledge::VectorStore::InMemory.new(dimension: DIM)
31
+ store = Phronomy::VectorStore::InMemory.new(dimension: DIM)
32
32
  populate(store, n)
33
33
  iters = BENCH_VS_ITERS[n]
34
34
 
@@ -0,0 +1,224 @@
1
+ # ADR-011: build_context as the Single Authority for LLM Input
2
+
3
+ ## Status
4
+
5
+ Proposed — 2026-05-31
6
+
7
+ ## Context
8
+
9
+ ### Background
10
+
11
+ `Agent::Base#build_context` was introduced as a hook for subclasses to customise
12
+ the system prompt and conversation history passed to the LLM. Its original return
13
+ value was `{ system: String|nil, messages: Array }`, covering only two of the four
14
+ conceptual regions of an LLM context window.
15
+
16
+ `LlmContextWindow::Assembler` documents the four regions explicitly:
17
+
18
+ ```
19
+ 1. Instruction — system prompt text
20
+ 2. Capability — tool definitions
21
+ 3. Knowledge — external facts (XML context tags)
22
+ 4. Conversation — conversation messages
23
+ ```
24
+
25
+ However, the Assembler itself states Region 2 is "handled by RubyLLM, not here",
26
+ leaving tool registration entirely outside the `build_context` path.
27
+
28
+ ### Problems identified
29
+
30
+ **P1 — Tool definitions are not part of `build_context` output**
31
+
32
+ Tools were registered with `chat.with_tool(tc)` *after* `build_context` returned,
33
+ directly in `InvocationPipeline`, `_stream_impl`, and `ReactAgent#step`.
34
+ This means a subclass that overrides `build_context` cannot control which tools
35
+ are actually sent to the LLM; tools are always added behind its back.
36
+
37
+ **P2 — `_handoff_tools` bypass `build_context` entirely**
38
+
39
+ `Runner` adds handoff tools via `_add_handoff_tool` onto the agent instance.
40
+ These were registered with `chat.with_tool(tc)` at every call site, separately
41
+ from `context[:tool_classes]`, without going through `build_context` at all.
42
+ Even if a subclass override returned a modified tool list, handoff tools would
43
+ still be added unconditionally.
44
+
45
+ **P3 — Tool token cost excluded from budget calculation**
46
+
47
+ LLM providers (OpenAI, Anthropic, Gemini) count tool schema tokens against the
48
+ context window. The `TokenBudget` / `Assembler` pipeline never subtracted tool
49
+ tokens from the available budget before trimming conversation messages. This
50
+ caused the budget calculation to be consistently optimistic: the `effective_input_limit`
51
+ was always larger than the tokens actually available for messages, risking context
52
+ window overflow on long conversations with many or complex tools.
53
+
54
+ The existing `context_overhead` DSL was a manual workaround:
55
+
56
+ ```ruby
57
+ class MyAgent < Phronomy::Agent::Base
58
+ context_overhead 800 # developer guesses tool token cost
59
+ end
60
+ ```
61
+
62
+ This is inaccurate by design and should not be necessary.
63
+
64
+ **P4 — RAG fetch called inside `build_context` on every invocation**
65
+
66
+ `build_context` called `fetch_knowledge_chunks` dynamically. In a ReAct loop
67
+ with N iterations, RAG was fetched N times for the same query. More importantly,
68
+ dynamic per-call RAG fetch is architecturally misplaced:
69
+
70
+ - Knowledge fetched by RAG and injected as Region 3 context belongs to the
71
+ *agent's knowledge*, not to the per-invocation message flow.
72
+ - If the LLM needs to retrieve information dynamically, the correct mechanism is
73
+ **function calling**: the LLM calls a retrieval tool, and the result appears in
74
+ the conversation log as a tool result message (Region 4).
75
+ - Static knowledge that the agent always needs should be registered once at
76
+ agent initialisation time, not re-fetched on every `build_context` call.
77
+
78
+ **P5 — `build_capability_tool_classes` is redundant indirection**
79
+
80
+ `build_capability_tool_classes` was introduced as a narrower override hook to
81
+ avoid requiring subclasses to copy `build_context` just to change tool selection.
82
+ However, it has no documentation, no usage examples, and provides no capability
83
+ that overriding `build_context` itself does not already provide. Its existence
84
+ adds a public API surface and conceptual overhead without commensurate value.
85
+
86
+ **P6 — No access to previous context**
87
+
88
+ `build_context` builds from scratch every call with no knowledge of what was sent
89
+ to the LLM in the previous call. This prevents:
90
+ - Token cache hit optimisations (OpenAI prompt caching, Anthropic `cache_control`)
91
+ which require a stable prompt prefix
92
+ - Incremental context strategies that avoid recomputing unchanged regions
93
+
94
+ ## Decision
95
+
96
+ ### D1 — `build_context` is the single authority for all LLM input
97
+
98
+ **Nothing may be added to or removed from the LLM request outside of
99
+ `build_context`.** Every call site (`InvocationPipeline`, `_stream_impl`,
100
+ `ReactAgent#step`, `ReactAgent#stream_step`) must:
101
+
102
+ 1. Call `build_context` to obtain `{ system:, messages:, tool_classes: }`.
103
+ 2. Apply the result to `chat` — and *only* the result.
104
+ 3. Not register any additional tools, messages, or instructions independently.
105
+
106
+ ### D2 — Assembler handles all four regions including Capability
107
+
108
+ `LlmContextWindow::Assembler` gains `add_capability(tool_classes)`:
109
+
110
+ ```ruby
111
+ assembler.add_capability(tools) # Region 2
112
+ ```
113
+
114
+ Responsibilities of `add_capability`:
115
+
116
+ 1. Store `tool_classes` for pass-through in `build` return value.
117
+ 2. Serialise each tool's schema (via RubyLLM's provider-specific `tool_for` /
118
+ `function_declaration_for`) and estimate its token cost.
119
+ 3. Add that cost to the `used` token count before conversation message trimming.
120
+
121
+ `build` return value expands to:
122
+
123
+ ```ruby
124
+ { system: String|nil, messages: Array, tool_classes: Array }
125
+ ```
126
+
127
+ ### D3 — `build_context` includes all tools (user tools + handoff tools)
128
+
129
+ `build_context` passes `self.class.tools + _handoff_tools` to
130
+ `assembler.add_capability`. `_handoff_tools` are framework-managed routing tools;
131
+ they are always included and are not subject to user-level filtering.
132
+
133
+ Subclasses that need dynamic tool selection override `build_context` and call
134
+ `assembler.add_capability` with their own selection logic.
135
+
136
+ `build_capability_tool_classes` is **removed** (P5 resolution).
137
+
138
+ ### D4 — `fetch_knowledge_chunks` is removed from `build_context`
139
+
140
+ Knowledge enters Region 3 through exactly two paths:
141
+
142
+ **Path A — Agent initialisation (static knowledge)**
143
+
144
+ ```ruby
145
+ class MyAgent < Phronomy::Agent::Base
146
+ knowledge "The capital of Japan is Tokyo.", type: :entity
147
+ end
148
+ ```
149
+
150
+ Registered once; the Assembler always includes it.
151
+
152
+ **Path B — Per-invocation dynamic knowledge via `config[:knowledge_sources]`**
153
+
154
+ The caller passes knowledge sources in the invocation config:
155
+
156
+ ```ruby
157
+ agent.invoke(input, config: { knowledge_sources: [my_rag_source] })
158
+ ```
159
+
160
+ `build_context` calls `fetch_knowledge_chunks` exactly **once per `invoke`**,
161
+ not once per LLM call within a ReAct loop. The result is cached on the agent
162
+ instance for the duration of that invocation.
163
+
164
+ This is a caller responsibility: if the caller needs fresh knowledge on every
165
+ `invoke`, it passes new sources. Within a single `invoke`, knowledge is stable.
166
+
167
+ ### D5 — Previous context stored as instance variable
168
+
169
+ After each `build_context` call, the result is stored:
170
+
171
+ ```ruby
172
+ @last_context = { system: ..., messages: ..., tool_classes: ... }
173
+ ```
174
+
175
+ `build_context` may reference `@last_context` for optimisations such as:
176
+
177
+ - Detecting that `system` and `tool_classes` are unchanged → skip regeneration
178
+ of the stable prefix to improve LLM provider token cache hit rate.
179
+ - Skipping Assembler work when the context is provably identical to the last call.
180
+
181
+ `@last_context` is **not** passed as a method parameter; it is read from the
182
+ instance. This avoids changing call-site signatures.
183
+
184
+ Note: `Agent` instances are not thread-safe (already documented). `@last_context`
185
+ inherits this constraint — concurrent invocations on the same instance are not
186
+ supported.
187
+
188
+ ## Consequences
189
+
190
+ ### Token budget accuracy
191
+
192
+ With D2, `effective_input_limit` correctly reflects the tokens actually available
193
+ for conversation messages after system prompt, tool schemas, and knowledge are
194
+ accounted for. `context_overhead` becomes unnecessary for tool costs; it may
195
+ still be used as a manual reserve for provider-specific overhead not captured by
196
+ schema serialisation.
197
+
198
+ ### `build_context` as the integration surface
199
+
200
+ Subclasses override `build_context` for all customisation: tool selection,
201
+ knowledge injection, system prompt variants, context compression strategies.
202
+ There is one integration point, not several.
203
+
204
+ ### RAG fetch frequency
205
+
206
+ `fetch_knowledge_chunks` runs at most once per `invoke` call (P4 resolution).
207
+ In ReAct loops with N iterations, RAG is fetched once, not N times.
208
+
209
+ ### Removed API
210
+
211
+ `build_capability_tool_classes` is removed. It was never documented or used
212
+ outside of internal framework code, so there is no public API break.
213
+
214
+ ## Migration notes
215
+
216
+ - All call sites (`InvocationPipeline`, `_stream_impl`, `ReactAgent#step/stream_step`)
217
+ must be updated to remove the separate `_handoff_tools` registration lines and
218
+ rely solely on `context[:tool_classes]`.
219
+ - `Assembler#add_capability` and the token estimation for tool schemas must be
220
+ implemented.
221
+ - `build_context` must be updated to pass all tools to `assembler.add_capability`
222
+ and to cache `@last_context`.
223
+ - `fetch_knowledge_chunks` must be lifted out of `build_context` into the
224
+ invocation-scoped cache described in D4.