phronomy 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +40 -4
  3. data/README.md +32 -41
  4. data/benchmark/baseline.json +1 -1
  5. data/benchmark/bench_agent_invoke.rb +1 -1
  6. data/benchmark/bench_context_assembler.rb +9 -1
  7. data/benchmark/bench_regression.rb +8 -8
  8. data/benchmark/bench_tool_schema.rb +2 -2
  9. data/benchmark/bench_vector_store.rb +1 -1
  10. data/docs/decisions/011-build-context-as-single-llm-input-authority.md +224 -0
  11. data/lib/phronomy/agent/base.rb +328 -366
  12. data/lib/phronomy/agent/checkpoint.rb +30 -1
  13. data/lib/phronomy/agent/checkpoint_store.rb +97 -0
  14. data/lib/phronomy/agent/concerns/retryable.rb +1 -1
  15. data/lib/phronomy/agent/concerns/suspendable.rb +63 -8
  16. data/lib/phronomy/agent/context/capability/base.rb +689 -0
  17. data/lib/phronomy/agent/context/capability/scope_policy.rb +54 -0
  18. data/lib/phronomy/agent/context/knowledge/base.rb +58 -0
  19. data/lib/phronomy/agent/context/knowledge/entity_knowledge.rb +102 -0
  20. data/lib/phronomy/agent/context/knowledge/static_knowledge.rb +58 -0
  21. data/lib/phronomy/agent/shared_state.rb +2 -2
  22. data/lib/phronomy/agent/tool_executor.rb +1 -1
  23. data/lib/phronomy/concurrency/gate_registry.rb +0 -1
  24. data/lib/phronomy/configuration.rb +13 -6
  25. data/lib/phronomy/event_loop.rb +1 -18
  26. data/lib/phronomy/llm_context_window/assembler.rb +77 -44
  27. data/lib/phronomy/multi_agent/handoff.rb +4 -4
  28. data/lib/phronomy/multi_agent/orchestrator.rb +1 -1
  29. data/lib/phronomy/multi_agent/team_coordinator.rb +2 -2
  30. data/lib/phronomy/runtime/runtime_metrics.rb +0 -1
  31. data/lib/phronomy/runtime.rb +1 -2
  32. data/lib/phronomy/tool.rb +3 -4
  33. data/lib/phronomy/{tool/agent_tool.rb → tools/agent.rb} +8 -9
  34. data/lib/phronomy/{tool/mcp_tool.rb → tools/mcp.rb} +9 -9
  35. data/lib/phronomy/tools/vector_search.rb +70 -0
  36. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  37. data/lib/phronomy/vector_store/base.rb +89 -0
  38. data/lib/phronomy/vector_store/embeddings/base.rb +41 -0
  39. data/lib/phronomy/vector_store/embeddings/ruby_llm_embeddings.rb +47 -0
  40. data/lib/phronomy/vector_store/in_memory.rb +103 -0
  41. data/lib/phronomy/vector_store/loader/base.rb +27 -0
  42. data/lib/phronomy/vector_store/loader/csv_loader.rb +58 -0
  43. data/lib/phronomy/vector_store/loader/markdown_loader.rb +78 -0
  44. data/lib/phronomy/vector_store/loader/plain_text_loader.rb +24 -0
  45. data/lib/phronomy/vector_store/pgvector.rb +127 -0
  46. data/lib/phronomy/vector_store/redis_search.rb +192 -0
  47. data/lib/phronomy/vector_store/splitter/base.rb +49 -0
  48. data/lib/phronomy/vector_store/splitter/fixed_size_splitter.rb +53 -0
  49. data/lib/phronomy/vector_store/splitter/recursive_splitter.rb +107 -0
  50. data/lib/phronomy/vector_store.rb +16 -4
  51. data/lib/phronomy/version.rb +1 -1
  52. data/lib/phronomy/workflow/fsm_session.rb +249 -0
  53. data/lib/phronomy/workflow/phase_machine_builder.rb +247 -0
  54. data/lib/phronomy/workflow_runner.rb +2 -2
  55. data/lib/phronomy.rb +10 -3
  56. data/scripts/api_snapshot.rb +11 -10
  57. metadata +31 -37
  58. data/lib/phronomy/agent/context/conversation/compaction_context.rb +0 -117
  59. data/lib/phronomy/agent/context/conversation/trigger_context.rb +0 -43
  60. data/lib/phronomy/agent/context/conversation/trim_context.rb +0 -82
  61. data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +0 -45
  62. data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +0 -51
  63. data/lib/phronomy/agent/context/knowledge/loader/base.rb +0 -31
  64. data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +0 -62
  65. data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +0 -82
  66. data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +0 -28
  67. data/lib/phronomy/agent/context/knowledge/source/base.rb +0 -60
  68. data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +0 -102
  69. data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +0 -63
  70. data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +0 -58
  71. data/lib/phronomy/agent/context/knowledge/splitter/base.rb +0 -53
  72. data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +0 -57
  73. data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +0 -111
  74. data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +0 -116
  75. data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +0 -95
  76. data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +0 -109
  77. data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +0 -133
  78. data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +0 -198
  79. data/lib/phronomy/agent/fsm.rb +0 -157
  80. data/lib/phronomy/agent/invocation_pipeline.rb +0 -99
  81. data/lib/phronomy/agent/lifecycle/fsm_session.rb +0 -251
  82. data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +0 -249
  83. data/lib/phronomy/agent/react_agent.rb +0 -204
  84. data/lib/phronomy/embeddings.rb +0 -11
  85. data/lib/phronomy/loader.rb +0 -13
  86. data/lib/phronomy/splitter.rb +0 -12
  87. data/lib/phronomy/tool/base.rb +0 -685
  88. data/lib/phronomy/tool/scope_policy.rb +0 -50
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d4410424efcdcdf0ab529106ba2c872bddae9decc322995f37065f426255a05b
4
- data.tar.gz: c9ae0dff7f184244b92fc91c585536f767aded5ff6ea8ecaafe86221863738e8
3
+ metadata.gz: 7e84ccabf84c48e16cdb968c1f7b69f2348b24a70e477aa39bbbe1244d34edfc
4
+ data.tar.gz: f31dc2d1c4ed4bb7717e88278f1ced3debd0177f1f7a8042b170421a5d8e7493
5
5
  SHA512:
6
- metadata.gz: e7afa1749dc1431e27e225dfe7a8eafebb2e781c0e6a6ca6e0bdda9712c22b4b5b68d3a9897bc92b466026c698070045774d79a0197ad0463da3f81ff103b36c
7
- data.tar.gz: be93a29c2b98b2069ef912815e847f0e0115de07bb435e36fbcb834433757fc72442d7c5db129c9f97dd891113ecf75c46606a6295b2e9f3357831c24246d974
6
+ metadata.gz: 1c1ab4d05c27930b84abbad09f5c59027f9bfcddf9a89aa485608afdcd22ba50fcf971c2185a815206edfc37b29abb0fa99b7f80a8fa3f436c1d6a97b5ad38e4
7
+ data.tar.gz: 04016a561705ff24c4a6b9f8bb3d6918c303071f7bf97d94d70313b95f796ae561fee29fad9e7e620928655bf7e2007751cfa217bd973d83d2ad4d26d9754e3e
data/CHANGELOG.md CHANGED
@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ## [Unreleased]
11
11
 
12
+ ---
13
+
14
+ ## [0.9.1] - 2026-06-06
15
+
12
16
  ### Added
13
17
 
14
18
  - **`Phronomy::Diagnostics` and `SchedulerReentrancyError`** (#278, #279):
@@ -174,10 +178,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
174
178
  tasks are treated the same as errors and follow the existing `on_error:` policy (`:raise`
175
179
  or `:skip`).
176
180
 
177
- - **MCP `HttpTransport` custom authentication headers** (#144): `McpTool.from_server` now
178
- accepts `headers: {}`, forwarded all the way to `HttpTransport#initialize`. Arbitrary
179
- headers (e.g. `Authorization: Bearer …`) are injected into every JSON-RPC request,
180
- enabling use of MCP servers that require bearer tokens or API keys.
181
+ - **MCP `HttpTransport` custom authentication headers** (#144): `Phronomy::Tools::Mcp::HttpTransport#initialize`
182
+ now accepts `headers: {}`. Arbitrary headers (e.g. `Authorization: Bearer …`) are injected
183
+ into every JSON-RPC request, enabling use of MCP servers that require bearer tokens or
184
+ API keys. Threading `headers:` through `Mcp.from_server` is tracked in issue #144 and
185
+ pending in PR #151.
181
186
 
182
187
  - **`StdioTransport` — `env:`, `cwd:`, and `startup_timeout:` options** (#145):
183
188
  Three new keyword arguments are now accepted when constructing a `StdioTransport` (and
@@ -226,8 +231,39 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
226
231
  `dispatch_parallel` and `fan_out` accept `cancellation_token:` and automatically
227
232
  inject it into every worker task's config unless the task already supplies its own.
228
233
 
234
+ ### Added (post-v0.9.0)
235
+
236
+ - **`Phronomy::Agent::CheckpointStore` — idempotency store for HITL resume** (post-v0.9.0):
237
+ New in-memory store tracks consumed checkpoint IDs. Calling `Agent::Base#resume` twice
238
+ with the same checkpoint raises `Phronomy::CheckpointAlreadyResumedError` instead of
239
+ silently re-executing the approved tool. Custom stores can be injected via
240
+ `agent.checkpoint_store = MyRedis::CheckpointStore.new`. Duck-type contract:
241
+ `consumed?(id)`, `consume!(id)`, and optionally `cleanup!(id)` / `clear!`.
242
+
243
+ - **`checkpoint_id`, `agent_class`, `requested_at` on `Checkpoint`; `Agent::Base.resume` class method** (post-v0.9.0):
244
+ `Checkpoint` now carries a UUID `checkpoint_id` (idempotency key), `agent_class`
245
+ (fully-qualified class name), and `requested_at` (UTC timestamp). The new class-level
246
+ `Agent::Base.resume(checkpoint, approved:)` method instantiates the correct agent class
247
+ automatically and delegates to `#resume`, simplifying job-queue resume flows.
248
+
249
+ - **`CheckpointStore#cleanup!` and `#clear!`** (post-v0.9.0):
250
+ Optional methods on the `CheckpointStore` duck-type contract. `cleanup!(checkpoint_id)`
251
+ removes a single checkpoint entry; `clear!` wipes all tracking state.
252
+
229
253
  ### Removed
230
254
 
255
+ - **`Phronomy::ReactAgent` class removed** (post-v0.9.0):
256
+ Use `Phronomy::Agent::Base` directly. `ReactAgent` had no distinct public API beyond
257
+ `Agent::Base` and was not listed in the stability table.
258
+
259
+ - **`Phronomy::Agent::FSM` class removed** (post-v0.9.0, internal):
260
+ The agent invocation path is now unified through `Agent::Base#invoke` with inline logic.
261
+ No public API impact.
262
+
263
+ - **`Phronomy::Agent::Lifecycle::FSMSession` and `::PhaseMachineBuilder` moved to `Workflow` namespace** (post-v0.9.0, internal):
264
+ These internal classes now live at `Phronomy::Workflow::FSMSession` and
265
+ `Phronomy::Workflow::PhaseMachineBuilder`. No public API impact.
266
+
231
267
  - **BREAKING: `Agent::Base#run_as_child` drops `&result_writer` block parameter** (#265):
232
268
  The optional block form `run_as_child(input, ctx: ctx) { |r| ctx.answer = r[:output] }`
233
269
  is no longer supported. The result is now delivered **exclusively** as the
data/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
  > We apologise for the instability this may cause.
8
8
 
9
9
  **Phronomy** is a Ruby AI agent framework inspired by open-source AI agent frameworks.
10
- It provides composable building blocks — Workflows, Agents, Tools, Guardrails, RAG, and Tracing — all powered by [RubyLLM](https://github.com/crmne/ruby_llm) for LLM abstraction.
10
+ It provides composable building blocks — Workflows, Agents, Tools, Guardrails, and Tracing — all powered by [RubyLLM](https://github.com/crmne/ruby_llm) for LLM abstraction.
11
11
 
12
12
  ## Features
13
13
 
@@ -30,10 +30,10 @@ It provides composable building blocks — Workflows, Agents, Tools, Guardrails,
30
30
  | **Workflow action_timeout** — Per-state `action_timeout:` keyword on `state` DSL; cancels Task-returning entry actions that exceed the limit and raises `Phronomy::ActionTimeoutError` | Beta |
31
31
  | **Agent** — ReAct-style tool-calling agents with guardrails and conversation history | Stable |
32
32
  | **Before-Completion Hook** — Three-tier LLM parameter injection | Stable |
33
- | **Context Management** — Token budget calculation, estimation, and pruning | Stable |
33
+ | **Context Management** — Token budget calculation, estimation, and pruning; `Agent::Base` protected hooks: `build_context` (overridable), `trim_messages`, `trim_to_budget`, `compact_messages`, `budget_exceeded?`, `drop_messages_over` | Stable |
34
34
  | **Guardrails** — Input/output validation with custom `InputGuardrail`/`OutputGuardrail` | Beta |
35
35
  | **`PromptInjectionGuardrail`** — Built-in `InputGuardrail` subclass that detects prompt-injection patterns; usable standalone or as part of a guardrail chain | Beta |
36
- | **`Tool::Base.redact_params` / `.max_result_size`** — Class-level DSL: `redact_params` masks parameter values in log/trace output; `max_result_size` truncates oversized tool results before they reach the LLM | Beta |
36
+ | **`Agent::Context::Capability::Base.redact_params` / `.max_result_size`** — Class-level DSL: `redact_params` masks parameter values in log/trace output; `max_result_size` truncates oversized tool results before they reach the LLM | Beta |
37
37
  | **Output Parser** — JSON and Struct-mapped parsers for structured LLM responses | Stable |
38
38
  | **Eval Framework** — Dataset-driven evaluation with multiple scorer types | Beta |
39
39
  | **Tracing** — Pluggable span-based observability | Stable |
@@ -43,11 +43,11 @@ It provides composable building blocks — Workflows, Agents, Tools, Guardrails,
43
43
 
44
44
  | Feature | Stability |
45
45
  |---|---|
46
- | **Knowledge/RAG** — Retrieval sources with pluggable loaders, splitters, and vector stores; `static_knowledge_refresh!` for runtime cache invalidation | Beta |
46
+ | **Knowledge** — Static context injection with pluggable loaders, splitters, and vector stores; `static_knowledge_refresh!` for runtime cache invalidation | Beta |
47
47
  | **`VectorStore#size`** — Returns document count for all three backends (InMemory, RedisSearch, Pgvector) | Beta |
48
- | **`Agent::Context::Knowledge::VectorStore::AsyncBackend` mixin** — Pluggable async interface for `VectorStore`; default pool-backed implementations for `search_async`, `add_async`, `remove_async`, `clear_async`; backends with native async drivers override individual methods to bypass `BlockingAdapterPool` entirely; all existing backends remain unchanged | Beta |
49
- | **Parallel RAG multi-source fetch** — `Agent#build_context` fetches all `knowledge_sources` concurrently via `TaskGroup`; `config[:rag_failure_policy]` `:skip` (default) silently ignores failed sources so the agent answers with partial context, `:fail` surfaces the first error; per-source latency is emitted to `Phronomy.configuration.logger` at debug level | Beta |
50
- | **MCP Tool** — Model Context Protocol server integration | Beta |
48
+ | **`VectorStore::AsyncBackend` mixin** — Pluggable async interface for `VectorStore`; default pool-backed implementations for `search_async`, `add_async`, `remove_async`, `clear_async`; backends with native async drivers override individual methods to bypass `BlockingAdapterPool` entirely; all existing backends remain unchanged | Beta |
49
+ | **MCP Tool** — `Phronomy::Tools::Mcp`: Model Context Protocol server integration; `Phronomy::Tools::Agent`: wraps an agent class as a callable tool via `from_agent` | Beta |
50
+ | **Vector Search Tool** — `Phronomy::Tools::VectorSearch`: wraps a `VectorStore` and `Embeddings` adapter as a callable agent tool via `from_store` | Beta |
51
51
 
52
52
  **Execution and reliability**
53
53
 
@@ -55,14 +55,14 @@ It provides composable building blocks — Workflows, Agents, Tools, Guardrails,
55
55
  |---|---|
56
56
  | **Workflow EventLoop Mode** — Opt-in event-driven execution: `Phronomy.configure { \|c\| c.event_loop = true }` | Experimental |
57
57
  | **Agent EventLoop Mode** — `Agent#invoke` (non-blocking via EventLoop), `Agent#run_as_child` (child-FSM pattern for Workflow integration), parallel tool dispatch via `ParallelToolChat` | Experimental |
58
- | **`invoke_async` / `call_async`** — `Agent::Base#invoke_async` and `Workflow#invoke_async` return a `Task`; `Tool::Base#call_async` similarly; compatible with EventLoop and standalone contexts | Experimental |
58
+ | **`invoke_async` / `call_async`** — `Agent::Base#invoke_async` and `Workflow#invoke_async` return a `Task`; `Agent::Context::Capability::Base#call_async` similarly; compatible with EventLoop and standalone contexts | Experimental |
59
59
  | **CancellationToken** — Cooperative cancellation via `cancel!`/`cancelled?`/`raise_if_cancelled!`; `timeout_after(seconds)` for monotonic-clock deadlines; optional `deadline:` (wall-clock) for backward compatibility; passed as `config: { cancellation_token: token }` to agents and `dispatch_parallel`; injected into `tool.execute` when the method declares a `cancellation_token:` keyword | Experimental |
60
60
  | **`dispatch_parallel` / `fan_out` `force_kill:` option** — `force_kill: false` (default) leaves timed-out workers running and raises `TimeoutError` immediately; `force_kill: true` restores the old `Thread#kill` behaviour with a `logger.warn` | Beta |
61
- | **`execution_mode` DSL on `Tool::Base`** — Declares how a tool's `execute` should be dispatched: `:cooperative` (same scheduler thread), `:blocking_io` (default; offloaded to `BlockingAdapterPool`), `:cpu_bound`, `:external_process` | Experimental |
61
+ | **`execution_mode` DSL on `Agent::Context::Capability::Base`** — Declares how a tool's `execute` should be dispatched: `:cooperative` (same scheduler thread), `:blocking_io` (default; offloaded to `BlockingAdapterPool`), `:cpu_bound`, `:external_process` | Experimental |
62
62
  | **`invocation_context:` keyword on `Agent#invoke` / `Workflow#invoke`** — Pass a `Phronomy::InvocationContext` directly; `thread_id`, `cancellation_token`, and `deadline`-based timeout are derived from it; `task_id` / `parent_task_id` appear in trace spans automatically; `config:` keys remain supported as backward-compat aliases | Beta |
63
- | **`ConcurrencyGate` — unified backpressure** — Counting semaphore that enforces per-resource concurrency caps (`max_concurrent_agent_tasks`, `max_concurrent_tool_tasks`, `max_concurrent_workflow_tasks`, `max_concurrent_llm_calls`, `max_concurrent_rag_fetches`, `max_concurrent_vector_searches`); configured via `Phronomy.configure`; backpressure behaviour follows the global `backpressure` setting (`:wait`, `:raise`/`:reject`, `:timeout`); `nil` cap = unlimited (default) | Beta |
63
+ | **`ConcurrencyGate` — unified backpressure** — Counting semaphore that enforces per-resource concurrency caps (`max_concurrent_agent_tasks`, `max_concurrent_tool_tasks`, `max_concurrent_workflow_tasks`, `max_concurrent_llm_calls`, `max_concurrent_vector_searches`); configured via `Phronomy.configure`; backpressure behaviour follows the global `backpressure` setting (`:wait`, `:raise`/`:reject`, `:timeout`); `nil` cap = unlimited (default) | Beta |
64
64
  | **Cooperative scheduler yield points** — `Runtime#yield` (cooperative yield; yields the current task's time slice); `Runtime#yield_if_needed(every: N)` (thread-local counter, yields every N calls); CPU-bound detection when `blocking_detect_threshold_ms` is set (warns and increments `non_yield_threshold_violation_count` when a task runs longer than the threshold without yielding); `starvation_threshold_ms` configuration field (default: 50ms) | Beta |
65
- | **`Phronomy::Metrics`** — `Phronomy::Metrics.snapshot` returns task-tree and pool counters; task-centric keys: `active_agent_tasks`, `active_tool_tasks`, `active_workflow_tasks`, `active_rag_tasks`, `active_llm_tasks`, `task_wait_time_p50_ms`, `task_wait_time_p95_ms`, `task_run_time_p50_ms`, `task_run_time_p95_ms`, `cancelled_tasks`, `failed_tasks`, `non_yield_threshold_violation_count`; pool/event-loop keys remain for backward compatibility; `Runtime#task_snapshot` exposes task-centric metrics directly | Beta |
65
+ | **`Phronomy::Metrics`** — `Phronomy::Metrics.snapshot` returns task-tree and pool counters; task-centric keys: `active_agent_tasks`, `active_tool_tasks`, `active_workflow_tasks`, `active_llm_tasks`, `task_wait_time_p50_ms`, `task_wait_time_p95_ms`, `task_run_time_p50_ms`, `task_run_time_p95_ms`, `cancelled_tasks`, `failed_tasks`, `non_yield_threshold_violation_count`; pool/event-loop keys remain for backward compatibility; `Runtime#task_snapshot` exposes task-centric metrics directly | Beta |
66
66
  | **`Phronomy.with_configuration` / `Phronomy.reset_runtime!`** — Scoped configuration override and full runtime reset for test isolation | Beta |
67
67
 
68
68
  **Agent patterns**
@@ -76,6 +76,7 @@ It provides composable building blocks — Workflows, Agents, Tools, Guardrails,
76
76
  | **Agent::TeamCoordinator** — Agent teams pattern: LLM coordinator + stateful workers with sequential task assignment (worker-local message history persisted across tasks) | Beta |
77
77
  | **Agent::SharedState** — Shared state pattern: peer agents collaborate via a shared KnowledgeStore; `member` DSL with per-agent instructions and `coordination` team protocol | Experimental |
78
78
  | **`ScopePolicy`** — Configurable policy callable that maps (tool, scope, agent) to `:allow`/`:approve`/`:reject`; default policy auto-routes high-risk scopes through the approval gate | Experimental |
79
+ | **HITL Checkpoint/Resume** — `Agent::Base#invoke` returns `{ suspended: true, checkpoint: Checkpoint }` when an approval-required tool is encountered without a synchronous handler; `Agent::Base#resume(checkpoint, approved:)` resumes execution; `Agent::Base.resume(checkpoint, approved:)` (class-level) resolves the agent class automatically; `Checkpoint#to_h` / `Checkpoint.from_h` for serialization; `Agent::Base#checkpoint_store=` for custom idempotency backends; `CheckpointAlreadyResumedError` raised on duplicate resume | Experimental |
79
80
 
80
81
  > **Public API boundary**: The tables above are the complete list of classes, modules, and features
81
82
  > intended for gem consumers. Every entry has an associated stability label.
@@ -131,8 +132,8 @@ Install additional gems only for the features you use:
131
132
 
132
133
  | Gem | Required for |
133
134
  |-----|-------------|
134
- | `pgvector` | `Phronomy::Agent::Context::Knowledge::VectorStore::Pgvector` |
135
- | `redis` | `Phronomy::Agent::Context::Knowledge::VectorStore::RedisSearch` |
135
+ | `pgvector` | `Phronomy::VectorStore::Pgvector` |
136
+ | `redis` | `Phronomy::VectorStore::RedisSearch` |
136
137
  | `opentelemetry-api` | `Phronomy::Tracing::OpenTelemetryTracer` |
137
138
 
138
139
  ## Quick Start
@@ -140,7 +141,7 @@ Install additional gems only for the features you use:
140
141
  ### Agent — ReAct tool-calling agent
141
142
 
142
143
  ```ruby runnable
143
- class WebSearch < Phronomy::Tool::Base
144
+ class WebSearch < Phronomy::Agent::Context::Capability::Base
144
145
  description "Search the web"
145
146
  param :query, type: :string, desc: "Search query"
146
147
 
@@ -216,10 +217,10 @@ transition from: :run_agent, on: :child_failed, to: :handle_error
216
217
 
217
218
  ### Multi-Agent — Agent-as-Tool pattern
218
219
 
219
- Wrap sub-agents as `Tool::Base` subclasses so the orchestrator LLM can call them on demand.
220
+ Wrap sub-agents as `Agent::Context::Capability::Base` subclasses so the orchestrator LLM can call them on demand.
220
221
 
221
222
  ```ruby
222
- class ResearchTool < Phronomy::Tool::Base
223
+ class ResearchTool < Phronomy::Agent::Context::Capability::Base
223
224
  description "Research a topic and return key findings as bullet points."
224
225
  param :topic, type: :string, desc: "The topic to research"
225
226
 
@@ -233,7 +234,7 @@ class WriterAgent < Phronomy::Agent::Base
233
234
  instructions "You are a professional technical writer."
234
235
  end
235
236
 
236
- class WriteTool < Phronomy::Tool::Base
237
+ class WriteTool < Phronomy::Agent::Context::Capability::Base
237
238
  description "Write a technical blog post given research notes and a writing brief."
238
239
  param :instructions, type: :string, desc: "Writing brief including research notes"
239
240
 
@@ -280,35 +281,25 @@ end
280
281
  > that logic must be implemented by the application. Reference implementations for
281
282
  > common patterns are available in `phronomy-examples` (example 06).
282
283
 
283
- ### Knowledge/RAGContext injection and vector retrieval
284
+ ### Knowledge — Static context injection
284
285
 
285
286
  ```ruby
286
287
  # Static knowledge (policy files, reference docs)
287
- policy = Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(
288
+ policy = Phronomy::Agent::Context::Knowledge::StaticKnowledge.new(
288
289
  File.read("policy.md"),
289
290
  type: :policy,
290
291
  source: "policy.md" # exposed to LLM for citation
291
292
  )
292
293
 
293
- # RAG retrieval from a vector store
294
- store = Phronomy::Agent::Context::Knowledge::VectorStore::InMemory.new
295
- embeddings = Phronomy::Agent::Context::Knowledge::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small")
296
-
297
- # Add documents before querying
298
- text1 = "Refunds are processed within 5 business days."
299
- text2 = "Contact support@example.com for refund requests."
300
- store.add(id: "doc-1", embedding: embeddings.embed(text1), metadata: { content: text1, source: "policy.md" })
301
- store.add(id: "doc-2", embedding: embeddings.embed(text2), metadata: { content: text2, source: "policy.md" })
302
-
303
- rag = Phronomy::Agent::Context::Knowledge::Source::RAGKnowledge.new(store: store, embeddings: embeddings, k: 5)
304
-
305
- # Inject at invocation time
306
- result = MyAgent.new.invoke("What is the refund policy?",
307
- config: { knowledge_sources: [policy, rag] })
294
+ # Inject at invocation time via the agent DSL
295
+ class MyAgent < Phronomy::Agent::Base
296
+ model "gpt-4o-mini"
297
+ knowledge policy
298
+ end
308
299
  ```
309
300
 
310
- `static_knowledge_refresh!` invalidates the class-level cache of *static* knowledge sources
311
- (not RAG stores). Call it when the underlying file or content has changed:
301
+ `static_knowledge_refresh!` invalidates the class-level cache of static knowledge sources.
302
+ Call it when the underlying file or content has changed:
312
303
 
313
304
  ```ruby
314
305
  # Static knowledge sources are cached at the class level after the first fetch.
@@ -319,8 +310,8 @@ MyAgent.static_knowledge_refresh!
319
310
  Load and split documents with built-in loaders:
320
311
 
321
312
  ```ruby
322
- chunks = Phronomy::Agent::Context::Knowledge::Loader::MarkdownLoader.new.load("docs/guide.md")
323
- .then { |docs| Phronomy::Agent::Context::Knowledge::Splitter::RecursiveSplitter.new(chunk_size: 512).split(docs) }
313
+ chunks = Phronomy::VectorStore::Loader::MarkdownLoader.new.load("docs/guide.md")
314
+ .then { |docs| Phronomy::VectorStore::Splitter::RecursiveSplitter.new(chunk_size: 512).split(docs) }
324
315
  ```
325
316
 
326
317
  ### Multi-Agent Handoff — Hub-and-spoke routing
@@ -539,7 +530,7 @@ end
539
530
  ### MCP Tool — External tool servers
540
531
 
541
532
  ```ruby
542
- search_tool = Phronomy::Tool::McpTool.from_server(
533
+ search_tool = Phronomy::Tools::Mcp.from_server(
543
534
  "stdio://./mcp-server",
544
535
  tool_name: "web_search"
545
536
  )
@@ -723,8 +714,8 @@ registry the budget is silently skipped.
723
714
  ### CancellationToken — Cooperative cancellation
724
715
 
725
716
  Pass a `CancellationToken` to any agent via `config: { cancellation_token: token }`.
726
- Cancellation is checked at multiple granular checkpoints: before the LLM call, before
727
- each RAG knowledge-source fetch, after each streaming chunk, before each parallel
717
+ Cancellation is checked at multiple granular checkpoints: before the LLM call,
718
+ after each streaming chunk, before each parallel
728
719
  tool-call batch, and after each `before_completion` hook. `CancellationError` is
729
720
  raised immediately and is never retried. No threads are force-killed — `ensure`
730
721
  blocks always execute.
@@ -5,5 +5,5 @@
5
5
  "dispatch_parallel_10": 886.0,
6
6
  "cancellation_token_cancelled": 4335060.97443425,
7
7
  "cancellation_token_raise_if_cancelled_noop": 3566903.189098373,
8
- "trim_context_remove_2000": 1761.5700678986254
8
+ "trim_messages_2000": 2896552.0
9
9
  }
@@ -53,7 +53,7 @@ class BenchStubChat
53
53
  end
54
54
 
55
55
  # A stub tool that does nothing but conforms to the Tool::Base interface.
56
- class BenchNullTool < Phronomy::Tool::Base
56
+ class BenchNullTool < Phronomy::Agent::Context::Capability::Base
57
57
  description "No-op benchmark tool"
58
58
  param :x, type: :string, desc: "input"
59
59
 
@@ -41,6 +41,14 @@ Benchmark.bm(40) do |x|
41
41
  end
42
42
 
43
43
  x.report("build(1000 msgs, 10 chunks, budgeted)") do
44
- (BENCH_ASM_ITERATIONS / 10).times { make_assembler(n_messages: 1000, n_chunks: 10, with_budget: true).build }
44
+ (BENCH_ASM_ITERATIONS / 10).times do
45
+ # Assembler raises ContextLengthError when messages exceed the budget;
46
+ # callers (e.g. Agent::Base#build_context) are expected to pre-trim via
47
+ # trim_to_budget before calling build. The rescue here keeps the benchmark
48
+ # measuring build's fast path without triggering the error path.
49
+ make_assembler(n_messages: 1000, n_chunks: 10, with_budget: true).build
50
+ rescue Phronomy::ContextLengthError
51
+ # expected — budget exceeded
52
+ end
45
53
  end
46
54
  end
@@ -62,7 +62,7 @@ end
62
62
  # ---------------------------------------------------------------------------
63
63
  # Target 3: Tool::Base#params_schema generation (10 params)
64
64
  # ---------------------------------------------------------------------------
65
- tool_class = Class.new(Phronomy::Tool::Base) do
65
+ tool_class = Class.new(Phronomy::Agent::Context::Capability::Base) do
66
66
  description "Test tool with 10 params"
67
67
  param :p1, type: :string, desc: "param 1"
68
68
  param :p2, type: :string, desc: "param 2"
@@ -130,18 +130,18 @@ t6 = Benchmark.measure("CancellationToken#raise_if_cancelled! (no-op)") do
130
130
  end
131
131
 
132
132
  # ---------------------------------------------------------------------------
133
- # Target 7: Context::TrimContext#remove on a 2000-element history
133
+ # Target 7: Agent::Base#trim_messages on a 2000-message history
134
134
  # ---------------------------------------------------------------------------
135
135
  BenchMsg = Struct.new(:content) unless defined?(BenchMsg)
136
136
 
137
- TRIM_ELEMENTS = Array.new(2_000) { |i| {seq: i, message: BenchMsg.new("msg #{i}"), tokens: 10, role: :user} }
138
- TRIM_BUDGET = Phronomy::LlmContextWindow::TokenBudget.new(context_window: 4096, max_output_tokens: 512)
137
+ TRIM_MESSAGES = Array.new(2_000) { |i| BenchMsg.new("msg #{i}") }
139
138
  TRIM_ITERATIONS = 500
140
139
 
141
- t7 = Benchmark.measure("TrimContext#remove (2000-element history)") do
140
+ bench_trim_agent = Class.new(Phronomy::Agent::Base).new
141
+
142
+ t7 = Benchmark.measure("Agent::Base#trim_messages (2000-msg history)") do
142
143
  TRIM_ITERATIONS.times do
143
- tc = Phronomy::Agent::Context::Conversation::TrimContext.new(message_elements: TRIM_ELEMENTS, budget: TRIM_BUDGET)
144
- tc.remove((0...200).to_a) # remove 200 oldest messages
144
+ bench_trim_agent.send(:trim_messages, TRIM_MESSAGES, keep: 1_800)
145
145
  end
146
146
  end
147
147
 
@@ -159,7 +159,7 @@ metrics = {
159
159
  "dispatch_parallel_10" => [t4, PARALLEL_ITERATIONS],
160
160
  "cancellation_token_cancelled" => [t5, 8 * CANCEL_ITERATIONS],
161
161
  "cancellation_token_raise_if_cancelled_noop" => [t6, RAISE_ITERATIONS],
162
- "trim_context_remove_2000" => [t7, TRIM_ITERATIONS]
162
+ "trim_messages_2000" => [t7, TRIM_ITERATIONS]
163
163
  }
164
164
 
165
165
  REGRESSION_RESULTS = {} # rubocop:disable Style/MutableConstant
@@ -11,7 +11,7 @@ require_relative "../lib/phronomy"
11
11
 
12
12
  # --- Tool schema ---
13
13
 
14
- class BenchTool10Params < Phronomy::Tool::Base
14
+ class BenchTool10Params < Phronomy::Agent::Context::Capability::Base
15
15
  description "A tool with 10 parameters for benchmarking purposes"
16
16
  param :param1, type: :string, desc: "First parameter"
17
17
  param :param2, type: :integer, desc: "Second parameter"
@@ -43,7 +43,7 @@ end
43
43
 
44
44
  # --- static_knowledge_chunks cache ---
45
45
 
46
- class BenchKnowledgeSource < Phronomy::Agent::Context::Knowledge::Source::Base
46
+ class BenchKnowledgeSource < Phronomy::Agent::Context::Knowledge::Base
47
47
  def fetch(query: nil)
48
48
  [{content: "Cached knowledge fact.", type: :static}]
49
49
  end
@@ -28,7 +28,7 @@ BENCH_VS_ITERS = {100 => 100, 1_000 => 20, 10_000 => 5}.freeze
28
28
  puts "=== bench_vector_store_inmemory ==="
29
29
  Benchmark.bm(35) do |x|
30
30
  [100, 1_000, 10_000].each do |n|
31
- store = Phronomy::Agent::Context::Knowledge::VectorStore::InMemory.new(dimension: DIM)
31
+ store = Phronomy::VectorStore::InMemory.new(dimension: DIM)
32
32
  populate(store, n)
33
33
  iters = BENCH_VS_ITERS[n]
34
34
 
@@ -0,0 +1,224 @@
1
+ # ADR-011: build_context as the Single Authority for LLM Input
2
+
3
+ ## Status
4
+
5
+ Proposed — 2026-05-31
6
+
7
+ ## Context
8
+
9
+ ### Background
10
+
11
+ `Agent::Base#build_context` was introduced as a hook for subclasses to customise
12
+ the system prompt and conversation history passed to the LLM. Its original return
13
+ value was `{ system: String|nil, messages: Array }`, covering only two of the four
14
+ conceptual regions of an LLM context window.
15
+
16
+ `LlmContextWindow::Assembler` documents the four regions explicitly:
17
+
18
+ ```
19
+ 1. Instruction — system prompt text
20
+ 2. Capability — tool definitions
21
+ 3. Knowledge — external facts (XML context tags)
22
+ 4. Conversation — conversation messages
23
+ ```
24
+
25
+ However, the Assembler itself states Region 2 is "handled by RubyLLM, not here",
26
+ leaving tool registration entirely outside the `build_context` path.
27
+
28
+ ### Problems identified
29
+
30
+ **P1 — Tool definitions are not part of `build_context` output**
31
+
32
+ Tools were registered with `chat.with_tool(tc)` *after* `build_context` returned,
33
+ directly in `InvocationPipeline`, `_stream_impl`, and `ReactAgent#step`.
34
+ This means a subclass that overrides `build_context` cannot control which tools
35
+ are actually sent to the LLM; tools are always added behind its back.
36
+
37
+ **P2 — `_handoff_tools` bypass `build_context` entirely**
38
+
39
+ `Runner` adds handoff tools via `_add_handoff_tool` onto the agent instance.
40
+ These were registered with `chat.with_tool(tc)` at every call site, separately
41
+ from `context[:tool_classes]`, without going through `build_context` at all.
42
+ Even if a subclass override returned a modified tool list, handoff tools would
43
+ still be added unconditionally.
44
+
45
+ **P3 — Tool token cost excluded from budget calculation**
46
+
47
+ LLM providers (OpenAI, Anthropic, Gemini) count tool schema tokens against the
48
+ context window. The `TokenBudget` / `Assembler` pipeline never subtracted tool
49
+ tokens from the available budget before trimming conversation messages. This
50
+ caused the budget calculation to be consistently optimistic: the `effective_input_limit`
51
+ was always larger than the tokens actually available for messages, risking context
52
+ window overflow on long conversations with many or complex tools.
53
+
54
+ The existing `context_overhead` DSL was a manual workaround:
55
+
56
+ ```ruby
57
+ class MyAgent < Phronomy::Agent::Base
58
+ context_overhead 800 # developer guesses tool token cost
59
+ end
60
+ ```
61
+
62
+ This is inaccurate by design and should not be necessary.
63
+
64
+ **P4 — RAG fetch called inside `build_context` on every invocation**
65
+
66
+ `build_context` called `fetch_knowledge_chunks` dynamically. In a ReAct loop
67
+ with N iterations, RAG was fetched N times for the same query. More importantly,
68
+ dynamic per-call RAG fetch is architecturally misplaced:
69
+
70
+ - Knowledge fetched by RAG and injected as Region 3 context belongs to the
71
+ *agent's knowledge*, not to the per-invocation message flow.
72
+ - If the LLM needs to retrieve information dynamically, the correct mechanism is
73
+ **function calling**: the LLM calls a retrieval tool, and the result appears in
74
+ the conversation log as a tool result message (Region 4).
75
+ - Static knowledge that the agent always needs should be registered once at
76
+ agent initialisation time, not re-fetched on every `build_context` call.
77
+
78
+ **P5 — `build_capability_tool_classes` is redundant indirection**
79
+
80
+ `build_capability_tool_classes` was introduced as a narrower override hook to
81
+ avoid requiring subclasses to copy `build_context` just to change tool selection.
82
+ However, it has no documentation, no usage examples, and provides no capability
83
+ that overriding `build_context` itself does not already provide. Its existence
84
+ adds a public API surface and conceptual overhead without commensurate value.
85
+
86
+ **P6 — No access to previous context**
87
+
88
+ `build_context` builds from scratch every call with no knowledge of what was sent
89
+ to the LLM in the previous call. This prevents:
90
+ - Token cache hit optimisations (OpenAI prompt caching, Anthropic `cache_control`)
91
+ which require a stable prompt prefix
92
+ - Incremental context strategies that avoid recomputing unchanged regions
93
+
94
+ ## Decision
95
+
96
+ ### D1 — `build_context` is the single authority for all LLM input
97
+
98
+ **Nothing may be added to or removed from the LLM request outside of
99
+ `build_context`.** Every call site (`InvocationPipeline`, `_stream_impl`,
100
+ `ReactAgent#step`, `ReactAgent#stream_step`) must:
101
+
102
+ 1. Call `build_context` to obtain `{ system:, messages:, tool_classes: }`.
103
+ 2. Apply the result to `chat` — and *only* the result.
104
+ 3. Not register any additional tools, messages, or instructions independently.
105
+
106
+ ### D2 — Assembler handles all four regions including Capability
107
+
108
+ `LlmContextWindow::Assembler` gains `add_capability(tool_classes)`:
109
+
110
+ ```ruby
111
+ assembler.add_capability(tools) # Region 2
112
+ ```
113
+
114
+ Responsibilities of `add_capability`:
115
+
116
+ 1. Store `tool_classes` for pass-through in `build` return value.
117
+ 2. Serialise each tool's schema (via RubyLLM's provider-specific `tool_for` /
118
+ `function_declaration_for`) and estimate its token cost.
119
+ 3. Add that cost to the `used` token count before conversation message trimming.
120
+
121
+ `build` return value expands to:
122
+
123
+ ```ruby
124
+ { system: String|nil, messages: Array, tool_classes: Array }
125
+ ```
126
+
127
+ ### D3 — `build_context` includes all tools (user tools + handoff tools)
128
+
129
+ `build_context` passes `self.class.tools + _handoff_tools` to
130
+ `assembler.add_capability`. `_handoff_tools` are framework-managed routing tools;
131
+ they are always included and are not subject to user-level filtering.
132
+
133
+ Subclasses that need dynamic tool selection override `build_context` and call
134
+ `assembler.add_capability` with their own selection logic.
135
+
136
+ `build_capability_tool_classes` is **removed** (P5 resolution).
137
+
138
+ ### D4 — `fetch_knowledge_chunks` is removed from `build_context`
139
+
140
+ Knowledge enters Region 3 through exactly two paths:
141
+
142
+ **Path A — Agent initialisation (static knowledge)**
143
+
144
+ ```ruby
145
+ class MyAgent < Phronomy::Agent::Base
146
+ knowledge "The capital of Japan is Tokyo.", type: :entity
147
+ end
148
+ ```
149
+
150
+ Registered once; the Assembler always includes it.
151
+
152
+ **Path B — Per-invocation dynamic knowledge via `config[:knowledge_sources]`**
153
+
154
+ The caller passes knowledge sources in the invocation config:
155
+
156
+ ```ruby
157
+ agent.invoke(input, config: { knowledge_sources: [my_rag_source] })
158
+ ```
159
+
160
+ `build_context` calls `fetch_knowledge_chunks` exactly **once per `invoke`**,
161
+ not once per LLM call within a ReAct loop. The result is cached on the agent
162
+ instance for the duration of that invocation.
163
+
164
+ This is a caller responsibility: if the caller needs fresh knowledge on every
165
+ `invoke`, it passes new sources. Within a single `invoke`, knowledge is stable.
166
+
167
+ ### D5 — Previous context stored as instance variable
168
+
169
+ After each `build_context` call, the result is stored:
170
+
171
+ ```ruby
172
+ @last_context = { system: ..., messages: ..., tool_classes: ... }
173
+ ```
174
+
175
+ `build_context` may reference `@last_context` for optimisations such as:
176
+
177
+ - Detecting that `system` and `tool_classes` are unchanged → skip regeneration
178
+ of the stable prefix to improve LLM provider token cache hit rate.
179
+ - Skipping Assembler work when the context is provably identical to the last call.
180
+
181
+ `@last_context` is **not** passed as a method parameter; it is read from the
182
+ instance. This avoids changing call-site signatures.
183
+
184
+ Note: `Agent` instances are not thread-safe (already documented). `@last_context`
185
+ inherits this constraint — concurrent invocations on the same instance are not
186
+ supported.
187
+
188
+ ## Consequences
189
+
190
+ ### Token budget accuracy
191
+
192
+ With D2, `effective_input_limit` correctly reflects the tokens actually available
193
+ for conversation messages after system prompt, tool schemas, and knowledge are
194
+ accounted for. `context_overhead` becomes unnecessary for tool costs; it may
195
+ still be used as a manual reserve for provider-specific overhead not captured by
196
+ schema serialisation.
197
+
198
+ ### `build_context` as the integration surface
199
+
200
+ Subclasses override `build_context` for all customisation: tool selection,
201
+ knowledge injection, system prompt variants, context compression strategies.
202
+ There is one integration point, not several.
203
+
204
+ ### RAG fetch frequency
205
+
206
+ `fetch_knowledge_chunks` runs at most once per `invoke` call (P4 resolution).
207
+ In ReAct loops with N iterations, RAG is fetched once, not N times.
208
+
209
+ ### Removed API
210
+
211
+ `build_capability_tool_classes` is removed. It was never documented or used
212
+ outside of internal framework code, so there is no public API break.
213
+
214
+ ## Migration notes
215
+
216
+ - All call sites (`InvocationPipeline`, `_stream_impl`, `ReactAgent#step/stream_step`)
217
+ must be updated to remove the separate `_handoff_tools` registration lines and
218
+ rely solely on `context[:tool_classes]`.
219
+ - `Assembler#add_capability` and the token estimation for tool schemas must be
220
+ implemented.
221
+ - `build_context` must be updated to pass all tools to `assembler.add_capability`
222
+ and to cache `@last_context`.
223
+ - `fetch_knowledge_chunks` must be lifted out of `build_context` into the
224
+ invocation-scoped cache described in D4.