phronomy 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +21 -0
  3. data/CHANGELOG.md +338 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +242 -27
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/SECURITY.md +80 -0
  8. data/benchmark/baseline.json +9 -0
  9. data/benchmark/bench_agent_invoke.rb +105 -0
  10. data/benchmark/bench_context_assembler.rb +46 -0
  11. data/benchmark/bench_regression.rb +171 -0
  12. data/benchmark/bench_token_estimator.rb +44 -0
  13. data/benchmark/bench_tool_schema.rb +69 -0
  14. data/benchmark/bench_vector_store.rb +39 -0
  15. data/benchmark/bench_workflow.rb +55 -0
  16. data/benchmark/run_all.rb +118 -0
  17. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  18. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  19. data/docs/decisions/003-event-loop-singleton.md +48 -0
  20. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
  21. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  22. data/docs/decisions/006-no-built-in-guardrails.md +48 -0
  23. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  24. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  25. data/docs/decisions/009-state-store-abstraction.md +141 -0
  26. data/lib/phronomy/agent/base.rb +194 -12
  27. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  28. data/lib/phronomy/agent/checkpoint.rb +1 -0
  29. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  30. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  31. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  32. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  33. data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
  34. data/lib/phronomy/agent/fsm.rb +15 -0
  35. data/lib/phronomy/agent/handoff.rb +3 -0
  36. data/lib/phronomy/agent/orchestrator.rb +123 -11
  37. data/lib/phronomy/agent/parallel_tool_chat.rb +21 -4
  38. data/lib/phronomy/agent/react_agent.rb +8 -6
  39. data/lib/phronomy/agent/runner.rb +2 -0
  40. data/lib/phronomy/agent/shared_state.rb +11 -0
  41. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  42. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  43. data/lib/phronomy/cancellation_token.rb +92 -0
  44. data/lib/phronomy/configuration.rb +26 -2
  45. data/lib/phronomy/context/assembler.rb +6 -0
  46. data/lib/phronomy/context/compaction_context.rb +2 -0
  47. data/lib/phronomy/context/context_version_cache.rb +2 -0
  48. data/lib/phronomy/context/token_budget.rb +3 -0
  49. data/lib/phronomy/context/token_estimator.rb +9 -2
  50. data/lib/phronomy/context/trigger_context.rb +1 -0
  51. data/lib/phronomy/context/trim_context.rb +4 -0
  52. data/lib/phronomy/embeddings/base.rb +5 -2
  53. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  54. data/lib/phronomy/eval/comparison.rb +2 -0
  55. data/lib/phronomy/eval/dataset.rb +4 -0
  56. data/lib/phronomy/eval/metrics.rb +6 -0
  57. data/lib/phronomy/eval/runner.rb +2 -0
  58. data/lib/phronomy/eval/scorer/base.rb +1 -0
  59. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  60. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  61. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  62. data/lib/phronomy/event_loop.rb +114 -7
  63. data/lib/phronomy/fsm_session.rb +8 -1
  64. data/lib/phronomy/generator_verifier.rb +2 -0
  65. data/lib/phronomy/guardrail/base.rb +3 -0
  66. data/lib/phronomy/knowledge_source/base.rb +6 -2
  67. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  68. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  69. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  70. data/lib/phronomy/loader/base.rb +1 -0
  71. data/lib/phronomy/loader/csv_loader.rb +2 -0
  72. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  73. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  74. data/lib/phronomy/output_parser/base.rb +1 -0
  75. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  76. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  77. data/lib/phronomy/prompt_template.rb +5 -0
  78. data/lib/phronomy/runnable.rb +20 -3
  79. data/lib/phronomy/splitter/base.rb +2 -0
  80. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  81. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  82. data/lib/phronomy/state_store/base.rb +48 -0
  83. data/lib/phronomy/state_store/in_memory.rb +62 -0
  84. data/lib/phronomy/tool/agent_tool.rb +1 -0
  85. data/lib/phronomy/tool/base.rb +189 -27
  86. data/lib/phronomy/tool/mcp_tool.rb +68 -13
  87. data/lib/phronomy/tracing/base.rb +3 -0
  88. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  89. data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
  90. data/lib/phronomy/vector_store/base.rb +33 -7
  91. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  92. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  93. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  94. data/lib/phronomy/version.rb +1 -1
  95. data/lib/phronomy/workflow.rb +96 -7
  96. data/lib/phronomy/workflow_context.rb +54 -4
  97. data/lib/phronomy/workflow_runner.rb +35 -7
  98. data/lib/phronomy.rb +70 -1
  99. data/scripts/api_snapshot.rb +91 -0
  100. data/scripts/check_api_annotations.rb +68 -0
  101. data/scripts/check_private_enforcement.rb +93 -0
  102. data/scripts/check_readme_runnable.rb +98 -0
  103. data/scripts/run_mutation.sh +46 -0
  104. metadata +45 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 81df7b877b08caffbfdafb9ab1f1c186739a04ef643a14e7b457be805c8b2b9d
4
- data.tar.gz: c0fd0ffad64df476c21e0205926df15589c0e654fed9675a6e8aef3589636f1c
3
+ metadata.gz: fbca82a7a23706719deda2e827af5a9b342c9b388d700929ca9eca19a531a2c9
4
+ data.tar.gz: b9727e2010acefc14738dbd5b71b5ea06b10f5ebd994a858dfbf1133e15ed003
5
5
  SHA512:
6
- metadata.gz: cb22a0d7f3edba46a46e9614f4cdad1641941164a641e17c1b3aa24ed07a3d7fb88b408304f1e9c5eaceac02ef8a1fa8503cfb0cffac3ae86b1dd9786756f5ac
7
- data.tar.gz: 4be7f67215d0b3b8381508f9ccf062fbfc8f41bb7a8a76299e2642634e78421c8ad5fcc551170db4e739c3db7e1cb8fd69ffad6982f50cdba8375f2237aa5ce9
6
+ metadata.gz: c33ee2c26a4b6e3d0470f4d95e30b04e9bc228cc87bdd807db1569c707888817f2d904f085822f91e94441e5c21e95ab348f0d7bc56b1ef87c72770ed4976e1d
7
+ data.tar.gz: 706148e7047ab570ca7d69f735f5767c2a983cf35312a70732723595ea80ed3b5f3efaaddd59d393f5680ec91c36c6ca3e01a70dcb671faa940ae9211df59b5a
data/.mutant.yml ADDED
@@ -0,0 +1,21 @@
1
+ ---
2
+ # Mutant configuration for Phronomy (opensource project)
3
+ # See: https://github.com/mbj/mutant
4
+
5
+ usage: opensource
6
+
7
+ integration: rspec
8
+
9
+ includes:
10
+ - lib
11
+
12
+ requires:
13
+ - phronomy
14
+
15
+ subjects:
16
+ - Phronomy::WorkflowContext
17
+ - Phronomy::WorkflowRunner
18
+ - Phronomy::Tool::Base
19
+ - Phronomy::Context::TokenBudget
20
+ - Phronomy::Context::TokenEstimator
21
+ - Phronomy::VectorStore::InMemory
data/CHANGELOG.md CHANGED
@@ -9,6 +9,344 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ## [Unreleased]
11
11
 
12
+ ### Added
13
+
14
+ - **`VectorStore#size` — document count for all backends, contract coverage for RedisSearch and Pgvector** (#240):
15
+ `VectorStore::Base` gains `#size` as an abstract method; `InMemory`, `RedisSearch`,
16
+ and `Pgvector` all implement it. `RedisSearch#size` queries `FT.INFO num_docs`;
17
+ `Pgvector#size` delegates to `model_class.count`. The `a_vector_store` shared example
18
+ is applied to RedisSearch and Pgvector (nightly real-backend CI); unit specs add a
19
+ skip-guarded `it_behaves_like` reference and dedicated `#size` unit tests.
20
+ `empty_store` override hook added to the shared example for real-backend callers.
21
+
22
+ - **`force_kill: false` default in `dispatch_parallel`, `fan_out`, and `EventLoop#stop`** (#235):
23
+ Thread#kill is now opt-in. The default `force_kill: false` leaves timed-out workers
24
+ running and raises `TimeoutError` immediately, avoiding the risk of interrupted
25
+ `ensure` blocks or corrupted database transactions. Pass `force_kill: true` to
26
+ restore the previous behaviour (with a `logger.warn` to make it visible).
27
+ `EventLoop#stop` gains the same keyword and returns `:timeout` instead of
28
+ `:force_killed` when `force_kill: false` and the thread is still alive.
29
+
30
+ - **Public API compatibility snapshot spec** (#236):
31
+ `spec/phronomy/public_api_spec.rb` enumerates expected public methods for every
32
+ `Stable`-tagged constant. The spec runs as part of the default RSpec suite; any
33
+ accidental removal or rename of a listed method now fails CI immediately.
34
+
35
+ - **Nightly real-backend CI split into three independent job groups** (#238):
36
+ The nightly workflow (`nightly.yml`) now has three separately skippable jobs:
37
+ `real-backend-redis` (Redis Stack), `real-backend-pgvector` (PostgreSQL + pgvector),
38
+ and `real-backend-otel` (OpenTelemetry in-process SDK exporter). Each job runs only
39
+ the relevant spec with `--tag real_backend:<backend>`. The existing `redis_search_spec`
40
+ and `pgvector_spec` gain the `real_backend:` metadata tag. A new `otel_spec.rb`
41
+ verifies span emission, attribute attachment, and error recording via
42
+ `InMemorySpanExporter`.
43
+
44
+ - **`CancellationToken#raise_if_cancelled!` — convenience cancellation check** (#234):
45
+ New instance method that raises `Phronomy::CancellationError` when the token is
46
+ cancelled, or returns `nil` otherwise. Replaces the `if cancelled? then raise`
47
+ pattern inside tools, RAG loaders, and hooks.
48
+
49
+ - **Tool cooperative cancellation via `cancellation_token:` keyword** (#234):
50
+ `Tool::Base#call` now injects `Thread.current[:phronomy_cancellation_token]` as
51
+ `cancellation_token:` into `execute` when the method declares that keyword. Existing
52
+ tools without the keyword continue to work unchanged. Tool authors can opt in:
53
+ `def execute(query:, cancellation_token: nil)`.
54
+
55
+ - **`CancellationToken.timeout_after` — monotonic-clock deadline** (#225):
56
+ New `CancellationToken.timeout_after(seconds)` class method creates a token that
57
+ becomes cancelled after the specified number of seconds, measured with
58
+ `Process::CLOCK_MONOTONIC` (immune to NTP/DST drift). The existing `deadline:`
59
+ keyword for wall-clock deadlines remains supported for backward compatibility.
60
+
61
+ - **`EventLoop#stop` — drain mode and cooperative shutdown** (#233):
62
+ `EventLoop#stop` now accepts a `drain: true` keyword (default: `false`). When
63
+ set, the loop waits up to `Phronomy.configuration.event_loop_stop_grace_seconds`
64
+ (default: 5 s, configurable) for in-flight FSM sessions to complete before
65
+ joining threads. New sessions submitted while shutdown is pending are rejected
66
+ immediately with `Phronomy::CancellationError`. A new
67
+ `event_loop_stop_grace_seconds` configuration attribute is available on
68
+ `Phronomy::Configuration`.
69
+
70
+ - **`invoke_timeout` DSL and `Phronomy::TimeoutError`**: Agents can declare a per-invoke
71
+ timeout in seconds via `invoke_timeout N` in the class body. Exceeding the timeout raises
72
+ `Phronomy::TimeoutError` (a subclass of `Phronomy::Error`). The default remains unlimited.
73
+
74
+ - **`dispatch_parallel` / `fan_out` — per-call `timeout:` option** (#133): Both methods now
75
+ accept `timeout: nil` (default, unlimited) or a positive `Numeric` in seconds. Timed-out
76
+ tasks are treated the same as errors and follow the existing `on_error:` policy (`:raise`
77
+ or `:skip`).
78
+
79
+ - **MCP `HttpTransport` custom authentication headers** (#144): `McpTool.from_server` now
80
+ accepts `headers: {}`, forwarded all the way to `HttpTransport#initialize`. Arbitrary
81
+ headers (e.g. `Authorization: Bearer …`) are injected into every JSON-RPC request,
82
+ enabling use of MCP servers that require bearer tokens or API keys.
83
+
84
+ - **`StdioTransport` — `env:`, `cwd:`, and `startup_timeout:` options** (#145):
85
+ Three new keyword arguments are now accepted when constructing a `StdioTransport` (and
86
+ therefore via `McpTool.from_server`): `env: {}` merges extra variables into the child
87
+ process environment; `cwd: nil` sets the working directory; `startup_timeout: 5` limits
88
+ how long to wait for the child process to become ready.
89
+
90
+ - **Workflow DSL validates graph structure at build time** (#124): `Phronomy::Workflow.define`
91
+ now raises `ArgumentError` immediately for hard structural errors (no states declared,
92
+ transitions referencing undefined targets). Unreachable states emit a warning but do not
93
+ raise. Errors surface at load time rather than at the first `invoke`.
94
+
95
+ - **Expanded error taxonomy** (#149): Five new subclasses of `Phronomy::Error` are now
96
+ available: `TransportError` (MCP or LLM network-layer failure; subclasses are
97
+ `RateLimitError` for HTTP 429 and `AuthenticationError` for HTTP 401/403),
98
+ `ContextLengthError` (prompt exceeds model context window), and
99
+ `CancellationError` (explicit invocation cancellation, distinct from the
100
+ deadline-exceeded `TimeoutError`). All five are defined as subclasses of
101
+ `Phronomy::Error` so application code can rescue them uniformly.
102
+
103
+ - **`Agent::Base.static_knowledge_refresh!`** (#164): New class-level method that clears the
104
+ cached `static_knowledge` chunks so the next `invoke` re-fetches from all registered
105
+ sources. Essential for long-running processes (web servers, job workers) where knowledge
106
+ sources may be updated at runtime without a process restart.
107
+
108
+ - **`Phronomy::Configuration#logger`** (#158): New optional configuration attribute. Any
109
+ object responding to `#warn` (e.g. `Rails.logger`) can be assigned. Framework diagnostic
110
+ messages — starting with the unreachable-state warning from `Workflow.define` — are routed
111
+ through this logger instead of writing directly to `$stderr` via `Kernel#warn`.
112
+
113
+ - **`Phronomy.with_configuration` and `Phronomy.reset_runtime!`** (#206): Two new class
114
+ methods for runtime isolation. `with_configuration` yields the current `Configuration`
115
+ object and restores the original after the block — even on exception — enabling per-request
116
+ overrides and scoped test configuration. `reset_runtime!` stops any running `EventLoop`,
117
+ clears its singleton, and resets configuration to defaults; intended for test suites to
118
+ ensure clean state between examples. `spec_helper.rb` now calls `reset_runtime!` in an
119
+ `after(:each)` hook automatically.
120
+
121
+ - **`CancellationToken` — cooperative cancellation for agent invocations** (#216):
122
+ New class `Phronomy::CancellationToken` enables cooperative cancellation without
123
+ `Thread#kill`. Tokens are passed via `config: { cancellation_token: token }`.
124
+ `cancel!` marks the token (thread-safe via Mutex); `cancelled?` returns `true`
125
+ once cancelled or once an optional `deadline: Time` has passed. Agents check the
126
+ token in `_invoke_impl` (fail-fast before any LLM call) and again immediately
127
+ before `chat.ask`. `CancellationError` is never retried by the retry policy.
128
+ `dispatch_parallel` and `fan_out` accept `cancellation_token:` and automatically
129
+ inject it into every worker task's config unless the task already supplies its own.
130
+
131
+ ### Changed
132
+
133
+ - **`CancellationToken` checked at granular checkpoints** (#223):
134
+ The cancellation token (passed via `config: { cancellation_token: token }`) is
135
+ now checked at multiple additional points beyond the initial LLM call boundary:
136
+ before each `KnowledgeSource#fetch` in `build_context` (RAG phase); after each
137
+ streaming chunk in `_stream_impl`; before each tool-call batch in
138
+ `ParallelToolChat`; and after each `before_completion` hook. This ensures that
139
+ long-running retrieval, streaming, and tool-dispatch phases respect cancellation
140
+ with minimal latency.
141
+
142
+ - **`Agent::Orchestrator` uses `CancellationToken` for internal stop flag** (#224):
143
+ The boolean stop flag in `Orchestrator` is replaced with an internal
144
+ `CancellationToken`. FSM session loops perform cooperative cancellation checks
145
+ via `cancelled?`; `Thread#kill` is retained only as a last resort after
146
+ cooperative shutdown.
147
+
148
+ - **Error taxonomy classes are now raised at the retry boundary** (#204): The classes
149
+ `Phronomy::RateLimitError`, `Phronomy::AuthenticationError`, `Phronomy::ContextLengthError`,
150
+ and `Phronomy::TransportError` (introduced in #149) are now actually raised when the
151
+ corresponding `RubyLLM` exceptions occur. A new internal `ErrorTranslation` concern wraps
152
+ the retry exhaust path and maps `RubyLLM::*` exceptions to their Phronomy counterparts,
153
+ preserving the original exception as `#cause`. **Migration**: callers rescuing
154
+ `RubyLLM::RateLimitError` (or other `RubyLLM::*` errors) directly should migrate to
155
+ `rescue Phronomy::RateLimitError` / `Phronomy::TransportError` etc.
156
+
157
+ - **`Orchestrator#bounded_map` uses cooperative cancellation before force-kill** (#203):
158
+ Workers now check a shared `cancelled` flag at each loop iteration and stop picking up new
159
+ tasks once the timeout deadline passes. A 0.5 s grace period is given to in-flight workers
160
+ before `Thread#kill` is used as a last resort. `EventLoop#stop` similarly logs a warning
161
+ via `Phronomy.configuration.logger` when force-kill is triggered.
162
+
163
+ - **`Orchestrator#bounded_map` timeout deadline uses monotonic clock** (#209): Replaced
164
+ `Time.now` deadline arithmetic with `Process.clock_gettime(Process::CLOCK_MONOTONIC)` to
165
+ avoid sensitivity to NTP adjustments, DST transitions, and system-clock changes that could
166
+ inflate or deflate effective timeouts.
167
+
168
+ - **`EventLoop` warns on events for unknown `target_id`**: When the event loop receives an
169
+ event whose `target_id` does not match any registered session, a warning is emitted instead
170
+ of silently discarding the event.
171
+
172
+ - **`VectorStore#search` validates `k` is a positive integer**: All three backends
173
+ (`InMemory`, `RedisSearch`, `Pgvector`) now raise `ArgumentError` immediately when `k` is
174
+ not a positive integer, providing a clear error instead of a silent empty result or an
175
+ obscure database error.
176
+
177
+ - **`max_parallel_tools` DSL**: Agents can cap the number of concurrent tool-call threads
178
+ with `max_parallel_tools N` in the class body. Useful for rate-limiting external API calls.
179
+ The default is **10** (inheriting from `Base`); set explicitly to raise or lower the cap.
180
+
181
+ - **`max_parallel_tools` and `invoke_timeout` DSL argument validation** (#152): Both setters
182
+ now raise `ArgumentError` at class-definition time if the supplied value is invalid
183
+ (`max_parallel_tools` requires an `Integer >= 1`; `invoke_timeout` requires a positive
184
+ `Numeric`), surfacing configuration mistakes immediately.
185
+
186
+ - **`on_error :suppress` — canonical alias for `:return_empty`** (#165): `:suppress` is the
187
+ new preferred name for the error-suppression behaviour in `Tool::Base`. `:return_empty`
188
+ continues to function but emits a deprecation warning and will be removed in a future major
189
+ release. Migrate by replacing `on_error :return_empty` with `on_error :suppress`.
190
+
191
+ - **Tool nested object properties injected into JSON Schema** (#162): `Tool::Base#params_schema`
192
+ now recursively serialises nested `:object` param specs (including `enum` constraints and
193
+ further nesting) into the JSON Schema `properties` structure forwarded to the LLM,
194
+ enabling accurate structured argument generation for complex tool parameters.
195
+
196
+ ### Fixed
197
+
198
+ - **`EventLoop#start` is now idempotent; stale `:__stop__` sentinel race fixed** (#203):
199
+ Calling `start` on an already-running `EventLoop` is now a no-op. Fixed a race condition
200
+ where `stop` setting `@running = false` before the worker thread was scheduled left the
201
+ `:__stop__` sentinel unconsumed in the queue; a subsequent `start` would then immediately
202
+ terminate the new thread upon popping the stale sentinel. The sentinel is now treated as a
203
+ pure unblock signal for `queue.pop` (`next` instead of `break`) — loop termination is
204
+ driven solely by `@running`.
205
+
206
+ - **`trace_pii: false` now redacts both input and output**: Previously only the user input
207
+ was redacted when `trace_pii` was `false`; LLM responses and tool results were still
208
+ forwarded to the tracing backend unredacted. Both sides are now replaced with `[REDACTED]`.
209
+
210
+ - **`StdioTransport` — `read_timeout` prevents indefinite blocking**: A configurable
211
+ `read_timeout` (default 30 s) is now enforced on MCP stdio reads. A silent child process
212
+ could previously block the calling thread forever.
213
+
214
+ - **MCP schema `required` and `enum` constraints propagated to `param` DSL**:
215
+ `McpTool.from_server` now copies `required` and `enum` constraints from the MCP JSON Schema
216
+ into the generated `param` declarations so downstream validation sees them.
217
+
218
+ - **`FSMSession` notifies parent when child `AgentFSM` fails**: An unhandled error in a child
219
+ `AgentFSM` now correctly notifies the parent `FSMSession`, preventing it from waiting
220
+ indefinitely for a completion event that will never arrive.
221
+
222
+ - **`WorkflowContext.field` rejects plain `Array` or `Hash` defaults**: Passing a plain `Array`
223
+ or `Hash` as a field default now raises `ArgumentError` at class-definition time,
224
+ preventing accidental state sharing across workflow invocations. Other mutable objects
225
+ are not checked. Wrap collection defaults in a Proc: `default: -> { [] }`.
226
+
227
+ - **Tool aliases inherited by `Agent` subclasses**: `tool_aliases` declared in a parent
228
+ `Agent::Base` subclass are now correctly merged into subclasses rather than being silently
229
+ dropped.
230
+
231
+ - **`ReactAgent` output selection skips tool-role messages**: The final output selection
232
+ logic no longer misidentifies `tool`-role messages as the assistant response, fixing
233
+ spurious tool-call JSON appearing in `result[:output]`.
234
+
235
+ - **Thread-local context cache cleaned up after each `invoke`** (#128): `Agent::Base#invoke`
236
+ previously leaked thread-local context cache entries after each call, causing stale cache
237
+ hits in long-lived threads. The cache is now cleared in an `ensure` block.
238
+
239
+ - **Unknown tool parameters are rejected** (#130): `Tool::Base#call` now raises
240
+ `ArgumentError` when keyword arguments not declared via the `param` DSL are passed, instead
241
+ of forwarding them silently to `execute`.
242
+
243
+ - **`EventLoop#stop` uses cooperative shutdown instead of `Thread#kill`** (#135):
244
+ `Thread#kill` bypasses `ensure` blocks and is unsafe. The event loop now sets a sentinel
245
+ flag and joins the worker thread, allowing it to flush pending events before termination.
246
+
247
+ - **`Orchestrator` propagates parent `config` and `thread_id` to sub-agents** (#132):
248
+ Sub-agents spawned via `dispatch` or `dispatch_parallel` now inherit the caller's `config`
249
+ hash and `thread_id`, enabling correct memory isolation and distributed tracing in
250
+ multi-agent pipelines.
251
+
252
+ - **`Agent::Base` caches `static_knowledge` fetch at the class level** (#127): The RAG
253
+ knowledge fetch was re-executed on every `invoke`. The result is now memoized at the class
254
+ level (`@static_knowledge_chunks ||= ...`), eliminating redundant vector-store queries.
255
+ The cache is **not** invalidated automatically when source content changes; call
256
+ `static_knowledge_refresh!` explicitly to force a reload.
257
+
258
+ - **`WorkflowContext#initialize` raises on unknown field keys** (#121): Passing an
259
+ unrecognised key to `WorkflowContext.new` was silently ignored. The constructor now raises
260
+ `ArgumentError`, surfacing typos and API mismatches immediately.
261
+
262
+ - **`WorkflowContext#merge` raises `ArgumentError` for unknown field keys** (#154): Passing
263
+ an unrecognised key to `WorkflowContext#merge` was silently ignored. The method now raises
264
+ `ArgumentError`, matching the guard added to `#initialize` in #121.
265
+
266
+ - **`WorkflowContext#deep_dup_value` rescues `TypeError` for non-dupable objects** (#156):
267
+ Objects that raise `TypeError` from `#dup` (e.g. `Method`, frozen `Proc`, `Integer`,
268
+ `Symbol`) are now returned as-is instead of crashing.
269
+
270
+ - **`Workflow.define` raises for undefined `from:` state in transitions** (#157): Transitions
271
+ that reference a `from:` state not declared in the DSL now raise `ArgumentError` at
272
+ build time, complementing the existing check for undefined `to:` targets.
273
+
274
+ - **`Workflow.define` unreachable-state warning routes through configured logger** (#158):
275
+ The diagnostic warning for unreachable states now uses `Phronomy.configuration.logger`
276
+ when set, falling back to `Kernel#warn`. Previously the warning always went to `$stderr`.
277
+
278
+ - **`require "set"` added to `workflow.rb`** (#159): Eliminates an implicit dependency on
279
+ `Set` being pre-loaded by another gem.
280
+
281
+ - **`Tool::Base#validate_nested_object` rejects undeclared extra keys** (#166): Keys present
282
+ in the LLM-supplied hash but absent from the tool's nested `param` schema now produce a
283
+ validation error rather than being silently forwarded.
284
+
285
+ - **`WorkflowContext#merge` deep-copies unchanged fields** (#123): Fields absent from the
286
+ `merge` argument were previously shared by reference with the original context, allowing
287
+ one branch to mutate another branch's state. All fields are now independently copied.
288
+
289
+ - **Robust metadata parsing in `VectorStore::Pgvector#search`** (#139): Metadata stored as a
290
+ PostgreSQL JSON string is now parsed correctly regardless of whether the database driver
291
+ returns a `String` or an already-decoded `Hash`.
292
+
293
+ - **`OutputParser::JsonParser` tries all fenced code blocks before falling back** (#146):
294
+ The parser now scans every fenced block in the LLM response (in order) and returns the
295
+ first one that parses as valid JSON, rather than only checking the first block. This
296
+ improves reliability with models that include prose before the JSON block.
297
+
298
+ - **`on_error: :return_empty` emits a warning and returns a descriptive string** (#147):
299
+ Errors in tools that declare `on_error :return_empty` are now logged to `warn` before the
300
+ tool returns. The placeholder string includes the tool name and a brief reason, making
301
+ silent failures easier to diagnose.
302
+
303
+ - **`context_version_cache` accessible after `invoke` completes**: The thread-local cache is
304
+ cleared in `invoke`'s `ensure` block, which caused `context_version_cache` to return `nil`
305
+ immediately after every call. The value is now persisted in `@last_context_version_cache`
306
+ so it remains readable post-invoke.
307
+
308
+ - **`WorkflowContext` field type `:merge` comment corrected**: The inline comment incorrectly
309
+ described `:merge` as a deep-merge. It performs a shallow merge (`Hash#merge`). The comment
310
+ has been updated.
311
+
312
+ - **`WorkflowContext` return value from entry actions now adopted in EventLoop mode** (#107):
313
+ `FSMSession` previously discarded the `WorkflowContext` returned by entry action callables,
314
+ causing `s.merge(...)` updates to be silently lost when `event_loop = true`. The context is
315
+ now correctly propagated, bringing EventLoop semantics in line with the synchronous
316
+ `WorkflowRunner`. Regression tests added in `spec/phronomy/fsm_session_spec.rb` (unit)
317
+ and `spec/integration/workflow_spec.rb` (integration, both sync and EventLoop paths).
318
+
319
+ ### Documentation
320
+
321
+ - **`trace_pii = false` description corrected** (#153): The inline comment and README Note
322
+ now correctly state that both the input and the output are redacted.
323
+
324
+ - **`invoke_timeout` is a wait timeout, not cancellation** (#163): YARD comment now
325
+ explicitly documents that the background agent thread and in-flight LLM/tool calls are
326
+ **not** interrupted when the timeout fires. Only the caller receives `TimeoutError`.
327
+
328
+ - **`context_version_cache` thread-safety limitation documented** (#161): A NOTE in the YARD
329
+ comment explains that the per-instance cache is not thread-safe when the same agent
330
+ instance is shared across threads.
331
+
332
+ - **`trace_pii` option documented in README**: The `trace_pii:` configuration key and its
333
+ behaviour (default `false`, redacts input and output in trace records) is now described in
334
+ the Configuration section of the README.
335
+
336
+ - **CJK token under-count warning in `TokenEstimator`**: A note in both the source and README
337
+ explains that the byte-based heuristic under-counts CJK characters by roughly 3×. Users
338
+ processing Chinese, Japanese, or Korean content should apply a correction factor or use a
339
+ model-specific tokenizer.
340
+
341
+ - **Stability labels, `reset_configuration!` caveat, CI, and gemspec** (#140 / #141 / #142 / #143 / #148 / #150):
342
+ README stability table revised for several APIs. `Phronomy.reset_configuration!` now carries
343
+ a warning that it is intended for test isolation only. Gemspec upper bounds added for
344
+ `ruby_llm` and `pg`. `ruby head` added to the CI test matrix. README API smoke tests added.
345
+
346
+ ---
347
+
348
+ ## [0.6.0] - 2026-05-21
349
+
12
350
  ### Removed
13
351
 
14
352
  - **`Phronomy::Guardrail::Builtin` module removed**: `PromptInjectionDetector`
data/CONTRIBUTING.md ADDED
@@ -0,0 +1,102 @@
1
+ # Contributing to phronomy
2
+
3
+ Thank you for your interest in contributing!
4
+
5
+ ---
6
+
7
+ ## Development Setup
8
+
9
+ ```bash
10
+ git clone https://github.com/Raizo-TCS/phronomy.git
11
+ cd phronomy
12
+ bundle install
13
+ ```
14
+
15
+ Run the test suite:
16
+
17
+ ```bash
18
+ bundle exec rspec --format documentation
19
+ bundle exec rspec --tag integration
20
+ ```
21
+
22
+ Run the linter:
23
+
24
+ ```bash
25
+ bundle exec standardrb
26
+ ```
27
+
28
+ Check that no Japanese characters appear in source files:
29
+
30
+ ```bash
31
+ ruby scripts/check_japanese.rb
32
+ ```
33
+
34
+ ---
35
+
36
+ ## Code Style
37
+
38
+ - All source files under `lib/` and `spec/` must begin with `# frozen_string_literal: true`.
39
+ - All comments, error messages (`raise`), and YARD documentation inside source files must be in **English**.
40
+ - Follow [Ruby Standard Style](https://github.com/standardrb/standard) (`standardrb`).
41
+
42
+ ---
43
+
44
+ ## Public API Changes
45
+
46
+ When adding, removing, or renaming a public method or class:
47
+
48
+ 1. Update the stability table in `README.md`.
49
+ 2. Add or update `@api private` YARD annotations for internal APIs.
50
+ 3. Regenerate the API compatibility snapshot:
51
+ ```bash
52
+ bundle exec ruby scripts/api_snapshot.rb --write
53
+ ```
54
+
55
+ ---
56
+
57
+ ## Architecture Decision Records
58
+
59
+ Key design decisions are documented as ADRs in
60
+ [docs/decisions/](docs/decisions/). Read these before making significant changes
61
+ to the threading model, caching strategy, or public API shape.
62
+
63
+ ---
64
+
65
+ ## Mutation Testing
66
+
67
+ Phronomy uses [mutant](https://github.com/mbj/mutant) to verify that each test
68
+ actually detects real code changes. Mutation tests are **not** part of the
69
+ required CI gate (they are slow), but run nightly via `.github/workflows/nightly-mutation.yml`.
70
+
71
+ ### Run mutation tests locally
72
+
73
+ ```bash
74
+ # All subjects defined in .mutant.yml
75
+ bash scripts/run_mutation.sh
76
+
77
+ # Single subject
78
+ bash scripts/run_mutation.sh "Phronomy::WorkflowContext"
79
+ ```
80
+
81
+ ### Coverage targets
82
+
83
+ | Subject | Baseline | Target |
84
+ |---|---|---|
85
+ | `Phronomy::WorkflowContext` | 84.85% | ≥ 80% |
86
+ | `Phronomy::WorkflowRunner` | — | ≥ 80% |
87
+ | `Phronomy::Tool::Base` | 55.74% | ≥ 80% |
88
+ | `Phronomy::Context::TokenBudget` | — | ≥ 80% |
89
+ | `Phronomy::VectorStore::InMemory` | — | ≥ 80% |
90
+
91
+ When you add or modify tests for a covered subject, run mutation tests to confirm
92
+ the score does not regress.
93
+
94
+ ---
95
+
96
+ ## Releasing
97
+
98
+ See [RELEASE_CHECKLIST.md](RELEASE_CHECKLIST.md) for the full pre-release quality
99
+ gate and step-by-step release instructions.
100
+
101
+ **Never run `gem push` directly.** Releases are published via the GitHub Actions
102
+ `release.yml` workflow.