phronomy 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +22 -0
  3. data/CHANGELOG.md +488 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +374 -36
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/Rakefile +33 -0
  8. data/SECURITY.md +80 -0
  9. data/benchmark/baseline.json +9 -0
  10. data/benchmark/bench_agent_invoke.rb +105 -0
  11. data/benchmark/bench_context_assembler.rb +46 -0
  12. data/benchmark/bench_regression.rb +172 -0
  13. data/benchmark/bench_token_estimator.rb +44 -0
  14. data/benchmark/bench_tool_schema.rb +69 -0
  15. data/benchmark/bench_vector_store.rb +39 -0
  16. data/benchmark/bench_workflow.rb +55 -0
  17. data/benchmark/run_all.rb +118 -0
  18. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  19. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  20. data/docs/decisions/003-event-loop-singleton.md +48 -0
  21. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +75 -0
  22. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  23. data/docs/decisions/006-no-built-in-guardrails.md +66 -0
  24. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  25. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  26. data/docs/decisions/009-state-store-abstraction.md +141 -0
  27. data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
  28. data/lib/phronomy/agent/base.rb +416 -49
  29. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  30. data/lib/phronomy/agent/checkpoint.rb +1 -0
  31. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  32. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  33. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  34. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  35. data/lib/phronomy/agent/concerns/suspendable.rb +19 -0
  36. data/lib/phronomy/agent/fsm.rb +44 -52
  37. data/lib/phronomy/agent/handoff.rb +3 -0
  38. data/lib/phronomy/agent/orchestrator.rb +191 -54
  39. data/lib/phronomy/agent/parallel_tool_chat.rb +87 -13
  40. data/lib/phronomy/agent/react_agent.rb +16 -6
  41. data/lib/phronomy/agent/runner.rb +2 -0
  42. data/lib/phronomy/agent/shared_state.rb +11 -0
  43. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  44. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  45. data/lib/phronomy/async_queue.rb +155 -0
  46. data/lib/phronomy/blocking_adapter_pool.rb +435 -0
  47. data/lib/phronomy/cancellation_scope.rb +123 -0
  48. data/lib/phronomy/cancellation_token.rb +133 -0
  49. data/lib/phronomy/concurrency_gate.rb +155 -0
  50. data/lib/phronomy/configuration.rb +168 -2
  51. data/lib/phronomy/context/assembler.rb +6 -0
  52. data/lib/phronomy/context/compaction_context.rb +2 -0
  53. data/lib/phronomy/context/context_version_cache.rb +2 -0
  54. data/lib/phronomy/context/token_budget.rb +3 -0
  55. data/lib/phronomy/context/token_estimator.rb +9 -2
  56. data/lib/phronomy/context/trigger_context.rb +1 -0
  57. data/lib/phronomy/context/trim_context.rb +4 -0
  58. data/lib/phronomy/deadline.rb +63 -0
  59. data/lib/phronomy/diagnostics.rb +62 -0
  60. data/lib/phronomy/embeddings/base.rb +22 -2
  61. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  62. data/lib/phronomy/eval/comparison.rb +2 -0
  63. data/lib/phronomy/eval/dataset.rb +4 -0
  64. data/lib/phronomy/eval/metrics.rb +6 -0
  65. data/lib/phronomy/eval/runner.rb +11 -9
  66. data/lib/phronomy/eval/scorer/base.rb +1 -0
  67. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  68. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  69. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  70. data/lib/phronomy/event_loop.rb +275 -30
  71. data/lib/phronomy/fsm_session.rb +57 -4
  72. data/lib/phronomy/generator_verifier.rb +2 -0
  73. data/lib/phronomy/guardrail/base.rb +3 -0
  74. data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
  75. data/lib/phronomy/invocation_context.rb +152 -0
  76. data/lib/phronomy/knowledge_source/base.rb +24 -2
  77. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  78. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  79. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  80. data/lib/phronomy/llm_adapter/base.rb +104 -0
  81. data/lib/phronomy/llm_adapter/ruby_llm.rb +41 -0
  82. data/lib/phronomy/llm_adapter.rb +20 -0
  83. data/lib/phronomy/loader/base.rb +1 -0
  84. data/lib/phronomy/loader/csv_loader.rb +2 -0
  85. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  86. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  87. data/lib/phronomy/metrics.rb +38 -0
  88. data/lib/phronomy/output_parser/base.rb +1 -0
  89. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  90. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  91. data/lib/phronomy/prompt_template.rb +5 -0
  92. data/lib/phronomy/runnable.rb +20 -3
  93. data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
  94. data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
  95. data/lib/phronomy/runtime/gate_registry.rb +52 -0
  96. data/lib/phronomy/runtime/pool_registry.rb +57 -0
  97. data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
  98. data/lib/phronomy/runtime/scheduler.rb +98 -0
  99. data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
  100. data/lib/phronomy/runtime/task_registry.rb +48 -0
  101. data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
  102. data/lib/phronomy/runtime/timer_queue.rb +106 -0
  103. data/lib/phronomy/runtime/timer_service.rb +42 -0
  104. data/lib/phronomy/runtime.rb +374 -0
  105. data/lib/phronomy/splitter/base.rb +2 -0
  106. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  107. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  108. data/lib/phronomy/state_store/base.rb +48 -0
  109. data/lib/phronomy/state_store/in_memory.rb +62 -0
  110. data/lib/phronomy/task/backend.rb +80 -0
  111. data/lib/phronomy/task/fiber_backend.rb +157 -0
  112. data/lib/phronomy/task/immediate_backend.rb +89 -0
  113. data/lib/phronomy/task/thread_backend.rb +84 -0
  114. data/lib/phronomy/task.rb +275 -0
  115. data/lib/phronomy/task_group.rb +265 -0
  116. data/lib/phronomy/testing/fake_clock.rb +109 -0
  117. data/lib/phronomy/testing/fake_scheduler.rb +104 -0
  118. data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
  119. data/lib/phronomy/testing.rb +12 -0
  120. data/lib/phronomy/tool/agent_tool.rb +1 -0
  121. data/lib/phronomy/tool/base.rb +298 -28
  122. data/lib/phronomy/tool/mcp_tool.rb +103 -17
  123. data/lib/phronomy/tool/scope_policy.rb +50 -0
  124. data/lib/phronomy/tool_executor.rb +106 -0
  125. data/lib/phronomy/tracing/base.rb +3 -0
  126. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  127. data/lib/phronomy/tracing/open_telemetry_tracer.rb +36 -0
  128. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  129. data/lib/phronomy/vector_store/base.rb +40 -7
  130. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  131. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  132. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  133. data/lib/phronomy/version.rb +1 -1
  134. data/lib/phronomy/workflow.rb +147 -11
  135. data/lib/phronomy/workflow_context.rb +83 -6
  136. data/lib/phronomy/workflow_runner.rb +106 -7
  137. data/lib/phronomy.rb +112 -1
  138. data/scripts/api_snapshot.rb +91 -0
  139. data/scripts/check_api_annotations.rb +68 -0
  140. data/scripts/check_private_enforcement.rb +93 -0
  141. data/scripts/check_readme_runnable.rb +98 -0
  142. data/scripts/run_mutation.sh +46 -0
  143. metadata +83 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 81df7b877b08caffbfdafb9ab1f1c186739a04ef643a14e7b457be805c8b2b9d
4
- data.tar.gz: c0fd0ffad64df476c21e0205926df15589c0e654fed9675a6e8aef3589636f1c
3
+ metadata.gz: d9ae370d656048e38f700b6bced931fe249f731cea819ab94691eb4bcf6ef43c
4
+ data.tar.gz: 97d01ca3475f547a41397d1dad2ddb8ccaa10f6466d5a75c3f79e6875a7af0c6
5
5
  SHA512:
6
- metadata.gz: cb22a0d7f3edba46a46e9614f4cdad1641941164a641e17c1b3aa24ed07a3d7fb88b408304f1e9c5eaceac02ef8a1fa8503cfb0cffac3ae86b1dd9786756f5ac
7
- data.tar.gz: 4be7f67215d0b3b8381508f9ccf062fbfc8f41bb7a8a76299e2642634e78421c8ad5fcc551170db4e739c3db7e1cb8fd69ffad6982f50cdba8375f2237aa5ce9
6
+ metadata.gz: d3ab9ebd145e1ed706ad1741a2e3184c412aa8fd0eac32c95eb0b4a1ef87af38ae73eb5b4205b7f2894dd228929130c9a7569d24a1d7a571a5aa3ec5a68a4172
7
+ data.tar.gz: efa88afdbaa2f3d8fc38ee7cbc7044711479490546a888d44540f3b6bae6da60a3a3e64cfbbef455d65f78bab64dd9a68056e4c9f7ac7a360d512179364c8b23
data/.mutant.yml ADDED
@@ -0,0 +1,22 @@
1
+ ---
2
+ # Mutant configuration for Phronomy (opensource project)
3
+ # See: https://github.com/mbj/mutant
4
+
5
+ usage: opensource
6
+
7
+ integration: rspec
8
+
9
+ includes:
10
+ - lib
11
+
12
+ requires:
13
+ - phronomy
14
+
15
+ matcher:
16
+ subjects:
17
+ - Phronomy::WorkflowContext
18
+ - Phronomy::WorkflowRunner
19
+ - Phronomy::Tool::Base
20
+ - Phronomy::Context::TokenBudget
21
+ - Phronomy::Context::TokenEstimator
22
+ - Phronomy::VectorStore::InMemory
data/CHANGELOG.md CHANGED
@@ -9,6 +9,494 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ## [Unreleased]
11
11
 
12
+ ### Added
13
+
14
+ - **`Phronomy::Diagnostics` and `SchedulerReentrancyError`** (#278, #279):
15
+ `Phronomy::Diagnostics` exposes a snapshot of current scheduler state
16
+ (`pending_count`, `active_tasks`, `pool_utilization`, etc.) for debugging and
17
+ monitoring. `SchedulerReentrancyError` is raised when a scheduler operation is
18
+ attempted from within a scheduler callback, preventing deadlocks.
19
+ `Phronomy.configure { |c| c.scheduler_debug = true }` enables verbose scheduler
20
+ logging.
21
+
22
+ - **`task_id` / `parent_task_id` on `InvocationContext`** (#277):
23
+ Every task spawned via `Task.spawn` now carries a `task_id` (a random UUID) and
24
+ an optional `parent_task_id`. These fields enable hierarchical task-tree tracing
25
+ and are forwarded automatically by `TaskGroup`.
26
+
27
+ - **`Phronomy::Metrics` — task-centric observability snapshot** (#276):
28
+ `Phronomy::Metrics.snapshot` returns a hash with scheduler statistics:
29
+ `tasks_started`, `tasks_completed`, `tasks_failed`, `pool_queue_depth`, and
30
+ `pool_active_threads`. Intended for metrics export and health-check endpoints.
31
+
32
+ - **`Phronomy::Testing::FakeClock` and `FakeScheduler`** (#273):
33
+ Two test helpers for deterministic concurrency testing.
34
+ `FakeClock` exposes `advance(seconds)` to control the passage of time without
35
+ sleeping. `FakeScheduler` replaces the real scheduler in specs, providing
36
+ synchronous execution and `flush` / `drain` helpers to drive task completion.
37
+
38
+ - **`ScopePolicy` and approval gate integration** (#270):
39
+ `Phronomy::Tool::ScopePolicy` is a callable that maps `(tool_class, scope, agent)`
40
+ to `:allow`, `:approve`, or `:reject`. The default policy (`ScopePolicy::DEFAULT`)
41
+ automatically routes tools declaring high-risk scopes (`:write`, `:admin`,
42
+ `:external_network`, `:filesystem`, `:process`, `:external_process`) through the
43
+ existing approval gate; tools with `scope :read_only` or no scope are allowed
44
+ unconditionally. Per-agent policy overrides are available via
45
+ `agent.scope_policy = my_policy`.
46
+ **Behaviour change**: tools with the above scopes that previously executed without
47
+ an approval handler will now be **rejected** unless an approval handler is
48
+ registered or the agent uses a custom permissive policy.
49
+
50
+ - **`PromptInjectionGuardrail`, `Tool::Base#redact_params`, and `#max_result_size`** (#271):
51
+ `Phronomy::Guardrail::PromptInjectionGuardrail` is a built-in `InputGuardrail`
52
+ subclass that detects prompt-injection patterns in user input.
53
+ `Tool::Base.redact_params(*names)` marks parameter names as sensitive; their
54
+ values are replaced with `"[REDACTED]"` in log and trace output.
55
+ `Tool::Base.max_result_size(n)` sets a per-tool character limit; results
56
+ exceeding the limit are truncated and a warning is logged. The global fallback is
57
+ `Phronomy.configure { |c| c.tool_result_max_size = n }` (default: no limit).
58
+
59
+ - **`execution_mode` DSL on `Tool::Base`** (#263):
60
+ `Tool::Base.execution_mode` accepts `:cooperative`, `:blocking_io` (default),
61
+ `:cpu_bound`, or `:external_process`. Tools marked `:blocking_io` (the default)
62
+ are dispatched through `BlockingAdapterPool` when a `Runtime` is available,
63
+ keeping the scheduler thread unblocked. Tools marked `:cooperative` are called
64
+ directly on the scheduler thread (suitable for pure in-memory operations).
65
+
66
+ - **`invoke_async` and `call_async` — async entry points** (#262):
67
+ `Agent::Base#invoke_async(input, **opts)` returns a `Phronomy::Task` wrapping
68
+ `#invoke`. `Workflow#invoke_async(input, config:)` does the same for workflows.
69
+ `Tool::Base#call_async(args, cancellation_token:)` returns a `Task` wrapping
70
+ `#call`. All three are backward-compatible with existing synchronous callers.
71
+
72
+ - **`LLMAdapter` abstraction** (#266):
73
+ `Phronomy::LLMAdapter::Base` decouples the agent pipeline from RubyLLM.
74
+ `Phronomy::LLMAdapter::RubyLLM` (registered by default) wraps the existing
75
+ integration. Custom adapters can be registered via
76
+ `Phronomy.configure { |c| c.llm_adapter = MyAdapter }` for testing or
77
+ alternative LLM backends.
78
+
79
+ - **`BlockingAdapterPool` backpressure limits** (#268):
80
+ `BlockingAdapterPool` now enforces configurable `pool_size` (default: 10) and
81
+ `queue_size` (default: 100) limits. Tasks submitted when the queue is full raise
82
+ `Phronomy::BackpressureError` immediately instead of growing the queue without
83
+ bound.
84
+
85
+ - **Cooperative scheduler fairness** (#269):
86
+ The scheduler measures per-task lag and emits starvation and dispatch warnings
87
+ via `Phronomy.configuration.logger` when tasks wait longer than configured
88
+ thresholds. Configurable via `scheduler_starvation_warn_ms` and
89
+ `scheduler_dispatch_warn_ms`.
90
+
91
+ - **Workflow entry actions awaitable with Task** (#264):
92
+ Entry action lambdas may now return a `Phronomy::Task`. The FSMSession awaits
93
+ the task on a background thread and posts `:action_completed` (with the resulting
94
+ `WorkflowContext`) or `:state_completed` back to the EventLoop without blocking
95
+ it. Backward-compatible: lambdas that return a `WorkflowContext` or `nil`
96
+ continue to work as before.
97
+
98
+ - **`Task`, `TaskGroup`, `AsyncQueue`, `Deadline`, `InvocationContext`, `Runtime` concurrency abstractions** (#255):
99
+ Six new concurrency primitives form the foundation of the async execution layer.
100
+ `Task` wraps a callable with cancellation, timeout (`Deadline`), and context
101
+ propagation (`InvocationContext`). `TaskGroup` runs tasks concurrently and waits
102
+ for all to finish (or the first failure). `AsyncQueue` is a bounded, cancellable
103
+ queue. `Runtime` is the top-level façade that resolves a `BlockingAdapterPool`
104
+ and provides `blocking_io { }` and `cpu_bound { }` dispatch helpers.
105
+
106
+ - **`BlockingAdapterPool`** (#256):
107
+ A bounded thread pool that isolates blocking I/O (LLM calls, database queries,
108
+ HTTP requests) from the cooperative scheduler thread. Default pool size is 10
109
+ threads with a queue depth of 100. Replaces direct `Thread.new` calls in core
110
+ agent and tool paths.
111
+
112
+ - **`VectorStore#size` — document count for all backends, contract coverage for RedisSearch and Pgvector** (#240):
113
+ `VectorStore::Base` gains `#size` as an abstract method; `InMemory`, `RedisSearch`,
114
+ and `Pgvector` all implement it. `RedisSearch#size` queries `FT.INFO num_docs`;
115
+ `Pgvector#size` delegates to `model_class.count`. The `a_vector_store` shared example
116
+ is applied to RedisSearch and Pgvector (nightly real-backend CI); unit specs add a
117
+ skip-guarded `it_behaves_like` reference and dedicated `#size` unit tests.
118
+ `empty_store` override hook added to the shared example for real-backend callers.
119
+
120
+ - **`force_kill: false` default in `dispatch_parallel`, `fan_out`, and `EventLoop#stop`** (#235):
121
+ Thread#kill is now opt-in. The default `force_kill: false` leaves timed-out workers
122
+ running and raises `TimeoutError` immediately, avoiding the risk of interrupted
123
+ `ensure` blocks or corrupted database transactions. Pass `force_kill: true` to
124
+ restore the previous behaviour (with a `logger.warn` to make it visible).
125
+ `EventLoop#stop` gains the same keyword and returns `:timeout` instead of
126
+ `:force_killed` when `force_kill: false` and the thread is still alive.
127
+
128
+ - **Public API compatibility snapshot spec** (#236):
129
+ `spec/phronomy/public_api_spec.rb` enumerates expected public methods for every
130
+ `Stable`-tagged constant. The spec runs as part of the default RSpec suite; any
131
+ accidental removal or rename of a listed method now fails CI immediately.
132
+
133
+ - **Nightly real-backend CI split into three independent job groups** (#238):
134
+ The nightly workflow (`nightly.yml`) now has three separately skippable jobs:
135
+ `real-backend-redis` (Redis Stack), `real-backend-pgvector` (PostgreSQL + pgvector),
136
+ and `real-backend-otel` (OpenTelemetry in-process SDK exporter). Each job runs only
137
+ the relevant spec with `--tag real_backend:<backend>`. The existing `redis_search_spec`
138
+ and `pgvector_spec` gain the `real_backend:` metadata tag. A new `otel_spec.rb`
139
+ verifies span emission, attribute attachment, and error recording via
140
+ `InMemorySpanExporter`.
141
+
142
+ - **`CancellationToken#raise_if_cancelled!` — convenience cancellation check** (#234):
143
+ New instance method that raises `Phronomy::CancellationError` when the token is
144
+ cancelled, or returns `nil` otherwise. Replaces the `if cancelled? then raise`
145
+ pattern inside tools, RAG loaders, and hooks.
146
+
147
+ - **Tool cooperative cancellation via `cancellation_token:` keyword** (#234):
148
+ `Tool::Base#call` now injects `Thread.current[:phronomy_cancellation_token]` as
149
+ `cancellation_token:` into `execute` when the method declares that keyword. Existing
150
+ tools without the keyword continue to work unchanged. Tool authors can opt in:
151
+ `def execute(query:, cancellation_token: nil)`.
152
+
153
+ - **`CancellationToken.timeout_after` — monotonic-clock deadline** (#225):
154
+ New `CancellationToken.timeout_after(seconds)` class method creates a token that
155
+ becomes cancelled after the specified number of seconds, measured with
156
+ `Process::CLOCK_MONOTONIC` (immune to NTP/DST drift). The existing `deadline:`
157
+ keyword for wall-clock deadlines remains supported for backward compatibility.
158
+
159
+ - **`EventLoop#stop` — drain mode and cooperative shutdown** (#233):
160
+ `EventLoop#stop` now accepts a `drain: true` keyword (default: `false`). When
161
+ set, the loop waits up to `Phronomy.configuration.event_loop_stop_grace_seconds`
162
+ (default: 5 s, configurable) for in-flight FSM sessions to complete before
163
+ joining threads. New sessions submitted while shutdown is pending are rejected
164
+ immediately with `Phronomy::CancellationError`. A new
165
+ `event_loop_stop_grace_seconds` configuration attribute is available on
166
+ `Phronomy::Configuration`.
167
+
168
+ - **`invoke_timeout` DSL and `Phronomy::TimeoutError`**: Agents can declare a per-invoke
169
+ timeout in seconds via `invoke_timeout N` in the class body. Exceeding the timeout raises
170
+ `Phronomy::TimeoutError` (a subclass of `Phronomy::Error`). The default remains unlimited.
171
+
172
+ - **`dispatch_parallel` / `fan_out` — per-call `timeout:` option** (#133): Both methods now
173
+ accept `timeout: nil` (default, unlimited) or a positive `Numeric` in seconds. Timed-out
174
+ tasks are treated the same as errors and follow the existing `on_error:` policy (`:raise`
175
+ or `:skip`).
176
+
177
+ - **MCP `HttpTransport` custom authentication headers** (#144): `McpTool.from_server` now
178
+ accepts `headers: {}`, forwarded all the way to `HttpTransport#initialize`. Arbitrary
179
+ headers (e.g. `Authorization: Bearer …`) are injected into every JSON-RPC request,
180
+ enabling use of MCP servers that require bearer tokens or API keys.
181
+
182
+ - **`StdioTransport` — `env:`, `cwd:`, and `startup_timeout:` options** (#145):
183
+ Three new keyword arguments are now accepted when constructing a `StdioTransport` (and
184
+ therefore via `McpTool.from_server`): `env: {}` merges extra variables into the child
185
+ process environment; `cwd: nil` sets the working directory; `startup_timeout: 5` limits
186
+ how long to wait for the child process to become ready.
187
+
188
+ - **Workflow DSL validates graph structure at build time** (#124): `Phronomy::Workflow.define`
189
+ now raises `ArgumentError` immediately for hard structural errors (no states declared,
190
+ transitions referencing undefined targets). Unreachable states emit a warning but do not
191
+ raise. Errors surface at load time rather than at the first `invoke`.
192
+
193
+ - **Expanded error taxonomy** (#149): Five new subclasses of `Phronomy::Error` are now
194
+ available: `TransportError` (MCP or LLM network-layer failure; subclasses are
195
+ `RateLimitError` for HTTP 429 and `AuthenticationError` for HTTP 401/403),
196
+ `ContextLengthError` (prompt exceeds model context window), and
197
+ `CancellationError` (explicit invocation cancellation, distinct from the
198
+ deadline-exceeded `TimeoutError`). All five are defined as subclasses of
199
+ `Phronomy::Error` so application code can rescue them uniformly.
200
+
201
+ - **`Agent::Base.static_knowledge_refresh!`** (#164): New class-level method that clears the
202
+ cached `static_knowledge` chunks so the next `invoke` re-fetches from all registered
203
+ sources. Essential for long-running processes (web servers, job workers) where knowledge
204
+ sources may be updated at runtime without a process restart.
205
+
206
+ - **`Phronomy::Configuration#logger`** (#158): New optional configuration attribute. Any
207
+ object responding to `#warn` (e.g. `Rails.logger`) can be assigned. Framework diagnostic
208
+ messages — starting with the unreachable-state warning from `Workflow.define` — are routed
209
+ through this logger instead of writing directly to `$stderr` via `Kernel#warn`.
210
+
211
+ - **`Phronomy.with_configuration` and `Phronomy.reset_runtime!`** (#206): Two new class
212
+ methods for runtime isolation. `with_configuration` yields the current `Configuration`
213
+ object and restores the original after the block — even on exception — enabling per-request
214
+ overrides and scoped test configuration. `reset_runtime!` stops any running `EventLoop`,
215
+ clears its singleton, and resets configuration to defaults; intended for test suites to
216
+ ensure clean state between examples. `spec_helper.rb` now calls `reset_runtime!` in an
217
+ `after(:each)` hook automatically.
218
+
219
+ - **`CancellationToken` — cooperative cancellation for agent invocations** (#216):
220
+ New class `Phronomy::CancellationToken` enables cooperative cancellation without
221
+ `Thread#kill`. Tokens are passed via `config: { cancellation_token: token }`.
222
+ `cancel!` marks the token (thread-safe via Mutex); `cancelled?` returns `true`
223
+ once cancelled or once an optional `deadline: Time` has passed. Agents check the
224
+ token in `_invoke_impl` (fail-fast before any LLM call) and again immediately
225
+ before `chat.ask`. `CancellationError` is never retried by the retry policy.
226
+ `dispatch_parallel` and `fan_out` accept `cancellation_token:` and automatically
227
+ inject it into every worker task's config unless the task already supplies its own.
228
+
229
+ ### Removed
230
+
231
+ - **BREAKING: `Agent::Base#run_as_child` drops `&result_writer` block parameter** (#265):
232
+ The optional block form `run_as_child(input, ctx: ctx) { |r| ctx.answer = r[:output] }`
233
+ is no longer supported. The result is now delivered **exclusively** as the
234
+ `:child_completed` event payload `{ output:, messages:, usage: }`. The parent
235
+ Workflow task is the sole owner of the `WorkflowContext`; no background thread
236
+ writes to it directly. Callers that were using the block to write back into the
237
+ context must update their workflow design (e.g. read the result in the target
238
+ state's entry action after the transition, or store output through an external
239
+ shared resource if needed).
240
+
241
+ - **BREAKING (internal): `AgentFSM#initialize` drops `result_writer:` keyword** (#265):
242
+ Direct callers of `AgentFSM.new(result_writer: ...)` must remove that keyword.
243
+ This class is considered internal; gem consumers should use `run_as_child` instead.
244
+
245
+ ### Changed
246
+
247
+ - **`AgentFSM`, `ParallelToolChat`, and `Orchestrator` use `Task`/`TaskGroup` instead of bare `Thread.new`** (#257, #258, #259):
248
+ All three components now spawn async work through the `Task` and `TaskGroup`
249
+ abstractions. This enables cancellation propagation, context threading, and
250
+ `BlockingAdapterPool` routing. No public API changes; behaviour is equivalent.
251
+
252
+ - **`Thread.current[:phronomy_*]` context propagation replaced with explicit `InvocationContext`** (#260):
253
+ Thread-local keys `phronomy_event_loop_thread`, `phronomy_cancellation_token`,
254
+ and `phronomy_context_version_caches` are no longer used as the primary
255
+ propagation channel. `InvocationContext` is threaded explicitly through call
256
+ stacks. Importantly, `Tool::Base#call` no longer falls back to
257
+ `Thread.current[:phronomy_cancellation_token]`; cancellation is only observed
258
+ when the caller passes `cancellation_token:` explicitly (or when
259
+ `ParallelToolChat` injects it). Tools that relied on the thread-local fallback
260
+ must be updated.
261
+
262
+ - **`Timeout.timeout` removed from core paths; replaced with `CancellationScope`** (#261):
263
+ `Agent::Base#invoke` and `McpTool::StdioTransport` no longer use `Timeout.timeout`
264
+ (which is unsafe with `Thread.new` and `ensure` blocks). A `CancellationScope`
265
+ with `deadline_in(seconds)` provides equivalent semantics without the thread-
266
+ interruption hazards. `ScopeTimeoutError < TimeoutError` is raised on expiry.
267
+
268
+ - **RAG/VectorStore blocking I/O placed behind `BlockingAdapterPool` async boundary** (#267):
269
+ `KnowledgeSource#fetch` and all three `VectorStore` backends now execute their
270
+ blocking I/O through `Runtime#blocking_io` when a `Runtime` is present. Callers
271
+ in a synchronous context see no change; callers in an EventLoop context benefit
272
+ from non-blocking scheduler behaviour.
273
+
274
+
275
+ The cancellation token (passed via `config: { cancellation_token: token }`) is
276
+ now checked at multiple additional points beyond the initial LLM call boundary:
277
+ before each `KnowledgeSource#fetch` in `build_context` (RAG phase); after each
278
+ streaming chunk in `_stream_impl`; before each tool-call batch in
279
+ `ParallelToolChat`; and after each `before_completion` hook. This ensures that
280
+ long-running retrieval, streaming, and tool-dispatch phases respect cancellation
281
+ with minimal latency.
282
+
283
+ - **`Agent::Orchestrator` uses `CancellationToken` for internal stop flag** (#224):
284
+ The boolean stop flag in `Orchestrator` is replaced with an internal
285
+ `CancellationToken`. FSM session loops perform cooperative cancellation checks
286
+ via `cancelled?`; `Thread#kill` is retained only as a last resort after
287
+ cooperative shutdown.
288
+
289
+ - **Error taxonomy classes are now raised at the retry boundary** (#204): The classes
290
+ `Phronomy::RateLimitError`, `Phronomy::AuthenticationError`, `Phronomy::ContextLengthError`,
291
+ and `Phronomy::TransportError` (introduced in #149) are now actually raised when the
292
+ corresponding `RubyLLM` exceptions occur. A new internal `ErrorTranslation` concern wraps
293
+ the retry exhaust path and maps `RubyLLM::*` exceptions to their Phronomy counterparts,
294
+ preserving the original exception as `#cause`. **Migration**: callers rescuing
295
+ `RubyLLM::RateLimitError` (or other `RubyLLM::*` errors) directly should migrate to
296
+ `rescue Phronomy::RateLimitError` / `Phronomy::TransportError` etc.
297
+
298
+ - **`Orchestrator#bounded_map` uses cooperative cancellation before force-kill** (#203):
299
+ Workers now check a shared `cancelled` flag at each loop iteration and stop picking up new
300
+ tasks once the timeout deadline passes. A 0.5 s grace period is given to in-flight workers
301
+ before `Thread#kill` is used as a last resort. `EventLoop#stop` similarly logs a warning
302
+ via `Phronomy.configuration.logger` when force-kill is triggered.
303
+
304
+ - **`Orchestrator#bounded_map` timeout deadline uses monotonic clock** (#209): Replaced
305
+ `Time.now` deadline arithmetic with `Process.clock_gettime(Process::CLOCK_MONOTONIC)` to
306
+ avoid sensitivity to NTP adjustments, DST transitions, and system-clock changes that could
307
+ inflate or deflate effective timeouts.
308
+
309
+ - **`EventLoop` warns on events for unknown `target_id`**: When the event loop receives an
310
+ event whose `target_id` does not match any registered session, a warning is emitted instead
311
+ of silently discarding the event.
312
+
313
+ - **`VectorStore#search` validates `k` is a positive integer**: All three backends
314
+ (`InMemory`, `RedisSearch`, `Pgvector`) now raise `ArgumentError` immediately when `k` is
315
+ not a positive integer, providing a clear error instead of a silent empty result or an
316
+ obscure database error.
317
+
318
+ - **`max_parallel_tools` DSL**: Agents can cap the number of concurrent tool-call threads
319
+ with `max_parallel_tools N` in the class body. Useful for rate-limiting external API calls.
320
+ The default is **10** (inheriting from `Base`); set explicitly to raise or lower the cap.
321
+
322
+ - **`max_parallel_tools` and `invoke_timeout` DSL argument validation** (#152): Both setters
323
+ now raise `ArgumentError` at class-definition time if the supplied value is invalid
324
+ (`max_parallel_tools` requires an `Integer >= 1`; `invoke_timeout` requires a positive
325
+ `Numeric`), surfacing configuration mistakes immediately.
326
+
327
+ - **`on_error :suppress` — canonical alias for `:return_empty`** (#165): `:suppress` is the
328
+ new preferred name for the error-suppression behaviour in `Tool::Base`. `:return_empty`
329
+ continues to function but emits a deprecation warning and will be removed in a future major
330
+ release. Migrate by replacing `on_error :return_empty` with `on_error :suppress`.
331
+
332
+ - **Tool nested object properties injected into JSON Schema** (#162): `Tool::Base#params_schema`
333
+ now recursively serialises nested `:object` param specs (including `enum` constraints and
334
+ further nesting) into the JSON Schema `properties` structure forwarded to the LLM,
335
+ enabling accurate structured argument generation for complex tool parameters.
336
+
337
+ ### Fixed
338
+
339
+ - **`tool_name` preserved in `Orchestrator#prepare_tool_class` anonymous subclass wrapper**:
340
+ When `Orchestrator#prepare_tool_class` wrapped a subagent tool in an anonymous
341
+ subclass (`Class.new(prepared)`), the class-level instance variable `@tool_name`
342
+ was not inherited, causing the wrapper's `tool_name` to return `nil`. RubyLLM
343
+ then registered the tool under a `nil` key, making it unreachable when the LLM
344
+ called it by name. The fix captures the effective name before subclassing and
345
+ calls `tool_name effective_name` explicitly inside the anonymous class body —
346
+ the same pattern already used by the approval-gate wrapper.
347
+
348
+ - **`EventLoop#start` is now idempotent; stale `:__stop__` sentinel race fixed** (#203):
349
+ Calling `start` on an already-running `EventLoop` is now a no-op. Fixed a race condition
350
+ where `stop` setting `@running = false` before the worker thread was scheduled left the
351
+ `:__stop__` sentinel unconsumed in the queue; a subsequent `start` would then immediately
352
+ terminate the new thread upon popping the stale sentinel. The sentinel is now treated as a
353
+ pure unblock signal for `queue.pop` (`next` instead of `break`) — loop termination is
354
+ driven solely by `@running`.
355
+
356
+ - **`trace_pii: false` now redacts both input and output**: Previously only the user input
357
+ was redacted when `trace_pii` was `false`; LLM responses and tool results were still
358
+ forwarded to the tracing backend unredacted. Both sides are now replaced with `[REDACTED]`.
359
+
360
+ - **`StdioTransport` — `read_timeout` prevents indefinite blocking**: A configurable
361
+ `read_timeout` (default 30 s) is now enforced on MCP stdio reads. A silent child process
362
+ could previously block the calling thread forever.
363
+
364
+ - **MCP schema `required` and `enum` constraints propagated to `param` DSL**:
365
+ `McpTool.from_server` now copies `required` and `enum` constraints from the MCP JSON Schema
366
+ into the generated `param` declarations so downstream validation sees them.
367
+
368
+ - **`FSMSession` notifies parent when child `AgentFSM` fails**: An unhandled error in a child
369
+ `AgentFSM` now correctly notifies the parent `FSMSession`, preventing it from waiting
370
+ indefinitely for a completion event that will never arrive.
371
+
372
+ - **`WorkflowContext.field` rejects plain `Array` or `Hash` defaults**: Passing a plain `Array`
373
+ or `Hash` as a field default now raises `ArgumentError` at class-definition time,
374
+ preventing accidental state sharing across workflow invocations. Other mutable objects
375
+ are not checked. Wrap collection defaults in a Proc: `default: -> { [] }`.
376
+
377
+ - **Tool aliases inherited by `Agent` subclasses**: `tool_aliases` declared in a parent
378
+ `Agent::Base` subclass are now correctly merged into subclasses rather than being silently
379
+ dropped.
380
+
381
+ - **`ReactAgent` output selection skips tool-role messages**: The final output selection
382
+ logic no longer misidentifies `tool`-role messages as the assistant response, fixing
383
+ spurious tool-call JSON appearing in `result[:output]`.
384
+
385
+ - **Thread-local context cache cleaned up after each `invoke`** (#128): `Agent::Base#invoke`
386
+ previously leaked thread-local context cache entries after each call, causing stale cache
387
+ hits in long-lived threads. The cache is now cleared in an `ensure` block.
388
+
389
+ - **Unknown tool parameters are rejected** (#130): `Tool::Base#call` now raises
390
+ `ArgumentError` when keyword arguments not declared via the `param` DSL are passed, instead
391
+ of forwarding them silently to `execute`.
392
+
393
+ - **`EventLoop#stop` uses cooperative shutdown instead of `Thread#kill`** (#135):
394
+ `Thread#kill` bypasses `ensure` blocks and is unsafe. The event loop now sets a sentinel
395
+ flag and joins the worker thread, allowing it to flush pending events before termination.
396
+
397
+ - **`Orchestrator` propagates parent `config` and `thread_id` to sub-agents** (#132):
398
+ Sub-agents spawned via `dispatch` or `dispatch_parallel` now inherit the caller's `config`
399
+ hash and `thread_id`, enabling correct memory isolation and distributed tracing in
400
+ multi-agent pipelines.
401
+
402
+ - **`Agent::Base` caches `static_knowledge` fetch at the class level** (#127): The RAG
403
+ knowledge fetch was re-executed on every `invoke`. The result is now memoized at the class
404
+ level (`@static_knowledge_chunks ||= ...`), eliminating redundant vector-store queries.
405
+ The cache is **not** invalidated automatically when source content changes; call
406
+ `static_knowledge_refresh!` explicitly to force a reload.
407
+
408
+ - **`WorkflowContext#initialize` raises on unknown field keys** (#121): Passing an
409
+ unrecognised key to `WorkflowContext.new` was silently ignored. The constructor now raises
410
+ `ArgumentError`, surfacing typos and API mismatches immediately.
411
+
412
+ - **`WorkflowContext#merge` raises `ArgumentError` for unknown field keys** (#154): Passing
413
+ an unrecognised key to `WorkflowContext#merge` was silently ignored. The method now raises
414
+ `ArgumentError`, matching the guard added to `#initialize` in #121.
415
+
416
+ - **`WorkflowContext#deep_dup_value` rescues `TypeError` for non-dupable objects** (#156):
417
+ Objects that raise `TypeError` from `#dup` (e.g. `Method`, frozen `Proc`, `Integer`,
418
+ `Symbol`) are now returned as-is instead of crashing.
419
+
420
+ - **`Workflow.define` raises for undefined `from:` state in transitions** (#157): Transitions
421
+ that reference a `from:` state not declared in the DSL now raise `ArgumentError` at
422
+ build time, complementing the existing check for undefined `to:` targets.
423
+
424
+ - **`Workflow.define` unreachable-state warning routes through configured logger** (#158):
425
+ The diagnostic warning for unreachable states now uses `Phronomy.configuration.logger`
426
+ when set, falling back to `Kernel#warn`. Previously the warning always went to `$stderr`.
427
+
428
+ - **`require "set"` added to `workflow.rb`** (#159): Eliminates an implicit dependency on
429
+ `Set` being pre-loaded by another gem.
430
+
431
+ - **`Tool::Base#validate_nested_object` rejects undeclared extra keys** (#166): Keys present
432
+ in the LLM-supplied hash but absent from the tool's nested `param` schema now produce a
433
+ validation error rather than being silently forwarded.
434
+
435
+ - **`WorkflowContext#merge` deep-copies unchanged fields** (#123): Fields absent from the
436
+ `merge` argument were previously shared by reference with the original context, allowing
437
+ one branch to mutate another branch's state. All fields are now independently copied.
438
+
439
+ - **Robust metadata parsing in `VectorStore::Pgvector#search`** (#139): Metadata stored as a
440
+ PostgreSQL JSON string is now parsed correctly regardless of whether the database driver
441
+ returns a `String` or an already-decoded `Hash`.
442
+
443
+ - **`OutputParser::JsonParser` tries all fenced code blocks before falling back** (#146):
444
+ The parser now scans every fenced block in the LLM response (in order) and returns the
445
+ first one that parses as valid JSON, rather than only checking the first block. This
446
+ improves reliability with models that include prose before the JSON block.
447
+
448
+ - **`on_error: :return_empty` emits a warning and returns a descriptive string** (#147):
449
+ Errors in tools that declare `on_error :return_empty` are now logged to `warn` before the
450
+ tool returns. The placeholder string includes the tool name and a brief reason, making
451
+ silent failures easier to diagnose.
452
+
453
+ - **`context_version_cache` accessible after `invoke` completes**: The thread-local cache is
454
+ cleared in `invoke`'s `ensure` block, which caused `context_version_cache` to return `nil`
455
+ immediately after every call. The value is now persisted in `@last_context_version_cache`
456
+ so it remains readable post-invoke.
457
+
458
+ - **`WorkflowContext` field type `:merge` comment corrected**: The inline comment incorrectly
459
+ described `:merge` as a deep-merge. It performs a shallow merge (`Hash#merge`). The comment
460
+ has been updated.
461
+
462
+ - **`WorkflowContext` return value from entry actions now adopted in EventLoop mode** (#107):
463
+ `FSMSession` previously discarded the `WorkflowContext` returned by entry action callables,
464
+ causing `s.merge(...)` updates to be silently lost when `event_loop = true`. The context is
465
+ now correctly propagated, bringing EventLoop semantics in line with the synchronous
466
+ `WorkflowRunner`. Regression tests added in `spec/phronomy/fsm_session_spec.rb` (unit)
467
+ and `spec/integration/workflow_spec.rb` (integration, both sync and EventLoop paths).
468
+
469
+ ### Documentation
470
+
471
+ - **`trace_pii = false` description corrected** (#153): The inline comment and README Note
472
+ now correctly state that both the input and the output are redacted.
473
+
474
+ - **`invoke_timeout` is a wait timeout, not cancellation** (#163): YARD comment now
475
+ explicitly documents that the background agent thread and in-flight LLM/tool calls are
476
+ **not** interrupted when the timeout fires. Only the caller receives `TimeoutError`.
477
+
478
+ - **`context_version_cache` thread-safety limitation documented** (#161): A NOTE in the YARD
479
+ comment explains that the per-instance cache is not thread-safe when the same agent
480
+ instance is shared across threads.
481
+
482
+ - **`trace_pii` option documented in README**: The `trace_pii:` configuration key and its
483
+ behaviour (default `false`, redacts input and output in trace records) is now described in
484
+ the Configuration section of the README.
485
+
486
+ - **CJK token under-count warning in `TokenEstimator`**: A note in both the source and README
487
+ explains that the byte-based heuristic under-counts CJK characters by roughly 3×. Users
488
+ processing Chinese, Japanese, or Korean content should apply a correction factor or use a
489
+ model-specific tokenizer.
490
+
491
+ - **Stability labels, `reset_configuration!` caveat, CI, and gemspec** (#140 / #141 / #142 / #143 / #148 / #150):
492
+ README stability table revised for several APIs. `Phronomy.reset_configuration!` now carries
493
+ a warning that it is intended for test isolation only. Gemspec upper bounds added for
494
+ `ruby_llm` and `pg`. `ruby head` added to the CI test matrix. README API smoke tests added.
495
+
496
+ ---
497
+
498
+ ## [0.6.0] - 2026-05-21
499
+
12
500
  ### Removed
13
501
 
14
502
  - **`Phronomy::Guardrail::Builtin` module removed**: `PromptInjectionDetector`
data/CONTRIBUTING.md ADDED
@@ -0,0 +1,102 @@
1
+ # Contributing to phronomy
2
+
3
+ Thank you for your interest in contributing!
4
+
5
+ ---
6
+
7
+ ## Development Setup
8
+
9
+ ```bash
10
+ git clone https://github.com/Raizo-TCS/phronomy.git
11
+ cd phronomy
12
+ bundle install
13
+ ```
14
+
15
+ Run the test suite:
16
+
17
+ ```bash
18
+ bundle exec rspec --format documentation
19
+ bundle exec rspec --tag integration
20
+ ```
21
+
22
+ Run the linter:
23
+
24
+ ```bash
25
+ bundle exec standardrb
26
+ ```
27
+
28
+ Check that no Japanese characters appear in source files:
29
+
30
+ ```bash
31
+ ruby scripts/check_japanese.rb
32
+ ```
33
+
34
+ ---
35
+
36
+ ## Code Style
37
+
38
+ - All source files under `lib/` and `spec/` must begin with `# frozen_string_literal: true`.
39
+ - All comments, error messages (`raise`), and YARD documentation inside source files must be in **English**.
40
+ - Follow [Ruby Standard Style](https://github.com/standardrb/standard) (`standardrb`).
41
+
42
+ ---
43
+
44
+ ## Public API Changes
45
+
46
+ When adding, removing, or renaming a public method or class:
47
+
48
+ 1. Update the stability table in `README.md`.
49
+ 2. Add or update `@api private` YARD annotations for internal APIs.
50
+ 3. Regenerate the API compatibility snapshot:
51
+ ```bash
52
+ bundle exec ruby scripts/api_snapshot.rb --write
53
+ ```
54
+
55
+ ---
56
+
57
+ ## Architecture Decision Records
58
+
59
+ Key design decisions are documented as ADRs in
60
+ [docs/decisions/](docs/decisions/). Read these before making significant changes
61
+ to the threading model, caching strategy, or public API shape.
62
+
63
+ ---
64
+
65
+ ## Mutation Testing
66
+
67
+ Phronomy uses [mutant](https://github.com/mbj/mutant) to verify that each test
68
+ actually detects real code changes. Mutation tests are **not** part of the
69
+ required CI gate (they are slow), but run nightly via `.github/workflows/nightly-mutation.yml`.
70
+
71
+ ### Run mutation tests locally
72
+
73
+ ```bash
74
+ # All subjects defined in .mutant.yml
75
+ bash scripts/run_mutation.sh
76
+
77
+ # Single subject
78
+ bash scripts/run_mutation.sh "Phronomy::WorkflowContext"
79
+ ```
80
+
81
+ ### Coverage targets
82
+
83
+ | Subject | Baseline | Target |
84
+ |---|---|---|
85
+ | `Phronomy::WorkflowContext` | 84.85% | ≥ 80% |
86
+ | `Phronomy::WorkflowRunner` | — | ≥ 80% |
87
+ | `Phronomy::Tool::Base` | 55.74% | ≥ 80% |
88
+ | `Phronomy::Context::TokenBudget` | — | ≥ 80% |
89
+ | `Phronomy::VectorStore::InMemory` | — | ≥ 80% |
90
+
91
+ When you add or modify tests for a covered subject, run mutation tests to confirm
92
+ the score does not regress.
93
+
94
+ ---
95
+
96
+ ## Releasing
97
+
98
+ See [RELEASE_CHECKLIST.md](RELEASE_CHECKLIST.md) for the full pre-release quality
99
+ gate and step-by-step release instructions.
100
+
101
+ **Never run `gem push` directly.** Releases are published via the GitHub Actions
102
+ `release.yml` workflow.