phronomy 0.5.4 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +21 -0
  3. data/CHANGELOG.md +379 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +262 -48
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/SECURITY.md +80 -0
  8. data/benchmark/baseline.json +9 -0
  9. data/benchmark/bench_agent_invoke.rb +105 -0
  10. data/benchmark/bench_context_assembler.rb +46 -0
  11. data/benchmark/bench_regression.rb +171 -0
  12. data/benchmark/bench_token_estimator.rb +44 -0
  13. data/benchmark/bench_tool_schema.rb +69 -0
  14. data/benchmark/bench_vector_store.rb +39 -0
  15. data/benchmark/bench_workflow.rb +55 -0
  16. data/benchmark/run_all.rb +118 -0
  17. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  18. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  19. data/docs/decisions/003-event-loop-singleton.md +48 -0
  20. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
  21. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  22. data/docs/decisions/006-no-built-in-guardrails.md +48 -0
  23. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  24. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  25. data/docs/decisions/009-state-store-abstraction.md +141 -0
  26. data/lib/phronomy/agent/base.rb +281 -13
  27. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  28. data/lib/phronomy/agent/checkpoint.rb +1 -0
  29. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  30. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  31. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  32. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  33. data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
  34. data/lib/phronomy/agent/fsm.rb +180 -0
  35. data/lib/phronomy/agent/handoff.rb +3 -0
  36. data/lib/phronomy/agent/orchestrator.rb +123 -11
  37. data/lib/phronomy/agent/parallel_tool_chat.rb +92 -0
  38. data/lib/phronomy/agent/react_agent.rb +8 -6
  39. data/lib/phronomy/agent/runner.rb +2 -0
  40. data/lib/phronomy/agent/shared_state.rb +11 -0
  41. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  42. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  43. data/lib/phronomy/cancellation_token.rb +92 -0
  44. data/lib/phronomy/configuration.rb +32 -2
  45. data/lib/phronomy/context/assembler.rb +6 -0
  46. data/lib/phronomy/context/compaction_context.rb +2 -0
  47. data/lib/phronomy/context/context_version_cache.rb +2 -0
  48. data/lib/phronomy/context/token_budget.rb +3 -0
  49. data/lib/phronomy/context/token_estimator.rb +9 -2
  50. data/lib/phronomy/context/trigger_context.rb +1 -0
  51. data/lib/phronomy/context/trim_context.rb +4 -0
  52. data/lib/phronomy/context.rb +0 -1
  53. data/lib/phronomy/embeddings/base.rb +5 -2
  54. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  55. data/lib/phronomy/eval/comparison.rb +2 -0
  56. data/lib/phronomy/eval/dataset.rb +4 -0
  57. data/lib/phronomy/eval/metrics.rb +6 -0
  58. data/lib/phronomy/eval/runner.rb +2 -0
  59. data/lib/phronomy/eval/scorer/base.rb +1 -0
  60. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  61. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  62. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  63. data/lib/phronomy/event.rb +14 -0
  64. data/lib/phronomy/event_loop.rb +254 -0
  65. data/lib/phronomy/fsm_session.rb +201 -0
  66. data/lib/phronomy/generator_verifier.rb +24 -22
  67. data/lib/phronomy/guardrail/base.rb +3 -0
  68. data/lib/phronomy/guardrail.rb +0 -1
  69. data/lib/phronomy/knowledge_source/base.rb +6 -2
  70. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  71. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  72. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  73. data/lib/phronomy/loader/base.rb +1 -0
  74. data/lib/phronomy/loader/csv_loader.rb +2 -0
  75. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  76. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  77. data/lib/phronomy/output_parser/base.rb +1 -0
  78. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  79. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  80. data/lib/phronomy/prompt_template.rb +5 -0
  81. data/lib/phronomy/runnable.rb +20 -3
  82. data/lib/phronomy/splitter/base.rb +2 -0
  83. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  84. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  85. data/lib/phronomy/state_store/base.rb +48 -0
  86. data/lib/phronomy/state_store/in_memory.rb +62 -0
  87. data/lib/phronomy/tool/agent_tool.rb +1 -0
  88. data/lib/phronomy/tool/base.rb +189 -27
  89. data/lib/phronomy/tool/mcp_tool.rb +68 -13
  90. data/lib/phronomy/tracing/base.rb +3 -0
  91. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  92. data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
  93. data/lib/phronomy/vector_store/base.rb +33 -7
  94. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  95. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  96. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  97. data/lib/phronomy/version.rb +1 -1
  98. data/lib/phronomy/workflow.rb +175 -74
  99. data/lib/phronomy/workflow_context.rb +55 -5
  100. data/lib/phronomy/workflow_runner.rb +197 -114
  101. data/lib/phronomy.rb +74 -1
  102. data/scripts/api_snapshot.rb +91 -0
  103. data/scripts/check_api_annotations.rb +68 -0
  104. data/scripts/check_private_enforcement.rb +93 -0
  105. data/scripts/check_readme_runnable.rb +98 -0
  106. data/scripts/run_mutation.sh +46 -0
  107. metadata +50 -6
  108. data/lib/phronomy/context/builder.rb +0 -92
  109. data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +0 -100
  110. data/lib/phronomy/guardrail/builtin/prompt_injection_detector.rb +0 -67
  111. data/lib/phronomy/guardrail/builtin.rb +0 -16
data/README.md CHANGED
@@ -1,32 +1,52 @@
1
1
  # Phronomy
2
2
 
3
+ > **⚠️ Development Notice**
4
+ > This project is primarily developed and maintained by **AI coding agents**.
5
+ > As a result, `main` receives frequent, large, and unannounced changes.
6
+ > External contributors should expect significant churn and potential conflicts at any time.
7
+ > We apologise for the instability this may cause.
8
+
3
9
  **Phronomy** is a Ruby AI agent framework inspired by open-source AI agent frameworks.
4
10
  It provides composable building blocks — Workflows, Agents, Tools, Guardrails, RAG, and Tracing — all powered by [RubyLLM](https://github.com/crmne/ruby_llm) for LLM abstraction.
5
11
 
6
12
  ## Features
7
13
 
8
- > **Stability labels**: `Stable` production-ready, semver-protected API.
9
- > `Beta` functional but the API may change in a minor release.
10
- > `Experimental` — subject to breaking changes without notice.
14
+ > **Stability labels** (phronomy is pre-1.0, so `0.x` minor releases may include
15
+ > breaking changes even to `Stable` APIs; patch releases (`0.x.y`) are non-breaking):
16
+ > - `Stable` — API is considered complete and suitable for production use. Breaking changes
17
+ > within a minor release are avoided, and any breaking changes in a minor bump are noted
18
+ > in `CHANGELOG.md`.
19
+ > - `Beta` — Functionality is complete and tested, but the API may change in a minor version release (0.x). Use with awareness that signatures or behaviour may evolve.
20
+ > - `Experimental` — Functionality may be incomplete or subject to breaking changes at any time without notice. Not recommended for production use.
21
+ >
22
+ > **Note**: The `main` branch contains unreleased development work. Pin to a released gem
23
+ > version (`gem "phronomy", "~> 0.x"`) for stability in production.
11
24
 
12
25
  | Feature | Stability |
13
26
  |---|---|
14
27
  | **Workflow** — Stateful, branching workflows with wait_state/send_event | Stable |
28
+ | **Workflow EventLoop Mode** — Opt-in event-driven execution: `Phronomy.configure { \|c\| c.event_loop = true }` | Experimental |
29
+ | **Agent EventLoop Mode** — `Agent#invoke` (non-blocking via EventLoop), `Agent#run_as_child` (child-FSM pattern for Workflow integration), parallel tool dispatch via `ParallelToolChat` | Experimental |
15
30
  | **Workflow Parallel Node** — Concurrent branches via application-level threads | Beta |
16
31
  | **Agent** — ReAct-style tool-calling agents with guardrails and conversation history | Stable |
17
32
  | **Before-Completion Hook** — Three-tier LLM parameter injection | Stable |
18
33
  | **Context Management** — Token budget calculation, estimation, and pruning | Stable |
19
- | **Knowledge/RAG** — Retrieval sources with pluggable loaders, splitters, and vector stores | Beta |
34
+ | **Knowledge/RAG** — Retrieval sources with pluggable loaders, splitters, and vector stores; `static_knowledge_refresh!` for runtime cache invalidation | Beta |
35
+ | **`VectorStore#size`** — Returns document count for all three backends (InMemory, RedisSearch, Pgvector) | Beta |
20
36
  | **Multi-agent** — Agent-as-Tool pattern and hub-and-spoke handoff routing | Beta |
21
37
  | **GeneratorVerifier** — Generator-Verifier loop with injectable prompt builders/parsers | Beta |
22
38
  | **Agent::Orchestrator** — Parallel subagent dispatch, fan-out, and `subagent` DSL | Beta |
23
- | **Agent::TeamCoordinator** — Agent teams pattern: LLM coordinator + stateful worker pool with task queue (worker-local message history per run) | Beta |
39
+ | **Agent::TeamCoordinator** — Agent teams pattern: LLM coordinator + stateful workers with sequential task assignment (worker-local message history persisted across tasks) | Beta |
24
40
  | **Agent::SharedState** — Shared state pattern: peer agents collaborate via a shared KnowledgeStore; `member` DSL with per-agent instructions and `coordination` team protocol | Experimental |
25
- | **Guardrails** — Input/output validation; built-in PII and prompt-injection detectors | Beta |
41
+ | **Guardrails** — Input/output validation with custom `InputGuardrail`/`OutputGuardrail` | Beta |
26
42
  | **Output Parser** — JSON and Struct-mapped parsers for structured LLM responses | Stable |
27
43
  | **Eval Framework** — Dataset-driven evaluation with multiple scorer types | Beta |
28
44
  | **Tracing** — Pluggable span-based observability | Stable |
29
45
  | **MCP Tool** — Model Context Protocol server integration | Beta |
46
+ | **Error Taxonomy** — `RateLimitError`, `AuthenticationError`, `ContextLengthError`, `TransportError` (subclasses of `Phronomy::Error`) raised at the agent retry boundary | Beta |
47
+ | **`Phronomy.with_configuration` / `Phronomy.reset_runtime!`** — Scoped configuration override and full runtime reset for test isolation | Beta |
48
+ | **CancellationToken** — Cooperative cancellation via `cancel!`/`cancelled?`/`raise_if_cancelled!`; `timeout_after(seconds)` for monotonic-clock deadlines; optional `deadline:` (wall-clock) for backward compatibility; passed as `config: { cancellation_token: token }` to agents and `dispatch_parallel`; injected into `tool.execute` when the method declares a `cancellation_token:` keyword | Experimental |
49
+ | **`dispatch_parallel` / `fan_out` `force_kill:` option** — `force_kill: false` (default) leaves timed-out workers running and raises `TimeoutError` immediately; `force_kill: true` restores the old `Thread#kill` behaviour with a `logger.warn` | Beta |
30
50
 
31
51
  ## Installation
32
52
 
@@ -42,17 +62,42 @@ Then run:
42
62
  bundle install
43
63
  ```
44
64
 
65
+ ### RubyLLM setup
66
+
67
+ Phronomy uses [RubyLLM](https://github.com/crmne/ruby_llm) for LLM access.
68
+ Configure your provider credentials before using agents or chains:
69
+
70
+ ```ruby
71
+ RubyLLM.configure do |c|
72
+ c.openai_api_key = ENV["OPENAI_API_KEY"]
73
+ # c.anthropic_api_key = ENV["ANTHROPIC_API_KEY"]
74
+ end
75
+ ```
76
+
77
+ See the [RubyLLM documentation](https://rubyllm.com) for all supported providers.
78
+
79
+ ### Optional dependencies
80
+
81
+ Install additional gems only for the features you use:
82
+
83
+ | Gem | Required for |
84
+ |-----|-------------|
85
+ | `pgvector` | `Phronomy::VectorStore::Pgvector` |
86
+ | `redis` | `Phronomy::VectorStore::RedisSearch` |
87
+ | `opentelemetry-api` | `Phronomy::Tracing::OpenTelemetryTracer` |
88
+
45
89
  ## Quick Start
46
90
 
47
91
  ### Agent — ReAct tool-calling agent
48
92
 
49
- ```ruby
93
+ ```ruby runnable
50
94
  class WebSearch < Phronomy::Tool::Base
51
95
  description "Search the web"
52
96
  param :query, type: :string, desc: "Search query"
53
97
 
54
98
  def execute(query:)
55
- # ... call a search API
99
+ # Replace with a real search API call (e.g., SerpAPI, Tavily)
100
+ "Mock search result for: #{query}"
56
101
  end
57
102
  end
58
103
 
@@ -69,7 +114,7 @@ puts result[:output]
69
114
 
70
115
  ### Workflow — Stateful workflow with wait_state/send_event
71
116
 
72
- ```ruby
117
+ ```ruby runnable
73
118
  class ReviewContext
74
119
  include Phronomy::WorkflowContext
75
120
  field :draft, type: :replace
@@ -77,17 +122,21 @@ class ReviewContext
77
122
  field :approved, type: :replace, default: false
78
123
  end
79
124
 
125
+ # Placeholder callables representing your own implementation
126
+ write_draft = ->(state) { state.merge(draft: "Draft content here") }
127
+ review_draft = ->(state) { state.merge(feedback: "Feedback on: #{state.draft}") }
128
+
80
129
  app = Phronomy::Workflow.define(ReviewContext) do
81
130
  initial :write
82
- state :write, action: ->(s) { s.merge(draft: Writer.call(s)) }
83
- state :review, action: ->(s) { s.merge(feedback: Reviewer.call(s.draft)) }
131
+ state :write, action: write_draft
132
+ state :review, action: review_draft
84
133
  wait_state :awaiting_approval # halts here for human decision
85
134
  state :finalize, action: ->(s) { s.merge(approved: true) }
86
- after :write, to: :review
87
- after :review, to: :awaiting_approval
88
- after :finalize, to: :__finish__
89
- event :approve, from: :awaiting_approval, to: :finalize
90
- event :reject, from: :awaiting_approval, to: :write
135
+ transition from: :write, to: :review
136
+ transition from: :review, to: :awaiting_approval
137
+ transition from: :finalize, to: :__finish__
138
+ transition from: :awaiting_approval, on: :approve, to: :finalize
139
+ transition from: :awaiting_approval, on: :reject, to: :write
91
140
  end
92
141
 
93
142
  # First run — halts at :awaiting_approval
@@ -100,6 +149,19 @@ final = app.send_event(state: state, event: :approve)
100
149
  puts "Approved: #{final.approved}" # => true
101
150
  ```
102
151
 
152
+ In EventLoop mode (`c.event_loop = true`), `Agent#run_as_child` spawns a child agent
153
+ asynchronously. When the child succeeds, `:child_completed` is dispatched; when it fails,
154
+ `:child_failed` is dispatched. Always declare both transitions to avoid a stuck workflow:
155
+
156
+ ```ruby
157
+ # EventLoop mode: workflow that runs an agent as a child FSM
158
+ entry :run_agent, ->(ctx) {
159
+ MyAgent.new.run_as_child(ctx.query, ctx: ctx) { |r| ctx.answer = r[:output] }
160
+ }
161
+ transition from: :run_agent, on: :child_completed, to: :done
162
+ transition from: :run_agent, on: :child_failed, to: :handle_error
163
+ ```
164
+
103
165
  ### Multi-Agent — Agent-as-Tool pattern
104
166
 
105
167
  Wrap sub-agents as `Tool::Base` subclasses so the orchestrator LLM can call them on demand.
@@ -114,6 +176,11 @@ class ResearchTool < Phronomy::Tool::Base
114
176
  end
115
177
  end
116
178
 
179
+ class WriterAgent < Phronomy::Agent::Base
180
+ model "gpt-4o"
181
+ instructions "You are a professional technical writer."
182
+ end
183
+
117
184
  class WriteTool < Phronomy::Tool::Base
118
185
  description "Write a technical blog post given research notes and a writing brief."
119
186
  param :instructions, type: :string, desc: "Writing brief including research notes"
@@ -135,6 +202,9 @@ puts result[:output]
135
202
 
136
203
  ### Guardrails — Input/output validation
137
204
 
205
+ Call `fail!(reason)` inside `check` to reject — it raises `Phronomy::GuardrailError`.
206
+ When a guardrail rejects, `invoke` raises instead of returning an output.
207
+
138
208
  ```ruby
139
209
  class NoSensitiveDataGuardrail < Phronomy::Guardrail::InputGuardrail
140
210
  def check(input)
@@ -144,18 +214,19 @@ end
144
214
 
145
215
  agent = ResearchAgent.new
146
216
  agent.add_input_guardrail(NoSensitiveDataGuardrail.new)
147
- ```
148
-
149
- ### Built-in Guardrails — PII and prompt injection detection
150
-
151
- ```ruby
152
- # Detect SSNs, credit cards, emails, and phone numbers
153
- agent.add_input_guardrail(Phronomy::Guardrail::Builtin::PIIPatternDetector.new)
154
217
 
155
- # Block common prompt-injection attempts
156
- agent.add_input_guardrail(Phronomy::Guardrail::Builtin::PromptInjectionDetector.new)
218
+ begin
219
+ agent.invoke("Charge 4111-1111-1111-1111")
220
+ rescue Phronomy::GuardrailError => e
221
+ puts e.message # => "Credit card numbers are not allowed"
222
+ end
157
223
  ```
158
224
 
225
+ > **Limitations:** Phronomy ships no built-in guardrail implementations. There is no
226
+ > built-in prompt injection detector, PII scanner, or content classifier. All guardrail
227
+ > logic must be implemented by the application. Reference implementations for common
228
+ > patterns are available in `phronomy-examples` (example 06).
229
+
159
230
  ### Knowledge/RAG — Context injection and vector retrieval
160
231
 
161
232
  ```ruby
@@ -169,6 +240,13 @@ policy = Phronomy::KnowledgeSource::StaticKnowledge.new(
169
240
  # RAG retrieval from a vector store
170
241
  store = Phronomy::VectorStore::InMemory.new
171
242
  embeddings = Phronomy::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small")
243
+
244
+ # Add documents before querying
245
+ text1 = "Refunds are processed within 5 business days."
246
+ text2 = "Contact support@example.com for refund requests."
247
+ store.add(id: "doc-1", embedding: embeddings.embed(text1), metadata: { content: text1, source: "policy.md" })
248
+ store.add(id: "doc-2", embedding: embeddings.embed(text2), metadata: { content: text2, source: "policy.md" })
249
+
172
250
  rag = Phronomy::KnowledgeSource::RAGKnowledge.new(store: store, embeddings: embeddings, k: 5)
173
251
 
174
252
  # Inject at invocation time
@@ -176,6 +254,15 @@ result = MyAgent.new.invoke("What is the refund policy?",
176
254
  config: { knowledge_sources: [policy, rag] })
177
255
  ```
178
256
 
257
+ `static_knowledge_refresh!` invalidates the class-level cache of *static* knowledge sources
258
+ (not RAG stores). Call it when the underlying file or content has changed:
259
+
260
+ ```ruby
261
+ # Static knowledge sources are cached at the class level after the first fetch.
262
+ # Call refresh! when the underlying content changes (e.g. after reloading policy.md).
263
+ MyAgent.static_knowledge_refresh!
264
+ ```
265
+
179
266
  Load and split documents with built-in loaders:
180
267
 
181
268
  ```ruby
@@ -219,7 +306,7 @@ Phronomy.configure do |c|
219
306
  end
220
307
  ```
221
308
 
222
- Hooks are called in order — global → class → instance — and deep-merged.
309
+ Hooks are called in order — global → class → instance — and shallow-merged (`Hash#merge`; last hook wins on key conflicts).
223
310
 
224
311
  ### GeneratorVerifier — Generator-Verifier loop with custom prompt builders
225
312
 
@@ -271,10 +358,11 @@ end
271
358
 
272
359
  ### Agent::Orchestrator — Parallel subagent dispatch
273
360
 
274
- > **Note:** `dispatch_parallel` and `fan_out` use plain Ruby threads and are
275
- > intended for small-scale fan-out (a handful of subagents). For large-scale
276
- > parallel dispatch, manage concurrency (thread pools, rate limiting) at the
277
- > application level.
361
+ > **Note:** `dispatch_parallel` and `fan_out` use plain Ruby threads. Use
362
+ > `max_concurrency:` to cap the number of concurrent workers and `on_error:`
363
+ > to control failure handling (`:raise` re-raises the first error after all
364
+ > tasks complete; `:skip` fills failed slots with `nil`). For very large
365
+ > fan-outs consider additional rate-limiting at the application level.
278
366
 
279
367
  ```ruby
280
368
  class ResearchOrchestrator < Phronomy::Agent::Orchestrator
@@ -297,16 +385,24 @@ class MyOrchestrator < Phronomy::Agent::Orchestrator
297
385
  instructions "Orchestrate."
298
386
 
299
387
  def run(query)
300
- # Heterogeneous agents in parallel
388
+ # Heterogeneous agents in parallel (cap at 4 threads; skip failures; 30 s timeout)
301
389
  results = dispatch_parallel(
302
390
  {agent: SearchAgent, input: "topic A"},
303
- {agent: AnalysisAgent, input: query}
391
+ {agent: AnalysisAgent, input: query},
392
+ max_concurrency: 4,
393
+ on_error: :skip,
394
+ timeout: 30
304
395
  )
305
396
 
306
397
  # Fan-out — same agent, multiple inputs
307
- translations = fan_out(agent: TranslationAgent, inputs: %w[Hello World])
398
+ translations = fan_out(
399
+ agent: TranslationAgent,
400
+ inputs: %w[Hello World],
401
+ max_concurrency: 2,
402
+ timeout: 20
403
+ )
308
404
 
309
- results.map { |r| r[:output] }.join("\n")
405
+ results.compact.map { |r| r[:output] }.join("\n")
310
406
  end
311
407
  end
312
408
  ```
@@ -325,15 +421,18 @@ end
325
421
  app = Phronomy::Workflow.define(EnrichContext) do
326
422
  initial :enrich
327
423
  state :enrich, action: ->(s) do
328
- results = {}
329
- threads = [
330
- Thread.new { results[:summary] = Summarizer.call(s) },
331
- Thread.new { results[:tags] = Tagger.call(s) }
332
- ]
333
- threads.each { |t| t.join(10) } # 10-second timeout
334
- s.merge(summary: results[:summary], tags: Array(results[:tags]))
424
+ # Use Thread#value to collect results safely — avoids concurrent Hash writes
425
+ threads = {
426
+ summary: Thread.new { Summarizer.call(s) },
427
+ tags: Thread.new { Tagger.call(s) }
428
+ }
429
+ # For production use, wrap with Timeout.timeout to avoid unbounded waits:
430
+ # require "timeout"
431
+ # Timeout.timeout(30) { threads.each_value(&:join) }
432
+ threads.each_value(&:join)
433
+ s.merge(summary: threads[:summary].value, tags: Array(threads[:tags].value))
335
434
  end
336
- after :enrich, to: :__finish__
435
+ transition from: :enrich, to: :__finish__
337
436
  end
338
437
 
339
438
  state = app.invoke({}, config: { thread_id: "t1" })
@@ -420,22 +519,38 @@ puts result2[:output] # => "Your name is Alice."
420
519
  `result[:messages]` contains the complete message history after each invocation.
421
520
  Persist it however suits your application (in-memory hash, Redis, ActiveRecord, etc.).
422
521
 
522
+ > **Note on `thread_id`**: `thread_id` is a correlation identifier used internally for
523
+ > checkpoint/compaction context and EventLoop routing. It does **not** automatically persist or
524
+ > restore conversation history — you must pass `messages:` explicitly on each turn as shown above.
525
+
423
526
 
424
527
  ## Configuration
425
528
 
426
529
  ```ruby
427
530
  Phronomy.configure do |c|
428
- c.default_model = "gpt-4o-mini"
429
- c.recursion_limit = 25
430
- c.tracer = Phronomy::Tracing::NullTracer.new
431
- c.before_completion = nil # optional; global hook lambda
531
+ c.default_model = "gpt-4o-mini"
532
+ c.recursion_limit = 25
533
+ c.tracer = Phronomy::Tracing::NullTracer.new
534
+ c.before_completion = nil # optional; global hook lambda
535
+ c.trace_pii = false # default; set to true only when trace data contains no PII
536
+ c.logger = nil # optional; any object responding to #warn (e.g. Rails.logger)
537
+ c.event_loop_stop_grace_seconds = 5 # seconds to wait for sessions to drain on EventLoop#stop(drain: true)
432
538
  end
433
539
  ```
434
540
 
541
+ `c.logger` receives framework diagnostic messages (e.g. unreachable-state warnings from
542
+ `Workflow.define`). When `nil` (default), messages are written to `$stderr` via `Kernel#warn`.
543
+
544
+ > **Note**: When `trace_pii = false`, both the _input_ and the _output_ (LLM
545
+ > responses and tool results) are replaced with `[REDACTED]` in trace spans.
546
+ > The default is `false` (PII protection enabled). Set to `true` only when
547
+ > trace data does not contain sensitive information.
548
+
435
549
  ## Context Management
436
550
 
437
- Phronomy includes a context window management layer so agents automatically
438
- stay within the token limits of the underlying model.
551
+ Phronomy includes a context window management layer. When model metadata is
552
+ available (either from the built-in registry or via an explicit `context_window:` setting),
553
+ agents automatically stay within the configured token limit.
439
554
 
440
555
  ### TokenBudget
441
556
 
@@ -467,12 +582,82 @@ class MyAgent < Phronomy::Agent::Base
467
582
  model "gpt-4o"
468
583
  max_output_tokens 4096 # override max_output_tokens from registry
469
584
  context_overhead 600 # extra reservation for system prompt + tools
585
+ invoke_timeout 30 # raise Phronomy::TimeoutError after 30 s (wait timeout, not cancellation)
586
+ max_parallel_tools 4 # cap concurrent tool-call threads (default: 10)
470
587
  end
471
588
  ```
472
589
 
473
590
  `Agent::Base#invoke` builds a `TokenBudget` automatically. When the model is not in the
474
591
  registry the budget is silently skipped.
475
592
 
593
+ > **Note on CJK languages**: The default `TokenEstimator` uses a character-ratio heuristic
594
+ > calibrated for ASCII/Latin text (4 chars/token). For Chinese, Japanese, and Korean text,
595
+ > actual token counts are approximately **4× higher** than the estimate because CJK
596
+ > characters are typically 1 token each. For accurate CJK token counting, supply a
597
+ > tokenizer-backed callable:
598
+ >
599
+ > ```ruby
600
+ > require "tiktoken_ruby"
601
+ > enc = Tiktoken.encoding_for_model("gpt-4o")
602
+ > Phronomy::Context::TokenEstimator.tokenizer = ->(text) { enc.encode(text).length }
603
+ > ```
604
+
605
+
606
+ ### CancellationToken — Cooperative cancellation
607
+
608
+ Pass a `CancellationToken` to any agent via `config: { cancellation_token: token }`.
609
+ Cancellation is checked at multiple granular checkpoints: before the LLM call, before
610
+ each RAG knowledge-source fetch, after each streaming chunk, before each parallel
611
+ tool-call batch, and after each `before_completion` hook. `CancellationError` is
612
+ raised immediately and is never retried. No threads are force-killed — `ensure`
613
+ blocks always execute.
614
+
615
+ > **Cooperative cancellation — not preemptive**
616
+ >
617
+ > Phronomy uses _cooperative boundary cancellation_. The token is polled at the
618
+ > checkpoints listed above; it is **not** injected as a signal into a running
619
+ > operation. This means the following are **not** interrupted mid-execution:
620
+ >
621
+ > - A single `KnowledgeSource#fetch` that is already blocking (e.g. HTTP call)
622
+ > - A single `chat.ask` call that is not streaming
623
+ > - A single `tool.execute` call that is already running
624
+ > - Any external I/O (database query, vector search, HTTP request) inside those calls
625
+ >
626
+ > For deep in-flight safety, complement `CancellationToken` with per-source or
627
+ > per-tool timeouts (e.g. `Net::HTTP#read_timeout`, `Timeout.timeout`, connection
628
+ > pool limits). Ruby's GVL prevents fully preemptive cancellation without
629
+ > `Thread#kill`, which Phronomy avoids by default due to resource safety concerns.
630
+
631
+ ```ruby
632
+ token = Phronomy::CancellationToken.new
633
+
634
+ # Cancel from another thread after 5 s
635
+ Thread.new { sleep 5; token.cancel! }
636
+
637
+ begin
638
+ result = MyAgent.new.invoke("...", config: { cancellation_token: token })
639
+ rescue Phronomy::CancellationError
640
+ puts "cancelled"
641
+ end
642
+
643
+ # Hard deadline via monotonic clock (recommended — immune to NTP/DST changes)
644
+ token = Phronomy::CancellationToken.timeout_after(30)
645
+ result = MyAgent.new.invoke("...", config: { cancellation_token: token })
646
+
647
+ # Hard deadline via wall-clock (legacy — still supported)
648
+ token = Phronomy::CancellationToken.new(deadline: Time.now + 30)
649
+ result = MyAgent.new.invoke("...", config: { cancellation_token: token })
650
+
651
+ # Propagate to all parallel workers via dispatch_parallel / fan_out
652
+ token = Phronomy::CancellationToken.new
653
+ Thread.new { sleep 10; token.cancel! }
654
+
655
+ orchestrator.dispatch_parallel(
656
+ {agent: SearchAgent, input: "topic A"},
657
+ {agent: AnalysisAgent, input: "topic B"},
658
+ cancellation_token: token
659
+ )
660
+ ```
476
661
 
477
662
  ## Examples
478
663
 
@@ -543,6 +728,35 @@ bin/console
543
728
 
544
729
  Bug reports and pull requests are welcome on GitHub at https://github.com/Raizo-TCS/phronomy.
545
730
 
731
+ ## Security & Privacy
732
+
733
+ **API credentials** — Phronomy does not store or transmit your LLM API keys. All
734
+ credentials are handled by RubyLLM and passed directly to the provider.
735
+
736
+ **Tracing and PII** — When tracing is enabled (`Phronomy::Tracing::OpenTelemetryTracer`
737
+ or a custom tracer), agent inputs and LLM outputs are replaced with `[REDACTED]` in
738
+ span attributes by default (`trace_pii: false`). To include full content in traces
739
+ (e.g., for debugging in a non-production environment), set `trace_pii: true` in your
740
+ Phronomy configuration. Evaluate whether your tracing backend (OTLP collector, Jaeger,
741
+ Honeycomb, etc.) meets your data-retention and privacy requirements.
742
+
743
+ **Prompt injection** — Phronomy provides no built-in prompt injection detection.
744
+ Applications that process untrusted user input should implement their own input
745
+ guardrails (see the Guardrails section above).
746
+
747
+ **Tool and MCP security** — Tools can perform real-world side effects (database
748
+ writes, API calls, file deletion). Treat tool execution as a privileged operation:
749
+ use the interrupt/approval mechanism for high-risk tools (e.g., payment processing,
750
+ file deletion) rather than allowing fully autonomous execution. MCP servers are
751
+ external trust boundaries: connect only to servers you control. A compromised MCP
752
+ server can inject instructions that manipulate agent behavior (tool-level prompt
753
+ injection). Avoid passing secrets as direct tool parameters — if `trace_pii: true`
754
+ is set, tool arguments are captured in trace spans.
755
+
756
+ **Vulnerability reports** — Please report security vulnerabilities privately via
757
+ GitHub's [Security Advisories](https://github.com/Raizo-TCS/phronomy/security/advisories)
758
+ rather than opening a public issue.
759
+
546
760
  ## License
547
761
 
548
762
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,86 @@
1
+ # Release Checklist
2
+
3
+ Use this checklist before every release of the `phronomy` gem.
4
+ Copy it into the GitHub Release draft and check off each item.
5
+
6
+ ---
7
+
8
+ ## Pre-release
9
+
10
+ - [ ] `CHANGELOG.md` updated (Added / Changed / Fixed / Removed / Deprecated / Security)
11
+ - [ ] Version bumped in `lib/phronomy/version.rb`
12
+ - [ ] Stability table in `README.md` reflects any API additions, removals, or promotions
13
+ - [ ] `@api private` annotations are consistent with the README stability table (Issue #205)
14
+ - [ ] Public API compatibility snapshot regenerated if any Stable API changed:
15
+ ```bash
16
+ bundle exec ruby scripts/api_snapshot.rb --write
17
+ ```
18
+ (Issue #210)
19
+ - [ ] Migration notes or deprecation warnings added for any breaking changes
20
+
21
+ ---
22
+
23
+ ## Quality Gates (all must pass before tagging)
24
+
25
+ - [ ] `bundle exec rspec --format documentation` — 0 failures
26
+ - [ ] `bundle exec rspec --tag integration` — 0 failures, all expected pending
27
+ - [ ] `ruby scripts/check_japanese.rb` — exit 0 (no Japanese in source)
28
+ - [ ] `bundle exec standardrb` — 0 offenses
29
+ - [ ] `COVERAGE=1 bundle exec rspec` — coverage above configured threshold (Issue #207)
30
+ - [ ] CI green on all Ruby matrix versions (3.2 / 3.3 / 3.4 / head)
31
+
32
+ ---
33
+
34
+ ## Security Review
35
+
36
+ - [ ] `SECURITY.md` is up to date (supported versions table, contact info)
37
+ - [ ] No new `trace_pii`-sensitive data paths introduced without redaction
38
+ - [ ] No new `requires_approval` tools missing the approval gate
39
+ - [ ] No secrets, credentials, or PII in tool descriptions, schema strings, or spec fixtures
40
+ - [ ] Dependency audit passes: `bundle exec bundler-audit check --update`
41
+
42
+ ---
43
+
44
+ ## Release Steps
45
+
46
+ > **Do not use `gem push` directly.** The GitHub Actions release workflow handles
47
+ > gem publication. Follow the steps below exactly.
48
+
49
+ 1. Commit the version bump:
50
+ ```bash
51
+ git commit -m "bump version to X.Y.Z"
52
+ git push origin main
53
+ ```
54
+ 2. Create and push the tag:
55
+ ```bash
56
+ git tag vX.Y.Z
57
+ git push origin vX.Y.Z
58
+ ```
59
+ 3. Trigger the release workflow:
60
+ ```bash
61
+ gh workflow run release.yml --field tag=vX.Y.Z
62
+ ```
63
+ 4. Monitor the workflow run:
64
+ ```bash
65
+ gh run list --workflow release.yml --limit 3
66
+ ```
67
+ 5. Verify the gem appears on RubyGems: `gem search phronomy`
68
+
69
+ ---
70
+
71
+ ## Post-release
72
+
73
+ - [ ] `phronomy-examples` `Gemfile` updated to the new version
74
+ ```bash
75
+ cd ../phronomy-examples && bundle update phronomy
76
+ ```
77
+ - [ ] `phronomy-examples` tests pass after the update
78
+ - [ ] GitHub Release description includes the relevant CHANGELOG excerpt
79
+
80
+ ---
81
+
82
+ ## Reference Issues
83
+
84
+ - #205 — `@api private` annotation policy
85
+ - #207 — SimpleCov coverage gate
86
+ - #210 — Public API compatibility snapshot
data/SECURITY.md ADDED
@@ -0,0 +1,80 @@
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ | Version | Supported |
6
+ |---------|-----------|
7
+ | Latest release (main branch) | ✅ |
8
+ | Older versions | ❌ — please upgrade |
9
+
10
+ Only the latest released version of `phronomy` receives security patches. If you
11
+ are running an older version, please upgrade before filing a report.
12
+
13
+ ---
14
+
15
+ ## Reporting a Vulnerability
16
+
17
+ **Please do NOT open a public GitHub Issue for security vulnerabilities.**
18
+
19
+ Use [GitHub's private vulnerability reporting](https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing/privately-reporting-a-security-vulnerability)
20
+ instead:
21
+
22
+ 1. Navigate to the [Security tab](https://github.com/Raizo-TCS/phronomy/security)
23
+ of this repository.
24
+ 2. Click **"Report a vulnerability"**.
25
+ 3. Fill in the advisory form with as much detail as possible.
26
+
27
+ This creates a private draft advisory visible only to maintainers, keeping the
28
+ details confidential until a fix is prepared and released.
29
+
30
+ ---
31
+
32
+ ## Response Timeline
33
+
34
+ | Milestone | Target |
35
+ |-----------|--------|
36
+ | Acknowledgement of report | Within **7 days** |
37
+ | Triage and initial assessment | Within **14 days** |
38
+ | Patch release (critical / high severity) | Within **30 days** |
39
+ | Patch release (medium / low severity) | Best effort; typically within **60 days** |
40
+
41
+ If you do not receive an acknowledgement within 7 days, please follow up by
42
+ opening a **public** Issue with the subject "Security report follow-up (no
43
+ response)" — do **not** include vulnerability details in the public Issue.
44
+
45
+ ---
46
+
47
+ ## Scope
48
+
49
+ **In scope:**
50
+
51
+ - Vulnerabilities in the `phronomy` gem source code (`lib/`, `spec/`).
52
+ - Dependency vulnerabilities that affect gem consumers when `phronomy` is used as intended.
53
+ - Information disclosure via tracing/logging APIs (e.g. `trace_pii: false` bypass).
54
+ - Approval gate bypasses (tool execution without the registered approval handler).
55
+
56
+ **Out of scope:**
57
+
58
+ - Security of consumer applications built on top of `phronomy`.
59
+ - Vulnerabilities in the LLM provider (OpenAI, Anthropic, etc.) or in `ruby_llm`.
60
+ - Attacks that require an attacker to already have write access to the host system.
61
+ - Prompt injection via LLM output — the gem forwards LLM output faithfully; prompt
62
+ injection resistance is the responsibility of the LLM provider and the application.
63
+
64
+ ---
65
+
66
+ ## Disclosure Policy
67
+
68
+ - Maintainers will coordinate with you on the release date and credit you in the
69
+ `CHANGELOG.md` entry and GitHub release notes.
70
+ - If you wish to remain anonymous, let us know in the advisory.
71
+ - We follow a **coordinated disclosure** model: the advisory will be made public
72
+ after a patch is released (or after 90 days, whichever comes first).
73
+
74
+ ---
75
+
76
+ ## Credit
77
+
78
+ Security reporters are credited in the `CHANGELOG.md` entry for the patch release,
79
+ in the GitHub Security Advisory, and in the release notes — unless they request
80
+ anonymity.
@@ -0,0 +1,9 @@
1
+ {
2
+ "workflow_context_merge": 124364.81010472385,
3
+ "workflow_define": 2179.945274115319,
4
+ "tool_params_schema_definition": 19534379.159046534,
5
+ "dispatch_parallel_10": 1483.2255243486482,
6
+ "cancellation_token_cancelled": 4335060.97443425,
7
+ "cancellation_token_raise_if_cancelled_noop": 3566903.189098373,
8
+ "trim_context_remove_2000": 1761.5700678986254
9
+ }