phronomy 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +21 -0
  3. data/CHANGELOG.md +338 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +242 -27
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/SECURITY.md +80 -0
  8. data/benchmark/baseline.json +9 -0
  9. data/benchmark/bench_agent_invoke.rb +105 -0
  10. data/benchmark/bench_context_assembler.rb +46 -0
  11. data/benchmark/bench_regression.rb +171 -0
  12. data/benchmark/bench_token_estimator.rb +44 -0
  13. data/benchmark/bench_tool_schema.rb +69 -0
  14. data/benchmark/bench_vector_store.rb +39 -0
  15. data/benchmark/bench_workflow.rb +55 -0
  16. data/benchmark/run_all.rb +118 -0
  17. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  18. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  19. data/docs/decisions/003-event-loop-singleton.md +48 -0
  20. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
  21. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  22. data/docs/decisions/006-no-built-in-guardrails.md +48 -0
  23. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  24. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  25. data/docs/decisions/009-state-store-abstraction.md +141 -0
  26. data/lib/phronomy/agent/base.rb +194 -12
  27. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  28. data/lib/phronomy/agent/checkpoint.rb +1 -0
  29. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  30. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  31. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  32. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  33. data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
  34. data/lib/phronomy/agent/fsm.rb +15 -0
  35. data/lib/phronomy/agent/handoff.rb +3 -0
  36. data/lib/phronomy/agent/orchestrator.rb +123 -11
  37. data/lib/phronomy/agent/parallel_tool_chat.rb +21 -4
  38. data/lib/phronomy/agent/react_agent.rb +8 -6
  39. data/lib/phronomy/agent/runner.rb +2 -0
  40. data/lib/phronomy/agent/shared_state.rb +11 -0
  41. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  42. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  43. data/lib/phronomy/cancellation_token.rb +92 -0
  44. data/lib/phronomy/configuration.rb +26 -2
  45. data/lib/phronomy/context/assembler.rb +6 -0
  46. data/lib/phronomy/context/compaction_context.rb +2 -0
  47. data/lib/phronomy/context/context_version_cache.rb +2 -0
  48. data/lib/phronomy/context/token_budget.rb +3 -0
  49. data/lib/phronomy/context/token_estimator.rb +9 -2
  50. data/lib/phronomy/context/trigger_context.rb +1 -0
  51. data/lib/phronomy/context/trim_context.rb +4 -0
  52. data/lib/phronomy/embeddings/base.rb +5 -2
  53. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  54. data/lib/phronomy/eval/comparison.rb +2 -0
  55. data/lib/phronomy/eval/dataset.rb +4 -0
  56. data/lib/phronomy/eval/metrics.rb +6 -0
  57. data/lib/phronomy/eval/runner.rb +2 -0
  58. data/lib/phronomy/eval/scorer/base.rb +1 -0
  59. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  60. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  61. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  62. data/lib/phronomy/event_loop.rb +114 -7
  63. data/lib/phronomy/fsm_session.rb +8 -1
  64. data/lib/phronomy/generator_verifier.rb +2 -0
  65. data/lib/phronomy/guardrail/base.rb +3 -0
  66. data/lib/phronomy/knowledge_source/base.rb +6 -2
  67. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  68. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  69. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  70. data/lib/phronomy/loader/base.rb +1 -0
  71. data/lib/phronomy/loader/csv_loader.rb +2 -0
  72. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  73. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  74. data/lib/phronomy/output_parser/base.rb +1 -0
  75. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  76. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  77. data/lib/phronomy/prompt_template.rb +5 -0
  78. data/lib/phronomy/runnable.rb +20 -3
  79. data/lib/phronomy/splitter/base.rb +2 -0
  80. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  81. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  82. data/lib/phronomy/state_store/base.rb +48 -0
  83. data/lib/phronomy/state_store/in_memory.rb +62 -0
  84. data/lib/phronomy/tool/agent_tool.rb +1 -0
  85. data/lib/phronomy/tool/base.rb +189 -27
  86. data/lib/phronomy/tool/mcp_tool.rb +68 -13
  87. data/lib/phronomy/tracing/base.rb +3 -0
  88. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  89. data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
  90. data/lib/phronomy/vector_store/base.rb +33 -7
  91. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  92. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  93. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  94. data/lib/phronomy/version.rb +1 -1
  95. data/lib/phronomy/workflow.rb +96 -7
  96. data/lib/phronomy/workflow_context.rb +54 -4
  97. data/lib/phronomy/workflow_runner.rb +35 -7
  98. data/lib/phronomy.rb +70 -1
  99. data/scripts/api_snapshot.rb +91 -0
  100. data/scripts/check_api_annotations.rb +68 -0
  101. data/scripts/check_private_enforcement.rb +93 -0
  102. data/scripts/check_readme_runnable.rb +98 -0
  103. data/scripts/run_mutation.sh +46 -0
  104. metadata +45 -2
@@ -2,10 +2,12 @@
2
2
 
3
3
  require "digest"
4
4
  require "securerandom"
5
+ require "timeout"
5
6
  require_relative "concerns/retryable"
6
7
  require_relative "concerns/guardrailable"
7
8
  require_relative "concerns/before_completion"
8
9
  require_relative "concerns/suspendable"
10
+ require_relative "concerns/error_translation"
9
11
 
10
12
  module Phronomy
11
13
  module Agent
@@ -36,6 +38,7 @@ module Phronomy
36
38
  include Concerns::Guardrailable
37
39
  include Concerns::BeforeCompletion
38
40
  include Concerns::Suspendable
41
+ include Concerns::ErrorTranslation
39
42
 
40
43
  class << self
41
44
  # Sets or reads the LLM model identifier for this agent.
@@ -48,6 +51,7 @@ module Phronomy
48
51
  # class MyAgent < Phronomy::Agent::Base
49
52
  # model "gpt-4o"
50
53
  # end
54
+ # @api public
51
55
  def model(name = nil)
52
56
  if name
53
57
  @model = name
@@ -71,6 +75,7 @@ module Phronomy
71
75
  # class MyAgent < Phronomy::Agent::Base
72
76
  # instructions { |input| "Answer in #{input[:lang]}." }
73
77
  # end
78
+ # @api public
74
79
  def instructions(text = nil, &block)
75
80
  if text || block_given?
76
81
  @instructions = text || block
@@ -96,6 +101,7 @@ module Phronomy
96
101
  # Places::SearchTool => "places_search",
97
102
  # CurrentTimeTool => nil
98
103
  # )
104
+ # @api public
99
105
  def tools(*args)
100
106
  if args.empty?
101
107
  if instance_variable_defined?(:@tools)
@@ -115,9 +121,17 @@ module Phronomy
115
121
  end
116
122
 
117
123
  # Returns the alias map registered via the hash form of .tools.
124
+ # Merges parent class aliases so subclasses inherit their parent's mappings.
125
+ # Subclass-specific aliases take precedence over parent aliases.
118
126
  # @return [Hash{Class => String}]
127
+ # @api public
119
128
  def tool_aliases
120
- @tool_aliases ||= {}
129
+ own = @tool_aliases || {}
130
+ if superclass.respond_to?(:tool_aliases)
131
+ superclass.tool_aliases.merge(own)
132
+ else
133
+ own
134
+ end
121
135
  end
122
136
 
123
137
  # Sets or reads the LLM provider for this agent.
@@ -131,6 +145,7 @@ module Phronomy
131
145
  # model "openai/gpt-oss-20b"
132
146
  # provider :openai
133
147
  # end
148
+ # @api public
134
149
  def provider(name = nil)
135
150
  if name
136
151
  @provider = name
@@ -149,6 +164,7 @@ module Phronomy
149
164
  # class MyAgent < Phronomy::Agent::Base
150
165
  # temperature 0.2
151
166
  # end
167
+ # @api public
152
168
  def temperature(val = nil)
153
169
  if val
154
170
  @temperature = val
@@ -166,6 +182,7 @@ module Phronomy
166
182
  # class MyAgent < Phronomy::Agent::Base
167
183
  # max_iterations 5
168
184
  # end
185
+ # @api public
169
186
  def max_iterations(val = nil)
170
187
  if val
171
188
  @max_iterations = val
@@ -174,27 +191,121 @@ module Phronomy
174
191
  end
175
192
  end
176
193
 
194
+ # Sets or reads the maximum number of tool calls executed concurrently
195
+ # when the LLM returns multiple tool calls in a single response
196
+ # (ParallelToolChat mode, active inside an AgentFSM IO thread).
197
+ #
198
+ # Defaults to 10. Set to 1 to force sequential execution.
199
+ # Inherited by subclasses; the most-specific definition wins.
200
+ #
201
+ # @param val [Integer, nil]
202
+ # @return [Integer]
203
+ # @example
204
+ # class MyAgent < Phronomy::Agent::Base
205
+ # max_parallel_tools 4
206
+ # end
207
+ # @api public
208
+ def max_parallel_tools(val = nil)
209
+ if val.nil?
210
+ @max_parallel_tools ||
211
+ (superclass.respond_to?(:max_parallel_tools) ? superclass.max_parallel_tools : 10)
212
+ else
213
+ unless val.is_a?(Integer) && val >= 1
214
+ raise ArgumentError,
215
+ "max_parallel_tools must be a positive Integer (>= 1), got #{val.inspect}"
216
+ end
217
+ @max_parallel_tools = val
218
+ end
219
+ end
220
+
221
+ # Sets or reads the per-invocation timeout (in seconds) for EventLoop-mode
222
+ # agent calls. When set, +invoke+ raises {Phronomy::TimeoutError} if the
223
+ # agent does not finish within the given number of seconds.
224
+ #
225
+ # Has no effect when EventLoop mode is disabled (direct invoke path).
226
+ # Defaults to +nil+ (no timeout).
227
+ # Inherited by subclasses; the most-specific definition wins.
228
+ #
229
+ # **Note**: +invoke_timeout+ is a *wait timeout*, not a cancellation.
230
+ # When the timeout fires, +Phronomy::TimeoutError+ is raised to the
231
+ # caller, but the background agent thread and any in-flight LLM or tool
232
+ # calls are **not** interrupted — they continue running until they
233
+ # complete naturally. The agent therefore keeps consuming threads,
234
+ # memory, and external API credits after the caller has already received
235
+ # the error. True cancellation is not yet supported.
236
+ #
237
+ # @param val [Numeric, nil]
238
+ # @return [Numeric, nil]
239
+ # @example
240
+ # class MyAgent < Phronomy::Agent::Base
241
+ # invoke_timeout 30
242
+ # end
243
+ # @api public
244
+ def invoke_timeout(val = nil)
245
+ if val.nil?
246
+ return @invoke_timeout if defined?(@invoke_timeout)
247
+ superclass.respond_to?(:invoke_timeout) ? superclass.invoke_timeout : nil
248
+ else
249
+ unless val.is_a?(Numeric) && val > 0
250
+ raise ArgumentError,
251
+ "invoke_timeout must be a positive number, got #{val.inspect}"
252
+ end
253
+ @invoke_timeout = val
254
+ end
255
+ end
256
+
177
257
  # Registers one or more static knowledge sources on the agent class.
178
- # Static sources are fetched once per agent instance and their content
179
- # is cached in ContextVersionCache keyed by a fingerprint of the
180
- # instruction text + source content. The cache is invalidated automatically
181
- # when the fingerprint changes (e.g. because a source was updated).
258
+ # Static source content is fetched and memoized at the **class** level
259
+ # the first time +invoke+ is called. The cache persists for the lifetime
260
+ # of the process; call {.static_knowledge_refresh!} to force a reload.
182
261
  #
183
262
  # @param sources [Array<Phronomy::KnowledgeSource::Base>]
184
263
  # @example
185
264
  # class PolicyAgent < Phronomy::Agent::Base
186
265
  # static_knowledge Phronomy::KnowledgeSource::StaticKnowledge.new(POLICY_TEXT)
187
266
  # end
267
+ # @api public
188
268
  def static_knowledge(*sources)
189
269
  @static_knowledge_sources = sources.flatten
270
+ # Invalidate the cached chunks so the new sources are fetched on
271
+ # the next call to static_knowledge_chunks.
272
+ @static_knowledge_chunks = nil
190
273
  end
191
274
 
192
275
  # Returns the registered static knowledge sources.
193
276
  # @return [Array<Phronomy::KnowledgeSource::Base>]
277
+ # @api public
194
278
  def static_knowledge_sources
195
279
  @static_knowledge_sources || []
196
280
  end
197
281
 
282
+ # Returns the fetched content from all static knowledge sources.
283
+ # Results are cached at the class level so that each source is fetched
284
+ # only once regardless of how many times the agent is invoked.
285
+ # @return [Array<Hash>]
286
+ # @api public
287
+ def static_knowledge_chunks
288
+ @static_knowledge_chunks ||= static_knowledge_sources.flat_map { |ks|
289
+ ks.fetch(query: nil)
290
+ }
291
+ end
292
+
293
+ # Clears the class-level knowledge cache so that the next +invoke+ call
294
+ # re-fetches content from all registered static knowledge sources.
295
+ #
296
+ # Call this method when the underlying knowledge source has been updated
297
+ # at runtime (e.g. a file was rewritten, a DB record changed) and you
298
+ # want the agent to pick up the new content without restarting the
299
+ # process.
300
+ #
301
+ # @return [nil]
302
+ # @example Refresh after updating a knowledge file
303
+ # MyAgent.static_knowledge_refresh!
304
+ # @api public
305
+ def static_knowledge_refresh!
306
+ @static_knowledge_chunks = nil
307
+ end
308
+
198
309
  # Registers a callback that is invoked before every LLM call so the
199
310
  # application can remove stale or irrelevant messages from the
200
311
  # conversation history.
@@ -209,11 +320,13 @@ module Phronomy
209
320
  # limit = ctx.budget&.available(used: 0) || Float::INFINITY
210
321
  # ctx.remove(ctx.message_elements.first[:seq]) if ctx.total_tokens > limit * 0.8
211
322
  # end
323
+ # @api public
212
324
  def on_trim(&block)
213
325
  @on_trim_callback = block
214
326
  end
215
327
 
216
328
  # @return [Proc, nil]
329
+ # @api private
217
330
  def _on_trim_callback
218
331
  @on_trim_callback
219
332
  end
@@ -232,11 +345,13 @@ module Phronomy
232
345
  # limit = ctx.budget&.available(used: 0) || Float::INFINITY
233
346
  # ctx.total_tokens > limit * 0.7
234
347
  # end
348
+ # @api public
235
349
  def on_compaction_trigger(&block)
236
350
  @on_compaction_trigger_callback = block
237
351
  end
238
352
 
239
353
  # @return [Proc, nil]
354
+ # @api private
240
355
  def _on_compaction_trigger_callback
241
356
  @on_compaction_trigger_callback
242
357
  end
@@ -254,11 +369,13 @@ module Phronomy
254
369
  # "Earlier conversation summary: #{texts}"
255
370
  # end
256
371
  # end
372
+ # @api public
257
373
  def on_compact(&block)
258
374
  @on_compact_callback = block
259
375
  end
260
376
 
261
377
  # @return [Proc, nil]
378
+ # @api private
262
379
  def _on_compact_callback
263
380
  @on_compact_callback
264
381
  end
@@ -278,6 +395,7 @@ module Phronomy
278
395
  # provider :anthropic
279
396
  # cache_instructions true
280
397
  # end
398
+ # @api public
281
399
  def cache_instructions(enabled = nil)
282
400
  if enabled.nil?
283
401
  @cache_instructions
@@ -293,6 +411,7 @@ module Phronomy
293
411
  # class MyAgent < Phronomy::Agent::Base
294
412
  # max_output_tokens 4096
295
413
  # end
414
+ # @api public
296
415
  def max_output_tokens(val = nil)
297
416
  if val.nil?
298
417
  @max_output_tokens
@@ -310,6 +429,7 @@ module Phronomy
310
429
  # class MyAgent < Phronomy::Agent::Base
311
430
  # context_window 4096
312
431
  # end
432
+ # @api public
313
433
  def context_window(val = nil)
314
434
  if val.nil?
315
435
  @context_window
@@ -325,6 +445,7 @@ module Phronomy
325
445
  # class MyAgent < Phronomy::Agent::Base
326
446
  # context_overhead 500
327
447
  # end
448
+ # @api public
328
449
  def context_overhead(val = nil)
329
450
  if val.nil?
330
451
  @context_overhead || 0
@@ -338,6 +459,7 @@ module Phronomy
338
459
  # Called by Runner during construction when routes are configured.
339
460
  # @param tool_class [Class<Phronomy::Tool::Base>]
340
461
  # @return [self]
462
+ # @api private
341
463
  def _add_handoff_tool(tool_class)
342
464
  @_handoff_tools ||= []
343
465
  @_handoff_tools << tool_class
@@ -346,6 +468,7 @@ module Phronomy
346
468
 
347
469
  # Returns handoff tool classes registered on this instance by Runner.
348
470
  # @return [Array<Class>]
471
+ # @api private
349
472
  def _handoff_tools
350
473
  @_handoff_tools || []
351
474
  end
@@ -382,6 +505,7 @@ module Phronomy
382
505
  # result = agent.resume(result[:checkpoint], approved: true)
383
506
  # end
384
507
  # puts result[:output]
508
+ # @api public
385
509
  def invoke(input, messages: [], thread_id: nil, config: {})
386
510
  if Phronomy.configuration.event_loop
387
511
  # Protect against blocking the EventLoop thread itself.
@@ -399,12 +523,27 @@ module Phronomy
399
523
  config: config
400
524
  )
401
525
  completion_queue = Phronomy::EventLoop.instance.register(fsm)
402
- result = completion_queue.pop
526
+ timeout_sec = self.class.invoke_timeout
527
+ result = if timeout_sec
528
+ begin
529
+ Timeout.timeout(timeout_sec) { completion_queue.pop }
530
+ rescue Timeout::Error
531
+ raise Phronomy::TimeoutError,
532
+ "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
533
+ end
534
+ else
535
+ completion_queue.pop
536
+ end
403
537
  raise result if result.is_a?(Exception)
404
538
  result
405
539
  else
406
540
  _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
407
541
  end
542
+ ensure
543
+ # Remove this agent's context cache entry from the current thread to
544
+ # prevent unbounded growth of the thread-local hash in long-lived
545
+ # processes (e.g. Rails servers).
546
+ Thread.current[:phronomy_context_version_caches]&.delete(object_id)
408
547
  end
409
548
 
410
549
  # Registers this agent as a child {AgentFSM} inside the given Workflow context.
@@ -441,6 +580,7 @@ module Phronomy
441
580
  # @return [nil] the caller must not wait on any return value;
442
581
  # the result arrives as a +:child_completed+ event
443
582
  # @raise [Phronomy::Error] when EventLoop mode is not enabled
583
+ # @api public
444
584
  def run_as_child(input, ctx:, messages: [], config: {}, &result_writer)
445
585
  unless Phronomy.configuration.event_loop
446
586
  raise Phronomy::Error,
@@ -477,6 +617,7 @@ module Phronomy
477
617
  # @param config [Hash] same as #invoke
478
618
  # @yield [Phronomy::Agent::StreamEvent]
479
619
  # @return [Hash] { output:, messages:, usage: } — same as #invoke
620
+ # @api public
480
621
  def stream(input, messages: [], thread_id: nil, config: {}, &block)
481
622
  return invoke(input, messages: messages, thread_id: thread_id, config: config) unless block
482
623
 
@@ -486,10 +627,19 @@ module Phronomy
486
627
  raise
487
628
  end
488
629
 
489
- # Returns the {Context::ContextVersionCache} for the current thread.
630
+ # Returns the {Context::ContextVersionCache} built during the most recent
631
+ # {#invoke} call on this agent instance. The thread-local cache entry is
632
+ # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
633
+ # in +@last_context_version_cache+ so callers can inspect it after invoke
634
+ # returns.
635
+ #
636
+ # NOTE: Not thread-safe. When the same Agent instance is used concurrently,
637
+ # +@last_context_version_cache+ reflects the most recent +invoke+ on *any*
638
+ # thread. For per-invocation isolation, use a separate Agent instance per
639
+ # thread.
490
640
  # @api private
491
641
  def context_version_cache
492
- (Thread.current[:phronomy_context_version_caches] ||= {})[object_id]
642
+ @last_context_version_cache
493
643
  end
494
644
 
495
645
  private
@@ -531,6 +681,7 @@ module Phronomy
531
681
 
532
682
  response = chat.ask(user_message) do |chunk|
533
683
  block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
684
+ check_cancellation!(config, "invocation cancelled during streaming")
534
685
  end
535
686
 
536
687
  output = response.content
@@ -554,6 +705,7 @@ module Phronomy
554
705
  # @param thread_id [String, nil] conversation thread identifier
555
706
  # @param config [Hash] the invocation config (see #invoke)
556
707
  # @return [Hash] { system: String|nil, messages: Array }
708
+ # @api public
557
709
  def build_context(input, messages: [], thread_id: nil, config: {})
558
710
  history = prepare_history(messages: messages, thread_id: thread_id, config: config)
559
711
  budget = build_token_budget
@@ -564,7 +716,8 @@ module Phronomy
564
716
  assembler.add_instruction(system_text) if system_text
565
717
 
566
718
  Array(config[:knowledge_sources]).each do |ks|
567
- ks.fetch(query: user_message).each do |chunk|
719
+ check_cancellation!(config, "invocation cancelled during RAG fetch")
720
+ ks.fetch(query: user_message, cancellation_token: config[:cancellation_token]).each do |chunk|
568
721
  assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
569
722
  end
570
723
  end
@@ -585,6 +738,7 @@ module Phronomy
585
738
  # @param thread_id [String, nil] conversation thread identifier
586
739
  # @param config [Hash] additional invocation options
587
740
  # @return [Array] filtered and/or compacted message objects
741
+ # @api public
588
742
  def prepare_history(messages: [], thread_id: nil, config: {})
589
743
  budget = build_token_budget
590
744
  elements = build_message_elements(Array(messages))
@@ -641,6 +795,15 @@ module Phronomy
641
795
  # synchronous on_approval_required handler is already registered).
642
796
  _register_suspension_hook!(chat)
643
797
 
798
+ # Check for cancellation immediately before the LLM call.
799
+ check_cancellation!(config, "invocation cancelled before LLM call")
800
+
801
+ # Forward the cancellation token to ParallelToolChat via a thread-local
802
+ # so that tool dispatch batches can observe cancellation without needing
803
+ # direct access to config.
804
+ prev_ct = Thread.current[:phronomy_cancellation_token]
805
+ Thread.current[:phronomy_cancellation_token] = config[:cancellation_token]
806
+
644
807
  begin
645
808
  response = chat.ask(user_message)
646
809
  rescue SuspendSignal => signal
@@ -654,6 +817,8 @@ module Phronomy
654
817
  )
655
818
  suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
656
819
  next [suspended_result, nil]
820
+ ensure
821
+ Thread.current[:phronomy_cancellation_token] = prev_ct
657
822
  end
658
823
 
659
824
  output = response.content
@@ -699,6 +864,7 @@ module Phronomy
699
864
  #
700
865
  # @param messages [Array] message-like objects with #role and #content
701
866
  # @return [Array<Hash>]
867
+ # @api public
702
868
  def build_message_elements(messages)
703
869
  Array(messages).each_with_index.map do |msg, idx|
704
870
  tokens = Context::TokenEstimator.estimate(msg.content.to_s)
@@ -714,12 +880,11 @@ module Phronomy
714
880
  #
715
881
  # @param input [String, Hash] the agent's current input (used for template evaluation)
716
882
  # @return [String, nil] assembled system text, or nil when empty
883
+ # @api public
717
884
  def build_cached_system_text(input)
718
885
  instruction = build_instructions(input)
719
886
 
720
- static_chunks = self.class.static_knowledge_sources.flat_map { |ks|
721
- ks.fetch(query: nil)
722
- }
887
+ static_chunks = self.class.static_knowledge_chunks
723
888
 
724
889
  fingerprint = Digest::SHA256.hexdigest(
725
890
  [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
@@ -736,6 +901,11 @@ module Phronomy
736
901
  cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
737
902
  end
738
903
 
904
+ # Persist a reference on the instance so that context_version_cache
905
+ # remains accessible after invoke's ensure block cleans up the
906
+ # thread-local entry.
907
+ @last_context_version_cache = cache
908
+
739
909
  cache.system_text.empty? ? nil : cache.system_text
740
910
  end
741
911
 
@@ -811,6 +981,18 @@ module Phronomy
811
981
  end
812
982
  end
813
983
 
984
+ # Raises CancellationError if the cancellation_token in config is cancelled.
985
+ # No-op when config has no cancellation_token or the token is not cancelled.
986
+ #
987
+ # @param config [Hash] the invocation config hash
988
+ # @param message [String] the message for the CancellationError
989
+ # @raise [Phronomy::CancellationError]
990
+ # @api public
991
+ def check_cancellation!(config, message = "invocation cancelled")
992
+ ct = config[:cancellation_token]
993
+ raise Phronomy::CancellationError, message if ct&.cancelled?
994
+ end
995
+
814
996
  # Builds the final tool class to register with the chat.
815
997
  #
816
998
  # Two transformations are applied in order:
@@ -35,6 +35,7 @@ module Phronomy
35
35
  # @param messages [Array]
36
36
  # @param config [Hash]
37
37
  # @param params [Hash] initial params (model, temperature already set on chat)
38
+ # @api public
38
39
  def initialize(agent:, messages:, config:, params: {})
39
40
  @agent = agent
40
41
  @messages = messages.dup.freeze
@@ -47,6 +47,7 @@ module Phronomy
47
47
  # @param pending_tool_name [String]
48
48
  # @param pending_tool_args [Hash]
49
49
  # @param pending_tool_call_id [String]
50
+ # @api public
50
51
  def initialize(thread_id:, original_input:, messages:, pending_tool_name:, pending_tool_args:, pending_tool_call_id:)
51
52
  @thread_id = thread_id
52
53
  @original_input = original_input
@@ -8,6 +8,7 @@ module Phronomy
8
8
  # Included in {Phronomy::Agent::Base}. Hooks are executed just before every
9
9
  # LLM call (global → class → instance order) and may inject or override
10
10
  # LLM parameters such as temperature or model.
11
+ # @api private
11
12
  module BeforeCompletion
12
13
  def self.included(base)
13
14
  base.extend(ClassMethods)
@@ -26,6 +27,7 @@ module Phronomy
26
27
  # class MyAgent < Phronomy::Agent::Base
27
28
  # before_completion ->(ctx) { { temperature: 0.2 } }
28
29
  # end
30
+ # @api private
29
31
  def before_completion(callable = nil)
30
32
  if callable.nil? && !block_given?
31
33
  @before_completion
@@ -35,6 +37,7 @@ module Phronomy
35
37
  end
36
38
 
37
39
  # @return [#call, nil]
40
+ # @api private
38
41
  def _before_completion
39
42
  @before_completion
40
43
  end
@@ -53,6 +56,7 @@ module Phronomy
53
56
  # @param chat [RubyLLM::Chat] the assembled chat object
54
57
  # @param config [Hash] the invocation config hash
55
58
  # @return [Hash] the merged params applied to the chat
59
+ # @api private
56
60
  def run_before_completion_hooks!(chat, config)
57
61
  hooks = [
58
62
  Phronomy.configuration.before_completion,
@@ -72,6 +76,7 @@ module Phronomy
72
76
  merged = {}
73
77
  hooks.each do |hook|
74
78
  result = hook.call(ctx)
79
+ check_cancellation!(config, "invocation cancelled during before_completion hook")
75
80
  merged.merge!(result) if result.is_a?(Hash)
76
81
  end
77
82
 
@@ -86,6 +91,7 @@ module Phronomy
86
91
  #
87
92
  # @param chat [RubyLLM::Chat]
88
93
  # @param params [Hash]
94
+ # @api private
89
95
  def apply_before_completion_params!(chat, params)
90
96
  params.each do |key, value|
91
97
  case key
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Phronomy
4
+ module Agent
5
+ module Concerns
6
+ # Translates RubyLLM transport errors into the corresponding Phronomy error
7
+ # classes so that callers can rescue Phronomy-namespaced exceptions rather
8
+ # than coupling themselves to the underlying provider library.
9
+ #
10
+ # Included in {Phronomy::Agent::Base}.
11
+ module ErrorTranslation
12
+ private
13
+
14
+ # Re-raises +error+ as the most specific Phronomy error class that
15
+ # corresponds to it. Non-RubyLLM errors are re-raised unchanged.
16
+ # The original exception is available as +#cause+ on the translated error.
17
+ #
18
+ # Must be called from within an active +rescue+ block so that Ruby
19
+ # automatically sets +#cause+ on the new exception.
20
+ #
21
+ # @param error [Exception]
22
+ # @raise [Phronomy::RateLimitError] for provider HTTP 429
23
+ # @raise [Phronomy::AuthenticationError] for provider HTTP 401 / 403
24
+ # @raise [Phronomy::ContextLengthError] for context window overflow
25
+ # @raise [Phronomy::TransportError] for all other +RubyLLM::Error+ subclasses
26
+ # @raise re-raises +error+ unchanged for non-RubyLLM exceptions
27
+ # @api private
28
+ def translate_and_reraise!(error)
29
+ case error
30
+ when RubyLLM::RateLimitError
31
+ raise Phronomy::RateLimitError, error.message
32
+ when RubyLLM::UnauthorizedError, RubyLLM::ForbiddenError
33
+ raise Phronomy::AuthenticationError, error.message
34
+ when RubyLLM::ContextLengthExceededError
35
+ raise Phronomy::ContextLengthError, error.message
36
+ when RubyLLM::Error
37
+ raise Phronomy::TransportError, error.message
38
+ else
39
+ raise # bare re-raise preserves $! and its backtrace unchanged
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -8,10 +8,12 @@ module Phronomy
8
8
  # Included in {Phronomy::Agent::Base}. Guardrails are run on the raw
9
9
  # input string before the LLM is called, and on the raw output string
10
10
  # before the result is returned to the caller.
11
+ # @api private
11
12
  module Guardrailable
12
13
  # Attach a guardrail that validates input before every #invoke call.
13
14
  # @param guardrail [Phronomy::Guardrail::InputGuardrail]
14
15
  # @return [self]
16
+ # @api private
15
17
  def add_input_guardrail(guardrail)
16
18
  @input_guardrails ||= []
17
19
  @input_guardrails << guardrail
@@ -21,6 +23,7 @@ module Phronomy
21
23
  # Attach a guardrail that validates output before it is returned.
22
24
  # @param guardrail [Phronomy::Guardrail::OutputGuardrail]
23
25
  # @return [self]
26
+ # @api private
24
27
  def add_output_guardrail(guardrail)
25
28
  @output_guardrails ||= []
26
29
  @output_guardrails << guardrail
@@ -7,6 +7,7 @@ module Phronomy
7
7
  #
8
8
  # Included in {Phronomy::Agent::Base}. The retry loop wraps the full
9
9
  # #invoke_once call; {Phronomy::GuardrailError} is never retried.
10
+ # @api private
10
11
  module Retryable
11
12
  def self.included(base)
12
13
  base.extend(ClassMethods)
@@ -25,6 +26,7 @@ module Phronomy
25
26
  # class MyAgent < Phronomy::Agent::Base
26
27
  # retry_policy times: 2, wait: :exponential, base: 1.0
27
28
  # end
29
+ # @api private
28
30
  def retry_policy(times: 0, wait: 0, base: 1.0)
29
31
  @_retry_policy = {times: times, wait: wait, base: base}
30
32
  end
@@ -35,6 +37,7 @@ module Phronomy
35
37
 
36
38
  # Injectable sleep callable for testing (shared with Tool::Base pattern).
37
39
  # @return [#call]
40
+ # @api private
38
41
  def _sleep_proc
39
42
  @_sleep_proc || method(:sleep)
40
43
  end
@@ -48,12 +51,19 @@ module Phronomy
48
51
 
49
52
  # Retry loop for #invoke. Separated so that ReactAgent can override #invoke_once.
50
53
  def _invoke_impl(input, messages: [], thread_id: nil, config: {})
54
+ # Fail fast when the token is already cancelled before any LLM call.
55
+ if (token = config[:cancellation_token]) && token.cancelled?
56
+ raise Phronomy::CancellationError, "invocation cancelled"
57
+ end
58
+
51
59
  policy = self.class._retry_policy
52
60
  attempt = 0
53
61
  begin
54
62
  invoke_once(input, messages: messages, thread_id: thread_id, config: config)
55
63
  rescue Phronomy::GuardrailError
56
64
  raise
65
+ rescue Phronomy::CancellationError
66
+ raise # Never retry after cancellation.
57
67
  rescue
58
68
  if policy && attempt < policy[:times]
59
69
  wait = compute_agent_retry_wait(policy[:wait], policy[:base], attempt)
@@ -61,7 +71,7 @@ module Phronomy
61
71
  attempt += 1
62
72
  retry
63
73
  end
64
- raise
74
+ translate_and_reraise!($!)
65
75
  end
66
76
  end
67
77
 
@@ -70,6 +80,7 @@ module Phronomy
70
80
  # @param base [Float]
71
81
  # @param attempt [Integer]
72
82
  # @return [Float]
83
+ # @api private
73
84
  def compute_agent_retry_wait(strategy, base, attempt)
74
85
  case strategy
75
86
  when :exponential