phronomy 0.5.4 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +21 -0
  3. data/CHANGELOG.md +379 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +262 -48
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/SECURITY.md +80 -0
  8. data/benchmark/baseline.json +9 -0
  9. data/benchmark/bench_agent_invoke.rb +105 -0
  10. data/benchmark/bench_context_assembler.rb +46 -0
  11. data/benchmark/bench_regression.rb +171 -0
  12. data/benchmark/bench_token_estimator.rb +44 -0
  13. data/benchmark/bench_tool_schema.rb +69 -0
  14. data/benchmark/bench_vector_store.rb +39 -0
  15. data/benchmark/bench_workflow.rb +55 -0
  16. data/benchmark/run_all.rb +118 -0
  17. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  18. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  19. data/docs/decisions/003-event-loop-singleton.md +48 -0
  20. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
  21. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  22. data/docs/decisions/006-no-built-in-guardrails.md +48 -0
  23. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  24. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  25. data/docs/decisions/009-state-store-abstraction.md +141 -0
  26. data/lib/phronomy/agent/base.rb +281 -13
  27. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  28. data/lib/phronomy/agent/checkpoint.rb +1 -0
  29. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  30. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  31. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  32. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  33. data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
  34. data/lib/phronomy/agent/fsm.rb +180 -0
  35. data/lib/phronomy/agent/handoff.rb +3 -0
  36. data/lib/phronomy/agent/orchestrator.rb +123 -11
  37. data/lib/phronomy/agent/parallel_tool_chat.rb +92 -0
  38. data/lib/phronomy/agent/react_agent.rb +8 -6
  39. data/lib/phronomy/agent/runner.rb +2 -0
  40. data/lib/phronomy/agent/shared_state.rb +11 -0
  41. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  42. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  43. data/lib/phronomy/cancellation_token.rb +92 -0
  44. data/lib/phronomy/configuration.rb +32 -2
  45. data/lib/phronomy/context/assembler.rb +6 -0
  46. data/lib/phronomy/context/compaction_context.rb +2 -0
  47. data/lib/phronomy/context/context_version_cache.rb +2 -0
  48. data/lib/phronomy/context/token_budget.rb +3 -0
  49. data/lib/phronomy/context/token_estimator.rb +9 -2
  50. data/lib/phronomy/context/trigger_context.rb +1 -0
  51. data/lib/phronomy/context/trim_context.rb +4 -0
  52. data/lib/phronomy/context.rb +0 -1
  53. data/lib/phronomy/embeddings/base.rb +5 -2
  54. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  55. data/lib/phronomy/eval/comparison.rb +2 -0
  56. data/lib/phronomy/eval/dataset.rb +4 -0
  57. data/lib/phronomy/eval/metrics.rb +6 -0
  58. data/lib/phronomy/eval/runner.rb +2 -0
  59. data/lib/phronomy/eval/scorer/base.rb +1 -0
  60. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  61. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  62. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  63. data/lib/phronomy/event.rb +14 -0
  64. data/lib/phronomy/event_loop.rb +254 -0
  65. data/lib/phronomy/fsm_session.rb +201 -0
  66. data/lib/phronomy/generator_verifier.rb +24 -22
  67. data/lib/phronomy/guardrail/base.rb +3 -0
  68. data/lib/phronomy/guardrail.rb +0 -1
  69. data/lib/phronomy/knowledge_source/base.rb +6 -2
  70. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  71. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  72. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  73. data/lib/phronomy/loader/base.rb +1 -0
  74. data/lib/phronomy/loader/csv_loader.rb +2 -0
  75. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  76. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  77. data/lib/phronomy/output_parser/base.rb +1 -0
  78. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  79. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  80. data/lib/phronomy/prompt_template.rb +5 -0
  81. data/lib/phronomy/runnable.rb +20 -3
  82. data/lib/phronomy/splitter/base.rb +2 -0
  83. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  84. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  85. data/lib/phronomy/state_store/base.rb +48 -0
  86. data/lib/phronomy/state_store/in_memory.rb +62 -0
  87. data/lib/phronomy/tool/agent_tool.rb +1 -0
  88. data/lib/phronomy/tool/base.rb +189 -27
  89. data/lib/phronomy/tool/mcp_tool.rb +68 -13
  90. data/lib/phronomy/tracing/base.rb +3 -0
  91. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  92. data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
  93. data/lib/phronomy/vector_store/base.rb +33 -7
  94. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  95. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  96. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  97. data/lib/phronomy/version.rb +1 -1
  98. data/lib/phronomy/workflow.rb +175 -74
  99. data/lib/phronomy/workflow_context.rb +55 -5
  100. data/lib/phronomy/workflow_runner.rb +197 -114
  101. data/lib/phronomy.rb +74 -1
  102. data/scripts/api_snapshot.rb +91 -0
  103. data/scripts/check_api_annotations.rb +68 -0
  104. data/scripts/check_private_enforcement.rb +93 -0
  105. data/scripts/check_readme_runnable.rb +98 -0
  106. data/scripts/run_mutation.sh +46 -0
  107. metadata +50 -6
  108. data/lib/phronomy/context/builder.rb +0 -92
  109. data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +0 -100
  110. data/lib/phronomy/guardrail/builtin/prompt_injection_detector.rb +0 -67
  111. data/lib/phronomy/guardrail/builtin.rb +0 -16
@@ -1,10 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "digest"
4
+ require "securerandom"
5
+ require "timeout"
4
6
  require_relative "concerns/retryable"
5
7
  require_relative "concerns/guardrailable"
6
8
  require_relative "concerns/before_completion"
7
9
  require_relative "concerns/suspendable"
10
+ require_relative "concerns/error_translation"
8
11
 
9
12
  module Phronomy
10
13
  module Agent
@@ -35,6 +38,7 @@ module Phronomy
35
38
  include Concerns::Guardrailable
36
39
  include Concerns::BeforeCompletion
37
40
  include Concerns::Suspendable
41
+ include Concerns::ErrorTranslation
38
42
 
39
43
  class << self
40
44
  # Sets or reads the LLM model identifier for this agent.
@@ -47,6 +51,7 @@ module Phronomy
47
51
  # class MyAgent < Phronomy::Agent::Base
48
52
  # model "gpt-4o"
49
53
  # end
54
+ # @api public
50
55
  def model(name = nil)
51
56
  if name
52
57
  @model = name
@@ -70,6 +75,7 @@ module Phronomy
70
75
  # class MyAgent < Phronomy::Agent::Base
71
76
  # instructions { |input| "Answer in #{input[:lang]}." }
72
77
  # end
78
+ # @api public
73
79
  def instructions(text = nil, &block)
74
80
  if text || block_given?
75
81
  @instructions = text || block
@@ -95,6 +101,7 @@ module Phronomy
95
101
  # Places::SearchTool => "places_search",
96
102
  # CurrentTimeTool => nil
97
103
  # )
104
+ # @api public
98
105
  def tools(*args)
99
106
  if args.empty?
100
107
  if instance_variable_defined?(:@tools)
@@ -114,9 +121,17 @@ module Phronomy
114
121
  end
115
122
 
116
123
  # Returns the alias map registered via the hash form of .tools.
124
+ # Merges parent class aliases so subclasses inherit their parent's mappings.
125
+ # Subclass-specific aliases take precedence over parent aliases.
117
126
  # @return [Hash{Class => String}]
127
+ # @api public
118
128
  def tool_aliases
119
- @tool_aliases ||= {}
129
+ own = @tool_aliases || {}
130
+ if superclass.respond_to?(:tool_aliases)
131
+ superclass.tool_aliases.merge(own)
132
+ else
133
+ own
134
+ end
120
135
  end
121
136
 
122
137
  # Sets or reads the LLM provider for this agent.
@@ -130,6 +145,7 @@ module Phronomy
130
145
  # model "openai/gpt-oss-20b"
131
146
  # provider :openai
132
147
  # end
148
+ # @api public
133
149
  def provider(name = nil)
134
150
  if name
135
151
  @provider = name
@@ -148,6 +164,7 @@ module Phronomy
148
164
  # class MyAgent < Phronomy::Agent::Base
149
165
  # temperature 0.2
150
166
  # end
167
+ # @api public
151
168
  def temperature(val = nil)
152
169
  if val
153
170
  @temperature = val
@@ -165,6 +182,7 @@ module Phronomy
165
182
  # class MyAgent < Phronomy::Agent::Base
166
183
  # max_iterations 5
167
184
  # end
185
+ # @api public
168
186
  def max_iterations(val = nil)
169
187
  if val
170
188
  @max_iterations = val
@@ -173,27 +191,121 @@ module Phronomy
173
191
  end
174
192
  end
175
193
 
194
+ # Sets or reads the maximum number of tool calls executed concurrently
195
+ # when the LLM returns multiple tool calls in a single response
196
+ # (ParallelToolChat mode, active inside an AgentFSM IO thread).
197
+ #
198
+ # Defaults to 10. Set to 1 to force sequential execution.
199
+ # Inherited by subclasses; the most-specific definition wins.
200
+ #
201
+ # @param val [Integer, nil]
202
+ # @return [Integer]
203
+ # @example
204
+ # class MyAgent < Phronomy::Agent::Base
205
+ # max_parallel_tools 4
206
+ # end
207
+ # @api public
208
+ def max_parallel_tools(val = nil)
209
+ if val.nil?
210
+ @max_parallel_tools ||
211
+ (superclass.respond_to?(:max_parallel_tools) ? superclass.max_parallel_tools : 10)
212
+ else
213
+ unless val.is_a?(Integer) && val >= 1
214
+ raise ArgumentError,
215
+ "max_parallel_tools must be a positive Integer (>= 1), got #{val.inspect}"
216
+ end
217
+ @max_parallel_tools = val
218
+ end
219
+ end
220
+
221
+ # Sets or reads the per-invocation timeout (in seconds) for EventLoop-mode
222
+ # agent calls. When set, +invoke+ raises {Phronomy::TimeoutError} if the
223
+ # agent does not finish within the given number of seconds.
224
+ #
225
+ # Has no effect when EventLoop mode is disabled (direct invoke path).
226
+ # Defaults to +nil+ (no timeout).
227
+ # Inherited by subclasses; the most-specific definition wins.
228
+ #
229
+ # **Note**: +invoke_timeout+ is a *wait timeout*, not a cancellation.
230
+ # When the timeout fires, +Phronomy::TimeoutError+ is raised to the
231
+ # caller, but the background agent thread and any in-flight LLM or tool
232
+ # calls are **not** interrupted — they continue running until they
233
+ # complete naturally. The agent therefore keeps consuming threads,
234
+ # memory, and external API credits after the caller has already received
235
+ # the error. True cancellation is not yet supported.
236
+ #
237
+ # @param val [Numeric, nil]
238
+ # @return [Numeric, nil]
239
+ # @example
240
+ # class MyAgent < Phronomy::Agent::Base
241
+ # invoke_timeout 30
242
+ # end
243
+ # @api public
244
+ def invoke_timeout(val = nil)
245
+ if val.nil?
246
+ return @invoke_timeout if defined?(@invoke_timeout)
247
+ superclass.respond_to?(:invoke_timeout) ? superclass.invoke_timeout : nil
248
+ else
249
+ unless val.is_a?(Numeric) && val > 0
250
+ raise ArgumentError,
251
+ "invoke_timeout must be a positive number, got #{val.inspect}"
252
+ end
253
+ @invoke_timeout = val
254
+ end
255
+ end
256
+
176
257
  # Registers one or more static knowledge sources on the agent class.
177
- # Static sources are fetched once per agent instance and their content
178
- # is cached in ContextVersionCache keyed by a fingerprint of the
179
- # instruction text + source content. The cache is invalidated automatically
180
- # when the fingerprint changes (e.g. because a source was updated).
258
+ # Static source content is fetched and memoized at the **class** level
259
+ # the first time +invoke+ is called. The cache persists for the lifetime
260
+ # of the process; call {.static_knowledge_refresh!} to force a reload.
181
261
  #
182
262
  # @param sources [Array<Phronomy::KnowledgeSource::Base>]
183
263
  # @example
184
264
  # class PolicyAgent < Phronomy::Agent::Base
185
265
  # static_knowledge Phronomy::KnowledgeSource::StaticKnowledge.new(POLICY_TEXT)
186
266
  # end
267
+ # @api public
187
268
  def static_knowledge(*sources)
188
269
  @static_knowledge_sources = sources.flatten
270
+ # Invalidate the cached chunks so the new sources are fetched on
271
+ # the next call to static_knowledge_chunks.
272
+ @static_knowledge_chunks = nil
189
273
  end
190
274
 
191
275
  # Returns the registered static knowledge sources.
192
276
  # @return [Array<Phronomy::KnowledgeSource::Base>]
277
+ # @api public
193
278
  def static_knowledge_sources
194
279
  @static_knowledge_sources || []
195
280
  end
196
281
 
282
+ # Returns the fetched content from all static knowledge sources.
283
+ # Results are cached at the class level so that each source is fetched
284
+ # only once regardless of how many times the agent is invoked.
285
+ # @return [Array<Hash>]
286
+ # @api public
287
+ def static_knowledge_chunks
288
+ @static_knowledge_chunks ||= static_knowledge_sources.flat_map { |ks|
289
+ ks.fetch(query: nil)
290
+ }
291
+ end
292
+
293
+ # Clears the class-level knowledge cache so that the next +invoke+ call
294
+ # re-fetches content from all registered static knowledge sources.
295
+ #
296
+ # Call this method when the underlying knowledge source has been updated
297
+ # at runtime (e.g. a file was rewritten, a DB record changed) and you
298
+ # want the agent to pick up the new content without restarting the
299
+ # process.
300
+ #
301
+ # @return [nil]
302
+ # @example Refresh after updating a knowledge file
303
+ # MyAgent.static_knowledge_refresh!
304
+ # @api public
305
+ def static_knowledge_refresh!
306
+ @static_knowledge_chunks = nil
307
+ end
308
+
197
309
  # Registers a callback that is invoked before every LLM call so the
198
310
  # application can remove stale or irrelevant messages from the
199
311
  # conversation history.
@@ -208,11 +320,13 @@ module Phronomy
208
320
  # limit = ctx.budget&.available(used: 0) || Float::INFINITY
209
321
  # ctx.remove(ctx.message_elements.first[:seq]) if ctx.total_tokens > limit * 0.8
210
322
  # end
323
+ # @api public
211
324
  def on_trim(&block)
212
325
  @on_trim_callback = block
213
326
  end
214
327
 
215
328
  # @return [Proc, nil]
329
+ # @api private
216
330
  def _on_trim_callback
217
331
  @on_trim_callback
218
332
  end
@@ -231,11 +345,13 @@ module Phronomy
231
345
  # limit = ctx.budget&.available(used: 0) || Float::INFINITY
232
346
  # ctx.total_tokens > limit * 0.7
233
347
  # end
348
+ # @api public
234
349
  def on_compaction_trigger(&block)
235
350
  @on_compaction_trigger_callback = block
236
351
  end
237
352
 
238
353
  # @return [Proc, nil]
354
+ # @api private
239
355
  def _on_compaction_trigger_callback
240
356
  @on_compaction_trigger_callback
241
357
  end
@@ -253,11 +369,13 @@ module Phronomy
253
369
  # "Earlier conversation summary: #{texts}"
254
370
  # end
255
371
  # end
372
+ # @api public
256
373
  def on_compact(&block)
257
374
  @on_compact_callback = block
258
375
  end
259
376
 
260
377
  # @return [Proc, nil]
378
+ # @api private
261
379
  def _on_compact_callback
262
380
  @on_compact_callback
263
381
  end
@@ -277,6 +395,7 @@ module Phronomy
277
395
  # provider :anthropic
278
396
  # cache_instructions true
279
397
  # end
398
+ # @api public
280
399
  def cache_instructions(enabled = nil)
281
400
  if enabled.nil?
282
401
  @cache_instructions
@@ -292,6 +411,7 @@ module Phronomy
292
411
  # class MyAgent < Phronomy::Agent::Base
293
412
  # max_output_tokens 4096
294
413
  # end
414
+ # @api public
295
415
  def max_output_tokens(val = nil)
296
416
  if val.nil?
297
417
  @max_output_tokens
@@ -309,6 +429,7 @@ module Phronomy
309
429
  # class MyAgent < Phronomy::Agent::Base
310
430
  # context_window 4096
311
431
  # end
432
+ # @api public
312
433
  def context_window(val = nil)
313
434
  if val.nil?
314
435
  @context_window
@@ -324,6 +445,7 @@ module Phronomy
324
445
  # class MyAgent < Phronomy::Agent::Base
325
446
  # context_overhead 500
326
447
  # end
448
+ # @api public
327
449
  def context_overhead(val = nil)
328
450
  if val.nil?
329
451
  @context_overhead || 0
@@ -337,6 +459,7 @@ module Phronomy
337
459
  # Called by Runner during construction when routes are configured.
338
460
  # @param tool_class [Class<Phronomy::Tool::Base>]
339
461
  # @return [self]
462
+ # @api private
340
463
  def _add_handoff_tool(tool_class)
341
464
  @_handoff_tools ||= []
342
465
  @_handoff_tools << tool_class
@@ -345,6 +468,7 @@ module Phronomy
345
468
 
346
469
  # Returns handoff tool classes registered on this instance by Runner.
347
470
  # @return [Array<Class>]
471
+ # @api private
348
472
  def _handoff_tools
349
473
  @_handoff_tools || []
350
474
  end
@@ -381,8 +505,100 @@ module Phronomy
381
505
  # result = agent.resume(result[:checkpoint], approved: true)
382
506
  # end
383
507
  # puts result[:output]
508
+ # @api public
384
509
  def invoke(input, messages: [], thread_id: nil, config: {})
385
- _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
510
+ if Phronomy.configuration.event_loop
511
+ # Protect against blocking the EventLoop thread itself.
512
+ if Thread.current[:phronomy_event_loop_thread]
513
+ raise Phronomy::Error,
514
+ "Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
515
+ "entry action. Use agent.run_as_child(input, ctx: ctx) instead."
516
+ end
517
+
518
+ fsm = Agent::FSM.new(
519
+ agent: self,
520
+ input: input,
521
+ messages: messages,
522
+ thread_id: thread_id || SecureRandom.uuid,
523
+ config: config
524
+ )
525
+ completion_queue = Phronomy::EventLoop.instance.register(fsm)
526
+ timeout_sec = self.class.invoke_timeout
527
+ result = if timeout_sec
528
+ begin
529
+ Timeout.timeout(timeout_sec) { completion_queue.pop }
530
+ rescue Timeout::Error
531
+ raise Phronomy::TimeoutError,
532
+ "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
533
+ end
534
+ else
535
+ completion_queue.pop
536
+ end
537
+ raise result if result.is_a?(Exception)
538
+ result
539
+ else
540
+ _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
541
+ end
542
+ ensure
543
+ # Remove this agent's context cache entry from the current thread to
544
+ # prevent unbounded growth of the thread-local hash in long-lived
545
+ # processes (e.g. Rails servers).
546
+ Thread.current[:phronomy_context_version_caches]&.delete(object_id)
547
+ end
548
+
549
+ # Registers this agent as a child {AgentFSM} inside the given Workflow context.
550
+ #
551
+ # Use this method from a Workflow entry action (running on the EventLoop thread)
552
+ # instead of {#invoke}, which would raise a deadlock error because +invoke+ blocks
553
+ # on a +Thread::Queue+ when EventLoop mode is active.
554
+ #
555
+ # The agent runs asynchronously in a background IO thread. When it finishes, the
556
+ # parent {FSMSession} receives a +:child_completed+ event whose payload is the
557
+ # result hash +{ output:, messages:, usage: }+. Declare an +on: :child_completed+
558
+ # transition in your Workflow to advance to the next state.
559
+ #
560
+ # An optional block may be provided to write the result back into the parent
561
+ # WorkflowContext <b>before</b> the +:child_completed+ event is dispatched.
562
+ # +Thread::Queue+ provides the happens-before guarantee \u2014 no Mutex is needed.
563
+ #
564
+ # @example Without block (result available only as event payload)
565
+ # entry :run_agent, ->(ctx) { MyAgent.new.run_as_child(ctx.query, ctx: ctx) }
566
+ # transition from: :run_agent, on: :child_completed, to: :process_result
567
+ #
568
+ # @example With block (writes result into context)
569
+ # entry :run_agent, ->(ctx) {
570
+ # MyAgent.new.run_as_child(ctx.query, ctx: ctx) { |r| ctx.answer = r[:output] }
571
+ # }
572
+ # transition from: :run_agent, on: :child_completed, to: :process_result
573
+ #
574
+ # @param input [String, Hash] user input passed to the agent
575
+ # @param ctx [Object] a WorkflowContext that responds to +#thread_id+
576
+ # @param messages [Array] prior conversation history
577
+ # @param config [Hash] invocation config (forwarded to +_invoke_impl+)
578
+ # @yield [Hash] result hash +{ output:, messages:, usage: }+ — called from the
579
+ # agent IO thread before +:child_completed+ is posted
580
+ # @return [nil] the caller must not wait on any return value;
581
+ # the result arrives as a +:child_completed+ event
582
+ # @raise [Phronomy::Error] when EventLoop mode is not enabled
583
+ # @api public
584
+ def run_as_child(input, ctx:, messages: [], config: {}, &result_writer)
585
+ unless Phronomy.configuration.event_loop
586
+ raise Phronomy::Error,
587
+ "run_as_child requires EventLoop mode. " \
588
+ "Enable with: Phronomy.configure { |c| c.event_loop = true }"
589
+ end
590
+
591
+ fsm = Agent::FSM.new(
592
+ agent: self,
593
+ input: input,
594
+ messages: messages,
595
+ thread_id: "#{ctx.thread_id}_agent_#{SecureRandom.uuid}",
596
+ config: config,
597
+ parent_id: ctx.thread_id,
598
+ result_writer: result_writer
599
+ )
600
+ Phronomy::EventLoop.instance.enqueue_child(fsm)
601
+ nil
386
602
  end
387
603
 
388
604
  # Streaming version of #invoke. Yields {Phronomy::Agent::StreamEvent} objects
@@ -401,6 +617,7 @@ module Phronomy
401
617
  # @param config [Hash] same as #invoke
402
618
  # @yield [Phronomy::Agent::StreamEvent]
403
619
  # @return [Hash] { output:, messages:, usage: } — same as #invoke
620
+ # @api public
404
621
  def stream(input, messages: [], thread_id: nil, config: {}, &block)
405
622
  return invoke(input, messages: messages, thread_id: thread_id, config: config) unless block
406
623
 
@@ -410,10 +627,19 @@ module Phronomy
410
627
  raise
411
628
  end
412
629
 
413
- # Returns the {Context::ContextVersionCache} for the current thread.
630
+ # Returns the {Context::ContextVersionCache} built during the most recent
631
+ # {#invoke} call on this agent instance. The thread-local cache entry is
632
+ # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
633
+ # in +@last_context_version_cache+ so callers can inspect it after invoke
634
+ # returns.
635
+ #
636
+ # NOTE: Not thread-safe. When the same Agent instance is used concurrently,
637
+ # +@last_context_version_cache+ reflects the most recent +invoke+ on *any*
638
+ # thread. For per-invocation isolation, use a separate Agent instance per
639
+ # thread.
414
640
  # @api private
415
641
  def context_version_cache
416
- (Thread.current[:phronomy_context_version_caches] ||= {})[object_id]
642
+ @last_context_version_cache
417
643
  end
418
644
 
419
645
  private
@@ -455,6 +681,7 @@ module Phronomy
455
681
 
456
682
  response = chat.ask(user_message) do |chunk|
457
683
  block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
684
+ check_cancellation!(config, "invocation cancelled during streaming")
458
685
  end
459
686
 
460
687
  output = response.content
@@ -478,6 +705,7 @@ module Phronomy
478
705
  # @param thread_id [String, nil] conversation thread identifier
479
706
  # @param config [Hash] the invocation config (see #invoke)
480
707
  # @return [Hash] { system: String|nil, messages: Array }
708
+ # @api public
481
709
  def build_context(input, messages: [], thread_id: nil, config: {})
482
710
  history = prepare_history(messages: messages, thread_id: thread_id, config: config)
483
711
  budget = build_token_budget
@@ -488,7 +716,8 @@ module Phronomy
488
716
  assembler.add_instruction(system_text) if system_text
489
717
 
490
718
  Array(config[:knowledge_sources]).each do |ks|
491
- ks.fetch(query: user_message).each do |chunk|
719
+ check_cancellation!(config, "invocation cancelled during RAG fetch")
720
+ ks.fetch(query: user_message, cancellation_token: config[:cancellation_token]).each do |chunk|
492
721
  assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
493
722
  end
494
723
  end
@@ -509,6 +738,7 @@ module Phronomy
509
738
  # @param thread_id [String, nil] conversation thread identifier
510
739
  # @param config [Hash] additional invocation options
511
740
  # @return [Array] filtered and/or compacted message objects
741
+ # @api public
512
742
  def prepare_history(messages: [], thread_id: nil, config: {})
513
743
  budget = build_token_budget
514
744
  elements = build_message_elements(Array(messages))
@@ -565,6 +795,15 @@ module Phronomy
565
795
  # synchronous on_approval_required handler is already registered).
566
796
  _register_suspension_hook!(chat)
567
797
 
798
+ # Check for cancellation immediately before the LLM call.
799
+ check_cancellation!(config, "invocation cancelled before LLM call")
800
+
801
+ # Forward the cancellation token to ParallelToolChat via a thread-local
802
+ # so that tool dispatch batches can observe cancellation without needing
803
+ # direct access to config.
804
+ prev_ct = Thread.current[:phronomy_cancellation_token]
805
+ Thread.current[:phronomy_cancellation_token] = config[:cancellation_token]
806
+
568
807
  begin
569
808
  response = chat.ask(user_message)
570
809
  rescue SuspendSignal => signal
@@ -578,6 +817,8 @@ module Phronomy
578
817
  )
579
818
  suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
580
819
  next [suspended_result, nil]
820
+ ensure
821
+ Thread.current[:phronomy_cancellation_token] = prev_ct
581
822
  end
582
823
 
583
824
  output = response.content
@@ -623,6 +864,7 @@ module Phronomy
623
864
  #
624
865
  # @param messages [Array] message-like objects with #role and #content
625
866
  # @return [Array<Hash>]
867
+ # @api public
626
868
  def build_message_elements(messages)
627
869
  Array(messages).each_with_index.map do |msg, idx|
628
870
  tokens = Context::TokenEstimator.estimate(msg.content.to_s)
@@ -638,12 +880,11 @@ module Phronomy
638
880
  #
639
881
  # @param input [String, Hash] the agent's current input (used for template evaluation)
640
882
  # @return [String, nil] assembled system text, or nil when empty
883
+ # @api public
641
884
  def build_cached_system_text(input)
642
885
  instruction = build_instructions(input)
643
886
 
644
- static_chunks = self.class.static_knowledge_sources.flat_map { |ks|
645
- ks.fetch(query: nil)
646
- }
887
+ static_chunks = self.class.static_knowledge_chunks
647
888
 
648
889
  fingerprint = Digest::SHA256.hexdigest(
649
890
  [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
@@ -660,11 +901,25 @@ module Phronomy
660
901
  cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
661
902
  end
662
903
 
904
+ # Persist a reference on the instance so that context_version_cache
905
+ # remains accessible after invoke's ensure block cleans up the
906
+ # thread-local entry.
907
+ @last_context_version_cache = cache
908
+
663
909
  cache.system_text.empty? ? nil : cache.system_text
664
910
  end
665
911
 
666
912
  # Load messages from a ConversationManager.
667
913
  #
914
+ # Returns the chat class to instantiate for this invocation.
915
+ # When the +:phronomy_agent_parallel_tools+ thread-local flag is set
916
+ # (i.e. inside an {AgentFSM} IO thread), returns {ParallelToolChat} so
917
+ # that concurrent tool dispatch is enabled. Falls back to +nil+ otherwise,
918
+ # signalling {#build_chat} to use the standard +RubyLLM.chat+ factory.
919
+ def build_chat_class
920
+ Thread.current[:phronomy_agent_parallel_tools] ? Agent::ParallelToolChat : nil
921
+ end
922
+
668
923
  def build_chat
669
924
  opts = {}
670
925
  m = self.class.model
@@ -675,7 +930,8 @@ module Phronomy
675
930
  opts[:assume_model_exists] = true
676
931
  end
677
932
  t = self.class.temperature
678
- chat = RubyLLM.chat(**opts)
933
+ parallel_class = build_chat_class
934
+ chat = parallel_class ? parallel_class.new(**opts) : RubyLLM.chat(**opts)
679
935
  chat.with_temperature(t) if t
680
936
  self.class.tools.each do |tool_class|
681
937
  chat.with_tool(prepare_tool_class(tool_class))
@@ -725,6 +981,18 @@ module Phronomy
725
981
  end
726
982
  end
727
983
 
984
+ # Raises CancellationError if the cancellation_token in config is cancelled.
985
+ # No-op when config has no cancellation_token or the token is not cancelled.
986
+ #
987
+ # @param config [Hash] the invocation config hash
988
+ # @param message [String] the message for the CancellationError
989
+ # @raise [Phronomy::CancellationError]
990
+ # @api public
991
+ def check_cancellation!(config, message = "invocation cancelled")
992
+ ct = config[:cancellation_token]
993
+ raise Phronomy::CancellationError, message if ct&.cancelled?
994
+ end
995
+
728
996
  # Builds the final tool class to register with the chat.
729
997
  #
730
998
  # Two transformations are applied in order:
@@ -35,6 +35,7 @@ module Phronomy
35
35
  # @param messages [Array]
36
36
  # @param config [Hash]
37
37
  # @param params [Hash] initial params (model, temperature already set on chat)
38
+ # @api public
38
39
  def initialize(agent:, messages:, config:, params: {})
39
40
  @agent = agent
40
41
  @messages = messages.dup.freeze
@@ -47,6 +47,7 @@ module Phronomy
47
47
  # @param pending_tool_name [String]
48
48
  # @param pending_tool_args [Hash]
49
49
  # @param pending_tool_call_id [String]
50
+ # @api public
50
51
  def initialize(thread_id:, original_input:, messages:, pending_tool_name:, pending_tool_args:, pending_tool_call_id:)
51
52
  @thread_id = thread_id
52
53
  @original_input = original_input
@@ -8,6 +8,7 @@ module Phronomy
8
8
  # Included in {Phronomy::Agent::Base}. Hooks are executed just before every
9
9
  # LLM call (global → class → instance order) and may inject or override
10
10
  # LLM parameters such as temperature or model.
11
+ # @api private
11
12
  module BeforeCompletion
12
13
  def self.included(base)
13
14
  base.extend(ClassMethods)
@@ -26,6 +27,7 @@ module Phronomy
26
27
  # class MyAgent < Phronomy::Agent::Base
27
28
  # before_completion ->(ctx) { { temperature: 0.2 } }
28
29
  # end
30
+ # @api private
29
31
  def before_completion(callable = nil)
30
32
  if callable.nil? && !block_given?
31
33
  @before_completion
@@ -35,6 +37,7 @@ module Phronomy
35
37
  end
36
38
 
37
39
  # @return [#call, nil]
40
+ # @api private
38
41
  def _before_completion
39
42
  @before_completion
40
43
  end
@@ -53,6 +56,7 @@ module Phronomy
53
56
  # @param chat [RubyLLM::Chat] the assembled chat object
54
57
  # @param config [Hash] the invocation config hash
55
58
  # @return [Hash] the merged params applied to the chat
59
+ # @api private
56
60
  def run_before_completion_hooks!(chat, config)
57
61
  hooks = [
58
62
  Phronomy.configuration.before_completion,
@@ -72,6 +76,7 @@ module Phronomy
72
76
  merged = {}
73
77
  hooks.each do |hook|
74
78
  result = hook.call(ctx)
79
+ check_cancellation!(config, "invocation cancelled during before_completion hook")
75
80
  merged.merge!(result) if result.is_a?(Hash)
76
81
  end
77
82
 
@@ -86,6 +91,7 @@ module Phronomy
86
91
  #
87
92
  # @param chat [RubyLLM::Chat]
88
93
  # @param params [Hash]
94
+ # @api private
89
95
  def apply_before_completion_params!(chat, params)
90
96
  params.each do |key, value|
91
97
  case key
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Phronomy
4
+ module Agent
5
+ module Concerns
6
+ # Translates RubyLLM transport errors into the corresponding Phronomy error
7
+ # classes so that callers can rescue Phronomy-namespaced exceptions rather
8
+ # than coupling themselves to the underlying provider library.
9
+ #
10
+ # Included in {Phronomy::Agent::Base}.
11
+ module ErrorTranslation
12
+ private
13
+
14
+ # Re-raises +error+ as the most specific Phronomy error class that
15
+ # corresponds to it. Non-RubyLLM errors are re-raised unchanged.
16
+ # The original exception is available as +#cause+ on the translated error.
17
+ #
18
+ # Must be called from within an active +rescue+ block so that Ruby
19
+ # automatically sets +#cause+ on the new exception.
20
+ #
21
+ # @param error [Exception]
22
+ # @raise [Phronomy::RateLimitError] for provider HTTP 429
23
+ # @raise [Phronomy::AuthenticationError] for provider HTTP 401 / 403
24
+ # @raise [Phronomy::ContextLengthError] for context window overflow
25
+ # @raise [Phronomy::TransportError] for all other +RubyLLM::Error+ subclasses
26
+ # @raise re-raises +error+ unchanged for non-RubyLLM exceptions
27
+ # @api private
28
+ def translate_and_reraise!(error)
29
+ case error
30
+ when RubyLLM::RateLimitError
31
+ raise Phronomy::RateLimitError, error.message
32
+ when RubyLLM::UnauthorizedError, RubyLLM::ForbiddenError
33
+ raise Phronomy::AuthenticationError, error.message
34
+ when RubyLLM::ContextLengthExceededError
35
+ raise Phronomy::ContextLengthError, error.message
36
+ when RubyLLM::Error
37
+ raise Phronomy::TransportError, error.message
38
+ else
39
+ raise # bare re-raise preserves $! and its backtrace unchanged
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -8,10 +8,12 @@ module Phronomy
8
8
  # Included in {Phronomy::Agent::Base}. Guardrails are run on the raw
9
9
  # input string before the LLM is called, and on the raw output string
10
10
  # before the result is returned to the caller.
11
+ # @api private
11
12
  module Guardrailable
12
13
  # Attach a guardrail that validates input before every #invoke call.
13
14
  # @param guardrail [Phronomy::Guardrail::InputGuardrail]
14
15
  # @return [self]
16
+ # @api private
15
17
  def add_input_guardrail(guardrail)
16
18
  @input_guardrails ||= []
17
19
  @input_guardrails << guardrail
@@ -21,6 +23,7 @@ module Phronomy
21
23
  # Attach a guardrail that validates output before it is returned.
22
24
  # @param guardrail [Phronomy::Guardrail::OutputGuardrail]
23
25
  # @return [self]
26
+ # @api private
24
27
  def add_output_guardrail(guardrail)
25
28
  @output_guardrails ||= []
26
29
  @output_guardrails << guardrail