phronomy 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +22 -0
  3. data/CHANGELOG.md +488 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +374 -36
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/Rakefile +33 -0
  8. data/SECURITY.md +80 -0
  9. data/benchmark/baseline.json +9 -0
  10. data/benchmark/bench_agent_invoke.rb +105 -0
  11. data/benchmark/bench_context_assembler.rb +46 -0
  12. data/benchmark/bench_regression.rb +172 -0
  13. data/benchmark/bench_token_estimator.rb +44 -0
  14. data/benchmark/bench_tool_schema.rb +69 -0
  15. data/benchmark/bench_vector_store.rb +39 -0
  16. data/benchmark/bench_workflow.rb +55 -0
  17. data/benchmark/run_all.rb +118 -0
  18. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  19. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  20. data/docs/decisions/003-event-loop-singleton.md +48 -0
  21. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +75 -0
  22. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  23. data/docs/decisions/006-no-built-in-guardrails.md +66 -0
  24. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  25. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  26. data/docs/decisions/009-state-store-abstraction.md +141 -0
  27. data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
  28. data/lib/phronomy/agent/base.rb +416 -49
  29. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  30. data/lib/phronomy/agent/checkpoint.rb +1 -0
  31. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  32. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  33. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  34. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  35. data/lib/phronomy/agent/concerns/suspendable.rb +19 -0
  36. data/lib/phronomy/agent/fsm.rb +44 -52
  37. data/lib/phronomy/agent/handoff.rb +3 -0
  38. data/lib/phronomy/agent/orchestrator.rb +191 -54
  39. data/lib/phronomy/agent/parallel_tool_chat.rb +87 -13
  40. data/lib/phronomy/agent/react_agent.rb +16 -6
  41. data/lib/phronomy/agent/runner.rb +2 -0
  42. data/lib/phronomy/agent/shared_state.rb +11 -0
  43. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  44. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  45. data/lib/phronomy/async_queue.rb +155 -0
  46. data/lib/phronomy/blocking_adapter_pool.rb +435 -0
  47. data/lib/phronomy/cancellation_scope.rb +123 -0
  48. data/lib/phronomy/cancellation_token.rb +133 -0
  49. data/lib/phronomy/concurrency_gate.rb +155 -0
  50. data/lib/phronomy/configuration.rb +168 -2
  51. data/lib/phronomy/context/assembler.rb +6 -0
  52. data/lib/phronomy/context/compaction_context.rb +2 -0
  53. data/lib/phronomy/context/context_version_cache.rb +2 -0
  54. data/lib/phronomy/context/token_budget.rb +3 -0
  55. data/lib/phronomy/context/token_estimator.rb +9 -2
  56. data/lib/phronomy/context/trigger_context.rb +1 -0
  57. data/lib/phronomy/context/trim_context.rb +4 -0
  58. data/lib/phronomy/deadline.rb +63 -0
  59. data/lib/phronomy/diagnostics.rb +62 -0
  60. data/lib/phronomy/embeddings/base.rb +22 -2
  61. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  62. data/lib/phronomy/eval/comparison.rb +2 -0
  63. data/lib/phronomy/eval/dataset.rb +4 -0
  64. data/lib/phronomy/eval/metrics.rb +6 -0
  65. data/lib/phronomy/eval/runner.rb +11 -9
  66. data/lib/phronomy/eval/scorer/base.rb +1 -0
  67. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  68. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  69. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  70. data/lib/phronomy/event_loop.rb +275 -30
  71. data/lib/phronomy/fsm_session.rb +57 -4
  72. data/lib/phronomy/generator_verifier.rb +2 -0
  73. data/lib/phronomy/guardrail/base.rb +3 -0
  74. data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
  75. data/lib/phronomy/invocation_context.rb +152 -0
  76. data/lib/phronomy/knowledge_source/base.rb +24 -2
  77. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  78. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  79. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  80. data/lib/phronomy/llm_adapter/base.rb +104 -0
  81. data/lib/phronomy/llm_adapter/ruby_llm.rb +41 -0
  82. data/lib/phronomy/llm_adapter.rb +20 -0
  83. data/lib/phronomy/loader/base.rb +1 -0
  84. data/lib/phronomy/loader/csv_loader.rb +2 -0
  85. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  86. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  87. data/lib/phronomy/metrics.rb +38 -0
  88. data/lib/phronomy/output_parser/base.rb +1 -0
  89. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  90. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  91. data/lib/phronomy/prompt_template.rb +5 -0
  92. data/lib/phronomy/runnable.rb +20 -3
  93. data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
  94. data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
  95. data/lib/phronomy/runtime/gate_registry.rb +52 -0
  96. data/lib/phronomy/runtime/pool_registry.rb +57 -0
  97. data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
  98. data/lib/phronomy/runtime/scheduler.rb +98 -0
  99. data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
  100. data/lib/phronomy/runtime/task_registry.rb +48 -0
  101. data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
  102. data/lib/phronomy/runtime/timer_queue.rb +106 -0
  103. data/lib/phronomy/runtime/timer_service.rb +42 -0
  104. data/lib/phronomy/runtime.rb +374 -0
  105. data/lib/phronomy/splitter/base.rb +2 -0
  106. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  107. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  108. data/lib/phronomy/state_store/base.rb +48 -0
  109. data/lib/phronomy/state_store/in_memory.rb +62 -0
  110. data/lib/phronomy/task/backend.rb +80 -0
  111. data/lib/phronomy/task/fiber_backend.rb +157 -0
  112. data/lib/phronomy/task/immediate_backend.rb +89 -0
  113. data/lib/phronomy/task/thread_backend.rb +84 -0
  114. data/lib/phronomy/task.rb +275 -0
  115. data/lib/phronomy/task_group.rb +265 -0
  116. data/lib/phronomy/testing/fake_clock.rb +109 -0
  117. data/lib/phronomy/testing/fake_scheduler.rb +104 -0
  118. data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
  119. data/lib/phronomy/testing.rb +12 -0
  120. data/lib/phronomy/tool/agent_tool.rb +1 -0
  121. data/lib/phronomy/tool/base.rb +298 -28
  122. data/lib/phronomy/tool/mcp_tool.rb +103 -17
  123. data/lib/phronomy/tool/scope_policy.rb +50 -0
  124. data/lib/phronomy/tool_executor.rb +106 -0
  125. data/lib/phronomy/tracing/base.rb +3 -0
  126. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  127. data/lib/phronomy/tracing/open_telemetry_tracer.rb +36 -0
  128. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  129. data/lib/phronomy/vector_store/base.rb +40 -7
  130. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  131. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  132. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  133. data/lib/phronomy/version.rb +1 -1
  134. data/lib/phronomy/workflow.rb +147 -11
  135. data/lib/phronomy/workflow_context.rb +83 -6
  136. data/lib/phronomy/workflow_runner.rb +106 -7
  137. data/lib/phronomy.rb +112 -1
  138. data/scripts/api_snapshot.rb +91 -0
  139. data/scripts/check_api_annotations.rb +68 -0
  140. data/scripts/check_private_enforcement.rb +93 -0
  141. data/scripts/check_readme_runnable.rb +98 -0
  142. data/scripts/run_mutation.sh +46 -0
  143. metadata +83 -2
@@ -6,6 +6,7 @@ require_relative "concerns/retryable"
6
6
  require_relative "concerns/guardrailable"
7
7
  require_relative "concerns/before_completion"
8
8
  require_relative "concerns/suspendable"
9
+ require_relative "concerns/error_translation"
9
10
 
10
11
  module Phronomy
11
12
  module Agent
@@ -36,6 +37,7 @@ module Phronomy
36
37
  include Concerns::Guardrailable
37
38
  include Concerns::BeforeCompletion
38
39
  include Concerns::Suspendable
40
+ include Concerns::ErrorTranslation
39
41
 
40
42
  class << self
41
43
  # Sets or reads the LLM model identifier for this agent.
@@ -48,6 +50,7 @@ module Phronomy
48
50
  # class MyAgent < Phronomy::Agent::Base
49
51
  # model "gpt-4o"
50
52
  # end
53
+ # @api public
51
54
  def model(name = nil)
52
55
  if name
53
56
  @model = name
@@ -71,6 +74,7 @@ module Phronomy
71
74
  # class MyAgent < Phronomy::Agent::Base
72
75
  # instructions { |input| "Answer in #{input[:lang]}." }
73
76
  # end
77
+ # @api public
74
78
  def instructions(text = nil, &block)
75
79
  if text || block_given?
76
80
  @instructions = text || block
@@ -96,6 +100,7 @@ module Phronomy
96
100
  # Places::SearchTool => "places_search",
97
101
  # CurrentTimeTool => nil
98
102
  # )
103
+ # @api public
99
104
  def tools(*args)
100
105
  if args.empty?
101
106
  if instance_variable_defined?(:@tools)
@@ -115,9 +120,17 @@ module Phronomy
115
120
  end
116
121
 
117
122
  # Returns the alias map registered via the hash form of .tools.
123
+ # Merges parent class aliases so subclasses inherit their parent's mappings.
124
+ # Subclass-specific aliases take precedence over parent aliases.
118
125
  # @return [Hash{Class => String}]
126
+ # @api public
119
127
  def tool_aliases
120
- @tool_aliases ||= {}
128
+ own = @tool_aliases || {}
129
+ if superclass.respond_to?(:tool_aliases)
130
+ superclass.tool_aliases.merge(own)
131
+ else
132
+ own
133
+ end
121
134
  end
122
135
 
123
136
  # Sets or reads the LLM provider for this agent.
@@ -131,6 +144,7 @@ module Phronomy
131
144
  # model "openai/gpt-oss-20b"
132
145
  # provider :openai
133
146
  # end
147
+ # @api public
134
148
  def provider(name = nil)
135
149
  if name
136
150
  @provider = name
@@ -149,6 +163,7 @@ module Phronomy
149
163
  # class MyAgent < Phronomy::Agent::Base
150
164
  # temperature 0.2
151
165
  # end
166
+ # @api public
152
167
  def temperature(val = nil)
153
168
  if val
154
169
  @temperature = val
@@ -166,6 +181,7 @@ module Phronomy
166
181
  # class MyAgent < Phronomy::Agent::Base
167
182
  # max_iterations 5
168
183
  # end
184
+ # @api public
169
185
  def max_iterations(val = nil)
170
186
  if val
171
187
  @max_iterations = val
@@ -174,27 +190,118 @@ module Phronomy
174
190
  end
175
191
  end
176
192
 
193
+ # Sets or reads the maximum number of tool calls executed concurrently
194
+ # when the LLM returns multiple tool calls in a single response
195
+ # (ParallelToolChat mode, active inside an AgentFSM IO thread).
196
+ #
197
+ # Defaults to 10. Set to 1 to force sequential execution.
198
+ # Inherited by subclasses; the most-specific definition wins.
199
+ #
200
+ # @param val [Integer, nil]
201
+ # @return [Integer]
202
+ # @example
203
+ # class MyAgent < Phronomy::Agent::Base
204
+ # max_parallel_tools 4
205
+ # end
206
+ # @api public
207
+ def max_parallel_tools(val = nil)
208
+ if val.nil?
209
+ @max_parallel_tools ||
210
+ (superclass.respond_to?(:max_parallel_tools) ? superclass.max_parallel_tools : 10)
211
+ else
212
+ unless val.is_a?(Integer) && val >= 1
213
+ raise ArgumentError,
214
+ "max_parallel_tools must be a positive Integer (>= 1), got #{val.inspect}"
215
+ end
216
+ @max_parallel_tools = val
217
+ end
218
+ end
219
+
220
+ # Sets or reads the per-invocation timeout (in seconds) for EventLoop-mode
221
+ # agent calls. When set, +invoke+ raises {Phronomy::TimeoutError} if the
222
+ # agent does not finish within the given number of seconds.
223
+ #
224
+ # Has no effect when EventLoop mode is disabled (direct invoke path).
225
+ # Defaults to +nil+ (no timeout).
226
+ # Inherited by subclasses; the most-specific definition wins.
227
+ #
228
+ # When the timeout fires, a {Phronomy::CancellationScope} is cancelled
229
+ # and its token is propagated to the FSM config so that in-flight LLM,
230
+ # tool, and RAG calls observe cancellation via their +cancellation_token:+
231
+ # keyword argument. +Phronomy::TimeoutError+ is raised to the caller.
232
+ #
233
+ # @param val [Numeric, nil]
234
+ # @return [Numeric, nil]
235
+ # @example
236
+ # class MyAgent < Phronomy::Agent::Base
237
+ # invoke_timeout 30
238
+ # end
239
+ # @api public
240
+ def invoke_timeout(val = nil)
241
+ if val.nil?
242
+ return @invoke_timeout if defined?(@invoke_timeout)
243
+ superclass.respond_to?(:invoke_timeout) ? superclass.invoke_timeout : nil
244
+ else
245
+ unless val.is_a?(Numeric) && val > 0
246
+ raise ArgumentError,
247
+ "invoke_timeout must be a positive number, got #{val.inspect}"
248
+ end
249
+ @invoke_timeout = val
250
+ end
251
+ end
252
+
177
253
  # Registers one or more static knowledge sources on the agent class.
178
- # Static sources are fetched once per agent instance and their content
179
- # is cached in ContextVersionCache keyed by a fingerprint of the
180
- # instruction text + source content. The cache is invalidated automatically
181
- # when the fingerprint changes (e.g. because a source was updated).
254
+ # Static source content is fetched and memoized at the **class** level
255
+ # the first time +invoke+ is called. The cache persists for the lifetime
256
+ # of the process; call {.static_knowledge_refresh!} to force a reload.
182
257
  #
183
258
  # @param sources [Array<Phronomy::KnowledgeSource::Base>]
184
259
  # @example
185
260
  # class PolicyAgent < Phronomy::Agent::Base
186
261
  # static_knowledge Phronomy::KnowledgeSource::StaticKnowledge.new(POLICY_TEXT)
187
262
  # end
263
+ # @api public
188
264
  def static_knowledge(*sources)
189
265
  @static_knowledge_sources = sources.flatten
266
+ # Invalidate the cached chunks so the new sources are fetched on
267
+ # the next call to static_knowledge_chunks.
268
+ @static_knowledge_chunks = nil
190
269
  end
191
270
 
192
271
  # Returns the registered static knowledge sources.
193
272
  # @return [Array<Phronomy::KnowledgeSource::Base>]
273
+ # @api public
194
274
  def static_knowledge_sources
195
275
  @static_knowledge_sources || []
196
276
  end
197
277
 
278
+ # Returns the fetched content from all static knowledge sources.
279
+ # Results are cached at the class level so that each source is fetched
280
+ # only once regardless of how many times the agent is invoked.
281
+ # @return [Array<Hash>]
282
+ # @api public
283
+ def static_knowledge_chunks
284
+ @static_knowledge_chunks ||= static_knowledge_sources.flat_map { |ks|
285
+ ks.fetch(query: nil)
286
+ }
287
+ end
288
+
289
+ # Clears the class-level knowledge cache so that the next +invoke+ call
290
+ # re-fetches content from all registered static knowledge sources.
291
+ #
292
+ # Call this method when the underlying knowledge source has been updated
293
+ # at runtime (e.g. a file was rewritten, a DB record changed) and you
294
+ # want the agent to pick up the new content without restarting the
295
+ # process.
296
+ #
297
+ # @return [nil]
298
+ # @example Refresh after updating a knowledge file
299
+ # MyAgent.static_knowledge_refresh!
300
+ # @api public
301
+ def static_knowledge_refresh!
302
+ @static_knowledge_chunks = nil
303
+ end
304
+
198
305
  # Registers a callback that is invoked before every LLM call so the
199
306
  # application can remove stale or irrelevant messages from the
200
307
  # conversation history.
@@ -209,11 +316,13 @@ module Phronomy
209
316
  # limit = ctx.budget&.available(used: 0) || Float::INFINITY
210
317
  # ctx.remove(ctx.message_elements.first[:seq]) if ctx.total_tokens > limit * 0.8
211
318
  # end
319
+ # @api public
212
320
  def on_trim(&block)
213
321
  @on_trim_callback = block
214
322
  end
215
323
 
216
324
  # @return [Proc, nil]
325
+ # @api private
217
326
  def _on_trim_callback
218
327
  @on_trim_callback
219
328
  end
@@ -232,11 +341,13 @@ module Phronomy
232
341
  # limit = ctx.budget&.available(used: 0) || Float::INFINITY
233
342
  # ctx.total_tokens > limit * 0.7
234
343
  # end
344
+ # @api public
235
345
  def on_compaction_trigger(&block)
236
346
  @on_compaction_trigger_callback = block
237
347
  end
238
348
 
239
349
  # @return [Proc, nil]
350
+ # @api private
240
351
  def _on_compaction_trigger_callback
241
352
  @on_compaction_trigger_callback
242
353
  end
@@ -254,11 +365,13 @@ module Phronomy
254
365
  # "Earlier conversation summary: #{texts}"
255
366
  # end
256
367
  # end
368
+ # @api public
257
369
  def on_compact(&block)
258
370
  @on_compact_callback = block
259
371
  end
260
372
 
261
373
  # @return [Proc, nil]
374
+ # @api private
262
375
  def _on_compact_callback
263
376
  @on_compact_callback
264
377
  end
@@ -278,6 +391,7 @@ module Phronomy
278
391
  # provider :anthropic
279
392
  # cache_instructions true
280
393
  # end
394
+ # @api public
281
395
  def cache_instructions(enabled = nil)
282
396
  if enabled.nil?
283
397
  @cache_instructions
@@ -293,6 +407,7 @@ module Phronomy
293
407
  # class MyAgent < Phronomy::Agent::Base
294
408
  # max_output_tokens 4096
295
409
  # end
410
+ # @api public
296
411
  def max_output_tokens(val = nil)
297
412
  if val.nil?
298
413
  @max_output_tokens
@@ -310,6 +425,7 @@ module Phronomy
310
425
  # class MyAgent < Phronomy::Agent::Base
311
426
  # context_window 4096
312
427
  # end
428
+ # @api public
313
429
  def context_window(val = nil)
314
430
  if val.nil?
315
431
  @context_window
@@ -325,6 +441,7 @@ module Phronomy
325
441
  # class MyAgent < Phronomy::Agent::Base
326
442
  # context_overhead 500
327
443
  # end
444
+ # @api public
328
445
  def context_overhead(val = nil)
329
446
  if val.nil?
330
447
  @context_overhead || 0
@@ -338,6 +455,7 @@ module Phronomy
338
455
  # Called by Runner during construction when routes are configured.
339
456
  # @param tool_class [Class<Phronomy::Tool::Base>]
340
457
  # @return [self]
458
+ # @api private
341
459
  def _add_handoff_tool(tool_class)
342
460
  @_handoff_tools ||= []
343
461
  @_handoff_tools << tool_class
@@ -346,6 +464,7 @@ module Phronomy
346
464
 
347
465
  # Returns handoff tool classes registered on this instance by Runner.
348
466
  # @return [Array<Class>]
467
+ # @api private
349
468
  def _handoff_tools
350
469
  @_handoff_tools || []
351
470
  end
@@ -366,6 +485,11 @@ module Phronomy
366
485
  # +:knowledge_sources+ (Array) — dynamic knowledge sources for this turn
367
486
  # +:user_id+ (+String+, optional) — caller identity forwarded to the tracer
368
487
  # +:session_id+ (+String+, optional) — session identity forwarded to the tracer
488
+ # @param invocation_context [Phronomy::InvocationContext, nil] optional first-class context
489
+ # object. When present, +thread_id+, +cancellation_token+, and +deadline+ are
490
+ # derived from it (existing +config:+ keys take precedence as backward-compat
491
+ # aliases). The object is also stored in +config[:invocation_context]+ so that
492
+ # +task_id+ / +parent_task_id+ appear in trace spans automatically.
369
493
  # @return [Hash] +{ output: String, messages: Array, usage: Phronomy::TokenUsage }+,
370
494
  # or +{ output: nil, suspended: true, checkpoint: Phronomy::Agent::Checkpoint,
371
495
  # messages: Array }+ when the invocation was suspended awaiting tool approval.
@@ -382,28 +506,111 @@ module Phronomy
382
506
  # result = agent.resume(result[:checkpoint], approved: true)
383
507
  # end
384
508
  # puts result[:output]
385
- def invoke(input, messages: [], thread_id: nil, config: {})
509
+ # @example With InvocationContext (deadline-based timeout)
510
+ # ctx = Phronomy::InvocationContext.new(
511
+ # thread_id: "conv-123",
512
+ # deadline: Phronomy::Deadline.in(30),
513
+ # task_id: SecureRandom.uuid
514
+ # )
515
+ # result = MyAgent.new.invoke("Hello", invocation_context: ctx)
516
+ # @api public
517
+ def invoke(input, messages: [], thread_id: nil, config: {}, invocation_context: nil)
518
+ if invocation_context
519
+ thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
520
+ end
386
521
  if Phronomy.configuration.event_loop
387
522
  # Protect against blocking the EventLoop thread itself.
388
- if Thread.current[:phronomy_event_loop_thread]
523
+ if Phronomy::EventLoop.current?
389
524
  raise Phronomy::Error,
390
525
  "Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
391
526
  "entry action. Use agent.run_as_child(input, ctx: ctx) instead."
392
527
  end
393
528
 
529
+ # Build an effective config that includes the invoke_timeout scope's
530
+ # CancellationToken before constructing the FSM. This ensures that
531
+ # every LLM, tool, and RAG call made inside _invoke_impl observes
532
+ # cancellation when the deadline fires.
533
+ timeout_sec = self.class.invoke_timeout
534
+ effective_config, scope = if timeout_sec
535
+ s = Phronomy::CancellationScope.new(parent_token: config[:cancellation_token])
536
+ s.deadline_in(timeout_sec)
537
+ [config.merge(cancellation_token: s.token), s]
538
+ else
539
+ [config, nil]
540
+ end
541
+
394
542
  fsm = Agent::FSM.new(
395
543
  agent: self,
396
544
  input: input,
397
545
  messages: messages,
398
546
  thread_id: thread_id || SecureRandom.uuid,
399
- config: config
547
+ config: effective_config
400
548
  )
401
549
  completion_queue = Phronomy::EventLoop.instance.register(fsm)
402
- result = completion_queue.pop
550
+ result = if scope
551
+ scope.pop_queue(completion_queue) do
552
+ raise Phronomy::TimeoutError,
553
+ "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
554
+ end
555
+ else
556
+ completion_queue.pop
557
+ end
403
558
  raise result if result.is_a?(Exception)
404
559
  result
405
560
  else
406
- _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
561
+ # Guard: calling invoke from inside a scheduler task would block the task
562
+ # against itself when using a cooperative backend. Use invoke_async
563
+ # instead to compose agents without introducing a blocking wait.
564
+ if Phronomy::Task.current
565
+ msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
566
+ "This blocks the scheduler until the inner invocation completes, preventing " \
567
+ "other tasks from making progress. Use invoke_async + await instead."
568
+ if Phronomy.configuration.strict_runtime_guards
569
+ raise Phronomy::SchedulerReentrancyError, msg
570
+ elsif Phronomy.configuration.logger
571
+ Phronomy.configuration.logger.warn(msg)
572
+ else
573
+ Kernel.warn("[phronomy] WARNING: #{msg}")
574
+ end
575
+ end
576
+ invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
577
+ end
578
+ end
579
+
580
+ # Invokes this agent asynchronously and returns a {Phronomy::Task}.
581
+ #
582
+ # This is the primary async entry point. {#invoke} is a synchronous wrapper
583
+ # that calls this method and blocks the caller until the task completes.
584
+ # Calling {#invoke} from inside an active scheduler task raises
585
+ # {Phronomy::SchedulerReentrancyError}; use +invoke_async+ directly in that
586
+ # context.
587
+ #
588
+ # The task is registered with the Runtime task registry so {Runtime#shutdown}
589
+ # drains in-flight invocations before process exit.
590
+ #
591
+ # @example
592
+ # task = agent.invoke_async("Hello!")
593
+ # result = task.await # => { output: "...", messages: [...], usage: ... }
594
+ #
595
+ # @param input [String, Hash]
596
+ # @param messages [Array]
597
+ # @param thread_id [String, nil]
598
+ # @param config [Hash]
599
+ # @param invocation_context [Phronomy::InvocationContext, nil]
600
+ # @return [Phronomy::Task]
601
+ # @api public
602
+ def invoke_async(input, messages: [], thread_id: nil, config: {}, invocation_context: nil)
603
+ if invocation_context
604
+ thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
605
+ end
606
+ bp = Phronomy.configuration.backpressure
607
+ on_full = (bp == :raise) ? :reject : (bp || :wait)
608
+ bp_timeout = Phronomy.configuration.backpressure_timeout
609
+ gate = Phronomy::Runtime.instance.gate(:agent)
610
+ Phronomy::Runtime.instance.spawn(name: "agent-#{(self.class.name || "anonymous").downcase}-async") do
611
+ gate.acquire(on_full: on_full, timeout: bp_timeout) do
612
+ _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
613
+ end
407
614
  end
408
615
  end
409
616
 
@@ -418,30 +625,24 @@ module Phronomy
418
625
  # result hash +{ output:, messages:, usage: }+. Declare an +on: :child_completed+
419
626
  # transition in your Workflow to advance to the next state.
420
627
  #
421
- # An optional block may be provided to write the result back into the parent
422
- # WorkflowContext <b>before</b> the +:child_completed+ event is dispatched.
423
- # +Thread::Queue+ provides the happens-before guarantee \u2014 no Mutex is needed.
628
+ # The result is delivered exclusively as the +:child_completed+ event payload.
629
+ # The parent Workflow task is the sole owner of the parent +WorkflowContext+ and
630
+ # applies the result after receiving the event no background thread writes to
631
+ # the parent context directly.
424
632
  #
425
- # @example Without block (result available only as event payload)
633
+ # @example
426
634
  # entry :run_agent, ->(ctx) { MyAgent.new.run_as_child(ctx.query, ctx: ctx) }
427
635
  # transition from: :run_agent, on: :child_completed, to: :process_result
428
636
  #
429
- # @example With block (writes result into context)
430
- # entry :run_agent, ->(ctx) {
431
- # MyAgent.new.run_as_child(ctx.query, ctx: ctx) { |r| ctx.answer = r[:output] }
432
- # }
433
- # transition from: :run_agent, on: :child_completed, to: :process_result
434
- #
435
637
  # @param input [String, Hash] user input passed to the agent
436
638
  # @param ctx [Object] a WorkflowContext that responds to +#thread_id+
437
639
  # @param messages [Array] prior conversation history
438
640
  # @param config [Hash] invocation config (forwarded to +_invoke_impl+)
439
- # @yield [Hash] result hash +{ output:, messages:, usage: }+ — called from the
440
- # agent IO thread before +:child_completed+ is posted
441
641
  # @return [nil] the caller must not wait on any return value;
442
642
  # the result arrives as a +:child_completed+ event
443
643
  # @raise [Phronomy::Error] when EventLoop mode is not enabled
444
- def run_as_child(input, ctx:, messages: [], config: {}, &result_writer)
644
+ # @api public
645
+ def run_as_child(input, ctx:, messages: [], config: {})
445
646
  unless Phronomy.configuration.event_loop
446
647
  raise Phronomy::Error,
447
648
  "run_as_child requires EventLoop mode. " \
@@ -454,8 +655,7 @@ module Phronomy
454
655
  messages: messages,
455
656
  thread_id: "#{ctx.thread_id}_agent_#{SecureRandom.uuid}",
456
657
  config: config,
457
- parent_id: ctx.thread_id,
458
- result_writer: result_writer
658
+ parent_id: ctx.thread_id
459
659
  )
460
660
  Phronomy::EventLoop.instance.enqueue_child(fsm)
461
661
  nil
@@ -477,6 +677,7 @@ module Phronomy
477
677
  # @param config [Hash] same as #invoke
478
678
  # @yield [Phronomy::Agent::StreamEvent]
479
679
  # @return [Hash] { output:, messages:, usage: } — same as #invoke
680
+ # @api public
480
681
  def stream(input, messages: [], thread_id: nil, config: {}, &block)
481
682
  return invoke(input, messages: messages, thread_id: thread_id, config: config) unless block
482
683
 
@@ -486,19 +687,50 @@ module Phronomy
486
687
  raise
487
688
  end
488
689
 
489
- # Returns the {Context::ContextVersionCache} for the current thread.
690
+ # Returns the {Context::ContextVersionCache} built during the most recent
691
+ # {#invoke} call on this agent instance. The thread-local cache entry is
692
+ # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
693
+ # in +@last_context_version_cache+ so callers can inspect it after invoke
694
+ # returns.
695
+ #
696
+ # NOTE: Not thread-safe. When the same Agent instance is used concurrently,
697
+ # +@last_context_version_cache+ reflects the most recent +invoke+ on *any*
698
+ # thread. For per-invocation isolation, use a separate Agent instance per
699
+ # thread.
490
700
  # @api private
491
701
  def context_version_cache
492
- (Thread.current[:phronomy_context_version_caches] ||= {})[object_id]
702
+ @last_context_version_cache
493
703
  end
494
704
 
495
705
  private
496
706
 
707
+ # Merges an {InvocationContext} into the +thread_id+ / +config+ pair.
708
+ # Returns +[effective_thread_id, effective_config]+.
709
+ #
710
+ # Precedence rules (existing explicit values always win):
711
+ # - +thread_id+ argument > +ic.thread_id+
712
+ # - +config[:cancellation_token]+ > +ic.cancellation_token+ > token derived from +ic.deadline+
713
+ # - +ic+ is stored in +config[:invocation_context]+ (overwriting any previous value)
714
+ def _apply_invocation_context(thread_id, config, ic)
715
+ effective_thread_id = thread_id || ic.thread_id
716
+ effective_config = config.merge(invocation_context: ic)
717
+ if effective_config[:cancellation_token].nil?
718
+ if (tok = ic.effective_timeout_token)
719
+ effective_config = effective_config.merge(cancellation_token: tok)
720
+ end
721
+ end
722
+ [effective_thread_id, effective_config]
723
+ end
724
+
497
725
  # Streaming implementation for #stream.
498
726
  def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
499
727
  caller_meta = {}
500
728
  caller_meta[:user_id] = config[:user_id] if config[:user_id]
501
729
  caller_meta[:session_id] = config[:session_id] if config[:session_id]
730
+ if (ic = config[:invocation_context])
731
+ caller_meta[:task_id] = ic.task_id if ic.task_id
732
+ caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
733
+ end
502
734
 
503
735
  trace("agent.invoke", input: input, **caller_meta) do |_span|
504
736
  run_input_guardrails!(input)
@@ -529,10 +761,26 @@ module Phronomy
529
761
  # Run before_completion hooks (global → class → instance) before the LLM call.
530
762
  run_before_completion_hooks!(chat, config)
531
763
 
532
- response = chat.ask(user_message) do |chunk|
764
+ # Route the LLM streaming call through the configured LLMAdapter.
765
+ # Chunks are pushed into a token queue by the pool worker thread and
766
+ # drained here (on the caller's side) so that the user block is never
767
+ # executed on a BlockingAdapterPool worker thread.
768
+ # The queue capacity is bounded by Configuration#stream_queue_max_size
769
+ # (nil = unbounded) to provide backpressure against a fast LLM producer.
770
+ adapter = Phronomy.configuration.llm_adapter
771
+ chunk_queue = Phronomy::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
772
+ pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
773
+
774
+ # Drain the chunk queue on this side (scheduler task / caller thread).
775
+ loop do
776
+ chunk = chunk_queue.pop
777
+ break if chunk.nil? # queue closed — LLM streaming complete
533
778
  block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
779
+ check_cancellation!(config, "invocation cancelled during streaming")
534
780
  end
535
781
 
782
+ response = pending.await
783
+
536
784
  output = response.content
537
785
  usage = Phronomy::TokenUsage.from_tokens(response.tokens)
538
786
 
@@ -554,6 +802,7 @@ module Phronomy
554
802
  # @param thread_id [String, nil] conversation thread identifier
555
803
  # @param config [Hash] the invocation config (see #invoke)
556
804
  # @return [Hash] { system: String|nil, messages: Array }
805
+ # @api public
557
806
  def build_context(input, messages: [], thread_id: nil, config: {})
558
807
  history = prepare_history(messages: messages, thread_id: thread_id, config: config)
559
808
  budget = build_token_budget
@@ -563,9 +812,49 @@ module Phronomy
563
812
  assembler = Context::Assembler.new(budget: budget)
564
813
  assembler.add_instruction(system_text) if system_text
565
814
 
566
- Array(config[:knowledge_sources]).each do |ks|
567
- ks.fetch(query: user_message).each do |chunk|
568
- assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
815
+ sources = Array(config[:knowledge_sources])
816
+ unless sources.empty?
817
+ check_cancellation!(config, "invocation cancelled before RAG fetch")
818
+ # Determine TaskGroup failure policy: :skip (default) ignores per-source
819
+ # failures so the agent can still answer with partial context; :fail
820
+ # surfaces the first error immediately via :fail_fast.
821
+ failure_policy =
822
+ case config[:rag_failure_policy]
823
+ when :fail then :fail_fast
824
+ else :skip_failed
825
+ end
826
+
827
+ group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
828
+
829
+ bp = Phronomy.configuration.backpressure
830
+ rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
831
+ rag_bp_timeout = Phronomy.configuration.backpressure_timeout
832
+
833
+ # Spawn all fetches concurrently. Results are returned in spawn order
834
+ # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
835
+ sources.each do |ks|
836
+ group.spawn do
837
+ Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
838
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
839
+ result = ks.fetch_async(
840
+ query: user_message,
841
+ cancellation_token: config[:cancellation_token],
842
+ timeout: config[:rag_timeout]
843
+ ).await
844
+ elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0
845
+ Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{(elapsed * 1000).round}ms" }
846
+ result
847
+ end
848
+ end
849
+ end
850
+
851
+ # await_all returns results in spawn order; nil entries indicate
852
+ # skipped failures when using :skip_failed.
853
+ per_source_chunks = group.await_all
854
+ per_source_chunks.each do |chunks|
855
+ Array(chunks).each do |chunk|
856
+ assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
857
+ end
569
858
  end
570
859
  end
571
860
 
@@ -585,6 +874,7 @@ module Phronomy
585
874
  # @param thread_id [String, nil] conversation thread identifier
586
875
  # @param config [Hash] additional invocation options
587
876
  # @return [Array] filtered and/or compacted message objects
877
+ # @api public
588
878
  def prepare_history(messages: [], thread_id: nil, config: {})
589
879
  budget = build_token_budget
590
880
  elements = build_message_elements(Array(messages))
@@ -620,6 +910,10 @@ module Phronomy
620
910
  caller_meta = {}
621
911
  caller_meta[:user_id] = config[:user_id] if config[:user_id]
622
912
  caller_meta[:session_id] = config[:session_id] if config[:session_id]
913
+ if (ic = config[:invocation_context])
914
+ caller_meta[:task_id] = ic.task_id if ic.task_id
915
+ caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
916
+ end
623
917
 
624
918
  trace("agent.invoke", input: input, **caller_meta) do |_span|
625
919
  # Run input guardrails before touching the LLM.
@@ -641,8 +935,20 @@ module Phronomy
641
935
  # synchronous on_approval_required handler is already registered).
642
936
  _register_suspension_hook!(chat)
643
937
 
938
+ # Check for cancellation immediately before the LLM call.
939
+ check_cancellation!(config, "invocation cancelled before LLM call")
940
+
941
+ # Forward the cancellation token to ParallelToolChat explicitly
942
+ # via the chat instance so that tool dispatch batches can observe
943
+ # cancellation without needing Thread.current.
944
+ chat.cancellation_token = config[:cancellation_token] if chat.respond_to?(:cancellation_token=)
945
+
644
946
  begin
645
- response = chat.ask(user_message)
947
+ # Route the LLM call through the configured LLMAdapter so that the
948
+ # blocking HTTP request runs inside BlockingAdapterPool and the
949
+ # adapter can be swapped without changing agent code.
950
+ adapter = Phronomy.configuration.llm_adapter
951
+ response = adapter.complete_async(chat, user_message, config: config).await
646
952
  rescue SuspendSignal => signal
647
953
  checkpoint = Checkpoint.new(
648
954
  thread_id: thread_id,
@@ -654,6 +960,9 @@ module Phronomy
654
960
  )
655
961
  suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
656
962
  next [suspended_result, nil]
963
+ ensure
964
+ # Clear the chat's cancellation token reference after each LLM call.
965
+ chat.cancellation_token = nil if chat.respond_to?(:cancellation_token=)
657
966
  end
658
967
 
659
968
  output = response.content
@@ -699,6 +1008,7 @@ module Phronomy
699
1008
  #
700
1009
  # @param messages [Array] message-like objects with #role and #content
701
1010
  # @return [Array<Hash>]
1011
+ # @api public
702
1012
  def build_message_elements(messages)
703
1013
  Array(messages).each_with_index.map do |msg, idx|
704
1014
  tokens = Context::TokenEstimator.estimate(msg.content.to_s)
@@ -714,20 +1024,17 @@ module Phronomy
714
1024
  #
715
1025
  # @param input [String, Hash] the agent's current input (used for template evaluation)
716
1026
  # @return [String, nil] assembled system text, or nil when empty
1027
+ # @api public
717
1028
  def build_cached_system_text(input)
718
1029
  instruction = build_instructions(input)
719
1030
 
720
- static_chunks = self.class.static_knowledge_sources.flat_map { |ks|
721
- ks.fetch(query: nil)
722
- }
1031
+ static_chunks = self.class.static_knowledge_chunks
723
1032
 
724
1033
  fingerprint = Digest::SHA256.hexdigest(
725
1034
  [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
726
1035
  )
727
1036
 
728
- agent_id = object_id
729
- cache = (Thread.current[:phronomy_context_version_caches] ||= {})[agent_id] ||=
730
- Context::ContextVersionCache.new
1037
+ cache = (@context_version_cache ||= Context::ContextVersionCache.new)
731
1038
  unless cache.valid?(fingerprint)
732
1039
  parts = [instruction]
733
1040
  static_chunks.each do |chunk|
@@ -736,18 +1043,20 @@ module Phronomy
736
1043
  cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
737
1044
  end
738
1045
 
1046
+ # Persist a reference on the instance so that context_version_cache
1047
+ # remains accessible after invoke completes.
1048
+ @last_context_version_cache = cache
1049
+
739
1050
  cache.system_text.empty? ? nil : cache.system_text
740
1051
  end
741
1052
 
742
- # Load messages from a ConversationManager.
743
- #
744
1053
  # Returns the chat class to instantiate for this invocation.
745
- # When the +:phronomy_agent_parallel_tools+ thread-local flag is set
746
- # (i.e. inside an {AgentFSM} IO thread), returns {ParallelToolChat} so
747
- # that concurrent tool dispatch is enabled. Falls back to +nil+ otherwise,
748
- # signalling {#build_chat} to use the standard +RubyLLM.chat+ factory.
1054
+ # When EventLoop mode is enabled ({Phronomy.configuration.event_loop}),
1055
+ # returns {ParallelToolChat} so that concurrent tool dispatch is enabled.
1056
+ # Falls back to +nil+ otherwise, signalling {#build_chat} to use the
1057
+ # standard +RubyLLM.chat+ factory.
749
1058
  def build_chat_class
750
- Thread.current[:phronomy_agent_parallel_tools] ? Agent::ParallelToolChat : nil
1059
+ Phronomy.configuration.event_loop ? Agent::ParallelToolChat : nil
751
1060
  end
752
1061
 
753
1062
  def build_chat
@@ -761,7 +1070,11 @@ module Phronomy
761
1070
  end
762
1071
  t = self.class.temperature
763
1072
  parallel_class = build_chat_class
764
- chat = parallel_class ? parallel_class.new(**opts) : RubyLLM.chat(**opts)
1073
+ chat = if parallel_class
1074
+ parallel_class.new(max_parallel_tools: self.class.max_parallel_tools, **opts)
1075
+ else
1076
+ RubyLLM.chat(**opts)
1077
+ end
765
1078
  chat.with_temperature(t) if t
766
1079
  self.class.tools.each do |tool_class|
767
1080
  chat.with_tool(prepare_tool_class(tool_class))
@@ -811,17 +1124,44 @@ module Phronomy
811
1124
  end
812
1125
  end
813
1126
 
1127
+ # Raises CancellationError if the cancellation_token in config is cancelled.
1128
+ # No-op when config has no cancellation_token or the token is not cancelled.
1129
+ #
1130
+ # @param config [Hash] the invocation config hash
1131
+ # @param message [String] the message for the CancellationError
1132
+ # @raise [Phronomy::CancellationError]
1133
+ # @api public
1134
+ def check_cancellation!(config, message = "invocation cancelled")
1135
+ ct = config[:cancellation_token]
1136
+ raise Phronomy::CancellationError, message if ct&.cancelled?
1137
+ end
1138
+
814
1139
  # Builds the final tool class to register with the chat.
815
1140
  #
816
- # Two transformations are applied in order:
1141
+ # When an already-instantiated tool object is passed (e.g. a
1142
+ # {Phronomy::Tool::McpTool} returned by +McpTool.from_server+), it is
1143
+ # returned as-is. RubyLLM's +with_tool+ accepts both classes and
1144
+ # instances, so no wrapping is needed.
1145
+ #
1146
+ # For tool classes, three transformations are applied in order:
817
1147
  # 1. Alias override — when the Hash form of .tools maps this class to an
818
1148
  # explicit name, an anonymous subclass with that tool_name is returned.
819
- # 2. Approval gate — when the tool class has +requires_approval+ set AND
1149
+ # 2. Scope policy — when a scope is declared on the tool, the configured
1150
+ # {Phronomy::Tool::ScopePolicy} (or the default) is evaluated.
1151
+ # +:reject+ wraps the tool to return a denial message without executing.
1152
+ # +:approve+ behaves like requiring approval (same as step 3 when the
1153
+ # tool does not already have +requires_approval+).
1154
+ # 3. Approval gate — when the tool class has +requires_approval+ set AND
820
1155
  # an approval handler has been registered via #on_approval_required,
821
1156
  # the tool's #call method is wrapped: the handler is invoked with
822
1157
  # (tool_name, args) and, if it returns falsy, the tool returns a denial
823
1158
  # message instead of executing.
824
1159
  def prepare_tool_class(tool_class)
1160
+ # When an instantiated tool object is passed (e.g. McpTool.from_server
1161
+ # returns an instance, not a class), skip class-level processing and
1162
+ # return it directly. RubyLLM#with_tool handles both forms.
1163
+ return tool_class unless tool_class.is_a?(Class)
1164
+
825
1165
  # Step 1: apply alias if needed.
826
1166
  resolved = if (alias_name = self.class.tool_aliases[tool_class])
827
1167
  parent_description = tool_class.description
@@ -833,7 +1173,34 @@ module Phronomy
833
1173
  tool_class
834
1174
  end
835
1175
 
836
- # Step 2: wrap with approval gate when handler is registered.
1176
+ # Step 2: evaluate scope policy.
1177
+ scope = resolved.scope
1178
+ if scope
1179
+ policy = @scope_policy || Phronomy::Tool::ScopePolicy::DEFAULT
1180
+ decision = policy.call(resolved, scope, self)
1181
+ case decision
1182
+ when :reject
1183
+ effective_name = resolved.new.name
1184
+ rejected_class = Class.new(resolved) do
1185
+ tool_name effective_name
1186
+ define_method(:call) do |_args|
1187
+ "Tool execution denied: scope :#{scope} is not permitted."
1188
+ end
1189
+ end
1190
+ return rejected_class
1191
+ when :approve
1192
+ # Treat as requires_approval unless the tool already has that flag.
1193
+ unless resolved.requires_approval
1194
+ effective_name = resolved.new.name
1195
+ resolved = Class.new(resolved) do
1196
+ tool_name effective_name
1197
+ requires_approval true
1198
+ end
1199
+ end
1200
+ end
1201
+ end
1202
+
1203
+ # Step 3: wrap with approval gate when handler is registered.
837
1204
  return resolved unless resolved.requires_approval && @approval_handler
838
1205
 
839
1206
  handler = @approval_handler