phronomy 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +8 -7
  3. data/CHANGELOG.md +151 -1
  4. data/README.md +170 -47
  5. data/Rakefile +33 -0
  6. data/benchmark/baseline.json +1 -1
  7. data/benchmark/bench_context_assembler.rb +2 -2
  8. data/benchmark/bench_regression.rb +6 -5
  9. data/benchmark/bench_token_estimator.rb +5 -5
  10. data/benchmark/bench_tool_schema.rb +1 -1
  11. data/benchmark/bench_vector_store.rb +1 -1
  12. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +24 -0
  13. data/docs/decisions/006-no-built-in-guardrails.md +20 -2
  14. data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
  15. data/lib/phronomy/agent/base.rb +285 -137
  16. data/lib/phronomy/agent/checkpoint.rb +118 -0
  17. data/lib/phronomy/agent/concerns/suspendable.rb +15 -0
  18. data/lib/phronomy/agent/context/conversation/compaction_context.rb +117 -0
  19. data/lib/phronomy/agent/context/conversation/trigger_context.rb +43 -0
  20. data/lib/phronomy/agent/context/conversation/trim_context.rb +82 -0
  21. data/lib/phronomy/agent/context/instruction/prompt_template.rb +102 -0
  22. data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +45 -0
  23. data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +51 -0
  24. data/lib/phronomy/agent/context/knowledge/loader/base.rb +31 -0
  25. data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +62 -0
  26. data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +82 -0
  27. data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +28 -0
  28. data/lib/phronomy/agent/context/knowledge/source/base.rb +60 -0
  29. data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +102 -0
  30. data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +63 -0
  31. data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +58 -0
  32. data/lib/phronomy/agent/context/knowledge/splitter/base.rb +53 -0
  33. data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +57 -0
  34. data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +111 -0
  35. data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +116 -0
  36. data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +95 -0
  37. data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +109 -0
  38. data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +133 -0
  39. data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +198 -0
  40. data/lib/phronomy/agent/fsm.rb +42 -65
  41. data/lib/phronomy/agent/invocation_pipeline.rb +99 -0
  42. data/lib/phronomy/agent/lifecycle/fsm_session.rb +251 -0
  43. data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +249 -0
  44. data/lib/phronomy/agent/react_agent.rb +27 -14
  45. data/lib/phronomy/agent/runner.rb +2 -2
  46. data/lib/phronomy/agent/tool_executor.rb +108 -0
  47. data/lib/phronomy/concurrency/async_queue.rb +157 -0
  48. data/lib/phronomy/concurrency/blocking_adapter_pool.rb +443 -0
  49. data/lib/phronomy/concurrency/cancellation_scope.rb +125 -0
  50. data/lib/phronomy/concurrency/cancellation_token.rb +140 -0
  51. data/lib/phronomy/concurrency/concurrency_gate.rb +157 -0
  52. data/lib/phronomy/concurrency/deadline.rb +65 -0
  53. data/lib/phronomy/concurrency/gate_registry.rb +52 -0
  54. data/lib/phronomy/concurrency/pool_registry.rb +57 -0
  55. data/lib/phronomy/configuration.rb +142 -0
  56. data/lib/phronomy/context.rb +2 -8
  57. data/lib/phronomy/diagnostics.rb +62 -0
  58. data/lib/phronomy/embeddings.rb +2 -2
  59. data/lib/phronomy/eval/runner.rb +13 -9
  60. data/lib/phronomy/eval/scorer/llm_judge.rb +12 -1
  61. data/lib/phronomy/event_loop.rb +184 -46
  62. data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
  63. data/lib/phronomy/invocation_context.rb +152 -0
  64. data/lib/phronomy/knowledge_source.rb +0 -5
  65. data/lib/phronomy/llm_adapter/base.rb +104 -0
  66. data/lib/phronomy/llm_adapter/ruby_llm.rb +47 -0
  67. data/lib/phronomy/llm_adapter.rb +20 -0
  68. data/lib/phronomy/{context → llm_context_window}/assembler.rb +18 -3
  69. data/lib/phronomy/{context → llm_context_window}/context_version_cache.rb +1 -1
  70. data/lib/phronomy/{context → llm_context_window}/token_budget.rb +7 -4
  71. data/lib/phronomy/{context → llm_context_window}/token_estimator.rb +3 -3
  72. data/lib/phronomy/loader.rb +4 -4
  73. data/lib/phronomy/metrics.rb +38 -0
  74. data/lib/phronomy/{agent → multi_agent}/handoff.rb +2 -2
  75. data/lib/phronomy/{agent → multi_agent}/orchestrator.rb +151 -126
  76. data/lib/phronomy/multi_agent/parallel_tool_chat.rb +149 -0
  77. data/lib/phronomy/{agent → multi_agent}/team_coordinator.rb +2 -2
  78. data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
  79. data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
  80. data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
  81. data/lib/phronomy/runtime/scheduler.rb +98 -0
  82. data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
  83. data/lib/phronomy/runtime/task_registry.rb +48 -0
  84. data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
  85. data/lib/phronomy/runtime/timer_queue.rb +106 -0
  86. data/lib/phronomy/runtime/timer_service.rb +42 -0
  87. data/lib/phronomy/runtime.rb +389 -0
  88. data/lib/phronomy/splitter.rb +3 -3
  89. data/lib/phronomy/task/backend.rb +80 -0
  90. data/lib/phronomy/task/fiber_backend.rb +157 -0
  91. data/lib/phronomy/task/immediate_backend.rb +89 -0
  92. data/lib/phronomy/task/thread_backend.rb +84 -0
  93. data/lib/phronomy/task.rb +275 -0
  94. data/lib/phronomy/task_group.rb +265 -0
  95. data/lib/phronomy/testing/fake_clock.rb +109 -0
  96. data/lib/phronomy/testing/fake_scheduler.rb +104 -0
  97. data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
  98. data/lib/phronomy/testing.rb +12 -0
  99. data/lib/phronomy/tool/base.rb +156 -7
  100. data/lib/phronomy/tool/mcp_tool.rb +47 -16
  101. data/lib/phronomy/tool/scope_policy.rb +50 -0
  102. data/lib/phronomy/tracing/null_tracer.rb +3 -1
  103. data/lib/phronomy/tracing/open_telemetry_tracer.rb +34 -0
  104. data/lib/phronomy/vector_store.rb +2 -2
  105. data/lib/phronomy/version.rb +1 -1
  106. data/lib/phronomy/workflow.rb +52 -5
  107. data/lib/phronomy/workflow_context.rb +37 -2
  108. data/lib/phronomy/workflow_runner.rb +28 -77
  109. data/lib/phronomy.rb +43 -0
  110. metadata +73 -33
  111. data/lib/phronomy/agent/parallel_tool_chat.rb +0 -92
  112. data/lib/phronomy/cancellation_token.rb +0 -92
  113. data/lib/phronomy/context/compaction_context.rb +0 -111
  114. data/lib/phronomy/context/trigger_context.rb +0 -39
  115. data/lib/phronomy/context/trim_context.rb +0 -75
  116. data/lib/phronomy/embeddings/base.rb +0 -22
  117. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +0 -45
  118. data/lib/phronomy/fsm_session.rb +0 -201
  119. data/lib/phronomy/knowledge_source/base.rb +0 -36
  120. data/lib/phronomy/knowledge_source/entity_knowledge.rb +0 -96
  121. data/lib/phronomy/knowledge_source/rag_knowledge.rb +0 -57
  122. data/lib/phronomy/knowledge_source/static_knowledge.rb +0 -52
  123. data/lib/phronomy/loader/base.rb +0 -25
  124. data/lib/phronomy/loader/csv_loader.rb +0 -56
  125. data/lib/phronomy/loader/markdown_loader.rb +0 -76
  126. data/lib/phronomy/loader/plain_text_loader.rb +0 -22
  127. data/lib/phronomy/prompt_template.rb +0 -96
  128. data/lib/phronomy/splitter/base.rb +0 -47
  129. data/lib/phronomy/splitter/fixed_size_splitter.rb +0 -51
  130. data/lib/phronomy/splitter/recursive_splitter.rb +0 -105
  131. data/lib/phronomy/vector_store/base.rb +0 -82
  132. data/lib/phronomy/vector_store/in_memory.rb +0 -93
  133. data/lib/phronomy/vector_store/pgvector.rb +0 -127
  134. data/lib/phronomy/vector_store/redis_search.rb +0 -192
@@ -2,7 +2,6 @@
2
2
 
3
3
  require "digest"
4
4
  require "securerandom"
5
- require "timeout"
6
5
  require_relative "concerns/retryable"
7
6
  require_relative "concerns/guardrailable"
8
7
  require_relative "concerns/before_completion"
@@ -61,12 +60,12 @@ module Phronomy
61
60
  end
62
61
 
63
62
  # Sets or reads the system instructions for this agent.
64
- # Accepts a String, a {Phronomy::PromptTemplate}, or a block (Proc).
63
+ # Accepts a String, a {Phronomy::Agent::Context::Instruction::PromptTemplate}, or a block (Proc).
65
64
  # When used as a reader (no argument, no block), returns the stored value.
66
65
  #
67
- # @param text [String, Phronomy::PromptTemplate, nil]
66
+ # @param text [String, Phronomy::Agent::Context::Instruction::PromptTemplate, nil]
68
67
  # @yield optionally provide instructions as a block
69
- # @return [String, Phronomy::PromptTemplate, Proc, nil]
68
+ # @return [String, Phronomy::Agent::Context::Instruction::PromptTemplate, Proc, nil]
70
69
  # @example String instructions
71
70
  # class MyAgent < Phronomy::Agent::Base
72
71
  # instructions "You are a helpful assistant."
@@ -226,13 +225,10 @@ module Phronomy
226
225
  # Defaults to +nil+ (no timeout).
227
226
  # Inherited by subclasses; the most-specific definition wins.
228
227
  #
229
- # **Note**: +invoke_timeout+ is a *wait timeout*, not a cancellation.
230
- # When the timeout fires, +Phronomy::TimeoutError+ is raised to the
231
- # caller, but the background agent thread and any in-flight LLM or tool
232
- # calls are **not** interrupted they continue running until they
233
- # complete naturally. The agent therefore keeps consuming threads,
234
- # memory, and external API credits after the caller has already received
235
- # the error. True cancellation is not yet supported.
228
+ # When the timeout fires, a {Phronomy::Concurrency::CancellationScope} is cancelled
229
+ # and its token is propagated to the FSM config so that in-flight LLM,
230
+ # tool, and RAG calls observe cancellation via their +cancellation_token:+
231
+ # keyword argument. +Phronomy::TimeoutError+ is raised to the caller.
236
232
  #
237
233
  # @param val [Numeric, nil]
238
234
  # @return [Numeric, nil]
@@ -259,10 +255,10 @@ module Phronomy
259
255
  # the first time +invoke+ is called. The cache persists for the lifetime
260
256
  # of the process; call {.static_knowledge_refresh!} to force a reload.
261
257
  #
262
- # @param sources [Array<Phronomy::KnowledgeSource::Base>]
258
+ # @param sources [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
263
259
  # @example
264
260
  # class PolicyAgent < Phronomy::Agent::Base
265
- # static_knowledge Phronomy::KnowledgeSource::StaticKnowledge.new(POLICY_TEXT)
261
+ # static_knowledge Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(POLICY_TEXT)
266
262
  # end
267
263
  # @api public
268
264
  def static_knowledge(*sources)
@@ -273,7 +269,7 @@ module Phronomy
273
269
  end
274
270
 
275
271
  # Returns the registered static knowledge sources.
276
- # @return [Array<Phronomy::KnowledgeSource::Base>]
272
+ # @return [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
277
273
  # @api public
278
274
  def static_knowledge_sources
279
275
  @static_knowledge_sources || []
@@ -310,11 +306,11 @@ module Phronomy
310
306
  # application can remove stale or irrelevant messages from the
311
307
  # conversation history.
312
308
  #
313
- # The block receives a {Phronomy::Context::TrimContext} and may call
309
+ # The block receives a {Phronomy::Agent::Context::Conversation::TrimContext} and may call
314
310
  # +ctx.remove(seqs)+ to drop messages by seq number. Changes affect
315
311
  # only the current invocation; the underlying memory store is unchanged.
316
312
  #
317
- # @yield [ctx] Phronomy::Context::TrimContext
313
+ # @yield [ctx] Phronomy::Agent::Context::Conversation::TrimContext
318
314
  # @example Drop the oldest message when over 80% of budget is used
319
315
  # on_trim do |ctx|
320
316
  # limit = ctx.budget&.available(used: 0) || Float::INFINITY
@@ -336,9 +332,9 @@ module Phronomy
336
332
  # truthy AND an +on_compact+ callback is also registered, the compact
337
333
  # pipeline is executed.
338
334
  #
339
- # The block receives a read-only {Phronomy::Context::TriggerContext}.
335
+ # The block receives a read-only {Phronomy::Agent::Context::Conversation::TriggerContext}.
340
336
  #
341
- # @yield [ctx] Phronomy::Context::TriggerContext
337
+ # @yield [ctx] Phronomy::Agent::Context::Conversation::TriggerContext
342
338
  # @return [Boolean] truthy → run on_compact; falsy → skip
343
339
  # @example Trigger when messages exceed 70% of token budget
344
340
  # on_compaction_trigger do |ctx|
@@ -358,10 +354,10 @@ module Phronomy
358
354
 
359
355
  # Registers a callback that performs the actual compaction when the
360
356
  # +on_compaction_trigger+ callback fires. The block receives a
361
- # {Phronomy::Context::CompactionContext} and should call +ctx.compact+
357
+ # {Phronomy::Agent::Context::Conversation::CompactionContext} and should call +ctx.compact+
362
358
  # to specify which messages to summarise.
363
359
  #
364
- # @yield [ctx] Phronomy::Context::CompactionContext
360
+ # @yield [ctx] Phronomy::Agent::Context::Conversation::CompactionContext
365
361
  # @example Replace the first 4 messages with a short summary
366
362
  # on_compact do |ctx|
367
363
  # ctx.compact(0..3) do |elements|
@@ -489,6 +485,11 @@ module Phronomy
489
485
  # +:knowledge_sources+ (Array) — dynamic knowledge sources for this turn
490
486
  # +:user_id+ (+String+, optional) — caller identity forwarded to the tracer
491
487
  # +:session_id+ (+String+, optional) — session identity forwarded to the tracer
488
+ # @param invocation_context [Phronomy::InvocationContext, nil] optional first-class context
489
+ # object. When present, +thread_id+, +cancellation_token+, and +deadline+ are
490
+ # derived from it (existing +config:+ keys take precedence as backward-compat
491
+ # aliases). The object is also stored in +config[:invocation_context]+ so that
492
+ # +task_id+ / +parent_task_id+ appear in trace spans automatically.
492
493
  # @return [Hash] +{ output: String, messages: Array, usage: Phronomy::TokenUsage }+,
493
494
  # or +{ output: nil, suspended: true, checkpoint: Phronomy::Agent::Checkpoint,
494
495
  # messages: Array }+ when the invocation was suspended awaiting tool approval.
@@ -505,29 +506,49 @@ module Phronomy
505
506
  # result = agent.resume(result[:checkpoint], approved: true)
506
507
  # end
507
508
  # puts result[:output]
509
+ # @example With InvocationContext (deadline-based timeout)
510
+ # ctx = Phronomy::InvocationContext.new(
511
+ # thread_id: "conv-123",
512
+ # deadline: Phronomy::Concurrency::Deadline.in(30),
513
+ # task_id: SecureRandom.uuid
514
+ # )
515
+ # result = MyAgent.new.invoke("Hello", invocation_context: ctx)
508
516
  # @api public
509
- def invoke(input, messages: [], thread_id: nil, config: {})
517
+ def invoke(input, messages: [], thread_id: nil, config: {}, invocation_context: nil)
518
+ if invocation_context
519
+ thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
520
+ end
510
521
  if Phronomy.configuration.event_loop
511
522
  # Protect against blocking the EventLoop thread itself.
512
- if Thread.current[:phronomy_event_loop_thread]
523
+ if Phronomy::EventLoop.current?
513
524
  raise Phronomy::Error,
514
525
  "Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
515
526
  "entry action. Use agent.run_as_child(input, ctx: ctx) instead."
516
527
  end
517
528
 
529
+ # Build an effective config that includes the invoke_timeout scope's
530
+ # CancellationToken before constructing the FSM. This ensures that
531
+ # every LLM, tool, and RAG call made inside _invoke_impl observes
532
+ # cancellation when the deadline fires.
533
+ timeout_sec = self.class.invoke_timeout
534
+ effective_config, scope = if timeout_sec
535
+ s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
536
+ s.deadline_in(timeout_sec)
537
+ [config.merge(cancellation_token: s.token), s]
538
+ else
539
+ [config, nil]
540
+ end
541
+
518
542
  fsm = Agent::FSM.new(
519
543
  agent: self,
520
544
  input: input,
521
545
  messages: messages,
522
546
  thread_id: thread_id || SecureRandom.uuid,
523
- config: config
547
+ config: effective_config
524
548
  )
525
549
  completion_queue = Phronomy::EventLoop.instance.register(fsm)
526
- timeout_sec = self.class.invoke_timeout
527
- result = if timeout_sec
528
- begin
529
- Timeout.timeout(timeout_sec) { completion_queue.pop }
530
- rescue Timeout::Error
550
+ result = if scope
551
+ scope.pop_queue(completion_queue) do
531
552
  raise Phronomy::TimeoutError,
532
553
  "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
533
554
  end
@@ -537,13 +558,60 @@ module Phronomy
537
558
  raise result if result.is_a?(Exception)
538
559
  result
539
560
  else
540
- _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
561
+ # Guard: calling invoke from inside a scheduler task would block the task
562
+ # against itself when using a cooperative backend. Use invoke_async
563
+ # instead to compose agents without introducing a blocking wait.
564
+ if Phronomy::Task.current
565
+ msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
566
+ "This blocks the scheduler until the inner invocation completes, preventing " \
567
+ "other tasks from making progress. Use invoke_async + await instead."
568
+ if Phronomy.configuration.strict_runtime_guards
569
+ raise Phronomy::SchedulerReentrancyError, msg
570
+ elsif Phronomy.configuration.logger
571
+ Phronomy.configuration.logger.warn(msg)
572
+ else
573
+ Kernel.warn("[phronomy] WARNING: #{msg}")
574
+ end
575
+ end
576
+ invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
577
+ end
578
+ end
579
+
580
+ # Invokes this agent asynchronously and returns a {Phronomy::Task}.
581
+ #
582
+ # This is the primary async entry point. {#invoke} is a synchronous wrapper
583
+ # that calls this method and blocks the caller until the task completes.
584
+ # Calling {#invoke} from inside an active scheduler task raises
585
+ # {Phronomy::SchedulerReentrancyError}; use +invoke_async+ directly in that
586
+ # context.
587
+ #
588
+ # The task is registered with the Runtime task registry so {Runtime#shutdown}
589
+ # drains in-flight invocations before process exit.
590
+ #
591
+ # @example
592
+ # task = agent.invoke_async("Hello!")
593
+ # result = task.await # => { output: "...", messages: [...], usage: ... }
594
+ #
595
+ # @param input [String, Hash]
596
+ # @param messages [Array]
597
+ # @param thread_id [String, nil]
598
+ # @param config [Hash]
599
+ # @param invocation_context [Phronomy::InvocationContext, nil]
600
+ # @return [Phronomy::Task]
601
+ # @api public
602
+ def invoke_async(input, messages: [], thread_id: nil, config: {}, invocation_context: nil)
603
+ if invocation_context
604
+ thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
605
+ end
606
+ bp = Phronomy.configuration.backpressure
607
+ on_full = (bp == :raise) ? :reject : (bp || :wait)
608
+ bp_timeout = Phronomy.configuration.backpressure_timeout
609
+ gate = Phronomy::Runtime.instance.gate(:agent)
610
+ Phronomy::Runtime.instance.spawn(name: "agent-#{(self.class.name || "anonymous").downcase}-async") do
611
+ gate.acquire(on_full: on_full, timeout: bp_timeout) do
612
+ _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
613
+ end
541
614
  end
542
- ensure
543
- # Remove this agent's context cache entry from the current thread to
544
- # prevent unbounded growth of the thread-local hash in long-lived
545
- # processes (e.g. Rails servers).
546
- Thread.current[:phronomy_context_version_caches]&.delete(object_id)
547
615
  end
548
616
 
549
617
  # Registers this agent as a child {AgentFSM} inside the given Workflow context.
@@ -557,31 +625,24 @@ module Phronomy
557
625
  # result hash +{ output:, messages:, usage: }+. Declare an +on: :child_completed+
558
626
  # transition in your Workflow to advance to the next state.
559
627
  #
560
- # An optional block may be provided to write the result back into the parent
561
- # WorkflowContext <b>before</b> the +:child_completed+ event is dispatched.
562
- # +Thread::Queue+ provides the happens-before guarantee \u2014 no Mutex is needed.
628
+ # The result is delivered exclusively as the +:child_completed+ event payload.
629
+ # The parent Workflow task is the sole owner of the parent +WorkflowContext+ and
630
+ # applies the result after receiving the event no background thread writes to
631
+ # the parent context directly.
563
632
  #
564
- # @example Without block (result available only as event payload)
633
+ # @example
565
634
  # entry :run_agent, ->(ctx) { MyAgent.new.run_as_child(ctx.query, ctx: ctx) }
566
635
  # transition from: :run_agent, on: :child_completed, to: :process_result
567
636
  #
568
- # @example With block (writes result into context)
569
- # entry :run_agent, ->(ctx) {
570
- # MyAgent.new.run_as_child(ctx.query, ctx: ctx) { |r| ctx.answer = r[:output] }
571
- # }
572
- # transition from: :run_agent, on: :child_completed, to: :process_result
573
- #
574
637
  # @param input [String, Hash] user input passed to the agent
575
638
  # @param ctx [Object] a WorkflowContext that responds to +#thread_id+
576
639
  # @param messages [Array] prior conversation history
577
640
  # @param config [Hash] invocation config (forwarded to +_invoke_impl+)
578
- # @yield [Hash] result hash +{ output:, messages:, usage: }+ — called from the
579
- # agent IO thread before +:child_completed+ is posted
580
641
  # @return [nil] the caller must not wait on any return value;
581
642
  # the result arrives as a +:child_completed+ event
582
643
  # @raise [Phronomy::Error] when EventLoop mode is not enabled
583
644
  # @api public
584
- def run_as_child(input, ctx:, messages: [], config: {}, &result_writer)
645
+ def run_as_child(input, ctx:, messages: [], config: {})
585
646
  unless Phronomy.configuration.event_loop
586
647
  raise Phronomy::Error,
587
648
  "run_as_child requires EventLoop mode. " \
@@ -594,8 +655,7 @@ module Phronomy
594
655
  messages: messages,
595
656
  thread_id: "#{ctx.thread_id}_agent_#{SecureRandom.uuid}",
596
657
  config: config,
597
- parent_id: ctx.thread_id,
598
- result_writer: result_writer
658
+ parent_id: ctx.thread_id
599
659
  )
600
660
  Phronomy::EventLoop.instance.enqueue_child(fsm)
601
661
  nil
@@ -627,7 +687,7 @@ module Phronomy
627
687
  raise
628
688
  end
629
689
 
630
- # Returns the {Context::ContextVersionCache} built during the most recent
690
+ # Returns the {LlmContextWindow::ContextVersionCache} built during the most recent
631
691
  # {#invoke} call on this agent instance. The thread-local cache entry is
632
692
  # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
633
693
  # in +@last_context_version_cache+ so callers can inspect it after invoke
@@ -644,11 +704,33 @@ module Phronomy
644
704
 
645
705
  private
646
706
 
707
+ # Merges an {InvocationContext} into the +thread_id+ / +config+ pair.
708
+ # Returns +[effective_thread_id, effective_config]+.
709
+ #
710
+ # Precedence rules (existing explicit values always win):
711
+ # - +thread_id+ argument > +ic.thread_id+
712
+ # - +config[:cancellation_token]+ > +ic.cancellation_token+ > token derived from +ic.deadline+
713
+ # - +ic+ is stored in +config[:invocation_context]+ (overwriting any previous value)
714
+ def _apply_invocation_context(thread_id, config, ic)
715
+ effective_thread_id = thread_id || ic.thread_id
716
+ effective_config = config.merge(invocation_context: ic)
717
+ if effective_config[:cancellation_token].nil?
718
+ if (tok = ic.effective_timeout_token)
719
+ effective_config = effective_config.merge(cancellation_token: tok)
720
+ end
721
+ end
722
+ [effective_thread_id, effective_config]
723
+ end
724
+
647
725
  # Streaming implementation for #stream.
648
726
  def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
649
727
  caller_meta = {}
650
728
  caller_meta[:user_id] = config[:user_id] if config[:user_id]
651
729
  caller_meta[:session_id] = config[:session_id] if config[:session_id]
730
+ if (ic = config[:invocation_context])
731
+ caller_meta[:task_id] = ic.task_id if ic.task_id
732
+ caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
733
+ end
652
734
 
653
735
  trace("agent.invoke", input: input, **caller_meta) do |_span|
654
736
  run_input_guardrails!(input)
@@ -679,11 +761,26 @@ module Phronomy
679
761
  # Run before_completion hooks (global → class → instance) before the LLM call.
680
762
  run_before_completion_hooks!(chat, config)
681
763
 
682
- response = chat.ask(user_message) do |chunk|
764
+ # Route the LLM streaming call through the configured LLMAdapter.
765
+ # Chunks are pushed into a token queue by the pool worker thread and
766
+ # drained here (on the caller's side) so that the user block is never
767
+ # executed on a BlockingAdapterPool worker thread.
768
+ # The queue capacity is bounded by Configuration#stream_queue_max_size
769
+ # (nil = unbounded) to provide backpressure against a fast LLM producer.
770
+ adapter = Phronomy.configuration.llm_adapter
771
+ chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
772
+ pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
773
+
774
+ # Drain the chunk queue on this side (scheduler task / caller thread).
775
+ loop do
776
+ chunk = chunk_queue.pop
777
+ break if chunk.nil? # queue closed — LLM streaming complete
683
778
  block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
684
779
  check_cancellation!(config, "invocation cancelled during streaming")
685
780
  end
686
781
 
782
+ response = pending.await
783
+
687
784
  output = response.content
688
785
  usage = Phronomy::TokenUsage.from_tokens(response.tokens)
689
786
 
@@ -712,21 +809,74 @@ module Phronomy
712
809
  system_text = build_cached_system_text(input)
713
810
  user_message = extract_message(input)
714
811
 
715
- assembler = Context::Assembler.new(budget: budget)
812
+ assembler = LlmContextWindow::Assembler.new(budget: budget)
716
813
  assembler.add_instruction(system_text) if system_text
717
-
718
- Array(config[:knowledge_sources]).each do |ks|
719
- check_cancellation!(config, "invocation cancelled during RAG fetch")
720
- ks.fetch(query: user_message, cancellation_token: config[:cancellation_token]).each do |chunk|
721
- assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
722
- end
814
+ fetch_knowledge_chunks(user_message, config).each do |chunk|
815
+ assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
723
816
  end
724
-
725
817
  assembler.add_messages(history)
726
818
  assembler.build
727
819
  end
728
820
  protected :build_context
729
821
 
822
+ # Fetches knowledge chunks from all registered sources concurrently.
823
+ #
824
+ # Each source is spawned as a separate task within a {Phronomy::TaskGroup};
825
+ # the RAG concurrency gate enforces the +max_concurrent_rag_fetches+ cap.
826
+ # Results are returned in registration order (spawn order) as a flat array.
827
+ #
828
+ # This method is available to subclasses as a building block when
829
+ # overriding {#build_context}. Pass a custom +query+ to implement
830
+ # multi-hop RAG or other retrieval strategies.
831
+ #
832
+ # @param query [String] RAG query string (typically the current user message)
833
+ # @param config [Hash] invocation config; relevant keys:
834
+ # +:knowledge_sources+, +:rag_failure_policy+, +:cancellation_token+, +:rag_timeout+
835
+ # @return [Array<Hash>] flat list of chunk hashes with +:content+, +:type+, +:source+
836
+ # @api private
837
+ def fetch_knowledge_chunks(query, config)
838
+ sources = Array(config[:knowledge_sources])
839
+ return [] if sources.empty?
840
+
841
+ check_cancellation!(config, "invocation cancelled before RAG fetch")
842
+
843
+ # :skip (default) — ignore per-source failures so the agent can still
844
+ # answer with partial context. :fail surfaces the first error immediately.
845
+ failure_policy =
846
+ case config[:rag_failure_policy]
847
+ when :fail then :fail_fast
848
+ else :skip_failed
849
+ end
850
+
851
+ group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
852
+ bp = Phronomy.configuration.backpressure
853
+ rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
854
+ rag_bp_timeout = Phronomy.configuration.backpressure_timeout
855
+
856
+ # Spawn all fetches concurrently. Results are returned in spawn order
857
+ # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
858
+ sources.each do |ks|
859
+ group.spawn do
860
+ Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
861
+ result, elapsed_ms = Phronomy::Runtime.measure_ms do
862
+ ks.fetch_async(
863
+ query: query,
864
+ cancellation_token: config[:cancellation_token],
865
+ timeout: config[:rag_timeout]
866
+ ).await
867
+ end
868
+ Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{elapsed_ms}ms" }
869
+ result
870
+ end
871
+ end
872
+ end
873
+
874
+ # await_all returns results in spawn order; nil entries indicate
875
+ # skipped failures when using :skip_failed.
876
+ group.await_all.flat_map { |chunks| Array(chunks) }
877
+ end
878
+ protected :fetch_knowledge_chunks
879
+
730
880
  # Runs the on_trim / on_compaction_trigger / on_compact pipeline on the
731
881
  # supplied message array and returns the final Array of message objects
732
882
  # ready to pass to the Assembler.
@@ -744,16 +894,16 @@ module Phronomy
744
894
  elements = build_message_elements(Array(messages))
745
895
 
746
896
  if (trim_cb = self.class._on_trim_callback)
747
- trim_ctx = Context::TrimContext.new(message_elements: elements, budget: budget)
897
+ trim_ctx = Context::Conversation::TrimContext.new(message_elements: elements, budget: budget)
748
898
  trim_cb.call(trim_ctx)
749
899
  elements = trim_ctx.message_elements
750
900
  end
751
901
 
752
902
  if (trigger_cb = self.class._on_compaction_trigger_callback)
753
- trigger_ctx = Context::TriggerContext.new(message_elements: elements, budget: budget)
903
+ trigger_ctx = Context::Conversation::TriggerContext.new(message_elements: elements, budget: budget)
754
904
  if trigger_cb.call(trigger_ctx)
755
905
  if (compact_cb = self.class._on_compact_callback)
756
- compact_ctx = Context::CompactionContext.new(
906
+ compact_ctx = Context::Conversation::CompactionContext.new(
757
907
  message_elements: elements,
758
908
  budget: budget,
759
909
  thread_id: thread_id
@@ -774,61 +924,18 @@ module Phronomy
774
924
  caller_meta = {}
775
925
  caller_meta[:user_id] = config[:user_id] if config[:user_id]
776
926
  caller_meta[:session_id] = config[:session_id] if config[:session_id]
927
+ if (ic = config[:invocation_context])
928
+ caller_meta[:task_id] = ic.task_id if ic.task_id
929
+ caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
930
+ end
777
931
 
778
932
  trace("agent.invoke", input: input, **caller_meta) do |_span|
779
- # Run input guardrails before touching the LLM.
780
- run_input_guardrails!(input)
781
-
782
- user_message = extract_message(input)
783
- chat = build_chat
784
-
785
- # Assemble context (system prompt + history). Override #build_context to
786
- # inject custom context editing logic at the Agent subclass level.
787
- context = build_context(input, messages: messages, thread_id: thread_id, config: config)
788
- apply_instructions(chat, context[:system]) if context[:system]
789
- context[:messages].each { |msg| chat.messages << msg }
790
-
791
- # Run before_completion hooks (global → class → instance) before the LLM call.
792
- run_before_completion_hooks!(chat, config)
793
-
794
- # Register suspension hook for approval-required tools (no-op when a
795
- # synchronous on_approval_required handler is already registered).
796
- _register_suspension_hook!(chat)
797
-
798
- # Check for cancellation immediately before the LLM call.
799
- check_cancellation!(config, "invocation cancelled before LLM call")
800
-
801
- # Forward the cancellation token to ParallelToolChat via a thread-local
802
- # so that tool dispatch batches can observe cancellation without needing
803
- # direct access to config.
804
- prev_ct = Thread.current[:phronomy_cancellation_token]
805
- Thread.current[:phronomy_cancellation_token] = config[:cancellation_token]
806
-
807
- begin
808
- response = chat.ask(user_message)
809
- rescue SuspendSignal => signal
810
- checkpoint = Checkpoint.new(
811
- thread_id: thread_id,
812
- original_input: input,
813
- messages: chat.messages.dup,
814
- pending_tool_name: signal.tool_name,
815
- pending_tool_args: signal.args,
816
- pending_tool_call_id: signal.tool_call_id
817
- )
818
- suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
819
- next [suspended_result, nil]
820
- ensure
821
- Thread.current[:phronomy_cancellation_token] = prev_ct
822
- end
823
-
824
- output = response.content
825
- usage = Phronomy::TokenUsage.from_tokens(response.tokens)
826
-
827
- # Run output guardrails before returning to the caller.
828
- run_output_guardrails!(output)
829
-
830
- result = {output: output, messages: chat.messages, usage: usage}
831
- [result, usage]
933
+ Agent::InvocationPipeline.new(self).run(
934
+ input,
935
+ messages: messages,
936
+ thread_id: thread_id,
937
+ config: config
938
+ )
832
939
  end
833
940
  end
834
941
 
@@ -842,19 +949,19 @@ module Phronomy
842
949
  return nil unless model_name
843
950
 
844
951
  if (cw = self.class.context_window)
845
- Phronomy::Context::TokenBudget.new(
952
+ Phronomy::LlmContextWindow::TokenBudget.new(
846
953
  context_window: cw,
847
954
  max_output_tokens: self.class.max_output_tokens || 0,
848
955
  overhead: self.class.context_overhead
849
956
  )
850
957
  else
851
- Phronomy::Context::TokenBudget.new(
958
+ Phronomy::LlmContextWindow::TokenBudget.new(
852
959
  model: model_name,
853
960
  max_output_tokens: self.class.max_output_tokens,
854
961
  overhead: self.class.context_overhead
855
962
  )
856
963
  end
857
- rescue Phronomy::Context::UnknownModelError, RubyLLM::ModelNotFoundError
964
+ rescue Phronomy::LlmContextWindow::UnknownModelError, RubyLLM::ModelNotFoundError
858
965
  nil
859
966
  end
860
967
 
@@ -867,7 +974,7 @@ module Phronomy
867
974
  # @api public
868
975
  def build_message_elements(messages)
869
976
  Array(messages).each_with_index.map do |msg, idx|
870
- tokens = Context::TokenEstimator.estimate(msg.content.to_s)
977
+ tokens = LlmContextWindow::TokenEstimator.estimate(msg.content.to_s)
871
978
  {seq: idx, message: msg, tokens: tokens, role: msg.role}
872
979
  end
873
980
  end
@@ -890,34 +997,29 @@ module Phronomy
890
997
  [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
891
998
  )
892
999
 
893
- agent_id = object_id
894
- cache = (Thread.current[:phronomy_context_version_caches] ||= {})[agent_id] ||=
895
- Context::ContextVersionCache.new
1000
+ cache = (@context_version_cache ||= LlmContextWindow::ContextVersionCache.new)
896
1001
  unless cache.valid?(fingerprint)
897
1002
  parts = [instruction]
898
1003
  static_chunks.each do |chunk|
899
- parts << Context::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
1004
+ parts << LlmContextWindow::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
900
1005
  end
901
1006
  cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
902
1007
  end
903
1008
 
904
1009
  # Persist a reference on the instance so that context_version_cache
905
- # remains accessible after invoke's ensure block cleans up the
906
- # thread-local entry.
1010
+ # remains accessible after invoke completes.
907
1011
  @last_context_version_cache = cache
908
1012
 
909
1013
  cache.system_text.empty? ? nil : cache.system_text
910
1014
  end
911
1015
 
912
- # Load messages from a ConversationManager.
913
- #
914
1016
  # Returns the chat class to instantiate for this invocation.
915
- # When the +:phronomy_agent_parallel_tools+ thread-local flag is set
916
- # (i.e. inside an {AgentFSM} IO thread), returns {ParallelToolChat} so
917
- # that concurrent tool dispatch is enabled. Falls back to +nil+ otherwise,
918
- # signalling {#build_chat} to use the standard +RubyLLM.chat+ factory.
1017
+ # When EventLoop mode is enabled ({Phronomy.configuration.event_loop}),
1018
+ # returns {ParallelToolChat} so that concurrent tool dispatch is enabled.
1019
+ # Falls back to +nil+ otherwise, signalling {#build_chat} to use the
1020
+ # standard +RubyLLM.chat+ factory.
919
1021
  def build_chat_class
920
- Thread.current[:phronomy_agent_parallel_tools] ? Agent::ParallelToolChat : nil
1022
+ Phronomy.configuration.event_loop ? Phronomy::MultiAgent::ParallelToolChat : nil
921
1023
  end
922
1024
 
923
1025
  def build_chat
@@ -931,7 +1033,11 @@ module Phronomy
931
1033
  end
932
1034
  t = self.class.temperature
933
1035
  parallel_class = build_chat_class
934
- chat = parallel_class ? parallel_class.new(**opts) : RubyLLM.chat(**opts)
1036
+ chat = if parallel_class
1037
+ parallel_class.new(max_parallel_tools: self.class.max_parallel_tools, **opts)
1038
+ else
1039
+ RubyLLM.chat(**opts)
1040
+ end
935
1041
  chat.with_temperature(t) if t
936
1042
  self.class.tools.each do |tool_class|
937
1043
  chat.with_tool(prepare_tool_class(tool_class))
@@ -943,7 +1049,7 @@ module Phronomy
943
1049
  def build_instructions(input)
944
1050
  instr = self.class.instructions
945
1051
  case instr
946
- when Phronomy::PromptTemplate
1052
+ when Phronomy::Agent::Context::Instruction::PromptTemplate
947
1053
  vars = input.is_a?(Hash) ? input : {input: input}
948
1054
  instr.format_system(**vars) || instr.format(**vars)
949
1055
  when String then instr
@@ -995,15 +1101,30 @@ module Phronomy
995
1101
 
996
1102
  # Builds the final tool class to register with the chat.
997
1103
  #
998
- # Two transformations are applied in order:
1104
+ # When an already-instantiated tool object is passed (e.g. a
1105
+ # {Phronomy::Tool::McpTool} returned by +McpTool.from_server+), it is
1106
+ # returned as-is. RubyLLM's +with_tool+ accepts both classes and
1107
+ # instances, so no wrapping is needed.
1108
+ #
1109
+ # For tool classes, three transformations are applied in order:
999
1110
  # 1. Alias override — when the Hash form of .tools maps this class to an
1000
1111
  # explicit name, an anonymous subclass with that tool_name is returned.
1001
- # 2. Approval gate — when the tool class has +requires_approval+ set AND
1112
+ # 2. Scope policy — when a scope is declared on the tool, the configured
1113
+ # {Phronomy::Tool::ScopePolicy} (or the default) is evaluated.
1114
+ # +:reject+ wraps the tool to return a denial message without executing.
1115
+ # +:approve+ behaves like requiring approval (same as step 3 when the
1116
+ # tool does not already have +requires_approval+).
1117
+ # 3. Approval gate — when the tool class has +requires_approval+ set AND
1002
1118
  # an approval handler has been registered via #on_approval_required,
1003
1119
  # the tool's #call method is wrapped: the handler is invoked with
1004
1120
  # (tool_name, args) and, if it returns falsy, the tool returns a denial
1005
1121
  # message instead of executing.
1006
1122
  def prepare_tool_class(tool_class)
1123
+ # When an instantiated tool object is passed (e.g. McpTool.from_server
1124
+ # returns an instance, not a class), skip class-level processing and
1125
+ # return it directly. RubyLLM#with_tool handles both forms.
1126
+ return tool_class unless tool_class.is_a?(Class)
1127
+
1007
1128
  # Step 1: apply alias if needed.
1008
1129
  resolved = if (alias_name = self.class.tool_aliases[tool_class])
1009
1130
  parent_description = tool_class.description
@@ -1015,7 +1136,34 @@ module Phronomy
1015
1136
  tool_class
1016
1137
  end
1017
1138
 
1018
- # Step 2: wrap with approval gate when handler is registered.
1139
+ # Step 2: evaluate scope policy.
1140
+ scope = resolved.scope
1141
+ if scope
1142
+ policy = @scope_policy || Phronomy::Tool::ScopePolicy::DEFAULT
1143
+ decision = policy.call(resolved, scope, self)
1144
+ case decision
1145
+ when :reject
1146
+ effective_name = resolved.new.name
1147
+ rejected_class = Class.new(resolved) do
1148
+ tool_name effective_name
1149
+ define_method(:call) do |_args|
1150
+ "Tool execution denied: scope :#{scope} is not permitted."
1151
+ end
1152
+ end
1153
+ return rejected_class
1154
+ when :approve
1155
+ # Treat as requires_approval unless the tool already has that flag.
1156
+ unless resolved.requires_approval
1157
+ effective_name = resolved.new.name
1158
+ resolved = Class.new(resolved) do
1159
+ tool_name effective_name
1160
+ requires_approval true
1161
+ end
1162
+ end
1163
+ end
1164
+ end
1165
+
1166
+ # Step 3: wrap with approval gate when handler is registered.
1019
1167
  return resolved unless resolved.requires_approval && @approval_handler
1020
1168
 
1021
1169
  handler = @approval_handler