phronomy 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +40 -4
  3. data/README.md +32 -41
  4. data/benchmark/baseline.json +1 -1
  5. data/benchmark/bench_agent_invoke.rb +1 -1
  6. data/benchmark/bench_context_assembler.rb +9 -1
  7. data/benchmark/bench_regression.rb +8 -8
  8. data/benchmark/bench_tool_schema.rb +2 -2
  9. data/benchmark/bench_vector_store.rb +1 -1
  10. data/docs/decisions/011-build-context-as-single-llm-input-authority.md +224 -0
  11. data/lib/phronomy/agent/base.rb +328 -366
  12. data/lib/phronomy/agent/checkpoint.rb +30 -1
  13. data/lib/phronomy/agent/checkpoint_store.rb +97 -0
  14. data/lib/phronomy/agent/concerns/retryable.rb +1 -1
  15. data/lib/phronomy/agent/concerns/suspendable.rb +63 -8
  16. data/lib/phronomy/agent/context/capability/base.rb +689 -0
  17. data/lib/phronomy/agent/context/capability/scope_policy.rb +54 -0
  18. data/lib/phronomy/agent/context/knowledge/base.rb +58 -0
  19. data/lib/phronomy/agent/context/knowledge/entity_knowledge.rb +102 -0
  20. data/lib/phronomy/agent/context/knowledge/static_knowledge.rb +58 -0
  21. data/lib/phronomy/agent/shared_state.rb +2 -2
  22. data/lib/phronomy/agent/tool_executor.rb +1 -1
  23. data/lib/phronomy/concurrency/gate_registry.rb +0 -1
  24. data/lib/phronomy/configuration.rb +13 -6
  25. data/lib/phronomy/event_loop.rb +1 -18
  26. data/lib/phronomy/llm_context_window/assembler.rb +77 -44
  27. data/lib/phronomy/multi_agent/handoff.rb +4 -4
  28. data/lib/phronomy/multi_agent/orchestrator.rb +1 -1
  29. data/lib/phronomy/multi_agent/team_coordinator.rb +2 -2
  30. data/lib/phronomy/runtime/runtime_metrics.rb +0 -1
  31. data/lib/phronomy/runtime.rb +1 -2
  32. data/lib/phronomy/tool.rb +3 -4
  33. data/lib/phronomy/{tool/agent_tool.rb → tools/agent.rb} +8 -9
  34. data/lib/phronomy/{tool/mcp_tool.rb → tools/mcp.rb} +9 -9
  35. data/lib/phronomy/tools/vector_search.rb +70 -0
  36. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  37. data/lib/phronomy/vector_store/base.rb +89 -0
  38. data/lib/phronomy/vector_store/embeddings/base.rb +41 -0
  39. data/lib/phronomy/vector_store/embeddings/ruby_llm_embeddings.rb +47 -0
  40. data/lib/phronomy/vector_store/in_memory.rb +103 -0
  41. data/lib/phronomy/vector_store/loader/base.rb +27 -0
  42. data/lib/phronomy/vector_store/loader/csv_loader.rb +58 -0
  43. data/lib/phronomy/vector_store/loader/markdown_loader.rb +78 -0
  44. data/lib/phronomy/vector_store/loader/plain_text_loader.rb +24 -0
  45. data/lib/phronomy/vector_store/pgvector.rb +127 -0
  46. data/lib/phronomy/vector_store/redis_search.rb +192 -0
  47. data/lib/phronomy/vector_store/splitter/base.rb +49 -0
  48. data/lib/phronomy/vector_store/splitter/fixed_size_splitter.rb +53 -0
  49. data/lib/phronomy/vector_store/splitter/recursive_splitter.rb +107 -0
  50. data/lib/phronomy/vector_store.rb +16 -4
  51. data/lib/phronomy/version.rb +1 -1
  52. data/lib/phronomy/workflow/fsm_session.rb +249 -0
  53. data/lib/phronomy/workflow/phase_machine_builder.rb +247 -0
  54. data/lib/phronomy/workflow_runner.rb +2 -2
  55. data/lib/phronomy.rb +10 -3
  56. data/scripts/api_snapshot.rb +11 -10
  57. metadata +31 -37
  58. data/lib/phronomy/agent/context/conversation/compaction_context.rb +0 -117
  59. data/lib/phronomy/agent/context/conversation/trigger_context.rb +0 -43
  60. data/lib/phronomy/agent/context/conversation/trim_context.rb +0 -82
  61. data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +0 -45
  62. data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +0 -51
  63. data/lib/phronomy/agent/context/knowledge/loader/base.rb +0 -31
  64. data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +0 -62
  65. data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +0 -82
  66. data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +0 -28
  67. data/lib/phronomy/agent/context/knowledge/source/base.rb +0 -60
  68. data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +0 -102
  69. data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +0 -63
  70. data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +0 -58
  71. data/lib/phronomy/agent/context/knowledge/splitter/base.rb +0 -53
  72. data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +0 -57
  73. data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +0 -111
  74. data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +0 -116
  75. data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +0 -95
  76. data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +0 -109
  77. data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +0 -133
  78. data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +0 -198
  79. data/lib/phronomy/agent/fsm.rb +0 -157
  80. data/lib/phronomy/agent/invocation_pipeline.rb +0 -99
  81. data/lib/phronomy/agent/lifecycle/fsm_session.rb +0 -251
  82. data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +0 -249
  83. data/lib/phronomy/agent/react_agent.rb +0 -204
  84. data/lib/phronomy/embeddings.rb +0 -11
  85. data/lib/phronomy/loader.rb +0 -13
  86. data/lib/phronomy/splitter.rb +0 -12
  87. data/lib/phronomy/tool/base.rb +0 -685
  88. data/lib/phronomy/tool/scope_policy.rb +0 -50
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "digest"
4
3
  require "securerandom"
4
+ require_relative "checkpoint_store"
5
5
  require_relative "concerns/retryable"
6
6
  require_relative "concerns/guardrailable"
7
7
  require_relative "concerns/before_completion"
@@ -255,10 +255,10 @@ module Phronomy
255
255
  # the first time +invoke+ is called. The cache persists for the lifetime
256
256
  # of the process; call {.static_knowledge_refresh!} to force a reload.
257
257
  #
258
- # @param sources [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
258
+ # @param sources [Array<Phronomy::Agent::Context::Knowledge::Base>]
259
259
  # @example
260
260
  # class PolicyAgent < Phronomy::Agent::Base
261
- # static_knowledge Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(POLICY_TEXT)
261
+ # static_knowledge Phronomy::Agent::Context::Knowledge::StaticKnowledge.new(POLICY_TEXT)
262
262
  # end
263
263
  # @api public
264
264
  def static_knowledge(*sources)
@@ -269,7 +269,7 @@ module Phronomy
269
269
  end
270
270
 
271
271
  # Returns the registered static knowledge sources.
272
- # @return [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
272
+ # @return [Array<Phronomy::Agent::Context::Knowledge::Base>]
273
273
  # @api public
274
274
  def static_knowledge_sources
275
275
  @static_knowledge_sources || []
@@ -302,80 +302,6 @@ module Phronomy
302
302
  @static_knowledge_chunks = nil
303
303
  end
304
304
 
305
- # Registers a callback that is invoked before every LLM call so the
306
- # application can remove stale or irrelevant messages from the
307
- # conversation history.
308
- #
309
- # The block receives a {Phronomy::Agent::Context::Conversation::TrimContext} and may call
310
- # +ctx.remove(seqs)+ to drop messages by seq number. Changes affect
311
- # only the current invocation; the underlying memory store is unchanged.
312
- #
313
- # @yield [ctx] Phronomy::Agent::Context::Conversation::TrimContext
314
- # @example Drop the oldest message when over 80% of budget is used
315
- # on_trim do |ctx|
316
- # limit = ctx.budget&.available(used: 0) || Float::INFINITY
317
- # ctx.remove(ctx.message_elements.first[:seq]) if ctx.total_tokens > limit * 0.8
318
- # end
319
- # @api public
320
- def on_trim(&block)
321
- @on_trim_callback = block
322
- end
323
-
324
- # @return [Proc, nil]
325
- # @api private
326
- def _on_trim_callback
327
- @on_trim_callback
328
- end
329
-
330
- # Registers a callback that decides whether compaction should run.
331
- # Evaluated before every LLM call (after on_trim). If the block returns
332
- # truthy AND an +on_compact+ callback is also registered, the compact
333
- # pipeline is executed.
334
- #
335
- # The block receives a read-only {Phronomy::Agent::Context::Conversation::TriggerContext}.
336
- #
337
- # @yield [ctx] Phronomy::Agent::Context::Conversation::TriggerContext
338
- # @return [Boolean] truthy → run on_compact; falsy → skip
339
- # @example Trigger when messages exceed 70% of token budget
340
- # on_compaction_trigger do |ctx|
341
- # limit = ctx.budget&.available(used: 0) || Float::INFINITY
342
- # ctx.total_tokens > limit * 0.7
343
- # end
344
- # @api public
345
- def on_compaction_trigger(&block)
346
- @on_compaction_trigger_callback = block
347
- end
348
-
349
- # @return [Proc, nil]
350
- # @api private
351
- def _on_compaction_trigger_callback
352
- @on_compaction_trigger_callback
353
- end
354
-
355
- # Registers a callback that performs the actual compaction when the
356
- # +on_compaction_trigger+ callback fires. The block receives a
357
- # {Phronomy::Agent::Context::Conversation::CompactionContext} and should call +ctx.compact+
358
- # to specify which messages to summarise.
359
- #
360
- # @yield [ctx] Phronomy::Agent::Context::Conversation::CompactionContext
361
- # @example Replace the first 4 messages with a short summary
362
- # on_compact do |ctx|
363
- # ctx.compact(0..3) do |elements|
364
- # texts = elements.map { |e| e[:message].content }.join(" | ")
365
- # "Earlier conversation summary: #{texts}"
366
- # end
367
- # end
368
- # @api public
369
- def on_compact(&block)
370
- @on_compact_callback = block
371
- end
372
-
373
- # @return [Proc, nil]
374
- # @api private
375
- def _on_compact_callback
376
- @on_compact_callback
377
- end
378
-
379
305
  # When enabled, attaches Anthropic prompt-cache markers to the system
380
306
  # message so that the fixed instructions are served from cache on
381
307
  # subsequent turns, reducing input-token costs.
@@ -449,11 +375,32 @@ module Phronomy
449
375
  @context_overhead = val.to_i
450
376
  end
451
377
  end
378
+
379
+ # Resumes a suspended invocation identified by +checkpoint+ without
380
+ # requiring the original agent instance to be kept in memory.
381
+ #
382
+ # Validates that the checkpoint was created by this agent class, then
383
+ # instantiates a fresh agent and delegates to {Suspendable#resume}.
384
+ #
385
+ # @param checkpoint [Phronomy::Agent::Checkpoint]
386
+ # @param approved [Boolean] +true+ to execute the pending tool; +false+ to deny
387
+ # @param config [Hash] same runtime options as {#invoke}
388
+ # @return [Hash] same shape as {#invoke} — may contain +suspended: true+ if
389
+ # another approval-required tool is encountered during continuation
390
+ # @raise [ArgumentError] when +checkpoint.agent_class+ does not match this class
391
+ # @api public
392
+ def resume(checkpoint, approved:, config: {})
393
+ if checkpoint.agent_class && checkpoint.agent_class != name
394
+ raise ArgumentError,
395
+ "checkpoint belongs to #{checkpoint.agent_class}, cannot resume with #{name}"
396
+ end
397
+ new.resume(checkpoint, approved: approved, config: config)
398
+ end
452
399
  end
453
400
 
454
401
  # Registers an anonymous handoff tool class on this agent instance.
455
402
  # Called by Runner during construction when routes are configured.
456
- # @param tool_class [Class<Phronomy::Tool::Base>]
403
+ # @param tool_class [Class<Phronomy::Agent::Context::Capability::Base>]
457
404
  # @return [self]
458
405
  # @api private
459
406
  def _add_handoff_tool(tool_class)
@@ -482,7 +429,6 @@ module Phronomy
482
429
  # @param thread_id [String, nil] conversation thread identifier, forwarded
483
430
  # to the compaction context when on_compact is configured.
484
431
  # @param config [Hash] additional runtime options:
485
- # +:knowledge_sources+ (Array) — dynamic knowledge sources for this turn
486
432
  # +:user_id+ (+String+, optional) — caller identity forwarded to the tracer
487
433
  # +:session_id+ (+String+, optional) — session identity forwarded to the tracer
488
434
  # @param invocation_context [Phronomy::InvocationContext, nil] optional first-class context
@@ -518,63 +464,35 @@ module Phronomy
518
464
  if invocation_context
519
465
  thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
520
466
  end
521
- if Phronomy.configuration.event_loop
522
- # Protect against blocking the EventLoop thread itself.
523
- if Phronomy::EventLoop.current?
524
- raise Phronomy::Error,
525
- "Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
526
- "entry action. Use agent.run_as_child(input, ctx: ctx) instead."
527
- end
467
+ _check_scheduler_reentrancy
528
468
 
529
- # Build an effective config that includes the invoke_timeout scope's
530
- # CancellationToken before constructing the FSM. This ensures that
531
- # every LLM, tool, and RAG call made inside _invoke_impl observes
532
- # cancellation when the deadline fires.
533
- timeout_sec = self.class.invoke_timeout
534
- effective_config, scope = if timeout_sec
535
- s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
536
- s.deadline_in(timeout_sec)
537
- [config.merge(cancellation_token: s.token), s]
538
- else
539
- [config, nil]
540
- end
469
+ timeout_sec = self.class.invoke_timeout
470
+ unless timeout_sec
471
+ return invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
472
+ end
541
473
 
542
- fsm = Agent::FSM.new(
543
- agent: self,
544
- input: input,
545
- messages: messages,
546
- thread_id: thread_id || SecureRandom.uuid,
547
- config: effective_config
548
- )
549
- completion_queue = Phronomy::EventLoop.instance.register(fsm)
550
- result = if scope
551
- scope.pop_queue(completion_queue) do
552
- raise Phronomy::TimeoutError,
553
- "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
554
- end
555
- else
556
- completion_queue.pop
557
- end
558
- raise result if result.is_a?(Exception)
559
- result
560
- else
561
- # Guard: calling invoke from inside a scheduler task would block the task
562
- # against itself when using a cooperative backend. Use invoke_async
563
- # instead to compose agents without introducing a blocking wait.
564
- if Phronomy::Task.current
565
- msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
566
- "This blocks the scheduler until the inner invocation completes, preventing " \
567
- "other tasks from making progress. Use invoke_async + await instead."
568
- if Phronomy.configuration.strict_runtime_guards
569
- raise Phronomy::SchedulerReentrancyError, msg
570
- elsif Phronomy.configuration.logger
571
- Phronomy.configuration.logger.warn(msg)
572
- else
573
- Kernel.warn("[phronomy] WARNING: #{msg}")
574
- end
575
- end
576
- invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
474
+ # invoke_timeout: create a CancellationScope with deadline, pass its token
475
+ # to the async invocation, and use scope.pop_queue so the calling thread
476
+ # unblocks as soon as either the result arrives or the deadline fires.
477
+ scope = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
478
+ scope.deadline_in(timeout_sec)
479
+ effective_config = config.merge(cancellation_token: scope.token)
480
+ task = invoke_async(input, messages: messages, thread_id: thread_id, config: effective_config)
481
+
482
+ # Bridge the task result to an AsyncQueue so scope.pop_queue can observe the deadline.
483
+ completion_queue = Phronomy::Concurrency::AsyncQueue.new
484
+ Phronomy::Runtime.instance.spawn(name: "invoke-timeout-bridge:#{(self.class.name || "agent").downcase}") do
485
+ completion_queue.push(task.await)
486
+ rescue => e
487
+ completion_queue.push(e)
577
488
  end
489
+
490
+ result = scope.pop_queue(completion_queue) do
491
+ raise Phronomy::TimeoutError,
492
+ "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
493
+ end
494
+ raise result if result.is_a?(Exception)
495
+ result
578
496
  end
579
497
 
580
498
  # Invokes this agent asynchronously and returns a {Phronomy::Task}.
@@ -649,15 +567,18 @@ module Phronomy
649
567
  "Enable with: Phronomy.configure { |c| c.event_loop = true }"
650
568
  end
651
569
 
652
- fsm = Agent::FSM.new(
653
- agent: self,
654
- input: input,
655
- messages: messages,
656
- thread_id: "#{ctx.thread_id}_agent_#{SecureRandom.uuid}",
657
- config: config,
658
- parent_id: ctx.thread_id
659
- )
660
- Phronomy::EventLoop.instance.enqueue_child(fsm)
570
+ parent_id = ctx.thread_id
571
+ thread_id = "#{parent_id}_agent_#{SecureRandom.uuid}"
572
+ Phronomy::Runtime.instance.spawn(name: "agent-child:#{thread_id}") do
573
+ result = _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
574
+ Phronomy::EventLoop.instance.post(
575
+ Phronomy::Event.new(type: :child_completed, target_id: parent_id, payload: result)
576
+ )
577
+ rescue => e
578
+ Phronomy::EventLoop.instance.post(
579
+ Phronomy::Event.new(type: :child_failed, target_id: parent_id, payload: e)
580
+ )
581
+ end
661
582
  nil
662
583
  end
663
584
 
@@ -666,8 +587,8 @@ module Phronomy
666
587
  #
667
588
  # Events emitted (in order):
668
589
  # :token — each content delta from the LLM
669
- # :tool_call — when the LLM requests a tool (ReactAgent subclasses only)
670
- # :tool_result — after a tool completes (ReactAgent subclasses only)
590
+ # :tool_call — when the LLM requests a tool
591
+ # :tool_result — after a tool completes
671
592
  # :done — final event carrying output, messages, and usage
672
593
  # :error — if an unrecoverable error occurs
673
594
  #
@@ -687,19 +608,11 @@ module Phronomy
687
608
  raise
688
609
  end
689
610
 
690
- # Returns the {LlmContextWindow::ContextVersionCache} built during the most recent
691
- # {#invoke} call on this agent instance. The thread-local cache entry is
692
- # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
693
- # in +@last_context_version_cache+ so callers can inspect it after invoke
694
- # returns.
695
- #
696
- # NOTE: Not thread-safe. When the same Agent instance is used concurrently,
697
- # +@last_context_version_cache+ reflects the most recent +invoke+ on *any*
698
- # thread. For per-invocation isolation, use a separate Agent instance per
699
- # thread.
611
+ # @deprecated The context version cache has been removed. Returns nil.
612
+ # Retained for backward compatibility with callers using safe navigation (+&.reset+).
700
613
  # @api private
701
614
  def context_version_cache
702
- @last_context_version_cache
615
+ nil
703
616
  end
704
617
 
705
618
  private
@@ -722,29 +635,39 @@ module Phronomy
722
635
  [effective_thread_id, effective_config]
723
636
  end
724
637
 
725
- # Streaming implementation for #stream.
726
- def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
727
- caller_meta = {}
728
- caller_meta[:user_id] = config[:user_id] if config[:user_id]
729
- caller_meta[:session_id] = config[:session_id] if config[:session_id]
730
- if (ic = config[:invocation_context])
731
- caller_meta[:task_id] = ic.task_id if ic.task_id
732
- caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
638
+ def _check_scheduler_reentrancy
639
+ return unless Phronomy::Task.current
640
+
641
+ msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
642
+ "This blocks the scheduler until the inner invocation completes, preventing " \
643
+ "other tasks from making progress. Use invoke_async + await instead."
644
+ if Phronomy.configuration.strict_runtime_guards
645
+ raise Phronomy::SchedulerReentrancyError, msg
646
+ elsif Phronomy.configuration.logger
647
+ Phronomy.configuration.logger.warn(msg)
648
+ else
649
+ Kernel.warn("[phronomy] WARNING: #{msg}")
733
650
  end
651
+ end
734
652
 
735
- trace("agent.invoke", input: input, **caller_meta) do |_span|
653
+ # Streaming implementation for #stream.
654
+ def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
655
+ trace("agent.invoke", input: input, **_build_caller_meta(config)) do |_span|
736
656
  run_input_guardrails!(input)
737
657
 
738
658
  chat = build_chat
739
659
  user_message = extract_message(input)
660
+ context = build_context(
661
+ input,
662
+ messages: messages,
663
+ thread_id: thread_id,
664
+ config: config,
665
+ budget: build_token_budget,
666
+ instruction: build_instructions(input),
667
+ tools: self.class.tools + _handoff_tools
668
+ )
669
+ _apply_context_to_chat(chat, context)
740
670
 
741
- # Assemble context (system prompt + history). Override #build_context to
742
- # inject custom context editing logic at the Agent subclass level.
743
- context = build_context(input, messages: messages, thread_id: thread_id, config: config)
744
- apply_instructions(chat, context[:system]) if context[:system]
745
- context[:messages].each { |msg| chat.messages << msg }
746
-
747
- # Wire per-event callbacks to yield StreamEvents.
748
671
  current_tool_call = nil
749
672
  chat.on_tool_call do |tool_call|
750
673
  current_tool_call = tool_call
@@ -758,32 +681,9 @@ module Phronomy
758
681
  }))
759
682
  end
760
683
 
761
- # Run before_completion hooks (global → class → instance) before the LLM call.
762
684
  run_before_completion_hooks!(chat, config)
763
685
 
764
- # Route the LLM streaming call through the configured LLMAdapter.
765
- # Chunks are pushed into a token queue by the pool worker thread and
766
- # drained here (on the caller's side) so that the user block is never
767
- # executed on a BlockingAdapterPool worker thread.
768
- # The queue capacity is bounded by Configuration#stream_queue_max_size
769
- # (nil = unbounded) to provide backpressure against a fast LLM producer.
770
- adapter = Phronomy.configuration.llm_adapter
771
- chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
772
- pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
773
-
774
- # Drain the chunk queue on this side (scheduler task / caller thread).
775
- loop do
776
- chunk = chunk_queue.pop
777
- break if chunk.nil? # queue closed — LLM streaming complete
778
- block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
779
- check_cancellation!(config, "invocation cancelled during streaming")
780
- end
781
-
782
- response = pending.await
783
-
784
- output = response.content
785
- usage = Phronomy::TokenUsage.from_tokens(response.tokens)
786
-
686
+ output, usage = _drain_stream(chat, user_message, config, &block)
787
687
  run_output_guardrails!(output)
788
688
 
789
689
  result = {output: output, messages: chat.messages, usage: usage}
@@ -797,146 +697,260 @@ module Phronomy
797
697
  # inject custom context editing logic without having to override
798
698
  # the full #invoke_once pipeline.
799
699
  #
800
- # @param input [String, Hash] the user's input for this turn
801
- # @param messages [Array<RubyLLM::Message>] raw conversation history
802
- # @param thread_id [String, nil] conversation thread identifier
803
- # @param config [Hash] the invocation config (see #invoke)
804
- # @return [Hash] { system: String|nil, messages: Array }
700
+ # The keyword arguments +budget+, +instruction+, +tools+, and +knowledge+
701
+ # carry pre-computed values. Override them in a subclass call to +super+
702
+ # to inject custom context without recomputing the defaults.
703
+ #
704
+ # @param input [String, Hash] the user's input for this turn
705
+ # @param messages [Array<RubyLLM::Message>] raw conversation history
706
+ # @param thread_id [String, nil] conversation thread identifier
707
+ # @param config [Hash] the invocation config (see #invoke)
708
+ # @param budget [LlmContextWindow::TokenBudget, nil] pre-computed token budget
709
+ # @param instruction [String, nil] pre-computed system instruction
710
+ # @param tools [Array<Class>] tool classes to expose
711
+ # @param knowledge [Array<Hash>] knowledge chunks ({ content:, type:, source: })
712
+ # @return [Hash] { system: String|nil, messages: Array, tool_classes: Array }
805
713
  # @api public
806
- def build_context(input, messages: [], thread_id: nil, config: {})
807
- history = prepare_history(messages: messages, thread_id: thread_id, config: config)
808
- budget = build_token_budget
809
- system_text = build_cached_system_text(input)
810
- user_message = extract_message(input)
811
-
714
+ def build_context(
715
+ input,
716
+ messages: [],
717
+ thread_id: nil,
718
+ config: {},
719
+ budget: build_token_budget,
720
+ instruction: build_instructions(input),
721
+ tools: self.class.tools + _handoff_tools,
722
+ knowledge: self.class.static_knowledge_chunks + instance_knowledge_chunks
723
+ )
812
724
  assembler = LlmContextWindow::Assembler.new(budget: budget)
813
- assembler.add_instruction(system_text) if system_text
814
- fetch_knowledge_chunks(user_message, config).each do |chunk|
815
- assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
725
+ assembler.add_instruction(instruction) if instruction
726
+ assembler.add_capability(tools)
727
+ knowledge.each { |chunk| assembler.add_knowledge(chunk[:content], type: chunk[:type] || :static, trusted: true, source: chunk[:source]) }
728
+
729
+ msgs = Array(messages)
730
+
731
+ if budget && budget_exceeded?(msgs)
732
+ # Default strategy when the token budget is tight:
733
+ # 1. Compact: keep the most recent half of the messages verbatim and
734
+ # replace the older half with a brief omission marker.
735
+ # 2. Trim: if the compacted history still exceeds the budget, call
736
+ # trim_to_budget with the :safe strategy, which discards the oldest
737
+ # message one at a time until the history fits.
738
+ # Subclasses can override build_context to apply a different strategy
739
+ # (e.g. LLM-based summarisation) before calling super.
740
+ keep = [msgs.size / 2, 2].max
741
+ msgs = compact_messages(msgs, keep_tail: keep) do |dropped|
742
+ "[#{dropped.size} earlier messages omitted]"
743
+ end
744
+ remaining = assembler.available_for_messages
745
+ msgs = trim_to_budget(msgs, remaining: remaining, strategy: :safe)
816
746
  end
817
- assembler.add_messages(history)
818
- assembler.build
747
+
748
+ assembler.add_messages(msgs)
749
+ @last_context = assembler.build
819
750
  end
820
751
  protected :build_context
821
752
 
822
- # Fetches knowledge chunks from all registered sources concurrently.
823
- #
824
- # Each source is spawned as a separate task within a {Phronomy::TaskGroup};
825
- # the RAG concurrency gate enforces the +max_concurrent_rag_fetches+ cap.
826
- # Results are returned in registration order (spawn order) as a flat array.
827
- #
828
- # This method is available to subclasses as a building block when
829
- # overriding {#build_context}. Pass a custom +query+ to implement
830
- # multi-hop RAG or other retrieval strategies.
753
+ # Keeps the last +keep+ messages from +messages+, discarding older ones.
754
+ # Use this inside a +build_context+ override to trim conversation history.
831
755
  #
832
- # @param query [String] RAG query string (typically the current user message)
833
- # @param config [Hash] invocation config; relevant keys:
834
- # +:knowledge_sources+, +:rag_failure_policy+, +:cancellation_token+, +:rag_timeout+
835
- # @return [Array<Hash>] flat list of chunk hashes with +:content+, +:type+, +:source+
836
- # @api private
837
- def fetch_knowledge_chunks(query, config)
838
- sources = Array(config[:knowledge_sources])
839
- return [] if sources.empty?
840
-
841
- check_cancellation!(config, "invocation cancelled before RAG fetch")
842
-
843
- # :skip (default) — ignore per-source failures so the agent can still
844
- # answer with partial context. :fail surfaces the first error immediately.
845
- failure_policy =
846
- case config[:rag_failure_policy]
847
- when :fail then :fail_fast
848
- else :skip_failed
849
- end
756
+ # @param messages [Array<RubyLLM::Message>] conversation history
757
+ # @param keep [Integer] number of messages to retain (from the tail)
758
+ # @return [Array<RubyLLM::Message>]
759
+ # @api public
760
+ def trim_messages(messages, keep:)
761
+ Array(messages).last(keep)
762
+ end
763
+ protected :trim_messages
850
764
 
851
- group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
852
- bp = Phronomy.configuration.backpressure
853
- rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
854
- rag_bp_timeout = Phronomy.configuration.backpressure_timeout
855
-
856
- # Spawn all fetches concurrently. Results are returned in spawn order
857
- # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
858
- sources.each do |ks|
859
- group.spawn do
860
- Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
861
- result, elapsed_ms = Phronomy::Runtime.measure_ms do
862
- ks.fetch_async(
863
- query: query,
864
- cancellation_token: config[:cancellation_token],
865
- timeout: config[:rag_timeout]
866
- ).await
867
- end
868
- Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{elapsed_ms}ms" }
869
- result
870
- end
871
- end
872
- end
765
+ # Removes the oldest messages one at a time until the count is within +limit+.
766
+ #
767
+ # @param messages [Array<RubyLLM::Message>] conversation history
768
+ # @param limit [Integer] maximum number of messages to retain
769
+ # @return [Array<RubyLLM::Message>]
770
+ # @api public
771
+ def drop_messages_over(messages, limit:)
772
+ msgs = Array(messages).dup
773
+ msgs.shift while msgs.size > limit
774
+ msgs
775
+ end
776
+ protected :drop_messages_over
873
777
 
874
- # await_all returns results in spawn order; nil entries indicate
875
- # skipped failures when using :skip_failed.
876
- group.await_all.flat_map { |chunks| Array(chunks) }
778
+ # Replaces all but the last +keep_tail+ messages with a single system summary.
779
+ # The block receives the dropped messages and must return a summary String.
780
+ #
781
+ # @param messages [Array<RubyLLM::Message>] conversation history
782
+ # @param keep_tail [Integer] number of recent messages to preserve verbatim
783
+ # @yield [Array<RubyLLM::Message>] the messages being summarised
784
+ # @yieldreturn [String] summary text
785
+ # @return [Array<RubyLLM::Message>]
786
+ # @api public
787
+ def compact_messages(messages, keep_tail:, &summariser)
788
+ msgs = Array(messages)
789
+ return msgs if msgs.size <= keep_tail
790
+ tail = msgs.last(keep_tail)
791
+ dropped = msgs.first(msgs.size - keep_tail)
792
+ summary_text = summariser.call(dropped)
793
+ [RubyLLM::Message.new(role: :system, content: summary_text)] + tail
877
794
  end
878
- protected :fetch_knowledge_chunks
795
+ protected :compact_messages
879
796
 
880
- # Runs the on_trim / on_compaction_trigger / on_compact pipeline on the
881
- # supplied message array and returns the final Array of message objects
882
- # ready to pass to the Assembler.
797
+ # Trims +messages+ to fit within +remaining+ tokens using the given
798
+ # +strategy+. Returns the trimmed message array without touching the
799
+ # assembler. The caller is responsible for passing the result to
800
+ # +assembler.add_messages+ and calling +assembler.build+.
883
801
  #
884
- # Override this method in a subclass to customize how conversation
885
- # history is filtered or compressed before context assembly.
802
+ # Supported strategies:
803
+ # +:safe+ discard the oldest message one at a time (default)
886
804
  #
887
- # @param messages [Array<RubyLLM::Message>] raw conversation history
888
- # @param thread_id [String, nil] conversation thread identifier
889
- # @param config [Hash] additional invocation options
890
- # @return [Array] filtered and/or compacted message objects
805
+ # @param messages [Array<RubyLLM::Message>] conversation history
806
+ # @param remaining [Integer, nil] token allowance for messages; when +nil+
807
+ # the messages are returned unchanged
808
+ # @param strategy [Symbol] trim strategy (default +:safe+)
809
+ # @return [Array<RubyLLM::Message>]
891
810
  # @api public
892
- def prepare_history(messages: [], thread_id: nil, config: {})
893
- budget = build_token_budget
894
- elements = build_message_elements(Array(messages))
895
-
896
- if (trim_cb = self.class._on_trim_callback)
897
- trim_ctx = Context::Conversation::TrimContext.new(message_elements: elements, budget: budget)
898
- trim_cb.call(trim_ctx)
899
- elements = trim_ctx.message_elements
811
+ def trim_to_budget(messages, remaining:, strategy: :safe)
812
+ return Array(messages) unless remaining
813
+ msgs = Array(messages)
814
+ loop do
815
+ used = msgs.sum { |m| LlmContextWindow::TokenEstimator.estimate(m.content.to_s) }
816
+ return msgs if used <= remaining
817
+ break if msgs.empty?
818
+ msgs = trim_messages(msgs, keep: msgs.size - 1)
900
819
  end
820
+ msgs
821
+ end
822
+ protected :trim_to_budget
901
823
 
902
- if (trigger_cb = self.class._on_compaction_trigger_callback)
903
- trigger_ctx = Context::Conversation::TriggerContext.new(message_elements: elements, budget: budget)
904
- if trigger_cb.call(trigger_ctx)
905
- if (compact_cb = self.class._on_compact_callback)
906
- compact_ctx = Context::Conversation::CompactionContext.new(
907
- message_elements: elements,
908
- budget: budget,
909
- thread_id: thread_id
910
- )
911
- compact_cb.call(compact_ctx)
912
- elements = build_message_elements(compact_ctx.result_messages)
913
- end
914
- end
915
- end
824
+ # Returns +true+ when the estimated token usage of +messages+ exceeds
825
+ # +threshold+ times the available context budget.
826
+ # Always returns +false+ when no token budget is available.
827
+ #
828
+ # @param messages [Array<RubyLLM::Message>] conversation history
829
+ # @param threshold [Float] fraction of the available budget (default 0.8)
830
+ # @return [Boolean]
831
+ # @api public
832
+ def budget_exceeded?(messages, threshold: 0.8)
833
+ return false unless (b = build_token_budget)
834
+ total = Array(messages).sum { |m| LlmContextWindow::TokenEstimator.estimate(m.content.to_s) }
835
+ limit = b.available(used: 0)
836
+ total > limit * threshold
837
+ end
838
+ protected :budget_exceeded?
916
839
 
917
- elements.map { |e| e[:message] }
840
+ # Registers a per-instance knowledge source. Knowledge chunks from all
841
+ # registered sources are included in every LLM call via +build_context+.
842
+ #
843
+ # @param source [#fetch] any object responding to +fetch(query:)+
844
+ # @return [void]
845
+ # @api public
846
+ def add_knowledge_source(source)
847
+ @instance_knowledge_sources ||= []
848
+ @instance_knowledge_sources << source
849
+ end
850
+ protected :add_knowledge_source
851
+
852
+ # Returns knowledge chunks fetched from all instance-level knowledge sources.
853
+ #
854
+ # @return [Array<Hash>]
855
+ # @api private
856
+ def instance_knowledge_chunks
857
+ return [] unless @instance_knowledge_sources
858
+ @instance_knowledge_sources.flat_map { |ks| ks.fetch(query: nil) }
918
859
  end
919
- protected :prepare_history
860
+ protected :instance_knowledge_chunks
920
861
 
921
862
  # Performs a single (non-retried) invocation. Extracted so that #invoke can
922
863
  # wrap it in a retry loop without duplicating the LLM interaction logic.
923
864
  def invoke_once(input, messages: [], thread_id: nil, config: {})
924
- caller_meta = {}
925
- caller_meta[:user_id] = config[:user_id] if config[:user_id]
926
- caller_meta[:session_id] = config[:session_id] if config[:session_id]
865
+ trace("agent.invoke", input: input, **_build_caller_meta(config)) do |_span|
866
+ run_input_guardrails!(input)
867
+
868
+ user_message = extract_message(input)
869
+ chat = build_chat
870
+ context = build_context(
871
+ input,
872
+ messages: messages, thread_id: thread_id, config: config,
873
+ budget: build_token_budget, instruction: build_instructions(input),
874
+ tools: self.class.tools + _handoff_tools
875
+ )
876
+ _apply_context_to_chat(chat, context)
877
+
878
+ run_before_completion_hooks!(chat, config)
879
+ _register_suspension_hook!(chat)
880
+ check_cancellation!(config, "invocation cancelled before LLM call")
881
+
882
+ result, usage = _complete_with_suspension_guard(
883
+ chat, user_message, config,
884
+ thread_id: thread_id, original_input: input
885
+ )
886
+ next [result, usage] if result[:suspended]
887
+
888
+ run_output_guardrails!(result[:output])
889
+ [result, usage]
890
+ end
891
+ end
892
+
893
+ def _build_caller_meta(config)
894
+ meta = {}
895
+ meta[:user_id] = config[:user_id] if config[:user_id]
896
+ meta[:session_id] = config[:session_id] if config[:session_id]
927
897
  if (ic = config[:invocation_context])
928
- caller_meta[:task_id] = ic.task_id if ic.task_id
929
- caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
898
+ meta[:task_id] = ic.task_id if ic.task_id
899
+ meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
930
900
  end
901
+ meta
902
+ end
931
903
 
932
- trace("agent.invoke", input: input, **caller_meta) do |_span|
933
- Agent::InvocationPipeline.new(self).run(
934
- input,
935
- messages: messages,
904
+ def _apply_context_to_chat(chat, context)
905
+ apply_instructions(chat, context[:system]) if context[:system]
906
+ (context[:tool_classes] || []).each { |tc| chat.with_tool(prepare_tool_class(tc)) }
907
+ context[:messages].each { |msg| chat.messages << msg }
908
+ end
909
+
910
+ # Submits the LLM call via LLMAdapter and handles SuspendSignal.
911
+ # Sets/clears the chat cancellation token around the call so that
912
+ # ParallelToolChat can observe cancellation without Thread.current.
913
+ # Returns [result_hash, usage_or_nil].
914
+ def _complete_with_suspension_guard(chat, user_message, config, thread_id:, original_input:)
915
+ chat.cancellation_token = config[:cancellation_token] if chat.respond_to?(:cancellation_token=)
916
+ begin
917
+ adapter = Phronomy.configuration.llm_adapter
918
+ response = adapter.complete_async(chat, user_message, config: config).await
919
+ rescue SuspendSignal => signal
920
+ checkpoint = Checkpoint.new(
921
+ checkpoint_id: SecureRandom.uuid,
922
+ agent_class: self.class.name,
923
+ requested_at: Time.now.utc,
936
924
  thread_id: thread_id,
937
- config: config
925
+ original_input: original_input,
926
+ messages: chat.messages.dup,
927
+ pending_tool_name: signal.tool_name,
928
+ pending_tool_args: signal.args,
929
+ pending_tool_call_id: signal.tool_call_id
938
930
  )
931
+ return [{output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}, nil]
932
+ ensure
933
+ chat.cancellation_token = nil if chat.respond_to?(:cancellation_token=)
939
934
  end
935
+ output = response.content
936
+ usage = Phronomy::TokenUsage.from_tokens(response.tokens)
937
+ [{output: output, messages: chat.messages, usage: usage}, usage]
938
+ end
939
+
940
+ def _drain_stream(chat, user_message, config, &block)
941
+ adapter = Phronomy.configuration.llm_adapter
942
+ chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
943
+ pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
944
+
945
+ loop do
946
+ chunk = chunk_queue.pop
947
+ break if chunk.nil?
948
+ block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
949
+ check_cancellation!(config, "invocation cancelled during streaming")
950
+ end
951
+
952
+ response = pending.await
953
+ [response.content, Phronomy::TokenUsage.from_tokens(response.tokens)]
940
954
  end
941
955
 
942
956
  # Builds a TokenBudget for this agent's model if possible.
@@ -965,61 +979,13 @@ module Phronomy
965
979
  nil
966
980
  end
967
981
 
968
- # Converts a flat Array of message objects into the internal message_elements
969
- # format used by TrimContext, TriggerContext, and CompactionContext.
970
- # Each element receives a 0-based synthetic seq number.
971
- #
972
- # @param messages [Array] message-like objects with #role and #content
973
- # @return [Array<Hash>]
974
- # @api public
975
- def build_message_elements(messages)
976
- Array(messages).each_with_index.map do |msg, idx|
977
- tokens = LlmContextWindow::TokenEstimator.estimate(msg.content.to_s)
978
- {seq: idx, message: msg, tokens: tokens, role: msg.role}
979
- end
980
- end
981
-
982
- # Builds (or returns a cached) system prompt text.
983
- # The fingerprint is a SHA-256 digest of the instruction text concatenated
984
- # with the content of every registered static knowledge source.
985
- # When the fingerprint is unchanged the ContextVersionCache returns the
986
- # previously assembled text without re-fetching any sources.
987
- #
988
- # @param input [String, Hash] the agent's current input (used for template evaluation)
989
- # @return [String, nil] assembled system text, or nil when empty
990
- # @api public
991
- def build_cached_system_text(input)
992
- instruction = build_instructions(input)
993
-
994
- static_chunks = self.class.static_knowledge_chunks
995
-
996
- fingerprint = Digest::SHA256.hexdigest(
997
- [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
998
- )
999
-
1000
- cache = (@context_version_cache ||= LlmContextWindow::ContextVersionCache.new)
1001
- unless cache.valid?(fingerprint)
1002
- parts = [instruction]
1003
- static_chunks.each do |chunk|
1004
- parts << LlmContextWindow::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
1005
- end
1006
- cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
1007
- end
1008
-
1009
- # Persist a reference on the instance so that context_version_cache
1010
- # remains accessible after invoke completes.
1011
- @last_context_version_cache = cache
1012
-
1013
- cache.system_text.empty? ? nil : cache.system_text
1014
- end
1015
-
1016
982
  # Returns the chat class to instantiate for this invocation.
1017
- # When EventLoop mode is enabled ({Phronomy.configuration.event_loop}),
983
+ # When {Phronomy.configuration.parallel_tool_execution} is true,
1018
984
  # returns {ParallelToolChat} so that concurrent tool dispatch is enabled.
1019
985
  # Falls back to +nil+ otherwise, signalling {#build_chat} to use the
1020
986
  # standard +RubyLLM.chat+ factory.
1021
987
  def build_chat_class
1022
- Phronomy.configuration.event_loop ? Phronomy::MultiAgent::ParallelToolChat : nil
988
+ Phronomy.configuration.parallel_tool_execution ? Phronomy::MultiAgent::ParallelToolChat : nil
1023
989
  end
1024
990
 
1025
991
  def build_chat
@@ -1039,10 +1005,6 @@ module Phronomy
1039
1005
  RubyLLM.chat(**opts)
1040
1006
  end
1041
1007
  chat.with_temperature(t) if t
1042
- self.class.tools.each do |tool_class|
1043
- chat.with_tool(prepare_tool_class(tool_class))
1044
- end
1045
- _handoff_tools.each { |tc| chat.with_tool(tc) }
1046
1008
  chat
1047
1009
  end
1048
1010
 
@@ -1102,7 +1064,7 @@ module Phronomy
1102
1064
  # Builds the final tool class to register with the chat.
1103
1065
  #
1104
1066
  # When an already-instantiated tool object is passed (e.g. a
1105
- # {Phronomy::Tool::McpTool} returned by +McpTool.from_server+), it is
1067
+ # {Phronomy::Tools::Mcp} returned by +Phronomy::Tools::Mcp.from_server+), it is
1106
1068
  # returned as-is. RubyLLM's +with_tool+ accepts both classes and
1107
1069
  # instances, so no wrapping is needed.
1108
1070
  #
@@ -1110,7 +1072,7 @@ module Phronomy
1110
1072
  # 1. Alias override — when the Hash form of .tools maps this class to an
1111
1073
  # explicit name, an anonymous subclass with that tool_name is returned.
1112
1074
  # 2. Scope policy — when a scope is declared on the tool, the configured
1113
- # {Phronomy::Tool::ScopePolicy} (or the default) is evaluated.
1075
+ # {Phronomy::Agent::Context::Capability::ScopePolicy} (or the default) is evaluated.
1114
1076
  # +:reject+ wraps the tool to return a denial message without executing.
1115
1077
  # +:approve+ behaves like requiring approval (same as step 3 when the
1116
1078
  # tool does not already have +requires_approval+).
@@ -1120,7 +1082,7 @@ module Phronomy
1120
1082
  # (tool_name, args) and, if it returns falsy, the tool returns a denial
1121
1083
  # message instead of executing.
1122
1084
  def prepare_tool_class(tool_class)
1123
- # When an instantiated tool object is passed (e.g. McpTool.from_server
1085
+ # When an instantiated tool object is passed (e.g. Phronomy::Tools::Mcp.from_server
1124
1086
  # returns an instance, not a class), skip class-level processing and
1125
1087
  # return it directly. RubyLLM#with_tool handles both forms.
1126
1088
  return tool_class unless tool_class.is_a?(Class)
@@ -1139,7 +1101,7 @@ module Phronomy
1139
1101
  # Step 2: evaluate scope policy.
1140
1102
  scope = resolved.scope
1141
1103
  if scope
1142
- policy = @scope_policy || Phronomy::Tool::ScopePolicy::DEFAULT
1104
+ policy = @scope_policy || Phronomy::Agent::Context::Capability::ScopePolicy::DEFAULT
1143
1105
  decision = policy.call(resolved, scope, self)
1144
1106
  case decision
1145
1107
  when :reject