phronomy 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +31 -41
  3. data/benchmark/baseline.json +1 -1
  4. data/benchmark/bench_agent_invoke.rb +1 -1
  5. data/benchmark/bench_context_assembler.rb +9 -1
  6. data/benchmark/bench_regression.rb +8 -8
  7. data/benchmark/bench_tool_schema.rb +2 -2
  8. data/benchmark/bench_vector_store.rb +1 -1
  9. data/docs/decisions/011-build-context-as-single-llm-input-authority.md +224 -0
  10. data/lib/phronomy/agent/base.rb +253 -351
  11. data/lib/phronomy/agent/concerns/suspendable.rb +6 -6
  12. data/lib/phronomy/agent/context/capability/base.rb +689 -0
  13. data/lib/phronomy/agent/context/capability/scope_policy.rb +54 -0
  14. data/lib/phronomy/agent/context/knowledge/base.rb +58 -0
  15. data/lib/phronomy/agent/context/knowledge/entity_knowledge.rb +102 -0
  16. data/lib/phronomy/agent/context/knowledge/static_knowledge.rb +58 -0
  17. data/lib/phronomy/agent/invocation_pipeline.rb +10 -1
  18. data/lib/phronomy/agent/react_agent.rb +24 -23
  19. data/lib/phronomy/agent/shared_state.rb +2 -2
  20. data/lib/phronomy/agent/tool_executor.rb +1 -1
  21. data/lib/phronomy/concurrency/gate_registry.rb +0 -1
  22. data/lib/phronomy/configuration.rb +0 -6
  23. data/lib/phronomy/llm_context_window/assembler.rb +77 -44
  24. data/lib/phronomy/multi_agent/handoff.rb +4 -4
  25. data/lib/phronomy/multi_agent/orchestrator.rb +1 -1
  26. data/lib/phronomy/multi_agent/team_coordinator.rb +2 -2
  27. data/lib/phronomy/runtime/runtime_metrics.rb +0 -1
  28. data/lib/phronomy/runtime.rb +1 -2
  29. data/lib/phronomy/tool.rb +3 -4
  30. data/lib/phronomy/{tool/agent_tool.rb → tools/agent.rb} +6 -6
  31. data/lib/phronomy/{tool/mcp_tool.rb → tools/mcp.rb} +9 -9
  32. data/lib/phronomy/tools/vector_search.rb +70 -0
  33. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  34. data/lib/phronomy/vector_store/base.rb +89 -0
  35. data/lib/phronomy/vector_store/embeddings/base.rb +41 -0
  36. data/lib/phronomy/vector_store/embeddings/ruby_llm_embeddings.rb +47 -0
  37. data/lib/phronomy/vector_store/in_memory.rb +103 -0
  38. data/lib/phronomy/vector_store/loader/base.rb +27 -0
  39. data/lib/phronomy/vector_store/loader/csv_loader.rb +58 -0
  40. data/lib/phronomy/vector_store/loader/markdown_loader.rb +78 -0
  41. data/lib/phronomy/vector_store/loader/plain_text_loader.rb +24 -0
  42. data/lib/phronomy/vector_store/pgvector.rb +127 -0
  43. data/lib/phronomy/vector_store/redis_search.rb +192 -0
  44. data/lib/phronomy/vector_store/splitter/base.rb +49 -0
  45. data/lib/phronomy/vector_store/splitter/fixed_size_splitter.rb +53 -0
  46. data/lib/phronomy/vector_store/splitter/recursive_splitter.rb +107 -0
  47. data/lib/phronomy/vector_store.rb +16 -4
  48. data/lib/phronomy/version.rb +1 -1
  49. data/lib/phronomy.rb +2 -1
  50. data/scripts/api_snapshot.rb +11 -9
  51. metadata +28 -32
  52. data/lib/phronomy/agent/context/conversation/compaction_context.rb +0 -117
  53. data/lib/phronomy/agent/context/conversation/trigger_context.rb +0 -43
  54. data/lib/phronomy/agent/context/conversation/trim_context.rb +0 -82
  55. data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +0 -45
  56. data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +0 -51
  57. data/lib/phronomy/agent/context/knowledge/loader/base.rb +0 -31
  58. data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +0 -62
  59. data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +0 -82
  60. data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +0 -28
  61. data/lib/phronomy/agent/context/knowledge/source/base.rb +0 -60
  62. data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +0 -102
  63. data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +0 -63
  64. data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +0 -58
  65. data/lib/phronomy/agent/context/knowledge/splitter/base.rb +0 -53
  66. data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +0 -57
  67. data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +0 -111
  68. data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +0 -116
  69. data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +0 -95
  70. data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +0 -109
  71. data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +0 -133
  72. data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +0 -198
  73. data/lib/phronomy/embeddings.rb +0 -11
  74. data/lib/phronomy/loader.rb +0 -13
  75. data/lib/phronomy/splitter.rb +0 -12
  76. data/lib/phronomy/tool/base.rb +0 -685
  77. data/lib/phronomy/tool/scope_policy.rb +0 -50
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "digest"
4
3
  require "securerandom"
5
4
  require_relative "concerns/retryable"
6
5
  require_relative "concerns/guardrailable"
@@ -255,10 +254,10 @@ module Phronomy
255
254
  # the first time +invoke+ is called. The cache persists for the lifetime
256
255
  # of the process; call {.static_knowledge_refresh!} to force a reload.
257
256
  #
258
- # @param sources [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
257
+ # @param sources [Array<Phronomy::Agent::Context::Knowledge::Base>]
259
258
  # @example
260
259
  # class PolicyAgent < Phronomy::Agent::Base
261
- # static_knowledge Phronomy::Agent::Context::Knowledge::Source::StaticKnowledge.new(POLICY_TEXT)
260
+ # static_knowledge Phronomy::Agent::Context::Knowledge::StaticKnowledge.new(POLICY_TEXT)
262
261
  # end
263
262
  # @api public
264
263
  def static_knowledge(*sources)
@@ -269,7 +268,7 @@ module Phronomy
269
268
  end
270
269
 
271
270
  # Returns the registered static knowledge sources.
272
- # @return [Array<Phronomy::Agent::Context::Knowledge::Source::Base>]
271
+ # @return [Array<Phronomy::Agent::Context::Knowledge::Base>]
273
272
  # @api public
274
273
  def static_knowledge_sources
275
274
  @static_knowledge_sources || []
@@ -302,80 +301,6 @@ module Phronomy
302
301
  @static_knowledge_chunks = nil
303
302
  end
304
303
 
305
- # Registers a callback that is invoked before every LLM call so the
306
- # application can remove stale or irrelevant messages from the
307
- # conversation history.
308
- #
309
- # The block receives a {Phronomy::Agent::Context::Conversation::TrimContext} and may call
310
- # +ctx.remove(seqs)+ to drop messages by seq number. Changes affect
311
- # only the current invocation; the underlying memory store is unchanged.
312
- #
313
- # @yield [ctx] Phronomy::Agent::Context::Conversation::TrimContext
314
- # @example Drop the oldest message when over 80% of budget is used
315
- # on_trim do |ctx|
316
- # limit = ctx.budget&.available(used: 0) || Float::INFINITY
317
- # ctx.remove(ctx.message_elements.first[:seq]) if ctx.total_tokens > limit * 0.8
318
- # end
319
- # @api public
320
- def on_trim(&block)
321
- @on_trim_callback = block
322
- end
323
-
324
- # @return [Proc, nil]
325
- # @api private
326
- def _on_trim_callback
327
- @on_trim_callback
328
- end
329
-
330
- # Registers a callback that decides whether compaction should run.
331
- # Evaluated before every LLM call (after on_trim). If the block returns
332
- # truthy AND an +on_compact+ callback is also registered, the compact
333
- # pipeline is executed.
334
- #
335
- # The block receives a read-only {Phronomy::Agent::Context::Conversation::TriggerContext}.
336
- #
337
- # @yield [ctx] Phronomy::Agent::Context::Conversation::TriggerContext
338
- # @return [Boolean] truthy → run on_compact; falsy → skip
339
- # @example Trigger when messages exceed 70% of token budget
340
- # on_compaction_trigger do |ctx|
341
- # limit = ctx.budget&.available(used: 0) || Float::INFINITY
342
- # ctx.total_tokens > limit * 0.7
343
- # end
344
- # @api public
345
- def on_compaction_trigger(&block)
346
- @on_compaction_trigger_callback = block
347
- end
348
-
349
- # @return [Proc, nil]
350
- # @api private
351
- def _on_compaction_trigger_callback
352
- @on_compaction_trigger_callback
353
- end
354
-
355
- # Registers a callback that performs the actual compaction when the
356
- # +on_compaction_trigger+ callback fires. The block receives a
357
- # {Phronomy::Agent::Context::Conversation::CompactionContext} and should call +ctx.compact+
358
- # to specify which messages to summarise.
359
- #
360
- # @yield [ctx] Phronomy::Agent::Context::Conversation::CompactionContext
361
- # @example Replace the first 4 messages with a short summary
362
- # on_compact do |ctx|
363
- # ctx.compact(0..3) do |elements|
364
- # texts = elements.map { |e| e[:message].content }.join(" | ")
365
- # "Earlier conversation summary: #{texts}"
366
- # end
367
- # end
368
- # @api public
369
- def on_compact(&block)
370
- @on_compact_callback = block
371
- end
372
-
373
- # @return [Proc, nil]
374
- # @api private
375
- def _on_compact_callback
376
- @on_compact_callback
377
- end
378
-
379
304
  # When enabled, attaches Anthropic prompt-cache markers to the system
380
305
  # message so that the fixed instructions are served from cache on
381
306
  # subsequent turns, reducing input-token costs.
@@ -453,7 +378,7 @@ module Phronomy
453
378
 
454
379
  # Registers an anonymous handoff tool class on this agent instance.
455
380
  # Called by Runner during construction when routes are configured.
456
- # @param tool_class [Class<Phronomy::Tool::Base>]
381
+ # @param tool_class [Class<Phronomy::Agent::Context::Capability::Base>]
457
382
  # @return [self]
458
383
  # @api private
459
384
  def _add_handoff_tool(tool_class)
@@ -482,7 +407,6 @@ module Phronomy
482
407
  # @param thread_id [String, nil] conversation thread identifier, forwarded
483
408
  # to the compaction context when on_compact is configured.
484
409
  # @param config [Hash] additional runtime options:
485
- # +:knowledge_sources+ (Array) — dynamic knowledge sources for this turn
486
410
  # +:user_id+ (+String+, optional) — caller identity forwarded to the tracer
487
411
  # +:session_id+ (+String+, optional) — session identity forwarded to the tracer
488
412
  # @param invocation_context [Phronomy::InvocationContext, nil] optional first-class context
@@ -519,60 +443,9 @@ module Phronomy
519
443
  thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
520
444
  end
521
445
  if Phronomy.configuration.event_loop
522
- # Protect against blocking the EventLoop thread itself.
523
- if Phronomy::EventLoop.current?
524
- raise Phronomy::Error,
525
- "Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
526
- "entry action. Use agent.run_as_child(input, ctx: ctx) instead."
527
- end
528
-
529
- # Build an effective config that includes the invoke_timeout scope's
530
- # CancellationToken before constructing the FSM. This ensures that
531
- # every LLM, tool, and RAG call made inside _invoke_impl observes
532
- # cancellation when the deadline fires.
533
- timeout_sec = self.class.invoke_timeout
534
- effective_config, scope = if timeout_sec
535
- s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
536
- s.deadline_in(timeout_sec)
537
- [config.merge(cancellation_token: s.token), s]
538
- else
539
- [config, nil]
540
- end
541
-
542
- fsm = Agent::FSM.new(
543
- agent: self,
544
- input: input,
545
- messages: messages,
546
- thread_id: thread_id || SecureRandom.uuid,
547
- config: effective_config
548
- )
549
- completion_queue = Phronomy::EventLoop.instance.register(fsm)
550
- result = if scope
551
- scope.pop_queue(completion_queue) do
552
- raise Phronomy::TimeoutError,
553
- "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
554
- end
555
- else
556
- completion_queue.pop
557
- end
558
- raise result if result.is_a?(Exception)
559
- result
446
+ _invoke_via_event_loop(input, messages: messages, thread_id: thread_id, config: config)
560
447
  else
561
- # Guard: calling invoke from inside a scheduler task would block the task
562
- # against itself when using a cooperative backend. Use invoke_async
563
- # instead to compose agents without introducing a blocking wait.
564
- if Phronomy::Task.current
565
- msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
566
- "This blocks the scheduler until the inner invocation completes, preventing " \
567
- "other tasks from making progress. Use invoke_async + await instead."
568
- if Phronomy.configuration.strict_runtime_guards
569
- raise Phronomy::SchedulerReentrancyError, msg
570
- elsif Phronomy.configuration.logger
571
- Phronomy.configuration.logger.warn(msg)
572
- else
573
- Kernel.warn("[phronomy] WARNING: #{msg}")
574
- end
575
- end
448
+ _check_scheduler_reentrancy
576
449
  invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
577
450
  end
578
451
  end
@@ -687,19 +560,11 @@ module Phronomy
687
560
  raise
688
561
  end
689
562
 
690
- # Returns the {LlmContextWindow::ContextVersionCache} built during the most recent
691
- # {#invoke} call on this agent instance. The thread-local cache entry is
692
- # cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
693
- # in +@last_context_version_cache+ so callers can inspect it after invoke
694
- # returns.
695
- #
696
- # NOTE: Not thread-safe. When the same Agent instance is used concurrently,
697
- # +@last_context_version_cache+ reflects the most recent +invoke+ on *any*
698
- # thread. For per-invocation isolation, use a separate Agent instance per
699
- # thread.
563
+ # @deprecated The context version cache has been removed. Returns nil.
564
+ # Retained for backward compatibility with callers using safe navigation (+&.reset+).
700
565
  # @api private
701
566
  def context_version_cache
702
- @last_context_version_cache
567
+ nil
703
568
  end
704
569
 
705
570
  private
@@ -722,29 +587,75 @@ module Phronomy
722
587
  [effective_thread_id, effective_config]
723
588
  end
724
589
 
725
- # Streaming implementation for #stream.
726
- def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
727
- caller_meta = {}
728
- caller_meta[:user_id] = config[:user_id] if config[:user_id]
729
- caller_meta[:session_id] = config[:session_id] if config[:session_id]
730
- if (ic = config[:invocation_context])
731
- caller_meta[:task_id] = ic.task_id if ic.task_id
732
- caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
590
+ def _invoke_via_event_loop(input, messages:, thread_id:, config:)
591
+ if Phronomy::EventLoop.current?
592
+ raise Phronomy::Error,
593
+ "Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
594
+ "entry action. Use agent.run_as_child(input, ctx: ctx) instead."
595
+ end
596
+
597
+ timeout_sec = self.class.invoke_timeout
598
+ effective_config, scope = if timeout_sec
599
+ s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
600
+ s.deadline_in(timeout_sec)
601
+ [config.merge(cancellation_token: s.token), s]
602
+ else
603
+ [config, nil]
604
+ end
605
+
606
+ fsm = Agent::FSM.new(
607
+ agent: self,
608
+ input: input,
609
+ messages: messages,
610
+ thread_id: thread_id || SecureRandom.uuid,
611
+ config: effective_config
612
+ )
613
+ completion_queue = Phronomy::EventLoop.instance.register(fsm)
614
+ result = if scope
615
+ scope.pop_queue(completion_queue) do
616
+ raise Phronomy::TimeoutError,
617
+ "Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
618
+ end
619
+ else
620
+ completion_queue.pop
733
621
  end
622
+ raise result if result.is_a?(Exception)
623
+ result
624
+ end
734
625
 
735
- trace("agent.invoke", input: input, **caller_meta) do |_span|
626
+ def _check_scheduler_reentrancy
627
+ return unless Phronomy::Task.current
628
+
629
+ msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
630
+ "This blocks the scheduler until the inner invocation completes, preventing " \
631
+ "other tasks from making progress. Use invoke_async + await instead."
632
+ if Phronomy.configuration.strict_runtime_guards
633
+ raise Phronomy::SchedulerReentrancyError, msg
634
+ elsif Phronomy.configuration.logger
635
+ Phronomy.configuration.logger.warn(msg)
636
+ else
637
+ Kernel.warn("[phronomy] WARNING: #{msg}")
638
+ end
639
+ end
640
+
641
+ # Streaming implementation for #stream.
642
+ def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
643
+ trace("agent.invoke", input: input, **_build_caller_meta(config)) do |_span|
736
644
  run_input_guardrails!(input)
737
645
 
738
646
  chat = build_chat
739
647
  user_message = extract_message(input)
648
+ context = build_context(
649
+ input,
650
+ messages: messages,
651
+ thread_id: thread_id,
652
+ config: config,
653
+ budget: build_token_budget,
654
+ instruction: build_instructions(input),
655
+ tools: self.class.tools + _handoff_tools
656
+ )
657
+ _apply_context_to_chat(chat, context)
740
658
 
741
- # Assemble context (system prompt + history). Override #build_context to
742
- # inject custom context editing logic at the Agent subclass level.
743
- context = build_context(input, messages: messages, thread_id: thread_id, config: config)
744
- apply_instructions(chat, context[:system]) if context[:system]
745
- context[:messages].each { |msg| chat.messages << msg }
746
-
747
- # Wire per-event callbacks to yield StreamEvents.
748
659
  current_tool_call = nil
749
660
  chat.on_tool_call do |tool_call|
750
661
  current_tool_call = tool_call
@@ -758,32 +669,9 @@ module Phronomy
758
669
  }))
759
670
  end
760
671
 
761
- # Run before_completion hooks (global → class → instance) before the LLM call.
762
672
  run_before_completion_hooks!(chat, config)
763
673
 
764
- # Route the LLM streaming call through the configured LLMAdapter.
765
- # Chunks are pushed into a token queue by the pool worker thread and
766
- # drained here (on the caller's side) so that the user block is never
767
- # executed on a BlockingAdapterPool worker thread.
768
- # The queue capacity is bounded by Configuration#stream_queue_max_size
769
- # (nil = unbounded) to provide backpressure against a fast LLM producer.
770
- adapter = Phronomy.configuration.llm_adapter
771
- chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
772
- pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
773
-
774
- # Drain the chunk queue on this side (scheduler task / caller thread).
775
- loop do
776
- chunk = chunk_queue.pop
777
- break if chunk.nil? # queue closed — LLM streaming complete
778
- block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
779
- check_cancellation!(config, "invocation cancelled during streaming")
780
- end
781
-
782
- response = pending.await
783
-
784
- output = response.content
785
- usage = Phronomy::TokenUsage.from_tokens(response.tokens)
786
-
674
+ output, usage = _drain_stream(chat, user_message, config, &block)
787
675
  run_output_guardrails!(output)
788
676
 
789
677
  result = {output: output, messages: chat.messages, usage: usage}
@@ -797,139 +685,172 @@ module Phronomy
797
685
  # inject custom context editing logic without having to override
798
686
  # the full #invoke_once pipeline.
799
687
  #
800
- # @param input [String, Hash] the user's input for this turn
801
- # @param messages [Array<RubyLLM::Message>] raw conversation history
802
- # @param thread_id [String, nil] conversation thread identifier
803
- # @param config [Hash] the invocation config (see #invoke)
804
- # @return [Hash] { system: String|nil, messages: Array }
688
+ # The keyword arguments +budget+, +instruction+, +tools+, and +knowledge+
689
+ # carry pre-computed values. Override them in a subclass call to +super+
690
+ # to inject custom context without recomputing the defaults.
691
+ #
692
+ # @param input [String, Hash] the user's input for this turn
693
+ # @param messages [Array<RubyLLM::Message>] raw conversation history
694
+ # @param thread_id [String, nil] conversation thread identifier
695
+ # @param config [Hash] the invocation config (see #invoke)
696
+ # @param budget [LlmContextWindow::TokenBudget, nil] pre-computed token budget
697
+ # @param instruction [String, nil] pre-computed system instruction
698
+ # @param tools [Array<Class>] tool classes to expose
699
+ # @param knowledge [Array<Hash>] knowledge chunks ({ content:, type:, source: })
700
+ # @return [Hash] { system: String|nil, messages: Array, tool_classes: Array }
805
701
  # @api public
806
- def build_context(input, messages: [], thread_id: nil, config: {})
807
- history = prepare_history(messages: messages, thread_id: thread_id, config: config)
808
- budget = build_token_budget
809
- system_text = build_cached_system_text(input)
810
- user_message = extract_message(input)
811
-
702
+ def build_context(
703
+ input,
704
+ messages: [],
705
+ thread_id: nil,
706
+ config: {},
707
+ budget: build_token_budget,
708
+ instruction: build_instructions(input),
709
+ tools: self.class.tools + _handoff_tools,
710
+ knowledge: self.class.static_knowledge_chunks + instance_knowledge_chunks
711
+ )
812
712
  assembler = LlmContextWindow::Assembler.new(budget: budget)
813
- assembler.add_instruction(system_text) if system_text
814
- fetch_knowledge_chunks(user_message, config).each do |chunk|
815
- assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
713
+ assembler.add_instruction(instruction) if instruction
714
+ assembler.add_capability(tools)
715
+ knowledge.each { |chunk| assembler.add_knowledge(chunk[:content], type: chunk[:type] || :static, trusted: true, source: chunk[:source]) }
716
+
717
+ msgs = Array(messages)
718
+
719
+ if budget && budget_exceeded?(msgs)
720
+ # Default strategy when the token budget is tight:
721
+ # 1. Compact: keep the most recent half of the messages verbatim and
722
+ # replace the older half with a brief omission marker.
723
+ # 2. Trim: if the compacted history still exceeds the budget, call
724
+ # trim_to_budget with the :safe strategy, which discards the oldest
725
+ # message one at a time until the history fits.
726
+ # Subclasses can override build_context to apply a different strategy
727
+ # (e.g. LLM-based summarisation) before calling super.
728
+ keep = [msgs.size / 2, 2].max
729
+ msgs = compact_messages(msgs, keep_tail: keep) do |dropped|
730
+ "[#{dropped.size} earlier messages omitted]"
731
+ end
732
+ remaining = assembler.available_for_messages
733
+ msgs = trim_to_budget(msgs, remaining: remaining, strategy: :safe)
816
734
  end
817
- assembler.add_messages(history)
818
- assembler.build
735
+
736
+ assembler.add_messages(msgs)
737
+ @last_context = assembler.build
819
738
  end
820
739
  protected :build_context
821
740
 
822
- # Fetches knowledge chunks from all registered sources concurrently.
823
- #
824
- # Each source is spawned as a separate task within a {Phronomy::TaskGroup};
825
- # the RAG concurrency gate enforces the +max_concurrent_rag_fetches+ cap.
826
- # Results are returned in registration order (spawn order) as a flat array.
741
+ # Keeps the last +keep+ messages from +messages+, discarding older ones.
742
+ # Use this inside a +build_context+ override to trim conversation history.
827
743
  #
828
- # This method is available to subclasses as a building block when
829
- # overriding {#build_context}. Pass a custom +query+ to implement
830
- # multi-hop RAG or other retrieval strategies.
831
- #
832
- # @param query [String] RAG query string (typically the current user message)
833
- # @param config [Hash] invocation config; relevant keys:
834
- # +:knowledge_sources+, +:rag_failure_policy+, +:cancellation_token+, +:rag_timeout+
835
- # @return [Array<Hash>] flat list of chunk hashes with +:content+, +:type+, +:source+
836
- # @api private
837
- def fetch_knowledge_chunks(query, config)
838
- sources = Array(config[:knowledge_sources])
839
- return [] if sources.empty?
840
-
841
- check_cancellation!(config, "invocation cancelled before RAG fetch")
842
-
843
- # :skip (default) — ignore per-source failures so the agent can still
844
- # answer with partial context. :fail surfaces the first error immediately.
845
- failure_policy =
846
- case config[:rag_failure_policy]
847
- when :fail then :fail_fast
848
- else :skip_failed
849
- end
744
+ # @param messages [Array<RubyLLM::Message>] conversation history
745
+ # @param keep [Integer] number of messages to retain (from the tail)
746
+ # @return [Array<RubyLLM::Message>]
747
+ # @api public
748
+ def trim_messages(messages, keep:)
749
+ Array(messages).last(keep)
750
+ end
751
+ protected :trim_messages
850
752
 
851
- group = Phronomy::Runtime.instance.task_group(failure_policy: failure_policy)
852
- bp = Phronomy.configuration.backpressure
853
- rag_on_full = (bp == :raise) ? :reject : (bp || :wait)
854
- rag_bp_timeout = Phronomy.configuration.backpressure_timeout
855
-
856
- # Spawn all fetches concurrently. Results are returned in spawn order
857
- # (i.e. registration order of knowledge sources) by TaskGroup#await_all.
858
- sources.each do |ks|
859
- group.spawn do
860
- Phronomy::Runtime.instance.gate(:rag).acquire(on_full: rag_on_full, timeout: rag_bp_timeout) do
861
- result, elapsed_ms = Phronomy::Runtime.measure_ms do
862
- ks.fetch_async(
863
- query: query,
864
- cancellation_token: config[:cancellation_token],
865
- timeout: config[:rag_timeout]
866
- ).await
867
- end
868
- Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{elapsed_ms}ms" }
869
- result
870
- end
871
- end
872
- end
753
+ # Removes the oldest messages one at a time until the count is within +limit+.
754
+ #
755
+ # @param messages [Array<RubyLLM::Message>] conversation history
756
+ # @param limit [Integer] maximum number of messages to retain
757
+ # @return [Array<RubyLLM::Message>]
758
+ # @api public
759
+ def drop_messages_over(messages, limit:)
760
+ msgs = Array(messages).dup
761
+ msgs.shift while msgs.size > limit
762
+ msgs
763
+ end
764
+ protected :drop_messages_over
873
765
 
874
- # await_all returns results in spawn order; nil entries indicate
875
- # skipped failures when using :skip_failed.
876
- group.await_all.flat_map { |chunks| Array(chunks) }
766
+ # Replaces all but the last +keep_tail+ messages with a single system summary.
767
+ # The block receives the dropped messages and must return a summary String.
768
+ #
769
+ # @param messages [Array<RubyLLM::Message>] conversation history
770
+ # @param keep_tail [Integer] number of recent messages to preserve verbatim
771
+ # @yield [Array<RubyLLM::Message>] the messages being summarised
772
+ # @yieldreturn [String] summary text
773
+ # @return [Array<RubyLLM::Message>]
774
+ # @api public
775
+ def compact_messages(messages, keep_tail:, &summariser)
776
+ msgs = Array(messages)
777
+ return msgs if msgs.size <= keep_tail
778
+ tail = msgs.last(keep_tail)
779
+ dropped = msgs.first(msgs.size - keep_tail)
780
+ summary_text = summariser.call(dropped)
781
+ [RubyLLM::Message.new(role: :system, content: summary_text)] + tail
877
782
  end
878
- protected :fetch_knowledge_chunks
783
+ protected :compact_messages
879
784
 
880
- # Runs the on_trim / on_compaction_trigger / on_compact pipeline on the
881
- # supplied message array and returns the final Array of message objects
882
- # ready to pass to the Assembler.
785
+ # Trims +messages+ to fit within +remaining+ tokens using the given
786
+ # +strategy+. Returns the trimmed message array without touching the
787
+ # assembler. The caller is responsible for passing the result to
788
+ # +assembler.add_messages+ and calling +assembler.build+.
883
789
  #
884
- # Override this method in a subclass to customize how conversation
885
- # history is filtered or compressed before context assembly.
790
+ # Supported strategies:
791
+ # +:safe+ discard the oldest message one at a time (default)
886
792
  #
887
- # @param messages [Array<RubyLLM::Message>] raw conversation history
888
- # @param thread_id [String, nil] conversation thread identifier
889
- # @param config [Hash] additional invocation options
890
- # @return [Array] filtered and/or compacted message objects
793
+ # @param messages [Array<RubyLLM::Message>] conversation history
794
+ # @param remaining [Integer, nil] token allowance for messages; when +nil+
795
+ # the messages are returned unchanged
796
+ # @param strategy [Symbol] trim strategy (default +:safe+)
797
+ # @return [Array<RubyLLM::Message>]
891
798
  # @api public
892
- def prepare_history(messages: [], thread_id: nil, config: {})
893
- budget = build_token_budget
894
- elements = build_message_elements(Array(messages))
895
-
896
- if (trim_cb = self.class._on_trim_callback)
897
- trim_ctx = Context::Conversation::TrimContext.new(message_elements: elements, budget: budget)
898
- trim_cb.call(trim_ctx)
899
- elements = trim_ctx.message_elements
799
+ def trim_to_budget(messages, remaining:, strategy: :safe)
800
+ return Array(messages) unless remaining
801
+ msgs = Array(messages)
802
+ loop do
803
+ used = msgs.sum { |m| LlmContextWindow::TokenEstimator.estimate(m.content.to_s) }
804
+ return msgs if used <= remaining
805
+ break if msgs.empty?
806
+ msgs = trim_messages(msgs, keep: msgs.size - 1)
900
807
  end
808
+ msgs
809
+ end
810
+ protected :trim_to_budget
901
811
 
902
- if (trigger_cb = self.class._on_compaction_trigger_callback)
903
- trigger_ctx = Context::Conversation::TriggerContext.new(message_elements: elements, budget: budget)
904
- if trigger_cb.call(trigger_ctx)
905
- if (compact_cb = self.class._on_compact_callback)
906
- compact_ctx = Context::Conversation::CompactionContext.new(
907
- message_elements: elements,
908
- budget: budget,
909
- thread_id: thread_id
910
- )
911
- compact_cb.call(compact_ctx)
912
- elements = build_message_elements(compact_ctx.result_messages)
913
- end
914
- end
915
- end
812
+ # Returns +true+ when the estimated token usage of +messages+ exceeds
813
+ # +threshold+ times the available context budget.
814
+ # Always returns +false+ when no token budget is available.
815
+ #
816
+ # @param messages [Array<RubyLLM::Message>] conversation history
817
+ # @param threshold [Float] fraction of the available budget (default 0.8)
818
+ # @return [Boolean]
819
+ # @api public
820
+ def budget_exceeded?(messages, threshold: 0.8)
821
+ return false unless (b = build_token_budget)
822
+ total = Array(messages).sum { |m| LlmContextWindow::TokenEstimator.estimate(m.content.to_s) }
823
+ limit = b.available(used: 0)
824
+ total > limit * threshold
825
+ end
826
+ protected :budget_exceeded?
916
827
 
917
- elements.map { |e| e[:message] }
828
+ # Registers a per-instance knowledge source. Knowledge chunks from all
829
+ # registered sources are included in every LLM call via +build_context+.
830
+ #
831
+ # @param source [#fetch] any object responding to +fetch(query:)+
832
+ # @return [void]
833
+ # @api public
834
+ def add_knowledge_source(source)
835
+ @instance_knowledge_sources ||= []
836
+ @instance_knowledge_sources << source
918
837
  end
919
- protected :prepare_history
838
+ protected :add_knowledge_source
839
+
840
+ # Returns knowledge chunks fetched from all instance-level knowledge sources.
841
+ #
842
+ # @return [Array<Hash>]
843
+ # @api private
844
+ def instance_knowledge_chunks
845
+ return [] unless @instance_knowledge_sources
846
+ @instance_knowledge_sources.flat_map { |ks| ks.fetch(query: nil) }
847
+ end
848
+ protected :instance_knowledge_chunks
920
849
 
921
850
  # Performs a single (non-retried) invocation. Extracted so that #invoke can
922
851
  # wrap it in a retry loop without duplicating the LLM interaction logic.
923
852
  def invoke_once(input, messages: [], thread_id: nil, config: {})
924
- caller_meta = {}
925
- caller_meta[:user_id] = config[:user_id] if config[:user_id]
926
- caller_meta[:session_id] = config[:session_id] if config[:session_id]
927
- if (ic = config[:invocation_context])
928
- caller_meta[:task_id] = ic.task_id if ic.task_id
929
- caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
930
- end
931
-
932
- trace("agent.invoke", input: input, **caller_meta) do |_span|
853
+ trace("agent.invoke", input: input, **_build_caller_meta(config)) do |_span|
933
854
  Agent::InvocationPipeline.new(self).run(
934
855
  input,
935
856
  messages: messages,
@@ -939,6 +860,39 @@ module Phronomy
939
860
  end
940
861
  end
941
862
 
863
+ def _build_caller_meta(config)
864
+ meta = {}
865
+ meta[:user_id] = config[:user_id] if config[:user_id]
866
+ meta[:session_id] = config[:session_id] if config[:session_id]
867
+ if (ic = config[:invocation_context])
868
+ meta[:task_id] = ic.task_id if ic.task_id
869
+ meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
870
+ end
871
+ meta
872
+ end
873
+
874
+ def _apply_context_to_chat(chat, context)
875
+ apply_instructions(chat, context[:system]) if context[:system]
876
+ (context[:tool_classes] || []).each { |tc| chat.with_tool(prepare_tool_class(tc)) }
877
+ context[:messages].each { |msg| chat.messages << msg }
878
+ end
879
+
880
+ def _drain_stream(chat, user_message, config, &block)
881
+ adapter = Phronomy.configuration.llm_adapter
882
+ chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
883
+ pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
884
+
885
+ loop do
886
+ chunk = chunk_queue.pop
887
+ break if chunk.nil?
888
+ block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
889
+ check_cancellation!(config, "invocation cancelled during streaming")
890
+ end
891
+
892
+ response = pending.await
893
+ [response.content, Phronomy::TokenUsage.from_tokens(response.tokens)]
894
+ end
895
+
942
896
  # Builds a TokenBudget for this agent's model if possible.
943
897
  # When context_window is set at the class level, that value is used directly
944
898
  # (bypassing the RubyLLM catalogue) — useful for locally-hosted models where
@@ -965,54 +919,6 @@ module Phronomy
965
919
  nil
966
920
  end
967
921
 
968
- # Converts a flat Array of message objects into the internal message_elements
969
- # format used by TrimContext, TriggerContext, and CompactionContext.
970
- # Each element receives a 0-based synthetic seq number.
971
- #
972
- # @param messages [Array] message-like objects with #role and #content
973
- # @return [Array<Hash>]
974
- # @api public
975
- def build_message_elements(messages)
976
- Array(messages).each_with_index.map do |msg, idx|
977
- tokens = LlmContextWindow::TokenEstimator.estimate(msg.content.to_s)
978
- {seq: idx, message: msg, tokens: tokens, role: msg.role}
979
- end
980
- end
981
-
982
- # Builds (or returns a cached) system prompt text.
983
- # The fingerprint is a SHA-256 digest of the instruction text concatenated
984
- # with the content of every registered static knowledge source.
985
- # When the fingerprint is unchanged the ContextVersionCache returns the
986
- # previously assembled text without re-fetching any sources.
987
- #
988
- # @param input [String, Hash] the agent's current input (used for template evaluation)
989
- # @return [String, nil] assembled system text, or nil when empty
990
- # @api public
991
- def build_cached_system_text(input)
992
- instruction = build_instructions(input)
993
-
994
- static_chunks = self.class.static_knowledge_chunks
995
-
996
- fingerprint = Digest::SHA256.hexdigest(
997
- [instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
998
- )
999
-
1000
- cache = (@context_version_cache ||= LlmContextWindow::ContextVersionCache.new)
1001
- unless cache.valid?(fingerprint)
1002
- parts = [instruction]
1003
- static_chunks.each do |chunk|
1004
- parts << LlmContextWindow::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
1005
- end
1006
- cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
1007
- end
1008
-
1009
- # Persist a reference on the instance so that context_version_cache
1010
- # remains accessible after invoke completes.
1011
- @last_context_version_cache = cache
1012
-
1013
- cache.system_text.empty? ? nil : cache.system_text
1014
- end
1015
-
1016
922
  # Returns the chat class to instantiate for this invocation.
1017
923
  # When EventLoop mode is enabled ({Phronomy.configuration.event_loop}),
1018
924
  # returns {ParallelToolChat} so that concurrent tool dispatch is enabled.
@@ -1039,10 +945,6 @@ module Phronomy
1039
945
  RubyLLM.chat(**opts)
1040
946
  end
1041
947
  chat.with_temperature(t) if t
1042
- self.class.tools.each do |tool_class|
1043
- chat.with_tool(prepare_tool_class(tool_class))
1044
- end
1045
- _handoff_tools.each { |tc| chat.with_tool(tc) }
1046
948
  chat
1047
949
  end
1048
950
 
@@ -1102,7 +1004,7 @@ module Phronomy
1102
1004
  # Builds the final tool class to register with the chat.
1103
1005
  #
1104
1006
  # When an already-instantiated tool object is passed (e.g. a
1105
- # {Phronomy::Tool::McpTool} returned by +McpTool.from_server+), it is
1007
+ # {Phronomy::Tools::Mcp} returned by +Phronomy::Tools::Mcp.from_server+), it is
1106
1008
  # returned as-is. RubyLLM's +with_tool+ accepts both classes and
1107
1009
  # instances, so no wrapping is needed.
1108
1010
  #
@@ -1110,7 +1012,7 @@ module Phronomy
1110
1012
  # 1. Alias override — when the Hash form of .tools maps this class to an
1111
1013
  # explicit name, an anonymous subclass with that tool_name is returned.
1112
1014
  # 2. Scope policy — when a scope is declared on the tool, the configured
1113
- # {Phronomy::Tool::ScopePolicy} (or the default) is evaluated.
1015
+ # {Phronomy::Agent::Context::Capability::ScopePolicy} (or the default) is evaluated.
1114
1016
  # +:reject+ wraps the tool to return a denial message without executing.
1115
1017
  # +:approve+ behaves like requiring approval (same as step 3 when the
1116
1018
  # tool does not already have +requires_approval+).
@@ -1120,7 +1022,7 @@ module Phronomy
1120
1022
  # (tool_name, args) and, if it returns falsy, the tool returns a denial
1121
1023
  # message instead of executing.
1122
1024
  def prepare_tool_class(tool_class)
1123
- # When an instantiated tool object is passed (e.g. McpTool.from_server
1025
+ # When an instantiated tool object is passed (e.g. Phronomy::Tools::Mcp.from_server
1124
1026
  # returns an instance, not a class), skip class-level processing and
1125
1027
  # return it directly. RubyLLM#with_tool handles both forms.
1126
1028
  return tool_class unless tool_class.is_a?(Class)
@@ -1139,7 +1041,7 @@ module Phronomy
1139
1041
  # Step 2: evaluate scope policy.
1140
1042
  scope = resolved.scope
1141
1043
  if scope
1142
- policy = @scope_policy || Phronomy::Tool::ScopePolicy::DEFAULT
1044
+ policy = @scope_policy || Phronomy::Agent::Context::Capability::ScopePolicy::DEFAULT
1143
1045
  decision = policy.call(resolved, scope, self)
1144
1046
  case decision
1145
1047
  when :reject