phronomy 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +31 -41
- data/benchmark/baseline.json +1 -1
- data/benchmark/bench_agent_invoke.rb +1 -1
- data/benchmark/bench_context_assembler.rb +9 -1
- data/benchmark/bench_regression.rb +8 -8
- data/benchmark/bench_tool_schema.rb +2 -2
- data/benchmark/bench_vector_store.rb +1 -1
- data/docs/decisions/011-build-context-as-single-llm-input-authority.md +224 -0
- data/lib/phronomy/agent/base.rb +253 -351
- data/lib/phronomy/agent/concerns/suspendable.rb +6 -6
- data/lib/phronomy/agent/context/capability/base.rb +689 -0
- data/lib/phronomy/agent/context/capability/scope_policy.rb +54 -0
- data/lib/phronomy/agent/context/knowledge/base.rb +58 -0
- data/lib/phronomy/agent/context/knowledge/entity_knowledge.rb +102 -0
- data/lib/phronomy/agent/context/knowledge/static_knowledge.rb +58 -0
- data/lib/phronomy/agent/invocation_pipeline.rb +10 -1
- data/lib/phronomy/agent/react_agent.rb +24 -23
- data/lib/phronomy/agent/shared_state.rb +2 -2
- data/lib/phronomy/agent/tool_executor.rb +1 -1
- data/lib/phronomy/concurrency/gate_registry.rb +0 -1
- data/lib/phronomy/configuration.rb +0 -6
- data/lib/phronomy/llm_context_window/assembler.rb +77 -44
- data/lib/phronomy/multi_agent/handoff.rb +4 -4
- data/lib/phronomy/multi_agent/orchestrator.rb +1 -1
- data/lib/phronomy/multi_agent/team_coordinator.rb +2 -2
- data/lib/phronomy/runtime/runtime_metrics.rb +0 -1
- data/lib/phronomy/runtime.rb +1 -2
- data/lib/phronomy/tool.rb +3 -4
- data/lib/phronomy/{tool/agent_tool.rb → tools/agent.rb} +6 -6
- data/lib/phronomy/{tool/mcp_tool.rb → tools/mcp.rb} +9 -9
- data/lib/phronomy/tools/vector_search.rb +70 -0
- data/lib/phronomy/vector_store/async_backend.rb +110 -0
- data/lib/phronomy/vector_store/base.rb +89 -0
- data/lib/phronomy/vector_store/embeddings/base.rb +41 -0
- data/lib/phronomy/vector_store/embeddings/ruby_llm_embeddings.rb +47 -0
- data/lib/phronomy/vector_store/in_memory.rb +103 -0
- data/lib/phronomy/vector_store/loader/base.rb +27 -0
- data/lib/phronomy/vector_store/loader/csv_loader.rb +58 -0
- data/lib/phronomy/vector_store/loader/markdown_loader.rb +78 -0
- data/lib/phronomy/vector_store/loader/plain_text_loader.rb +24 -0
- data/lib/phronomy/vector_store/pgvector.rb +127 -0
- data/lib/phronomy/vector_store/redis_search.rb +192 -0
- data/lib/phronomy/vector_store/splitter/base.rb +49 -0
- data/lib/phronomy/vector_store/splitter/fixed_size_splitter.rb +53 -0
- data/lib/phronomy/vector_store/splitter/recursive_splitter.rb +107 -0
- data/lib/phronomy/vector_store.rb +16 -4
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy.rb +2 -1
- data/scripts/api_snapshot.rb +11 -9
- metadata +28 -32
- data/lib/phronomy/agent/context/conversation/compaction_context.rb +0 -117
- data/lib/phronomy/agent/context/conversation/trigger_context.rb +0 -43
- data/lib/phronomy/agent/context/conversation/trim_context.rb +0 -82
- data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +0 -45
- data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +0 -51
- data/lib/phronomy/agent/context/knowledge/loader/base.rb +0 -31
- data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +0 -62
- data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +0 -82
- data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +0 -28
- data/lib/phronomy/agent/context/knowledge/source/base.rb +0 -60
- data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +0 -102
- data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +0 -63
- data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +0 -58
- data/lib/phronomy/agent/context/knowledge/splitter/base.rb +0 -53
- data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +0 -57
- data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +0 -111
- data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +0 -116
- data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +0 -95
- data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +0 -109
- data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +0 -133
- data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +0 -198
- data/lib/phronomy/embeddings.rb +0 -11
- data/lib/phronomy/loader.rb +0 -13
- data/lib/phronomy/splitter.rb +0 -12
- data/lib/phronomy/tool/base.rb +0 -685
- data/lib/phronomy/tool/scope_policy.rb +0 -50
data/lib/phronomy/agent/base.rb
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "digest"
|
|
4
3
|
require "securerandom"
|
|
5
4
|
require_relative "concerns/retryable"
|
|
6
5
|
require_relative "concerns/guardrailable"
|
|
@@ -255,10 +254,10 @@ module Phronomy
|
|
|
255
254
|
# the first time +invoke+ is called. The cache persists for the lifetime
|
|
256
255
|
# of the process; call {.static_knowledge_refresh!} to force a reload.
|
|
257
256
|
#
|
|
258
|
-
# @param sources [Array<Phronomy::Agent::Context::Knowledge::
|
|
257
|
+
# @param sources [Array<Phronomy::Agent::Context::Knowledge::Base>]
|
|
259
258
|
# @example
|
|
260
259
|
# class PolicyAgent < Phronomy::Agent::Base
|
|
261
|
-
# static_knowledge Phronomy::Agent::Context::Knowledge::
|
|
260
|
+
# static_knowledge Phronomy::Agent::Context::Knowledge::StaticKnowledge.new(POLICY_TEXT)
|
|
262
261
|
# end
|
|
263
262
|
# @api public
|
|
264
263
|
def static_knowledge(*sources)
|
|
@@ -269,7 +268,7 @@ module Phronomy
|
|
|
269
268
|
end
|
|
270
269
|
|
|
271
270
|
# Returns the registered static knowledge sources.
|
|
272
|
-
# @return [Array<Phronomy::Agent::Context::Knowledge::
|
|
271
|
+
# @return [Array<Phronomy::Agent::Context::Knowledge::Base>]
|
|
273
272
|
# @api public
|
|
274
273
|
def static_knowledge_sources
|
|
275
274
|
@static_knowledge_sources || []
|
|
@@ -302,80 +301,6 @@ module Phronomy
|
|
|
302
301
|
@static_knowledge_chunks = nil
|
|
303
302
|
end
|
|
304
303
|
|
|
305
|
-
# Registers a callback that is invoked before every LLM call so the
|
|
306
|
-
# application can remove stale or irrelevant messages from the
|
|
307
|
-
# conversation history.
|
|
308
|
-
#
|
|
309
|
-
# The block receives a {Phronomy::Agent::Context::Conversation::TrimContext} and may call
|
|
310
|
-
# +ctx.remove(seqs)+ to drop messages by seq number. Changes affect
|
|
311
|
-
# only the current invocation; the underlying memory store is unchanged.
|
|
312
|
-
#
|
|
313
|
-
# @yield [ctx] Phronomy::Agent::Context::Conversation::TrimContext
|
|
314
|
-
# @example Drop the oldest message when over 80% of budget is used
|
|
315
|
-
# on_trim do |ctx|
|
|
316
|
-
# limit = ctx.budget&.available(used: 0) || Float::INFINITY
|
|
317
|
-
# ctx.remove(ctx.message_elements.first[:seq]) if ctx.total_tokens > limit * 0.8
|
|
318
|
-
# end
|
|
319
|
-
# @api public
|
|
320
|
-
def on_trim(&block)
|
|
321
|
-
@on_trim_callback = block
|
|
322
|
-
end
|
|
323
|
-
|
|
324
|
-
# @return [Proc, nil]
|
|
325
|
-
# @api private
|
|
326
|
-
def _on_trim_callback
|
|
327
|
-
@on_trim_callback
|
|
328
|
-
end
|
|
329
|
-
|
|
330
|
-
# Registers a callback that decides whether compaction should run.
|
|
331
|
-
# Evaluated before every LLM call (after on_trim). If the block returns
|
|
332
|
-
# truthy AND an +on_compact+ callback is also registered, the compact
|
|
333
|
-
# pipeline is executed.
|
|
334
|
-
#
|
|
335
|
-
# The block receives a read-only {Phronomy::Agent::Context::Conversation::TriggerContext}.
|
|
336
|
-
#
|
|
337
|
-
# @yield [ctx] Phronomy::Agent::Context::Conversation::TriggerContext
|
|
338
|
-
# @return [Boolean] truthy → run on_compact; falsy → skip
|
|
339
|
-
# @example Trigger when messages exceed 70% of token budget
|
|
340
|
-
# on_compaction_trigger do |ctx|
|
|
341
|
-
# limit = ctx.budget&.available(used: 0) || Float::INFINITY
|
|
342
|
-
# ctx.total_tokens > limit * 0.7
|
|
343
|
-
# end
|
|
344
|
-
# @api public
|
|
345
|
-
def on_compaction_trigger(&block)
|
|
346
|
-
@on_compaction_trigger_callback = block
|
|
347
|
-
end
|
|
348
|
-
|
|
349
|
-
# @return [Proc, nil]
|
|
350
|
-
# @api private
|
|
351
|
-
def _on_compaction_trigger_callback
|
|
352
|
-
@on_compaction_trigger_callback
|
|
353
|
-
end
|
|
354
|
-
|
|
355
|
-
# Registers a callback that performs the actual compaction when the
|
|
356
|
-
# +on_compaction_trigger+ callback fires. The block receives a
|
|
357
|
-
# {Phronomy::Agent::Context::Conversation::CompactionContext} and should call +ctx.compact+
|
|
358
|
-
# to specify which messages to summarise.
|
|
359
|
-
#
|
|
360
|
-
# @yield [ctx] Phronomy::Agent::Context::Conversation::CompactionContext
|
|
361
|
-
# @example Replace the first 4 messages with a short summary
|
|
362
|
-
# on_compact do |ctx|
|
|
363
|
-
# ctx.compact(0..3) do |elements|
|
|
364
|
-
# texts = elements.map { |e| e[:message].content }.join(" | ")
|
|
365
|
-
# "Earlier conversation summary: #{texts}"
|
|
366
|
-
# end
|
|
367
|
-
# end
|
|
368
|
-
# @api public
|
|
369
|
-
def on_compact(&block)
|
|
370
|
-
@on_compact_callback = block
|
|
371
|
-
end
|
|
372
|
-
|
|
373
|
-
# @return [Proc, nil]
|
|
374
|
-
# @api private
|
|
375
|
-
def _on_compact_callback
|
|
376
|
-
@on_compact_callback
|
|
377
|
-
end
|
|
378
|
-
|
|
379
304
|
# When enabled, attaches Anthropic prompt-cache markers to the system
|
|
380
305
|
# message so that the fixed instructions are served from cache on
|
|
381
306
|
# subsequent turns, reducing input-token costs.
|
|
@@ -453,7 +378,7 @@ module Phronomy
|
|
|
453
378
|
|
|
454
379
|
# Registers an anonymous handoff tool class on this agent instance.
|
|
455
380
|
# Called by Runner during construction when routes are configured.
|
|
456
|
-
# @param tool_class [Class<Phronomy::
|
|
381
|
+
# @param tool_class [Class<Phronomy::Agent::Context::Capability::Base>]
|
|
457
382
|
# @return [self]
|
|
458
383
|
# @api private
|
|
459
384
|
def _add_handoff_tool(tool_class)
|
|
@@ -482,7 +407,6 @@ module Phronomy
|
|
|
482
407
|
# @param thread_id [String, nil] conversation thread identifier, forwarded
|
|
483
408
|
# to the compaction context when on_compact is configured.
|
|
484
409
|
# @param config [Hash] additional runtime options:
|
|
485
|
-
# +:knowledge_sources+ (Array) — dynamic knowledge sources for this turn
|
|
486
410
|
# +:user_id+ (+String+, optional) — caller identity forwarded to the tracer
|
|
487
411
|
# +:session_id+ (+String+, optional) — session identity forwarded to the tracer
|
|
488
412
|
# @param invocation_context [Phronomy::InvocationContext, nil] optional first-class context
|
|
@@ -519,60 +443,9 @@ module Phronomy
|
|
|
519
443
|
thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
|
|
520
444
|
end
|
|
521
445
|
if Phronomy.configuration.event_loop
|
|
522
|
-
|
|
523
|
-
if Phronomy::EventLoop.current?
|
|
524
|
-
raise Phronomy::Error,
|
|
525
|
-
"Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
|
|
526
|
-
"entry action. Use agent.run_as_child(input, ctx: ctx) instead."
|
|
527
|
-
end
|
|
528
|
-
|
|
529
|
-
# Build an effective config that includes the invoke_timeout scope's
|
|
530
|
-
# CancellationToken before constructing the FSM. This ensures that
|
|
531
|
-
# every LLM, tool, and RAG call made inside _invoke_impl observes
|
|
532
|
-
# cancellation when the deadline fires.
|
|
533
|
-
timeout_sec = self.class.invoke_timeout
|
|
534
|
-
effective_config, scope = if timeout_sec
|
|
535
|
-
s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
|
|
536
|
-
s.deadline_in(timeout_sec)
|
|
537
|
-
[config.merge(cancellation_token: s.token), s]
|
|
538
|
-
else
|
|
539
|
-
[config, nil]
|
|
540
|
-
end
|
|
541
|
-
|
|
542
|
-
fsm = Agent::FSM.new(
|
|
543
|
-
agent: self,
|
|
544
|
-
input: input,
|
|
545
|
-
messages: messages,
|
|
546
|
-
thread_id: thread_id || SecureRandom.uuid,
|
|
547
|
-
config: effective_config
|
|
548
|
-
)
|
|
549
|
-
completion_queue = Phronomy::EventLoop.instance.register(fsm)
|
|
550
|
-
result = if scope
|
|
551
|
-
scope.pop_queue(completion_queue) do
|
|
552
|
-
raise Phronomy::TimeoutError,
|
|
553
|
-
"Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
|
|
554
|
-
end
|
|
555
|
-
else
|
|
556
|
-
completion_queue.pop
|
|
557
|
-
end
|
|
558
|
-
raise result if result.is_a?(Exception)
|
|
559
|
-
result
|
|
446
|
+
_invoke_via_event_loop(input, messages: messages, thread_id: thread_id, config: config)
|
|
560
447
|
else
|
|
561
|
-
|
|
562
|
-
# against itself when using a cooperative backend. Use invoke_async
|
|
563
|
-
# instead to compose agents without introducing a blocking wait.
|
|
564
|
-
if Phronomy::Task.current
|
|
565
|
-
msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
|
|
566
|
-
"This blocks the scheduler until the inner invocation completes, preventing " \
|
|
567
|
-
"other tasks from making progress. Use invoke_async + await instead."
|
|
568
|
-
if Phronomy.configuration.strict_runtime_guards
|
|
569
|
-
raise Phronomy::SchedulerReentrancyError, msg
|
|
570
|
-
elsif Phronomy.configuration.logger
|
|
571
|
-
Phronomy.configuration.logger.warn(msg)
|
|
572
|
-
else
|
|
573
|
-
Kernel.warn("[phronomy] WARNING: #{msg}")
|
|
574
|
-
end
|
|
575
|
-
end
|
|
448
|
+
_check_scheduler_reentrancy
|
|
576
449
|
invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
|
|
577
450
|
end
|
|
578
451
|
end
|
|
@@ -687,19 +560,11 @@ module Phronomy
|
|
|
687
560
|
raise
|
|
688
561
|
end
|
|
689
562
|
|
|
690
|
-
#
|
|
691
|
-
#
|
|
692
|
-
# cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
|
|
693
|
-
# in +@last_context_version_cache+ so callers can inspect it after invoke
|
|
694
|
-
# returns.
|
|
695
|
-
#
|
|
696
|
-
# NOTE: Not thread-safe. When the same Agent instance is used concurrently,
|
|
697
|
-
# +@last_context_version_cache+ reflects the most recent +invoke+ on *any*
|
|
698
|
-
# thread. For per-invocation isolation, use a separate Agent instance per
|
|
699
|
-
# thread.
|
|
563
|
+
# @deprecated The context version cache has been removed. Returns nil.
|
|
564
|
+
# Retained for backward compatibility with callers using safe navigation (+&.reset+).
|
|
700
565
|
# @api private
|
|
701
566
|
def context_version_cache
|
|
702
|
-
|
|
567
|
+
nil
|
|
703
568
|
end
|
|
704
569
|
|
|
705
570
|
private
|
|
@@ -722,29 +587,75 @@ module Phronomy
|
|
|
722
587
|
[effective_thread_id, effective_config]
|
|
723
588
|
end
|
|
724
589
|
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
590
|
+
def _invoke_via_event_loop(input, messages:, thread_id:, config:)
|
|
591
|
+
if Phronomy::EventLoop.current?
|
|
592
|
+
raise Phronomy::Error,
|
|
593
|
+
"Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
|
|
594
|
+
"entry action. Use agent.run_as_child(input, ctx: ctx) instead."
|
|
595
|
+
end
|
|
596
|
+
|
|
597
|
+
timeout_sec = self.class.invoke_timeout
|
|
598
|
+
effective_config, scope = if timeout_sec
|
|
599
|
+
s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
|
|
600
|
+
s.deadline_in(timeout_sec)
|
|
601
|
+
[config.merge(cancellation_token: s.token), s]
|
|
602
|
+
else
|
|
603
|
+
[config, nil]
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
fsm = Agent::FSM.new(
|
|
607
|
+
agent: self,
|
|
608
|
+
input: input,
|
|
609
|
+
messages: messages,
|
|
610
|
+
thread_id: thread_id || SecureRandom.uuid,
|
|
611
|
+
config: effective_config
|
|
612
|
+
)
|
|
613
|
+
completion_queue = Phronomy::EventLoop.instance.register(fsm)
|
|
614
|
+
result = if scope
|
|
615
|
+
scope.pop_queue(completion_queue) do
|
|
616
|
+
raise Phronomy::TimeoutError,
|
|
617
|
+
"Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
|
|
618
|
+
end
|
|
619
|
+
else
|
|
620
|
+
completion_queue.pop
|
|
733
621
|
end
|
|
622
|
+
raise result if result.is_a?(Exception)
|
|
623
|
+
result
|
|
624
|
+
end
|
|
734
625
|
|
|
735
|
-
|
|
626
|
+
def _check_scheduler_reentrancy
|
|
627
|
+
return unless Phronomy::Task.current
|
|
628
|
+
|
|
629
|
+
msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
|
|
630
|
+
"This blocks the scheduler until the inner invocation completes, preventing " \
|
|
631
|
+
"other tasks from making progress. Use invoke_async + await instead."
|
|
632
|
+
if Phronomy.configuration.strict_runtime_guards
|
|
633
|
+
raise Phronomy::SchedulerReentrancyError, msg
|
|
634
|
+
elsif Phronomy.configuration.logger
|
|
635
|
+
Phronomy.configuration.logger.warn(msg)
|
|
636
|
+
else
|
|
637
|
+
Kernel.warn("[phronomy] WARNING: #{msg}")
|
|
638
|
+
end
|
|
639
|
+
end
|
|
640
|
+
|
|
641
|
+
# Streaming implementation for #stream.
|
|
642
|
+
def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
|
|
643
|
+
trace("agent.invoke", input: input, **_build_caller_meta(config)) do |_span|
|
|
736
644
|
run_input_guardrails!(input)
|
|
737
645
|
|
|
738
646
|
chat = build_chat
|
|
739
647
|
user_message = extract_message(input)
|
|
648
|
+
context = build_context(
|
|
649
|
+
input,
|
|
650
|
+
messages: messages,
|
|
651
|
+
thread_id: thread_id,
|
|
652
|
+
config: config,
|
|
653
|
+
budget: build_token_budget,
|
|
654
|
+
instruction: build_instructions(input),
|
|
655
|
+
tools: self.class.tools + _handoff_tools
|
|
656
|
+
)
|
|
657
|
+
_apply_context_to_chat(chat, context)
|
|
740
658
|
|
|
741
|
-
# Assemble context (system prompt + history). Override #build_context to
|
|
742
|
-
# inject custom context editing logic at the Agent subclass level.
|
|
743
|
-
context = build_context(input, messages: messages, thread_id: thread_id, config: config)
|
|
744
|
-
apply_instructions(chat, context[:system]) if context[:system]
|
|
745
|
-
context[:messages].each { |msg| chat.messages << msg }
|
|
746
|
-
|
|
747
|
-
# Wire per-event callbacks to yield StreamEvents.
|
|
748
659
|
current_tool_call = nil
|
|
749
660
|
chat.on_tool_call do |tool_call|
|
|
750
661
|
current_tool_call = tool_call
|
|
@@ -758,32 +669,9 @@ module Phronomy
|
|
|
758
669
|
}))
|
|
759
670
|
end
|
|
760
671
|
|
|
761
|
-
# Run before_completion hooks (global → class → instance) before the LLM call.
|
|
762
672
|
run_before_completion_hooks!(chat, config)
|
|
763
673
|
|
|
764
|
-
|
|
765
|
-
# Chunks are pushed into a token queue by the pool worker thread and
|
|
766
|
-
# drained here (on the caller's side) so that the user block is never
|
|
767
|
-
# executed on a BlockingAdapterPool worker thread.
|
|
768
|
-
# The queue capacity is bounded by Configuration#stream_queue_max_size
|
|
769
|
-
# (nil = unbounded) to provide backpressure against a fast LLM producer.
|
|
770
|
-
adapter = Phronomy.configuration.llm_adapter
|
|
771
|
-
chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
|
|
772
|
-
pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
|
|
773
|
-
|
|
774
|
-
# Drain the chunk queue on this side (scheduler task / caller thread).
|
|
775
|
-
loop do
|
|
776
|
-
chunk = chunk_queue.pop
|
|
777
|
-
break if chunk.nil? # queue closed — LLM streaming complete
|
|
778
|
-
block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
|
|
779
|
-
check_cancellation!(config, "invocation cancelled during streaming")
|
|
780
|
-
end
|
|
781
|
-
|
|
782
|
-
response = pending.await
|
|
783
|
-
|
|
784
|
-
output = response.content
|
|
785
|
-
usage = Phronomy::TokenUsage.from_tokens(response.tokens)
|
|
786
|
-
|
|
674
|
+
output, usage = _drain_stream(chat, user_message, config, &block)
|
|
787
675
|
run_output_guardrails!(output)
|
|
788
676
|
|
|
789
677
|
result = {output: output, messages: chat.messages, usage: usage}
|
|
@@ -797,139 +685,172 @@ module Phronomy
|
|
|
797
685
|
# inject custom context editing logic without having to override
|
|
798
686
|
# the full #invoke_once pipeline.
|
|
799
687
|
#
|
|
800
|
-
#
|
|
801
|
-
#
|
|
802
|
-
#
|
|
803
|
-
#
|
|
804
|
-
# @
|
|
688
|
+
# The keyword arguments +budget+, +instruction+, +tools+, and +knowledge+
|
|
689
|
+
# carry pre-computed values. Override them in a subclass call to +super+
|
|
690
|
+
# to inject custom context without recomputing the defaults.
|
|
691
|
+
#
|
|
692
|
+
# @param input [String, Hash] the user's input for this turn
|
|
693
|
+
# @param messages [Array<RubyLLM::Message>] raw conversation history
|
|
694
|
+
# @param thread_id [String, nil] conversation thread identifier
|
|
695
|
+
# @param config [Hash] the invocation config (see #invoke)
|
|
696
|
+
# @param budget [LlmContextWindow::TokenBudget, nil] pre-computed token budget
|
|
697
|
+
# @param instruction [String, nil] pre-computed system instruction
|
|
698
|
+
# @param tools [Array<Class>] tool classes to expose
|
|
699
|
+
# @param knowledge [Array<Hash>] knowledge chunks ({ content:, type:, source: })
|
|
700
|
+
# @return [Hash] { system: String|nil, messages: Array, tool_classes: Array }
|
|
805
701
|
# @api public
|
|
806
|
-
def build_context(
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
702
|
+
def build_context(
|
|
703
|
+
input,
|
|
704
|
+
messages: [],
|
|
705
|
+
thread_id: nil,
|
|
706
|
+
config: {},
|
|
707
|
+
budget: build_token_budget,
|
|
708
|
+
instruction: build_instructions(input),
|
|
709
|
+
tools: self.class.tools + _handoff_tools,
|
|
710
|
+
knowledge: self.class.static_knowledge_chunks + instance_knowledge_chunks
|
|
711
|
+
)
|
|
812
712
|
assembler = LlmContextWindow::Assembler.new(budget: budget)
|
|
813
|
-
assembler.add_instruction(
|
|
814
|
-
|
|
815
|
-
|
|
713
|
+
assembler.add_instruction(instruction) if instruction
|
|
714
|
+
assembler.add_capability(tools)
|
|
715
|
+
knowledge.each { |chunk| assembler.add_knowledge(chunk[:content], type: chunk[:type] || :static, trusted: true, source: chunk[:source]) }
|
|
716
|
+
|
|
717
|
+
msgs = Array(messages)
|
|
718
|
+
|
|
719
|
+
if budget && budget_exceeded?(msgs)
|
|
720
|
+
# Default strategy when the token budget is tight:
|
|
721
|
+
# 1. Compact: keep the most recent half of the messages verbatim and
|
|
722
|
+
# replace the older half with a brief omission marker.
|
|
723
|
+
# 2. Trim: if the compacted history still exceeds the budget, call
|
|
724
|
+
# trim_to_budget with the :safe strategy, which discards the oldest
|
|
725
|
+
# message one at a time until the history fits.
|
|
726
|
+
# Subclasses can override build_context to apply a different strategy
|
|
727
|
+
# (e.g. LLM-based summarisation) before calling super.
|
|
728
|
+
keep = [msgs.size / 2, 2].max
|
|
729
|
+
msgs = compact_messages(msgs, keep_tail: keep) do |dropped|
|
|
730
|
+
"[#{dropped.size} earlier messages omitted]"
|
|
731
|
+
end
|
|
732
|
+
remaining = assembler.available_for_messages
|
|
733
|
+
msgs = trim_to_budget(msgs, remaining: remaining, strategy: :safe)
|
|
816
734
|
end
|
|
817
|
-
|
|
818
|
-
assembler.
|
|
735
|
+
|
|
736
|
+
assembler.add_messages(msgs)
|
|
737
|
+
@last_context = assembler.build
|
|
819
738
|
end
|
|
820
739
|
protected :build_context
|
|
821
740
|
|
|
822
|
-
#
|
|
823
|
-
#
|
|
824
|
-
# Each source is spawned as a separate task within a {Phronomy::TaskGroup};
|
|
825
|
-
# the RAG concurrency gate enforces the +max_concurrent_rag_fetches+ cap.
|
|
826
|
-
# Results are returned in registration order (spawn order) as a flat array.
|
|
741
|
+
# Keeps the last +keep+ messages from +messages+, discarding older ones.
|
|
742
|
+
# Use this inside a +build_context+ override to trim conversation history.
|
|
827
743
|
#
|
|
828
|
-
#
|
|
829
|
-
#
|
|
830
|
-
#
|
|
831
|
-
#
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
# @api private
|
|
837
|
-
def fetch_knowledge_chunks(query, config)
|
|
838
|
-
sources = Array(config[:knowledge_sources])
|
|
839
|
-
return [] if sources.empty?
|
|
840
|
-
|
|
841
|
-
check_cancellation!(config, "invocation cancelled before RAG fetch")
|
|
842
|
-
|
|
843
|
-
# :skip (default) — ignore per-source failures so the agent can still
|
|
844
|
-
# answer with partial context. :fail surfaces the first error immediately.
|
|
845
|
-
failure_policy =
|
|
846
|
-
case config[:rag_failure_policy]
|
|
847
|
-
when :fail then :fail_fast
|
|
848
|
-
else :skip_failed
|
|
849
|
-
end
|
|
744
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
745
|
+
# @param keep [Integer] number of messages to retain (from the tail)
|
|
746
|
+
# @return [Array<RubyLLM::Message>]
|
|
747
|
+
# @api public
|
|
748
|
+
def trim_messages(messages, keep:)
|
|
749
|
+
Array(messages).last(keep)
|
|
750
|
+
end
|
|
751
|
+
protected :trim_messages
|
|
850
752
|
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
query: query,
|
|
864
|
-
cancellation_token: config[:cancellation_token],
|
|
865
|
-
timeout: config[:rag_timeout]
|
|
866
|
-
).await
|
|
867
|
-
end
|
|
868
|
-
Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{elapsed_ms}ms" }
|
|
869
|
-
result
|
|
870
|
-
end
|
|
871
|
-
end
|
|
872
|
-
end
|
|
753
|
+
# Removes the oldest messages one at a time until the count is within +limit+.
|
|
754
|
+
#
|
|
755
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
756
|
+
# @param limit [Integer] maximum number of messages to retain
|
|
757
|
+
# @return [Array<RubyLLM::Message>]
|
|
758
|
+
# @api public
|
|
759
|
+
def drop_messages_over(messages, limit:)
|
|
760
|
+
msgs = Array(messages).dup
|
|
761
|
+
msgs.shift while msgs.size > limit
|
|
762
|
+
msgs
|
|
763
|
+
end
|
|
764
|
+
protected :drop_messages_over
|
|
873
765
|
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
766
|
+
# Replaces all but the last +keep_tail+ messages with a single system summary.
|
|
767
|
+
# The block receives the dropped messages and must return a summary String.
|
|
768
|
+
#
|
|
769
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
770
|
+
# @param keep_tail [Integer] number of recent messages to preserve verbatim
|
|
771
|
+
# @yield [Array<RubyLLM::Message>] the messages being summarised
|
|
772
|
+
# @yieldreturn [String] summary text
|
|
773
|
+
# @return [Array<RubyLLM::Message>]
|
|
774
|
+
# @api public
|
|
775
|
+
def compact_messages(messages, keep_tail:, &summariser)
|
|
776
|
+
msgs = Array(messages)
|
|
777
|
+
return msgs if msgs.size <= keep_tail
|
|
778
|
+
tail = msgs.last(keep_tail)
|
|
779
|
+
dropped = msgs.first(msgs.size - keep_tail)
|
|
780
|
+
summary_text = summariser.call(dropped)
|
|
781
|
+
[RubyLLM::Message.new(role: :system, content: summary_text)] + tail
|
|
877
782
|
end
|
|
878
|
-
protected :
|
|
783
|
+
protected :compact_messages
|
|
879
784
|
|
|
880
|
-
#
|
|
881
|
-
#
|
|
882
|
-
#
|
|
785
|
+
# Trims +messages+ to fit within +remaining+ tokens using the given
|
|
786
|
+
# +strategy+. Returns the trimmed message array without touching the
|
|
787
|
+
# assembler. The caller is responsible for passing the result to
|
|
788
|
+
# +assembler.add_messages+ and calling +assembler.build+.
|
|
883
789
|
#
|
|
884
|
-
#
|
|
885
|
-
#
|
|
790
|
+
# Supported strategies:
|
|
791
|
+
# +:safe+ — discard the oldest message one at a time (default)
|
|
886
792
|
#
|
|
887
|
-
# @param messages [Array<RubyLLM::Message>]
|
|
888
|
-
# @param
|
|
889
|
-
#
|
|
890
|
-
# @
|
|
793
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
794
|
+
# @param remaining [Integer, nil] token allowance for messages; when +nil+
|
|
795
|
+
# the messages are returned unchanged
|
|
796
|
+
# @param strategy [Symbol] trim strategy (default +:safe+)
|
|
797
|
+
# @return [Array<RubyLLM::Message>]
|
|
891
798
|
# @api public
|
|
892
|
-
def
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
799
|
+
def trim_to_budget(messages, remaining:, strategy: :safe)
|
|
800
|
+
return Array(messages) unless remaining
|
|
801
|
+
msgs = Array(messages)
|
|
802
|
+
loop do
|
|
803
|
+
used = msgs.sum { |m| LlmContextWindow::TokenEstimator.estimate(m.content.to_s) }
|
|
804
|
+
return msgs if used <= remaining
|
|
805
|
+
break if msgs.empty?
|
|
806
|
+
msgs = trim_messages(msgs, keep: msgs.size - 1)
|
|
900
807
|
end
|
|
808
|
+
msgs
|
|
809
|
+
end
|
|
810
|
+
protected :trim_to_budget
|
|
901
811
|
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
812
|
+
# Returns +true+ when the estimated token usage of +messages+ exceeds
|
|
813
|
+
# +threshold+ times the available context budget.
|
|
814
|
+
# Always returns +false+ when no token budget is available.
|
|
815
|
+
#
|
|
816
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
817
|
+
# @param threshold [Float] fraction of the available budget (default 0.8)
|
|
818
|
+
# @return [Boolean]
|
|
819
|
+
# @api public
|
|
820
|
+
def budget_exceeded?(messages, threshold: 0.8)
|
|
821
|
+
return false unless (b = build_token_budget)
|
|
822
|
+
total = Array(messages).sum { |m| LlmContextWindow::TokenEstimator.estimate(m.content.to_s) }
|
|
823
|
+
limit = b.available(used: 0)
|
|
824
|
+
total > limit * threshold
|
|
825
|
+
end
|
|
826
|
+
protected :budget_exceeded?
|
|
916
827
|
|
|
917
|
-
|
|
828
|
+
# Registers a per-instance knowledge source. Knowledge chunks from all
|
|
829
|
+
# registered sources are included in every LLM call via +build_context+.
|
|
830
|
+
#
|
|
831
|
+
# @param source [#fetch] any object responding to +fetch(query:)+
|
|
832
|
+
# @return [void]
|
|
833
|
+
# @api public
|
|
834
|
+
def add_knowledge_source(source)
|
|
835
|
+
@instance_knowledge_sources ||= []
|
|
836
|
+
@instance_knowledge_sources << source
|
|
918
837
|
end
|
|
919
|
-
protected :
|
|
838
|
+
protected :add_knowledge_source
|
|
839
|
+
|
|
840
|
+
# Returns knowledge chunks fetched from all instance-level knowledge sources.
|
|
841
|
+
#
|
|
842
|
+
# @return [Array<Hash>]
|
|
843
|
+
# @api private
|
|
844
|
+
def instance_knowledge_chunks
|
|
845
|
+
return [] unless @instance_knowledge_sources
|
|
846
|
+
@instance_knowledge_sources.flat_map { |ks| ks.fetch(query: nil) }
|
|
847
|
+
end
|
|
848
|
+
protected :instance_knowledge_chunks
|
|
920
849
|
|
|
921
850
|
# Performs a single (non-retried) invocation. Extracted so that #invoke can
|
|
922
851
|
# wrap it in a retry loop without duplicating the LLM interaction logic.
|
|
923
852
|
def invoke_once(input, messages: [], thread_id: nil, config: {})
|
|
924
|
-
|
|
925
|
-
caller_meta[:user_id] = config[:user_id] if config[:user_id]
|
|
926
|
-
caller_meta[:session_id] = config[:session_id] if config[:session_id]
|
|
927
|
-
if (ic = config[:invocation_context])
|
|
928
|
-
caller_meta[:task_id] = ic.task_id if ic.task_id
|
|
929
|
-
caller_meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
|
|
930
|
-
end
|
|
931
|
-
|
|
932
|
-
trace("agent.invoke", input: input, **caller_meta) do |_span|
|
|
853
|
+
trace("agent.invoke", input: input, **_build_caller_meta(config)) do |_span|
|
|
933
854
|
Agent::InvocationPipeline.new(self).run(
|
|
934
855
|
input,
|
|
935
856
|
messages: messages,
|
|
@@ -939,6 +860,39 @@ module Phronomy
|
|
|
939
860
|
end
|
|
940
861
|
end
|
|
941
862
|
|
|
863
|
+
def _build_caller_meta(config)
|
|
864
|
+
meta = {}
|
|
865
|
+
meta[:user_id] = config[:user_id] if config[:user_id]
|
|
866
|
+
meta[:session_id] = config[:session_id] if config[:session_id]
|
|
867
|
+
if (ic = config[:invocation_context])
|
|
868
|
+
meta[:task_id] = ic.task_id if ic.task_id
|
|
869
|
+
meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
|
|
870
|
+
end
|
|
871
|
+
meta
|
|
872
|
+
end
|
|
873
|
+
|
|
874
|
+
def _apply_context_to_chat(chat, context)
|
|
875
|
+
apply_instructions(chat, context[:system]) if context[:system]
|
|
876
|
+
(context[:tool_classes] || []).each { |tc| chat.with_tool(prepare_tool_class(tc)) }
|
|
877
|
+
context[:messages].each { |msg| chat.messages << msg }
|
|
878
|
+
end
|
|
879
|
+
|
|
880
|
+
def _drain_stream(chat, user_message, config, &block)
|
|
881
|
+
adapter = Phronomy.configuration.llm_adapter
|
|
882
|
+
chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
|
|
883
|
+
pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
|
|
884
|
+
|
|
885
|
+
loop do
|
|
886
|
+
chunk = chunk_queue.pop
|
|
887
|
+
break if chunk.nil?
|
|
888
|
+
block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
|
|
889
|
+
check_cancellation!(config, "invocation cancelled during streaming")
|
|
890
|
+
end
|
|
891
|
+
|
|
892
|
+
response = pending.await
|
|
893
|
+
[response.content, Phronomy::TokenUsage.from_tokens(response.tokens)]
|
|
894
|
+
end
|
|
895
|
+
|
|
942
896
|
# Builds a TokenBudget for this agent's model if possible.
|
|
943
897
|
# When context_window is set at the class level, that value is used directly
|
|
944
898
|
# (bypassing the RubyLLM catalogue) — useful for locally-hosted models where
|
|
@@ -965,54 +919,6 @@ module Phronomy
|
|
|
965
919
|
nil
|
|
966
920
|
end
|
|
967
921
|
|
|
968
|
-
# Converts a flat Array of message objects into the internal message_elements
|
|
969
|
-
# format used by TrimContext, TriggerContext, and CompactionContext.
|
|
970
|
-
# Each element receives a 0-based synthetic seq number.
|
|
971
|
-
#
|
|
972
|
-
# @param messages [Array] message-like objects with #role and #content
|
|
973
|
-
# @return [Array<Hash>]
|
|
974
|
-
# @api public
|
|
975
|
-
def build_message_elements(messages)
|
|
976
|
-
Array(messages).each_with_index.map do |msg, idx|
|
|
977
|
-
tokens = LlmContextWindow::TokenEstimator.estimate(msg.content.to_s)
|
|
978
|
-
{seq: idx, message: msg, tokens: tokens, role: msg.role}
|
|
979
|
-
end
|
|
980
|
-
end
|
|
981
|
-
|
|
982
|
-
# Builds (or returns a cached) system prompt text.
|
|
983
|
-
# The fingerprint is a SHA-256 digest of the instruction text concatenated
|
|
984
|
-
# with the content of every registered static knowledge source.
|
|
985
|
-
# When the fingerprint is unchanged the ContextVersionCache returns the
|
|
986
|
-
# previously assembled text without re-fetching any sources.
|
|
987
|
-
#
|
|
988
|
-
# @param input [String, Hash] the agent's current input (used for template evaluation)
|
|
989
|
-
# @return [String, nil] assembled system text, or nil when empty
|
|
990
|
-
# @api public
|
|
991
|
-
def build_cached_system_text(input)
|
|
992
|
-
instruction = build_instructions(input)
|
|
993
|
-
|
|
994
|
-
static_chunks = self.class.static_knowledge_chunks
|
|
995
|
-
|
|
996
|
-
fingerprint = Digest::SHA256.hexdigest(
|
|
997
|
-
[instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
|
|
998
|
-
)
|
|
999
|
-
|
|
1000
|
-
cache = (@context_version_cache ||= LlmContextWindow::ContextVersionCache.new)
|
|
1001
|
-
unless cache.valid?(fingerprint)
|
|
1002
|
-
parts = [instruction]
|
|
1003
|
-
static_chunks.each do |chunk|
|
|
1004
|
-
parts << LlmContextWindow::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
|
|
1005
|
-
end
|
|
1006
|
-
cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
|
|
1007
|
-
end
|
|
1008
|
-
|
|
1009
|
-
# Persist a reference on the instance so that context_version_cache
|
|
1010
|
-
# remains accessible after invoke completes.
|
|
1011
|
-
@last_context_version_cache = cache
|
|
1012
|
-
|
|
1013
|
-
cache.system_text.empty? ? nil : cache.system_text
|
|
1014
|
-
end
|
|
1015
|
-
|
|
1016
922
|
# Returns the chat class to instantiate for this invocation.
|
|
1017
923
|
# When EventLoop mode is enabled ({Phronomy.configuration.event_loop}),
|
|
1018
924
|
# returns {ParallelToolChat} so that concurrent tool dispatch is enabled.
|
|
@@ -1039,10 +945,6 @@ module Phronomy
|
|
|
1039
945
|
RubyLLM.chat(**opts)
|
|
1040
946
|
end
|
|
1041
947
|
chat.with_temperature(t) if t
|
|
1042
|
-
self.class.tools.each do |tool_class|
|
|
1043
|
-
chat.with_tool(prepare_tool_class(tool_class))
|
|
1044
|
-
end
|
|
1045
|
-
_handoff_tools.each { |tc| chat.with_tool(tc) }
|
|
1046
948
|
chat
|
|
1047
949
|
end
|
|
1048
950
|
|
|
@@ -1102,7 +1004,7 @@ module Phronomy
|
|
|
1102
1004
|
# Builds the final tool class to register with the chat.
|
|
1103
1005
|
#
|
|
1104
1006
|
# When an already-instantiated tool object is passed (e.g. a
|
|
1105
|
-
# {Phronomy::
|
|
1007
|
+
# {Phronomy::Tools::Mcp} returned by +Phronomy::Tools::Mcp.from_server+), it is
|
|
1106
1008
|
# returned as-is. RubyLLM's +with_tool+ accepts both classes and
|
|
1107
1009
|
# instances, so no wrapping is needed.
|
|
1108
1010
|
#
|
|
@@ -1110,7 +1012,7 @@ module Phronomy
|
|
|
1110
1012
|
# 1. Alias override — when the Hash form of .tools maps this class to an
|
|
1111
1013
|
# explicit name, an anonymous subclass with that tool_name is returned.
|
|
1112
1014
|
# 2. Scope policy — when a scope is declared on the tool, the configured
|
|
1113
|
-
# {Phronomy::
|
|
1015
|
+
# {Phronomy::Agent::Context::Capability::ScopePolicy} (or the default) is evaluated.
|
|
1114
1016
|
# +:reject+ wraps the tool to return a denial message without executing.
|
|
1115
1017
|
# +:approve+ behaves like requiring approval (same as step 3 when the
|
|
1116
1018
|
# tool does not already have +requires_approval+).
|
|
@@ -1120,7 +1022,7 @@ module Phronomy
|
|
|
1120
1022
|
# (tool_name, args) and, if it returns falsy, the tool returns a denial
|
|
1121
1023
|
# message instead of executing.
|
|
1122
1024
|
def prepare_tool_class(tool_class)
|
|
1123
|
-
# When an instantiated tool object is passed (e.g.
|
|
1025
|
+
# When an instantiated tool object is passed (e.g. Phronomy::Tools::Mcp.from_server
|
|
1124
1026
|
# returns an instance, not a class), skip class-level processing and
|
|
1125
1027
|
# return it directly. RubyLLM#with_tool handles both forms.
|
|
1126
1028
|
return tool_class unless tool_class.is_a?(Class)
|
|
@@ -1139,7 +1041,7 @@ module Phronomy
|
|
|
1139
1041
|
# Step 2: evaluate scope policy.
|
|
1140
1042
|
scope = resolved.scope
|
|
1141
1043
|
if scope
|
|
1142
|
-
policy = @scope_policy || Phronomy::
|
|
1044
|
+
policy = @scope_policy || Phronomy::Agent::Context::Capability::ScopePolicy::DEFAULT
|
|
1143
1045
|
decision = policy.call(resolved, scope, self)
|
|
1144
1046
|
case decision
|
|
1145
1047
|
when :reject
|