phronomy 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -0
- data/README.md +19 -15
- data/lib/phronomy/agent/base.rb +109 -379
- data/lib/phronomy/agent/checkpoint.rb +12 -5
- data/lib/phronomy/agent/concerns/before_completion.rb +105 -0
- data/lib/phronomy/agent/concerns/guardrailable.rb +42 -0
- data/lib/phronomy/agent/concerns/retryable.rb +88 -0
- data/lib/phronomy/agent/concerns/suspendable.rb +116 -0
- data/lib/phronomy/agent/react_agent.rb +37 -16
- data/lib/phronomy/agent/team_coordinator.rb +4 -4
- data/lib/phronomy/ruby_llm_patches.rb +15 -11
- data/lib/phronomy/tool/mcp_tool.rb +21 -7
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy.rb +0 -3
- metadata +6 -7
- data/lib/generators/phronomy/install/install_generator.rb +0 -41
- data/lib/generators/phronomy/install/templates/create_phronomy_messages.rb.tt +0 -15
- data/lib/generators/phronomy/install/templates/initializer.rb.tt +0 -18
- data/lib/generators/phronomy/install/templates/message_model.rb.tt +0 -8
- data/lib/phronomy/railtie.rb +0 -39
data/lib/phronomy/agent/base.rb
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "digest"
|
|
4
|
+
require_relative "concerns/retryable"
|
|
5
|
+
require_relative "concerns/guardrailable"
|
|
6
|
+
require_relative "concerns/before_completion"
|
|
7
|
+
require_relative "concerns/suspendable"
|
|
4
8
|
|
|
5
9
|
module Phronomy
|
|
6
10
|
module Agent
|
|
@@ -27,6 +31,10 @@ module Phronomy
|
|
|
27
31
|
# end
|
|
28
32
|
class Base
|
|
29
33
|
include Phronomy::Runnable
|
|
34
|
+
include Concerns::Retryable
|
|
35
|
+
include Concerns::Guardrailable
|
|
36
|
+
include Concerns::BeforeCompletion
|
|
37
|
+
include Concerns::Suspendable
|
|
30
38
|
|
|
31
39
|
class << self
|
|
32
40
|
# Sets or reads the LLM model identifier for this agent.
|
|
@@ -165,35 +173,6 @@ module Phronomy
|
|
|
165
173
|
end
|
|
166
174
|
end
|
|
167
175
|
|
|
168
|
-
# Configures a retry policy that wraps the full #invoke call.
|
|
169
|
-
# GuardrailError is never retried regardless of this setting.
|
|
170
|
-
#
|
|
171
|
-
# @param times [Integer] maximum retry attempts (default: 0)
|
|
172
|
-
# @param wait [Symbol, Numeric] :exponential, :linear, or a fixed Float
|
|
173
|
-
# @param base [Float] base wait time in seconds (default: 1.0)
|
|
174
|
-
#
|
|
175
|
-
# @example
|
|
176
|
-
# class MyAgent < Phronomy::Agent::Base
|
|
177
|
-
# retry_policy times: 2, wait: :exponential, base: 1.0
|
|
178
|
-
# end
|
|
179
|
-
def retry_policy(times: 0, wait: 0, base: 1.0)
|
|
180
|
-
@_retry_policy = {times: times, wait: wait, base: base}
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
# Returns the configured retry policy, or nil when none is set.
|
|
184
|
-
# @return [Hash, nil]
|
|
185
|
-
attr_reader :_retry_policy
|
|
186
|
-
|
|
187
|
-
# Injectable sleep callable for testing (shared with Tool::Base pattern).
|
|
188
|
-
# @return [#call]
|
|
189
|
-
def _sleep_proc
|
|
190
|
-
@_sleep_proc || method(:sleep)
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
# Overrides the sleep callable used between retries.
|
|
194
|
-
# @param proc [#call]
|
|
195
|
-
attr_writer :_sleep_proc
|
|
196
|
-
|
|
197
176
|
# Registers one or more static knowledge sources on the agent class.
|
|
198
177
|
# Static sources are fetched once per agent instance and their content
|
|
199
178
|
# is cached in ContextVersionCache keyed by a fingerprint of the
|
|
@@ -352,37 +331,8 @@ module Phronomy
|
|
|
352
331
|
@context_overhead = val.to_i
|
|
353
332
|
end
|
|
354
333
|
end
|
|
355
|
-
|
|
356
|
-
# Sets or reads the class-level before_completion hook.
|
|
357
|
-
# The hook is called before every LLM request for instances of this class.
|
|
358
|
-
# Receives a {Phronomy::Agent::BeforeCompletionContext}; must return a Hash
|
|
359
|
-
# of params to merge into the LLM call, or nil to pass through unchanged.
|
|
360
|
-
#
|
|
361
|
-
# @param callable [#call, nil] lambda/proc to register, or nil to clear
|
|
362
|
-
# @return [#call, nil]
|
|
363
|
-
# @example
|
|
364
|
-
# class MyAgent < Phronomy::Agent::Base
|
|
365
|
-
# before_completion ->(ctx) { { temperature: 0.2 } }
|
|
366
|
-
# end
|
|
367
|
-
def before_completion(callable = nil)
|
|
368
|
-
if callable.nil? && !block_given?
|
|
369
|
-
@before_completion
|
|
370
|
-
else
|
|
371
|
-
@before_completion = callable
|
|
372
|
-
end
|
|
373
|
-
end
|
|
374
|
-
|
|
375
|
-
# @return [#call, nil]
|
|
376
|
-
def _before_completion
|
|
377
|
-
@before_completion
|
|
378
|
-
end
|
|
379
334
|
end
|
|
380
335
|
|
|
381
|
-
# Instance-level before_completion hook. When set, takes precedence over
|
|
382
|
-
# the class-level hook for this specific agent instance only.
|
|
383
|
-
# @return [#call, nil]
|
|
384
|
-
attr_accessor :before_completion
|
|
385
|
-
|
|
386
336
|
# Registers an anonymous handoff tool class on this agent instance.
|
|
387
337
|
# Called by Runner during construction when routes are configured.
|
|
388
338
|
# @param tool_class [Class<Phronomy::Tool::Base>]
|
|
@@ -403,14 +353,18 @@ module Phronomy
|
|
|
403
353
|
# Applies the retry policy configured via {.retry_policy} when transient
|
|
404
354
|
# errors occur. {Phronomy::GuardrailError} is never retried.
|
|
405
355
|
#
|
|
406
|
-
# @param input
|
|
356
|
+
# @param input [String, Hash] the user message; a Hash may supply
|
|
407
357
|
# +:message+, +:query+, or +:user+ as the text key, plus any template
|
|
408
358
|
# variables consumed by the configured instructions template.
|
|
409
|
-
# @param
|
|
410
|
-
#
|
|
411
|
-
#
|
|
412
|
-
#
|
|
413
|
-
#
|
|
359
|
+
# @param messages [Array<RubyLLM::Message>] conversation history from a
|
|
360
|
+
# previous invocation. The application owns and persists this array;
|
|
361
|
+
# pass it on every turn to maintain multi-turn context.
|
|
362
|
+
# @param thread_id [String, nil] conversation thread identifier, forwarded
|
|
363
|
+
# to the compaction context when on_compact is configured.
|
|
364
|
+
# @param config [Hash] additional runtime options:
|
|
365
|
+
# +:knowledge_sources+ (Array) — dynamic knowledge sources for this turn
|
|
366
|
+
# +:user_id+ (+String+, optional) — caller identity forwarded to the tracer
|
|
367
|
+
# +:session_id+ (+String+, optional) — session identity forwarded to the tracer
|
|
414
368
|
# @return [Hash] +{ output: String, messages: Array, usage: Phronomy::TokenUsage }+,
|
|
415
369
|
# or +{ output: nil, suspended: true, checkpoint: Phronomy::Agent::Checkpoint,
|
|
416
370
|
# messages: Array }+ when the invocation was suspended awaiting tool approval.
|
|
@@ -418,14 +372,17 @@ module Phronomy
|
|
|
418
372
|
# @example Normal invocation
|
|
419
373
|
# result = MyAgent.new.invoke("What is Ruby?")
|
|
420
374
|
# puts result[:output]
|
|
375
|
+
# @example Multi-turn conversation
|
|
376
|
+
# result1 = agent.invoke("Hi, I'm Alice.")
|
|
377
|
+
# result2 = agent.invoke("What's my name?", messages: result1[:messages])
|
|
421
378
|
# @example Suspend / resume flow
|
|
422
379
|
# result = agent.invoke("Perform task X")
|
|
423
380
|
# if result[:suspended]
|
|
424
381
|
# result = agent.resume(result[:checkpoint], approved: true)
|
|
425
382
|
# end
|
|
426
383
|
# puts result[:output]
|
|
427
|
-
def invoke(input, config: {})
|
|
428
|
-
_invoke_impl(input, config: config)
|
|
384
|
+
def invoke(input, messages: [], thread_id: nil, config: {})
|
|
385
|
+
_invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
|
|
429
386
|
end
|
|
430
387
|
|
|
431
388
|
# Streaming version of #invoke. Yields {Phronomy::Agent::StreamEvent} objects
|
|
@@ -438,104 +395,21 @@ module Phronomy
|
|
|
438
395
|
# :done — final event carrying output, messages, and usage
|
|
439
396
|
# :error — if an unrecoverable error occurs
|
|
440
397
|
#
|
|
441
|
-
# @param input
|
|
442
|
-
# @param
|
|
398
|
+
# @param input [String, Hash] same as #invoke
|
|
399
|
+
# @param messages [Array<RubyLLM::Message>] same as #invoke
|
|
400
|
+
# @param thread_id [String, nil] same as #invoke
|
|
401
|
+
# @param config [Hash] same as #invoke
|
|
443
402
|
# @yield [Phronomy::Agent::StreamEvent]
|
|
444
403
|
# @return [Hash] { output:, messages:, usage: } — same as #invoke
|
|
445
|
-
def stream(input, config: {}, &block)
|
|
446
|
-
return invoke(input, config: config) unless block
|
|
404
|
+
def stream(input, messages: [], thread_id: nil, config: {}, &block)
|
|
405
|
+
return invoke(input, messages: messages, thread_id: thread_id, config: config) unless block
|
|
447
406
|
|
|
448
|
-
_stream_impl(input, config: config, &block)
|
|
407
|
+
_stream_impl(input, messages: messages, thread_id: thread_id, config: config, &block)
|
|
449
408
|
rescue => e
|
|
450
409
|
block&.call(StreamEvent.new(type: :error, payload: {error: e}))
|
|
451
410
|
raise
|
|
452
411
|
end
|
|
453
412
|
|
|
454
|
-
# Resumes a previously suspended invocation from a {Phronomy::Agent::Checkpoint}.
|
|
455
|
-
#
|
|
456
|
-
# This method reconstructs the conversation state captured at suspension
|
|
457
|
-
# time, injects the tool result (executed or denied), and continues the
|
|
458
|
-
# LLM loop until it produces a final answer.
|
|
459
|
-
#
|
|
460
|
-
# @param checkpoint [Phronomy::Agent::Checkpoint] the checkpoint returned by
|
|
461
|
-
# the suspended #invoke call
|
|
462
|
-
# @param approved [Boolean] +true+ to execute the pending tool; +false+
|
|
463
|
-
# to inject a denial message and let the LLM handle it gracefully
|
|
464
|
-
# @param config [Hash] same runtime options as #invoke
|
|
465
|
-
# @return [Hash] +{ output: String, suspended: false, messages: Array, usage: Phronomy::TokenUsage }+
|
|
466
|
-
# @raise [Phronomy::GuardrailError] when an output guardrail rejects the value
|
|
467
|
-
def resume(checkpoint, approved:, config: {})
|
|
468
|
-
checkpoint.thread_id
|
|
469
|
-
|
|
470
|
-
# Build a fresh chat with all tools registered.
|
|
471
|
-
chat = build_chat
|
|
472
|
-
|
|
473
|
-
# Restore the full conversation (system + history + user + assistant).
|
|
474
|
-
checkpoint.messages.each { |msg| chat.messages << msg }
|
|
475
|
-
|
|
476
|
-
# Determine the tool result: execute it or inject a denial string.
|
|
477
|
-
tool_result =
|
|
478
|
-
if approved
|
|
479
|
-
tool_instance = chat.tools[checkpoint.pending_tool_name.to_sym]
|
|
480
|
-
tool_instance ? tool_instance.call(checkpoint.pending_tool_args) : "Tool not found."
|
|
481
|
-
else
|
|
482
|
-
"Tool execution denied."
|
|
483
|
-
end
|
|
484
|
-
|
|
485
|
-
# Inject the tool result so the LLM can continue.
|
|
486
|
-
chat.add_message(
|
|
487
|
-
role: :tool,
|
|
488
|
-
content: tool_result.to_s,
|
|
489
|
-
tool_call_id: checkpoint.pending_tool_call_id
|
|
490
|
-
)
|
|
491
|
-
|
|
492
|
-
# Continue the React loop.
|
|
493
|
-
response = chat.complete
|
|
494
|
-
|
|
495
|
-
output = response.content
|
|
496
|
-
usage = Phronomy::TokenUsage.from_tokens(response.tokens)
|
|
497
|
-
|
|
498
|
-
run_output_guardrails!(output)
|
|
499
|
-
|
|
500
|
-
{output: output, suspended: false, messages: chat.messages, usage: usage}
|
|
501
|
-
end
|
|
502
|
-
|
|
503
|
-
# Registers a callback that is invoked before executing any tool that has
|
|
504
|
-
# +requires_approval true+ set. The block receives the tool name (String)
|
|
505
|
-
# and the arguments Hash, and must return a truthy value to allow execution.
|
|
506
|
-
# Returning a falsy value causes the tool to return a denial message instead
|
|
507
|
-
# of executing.
|
|
508
|
-
#
|
|
509
|
-
# When no handler is registered and a tool with +requires_approval+ is
|
|
510
|
-
# called, #invoke returns a suspended result hash containing a
|
|
511
|
-
# {Phronomy::Agent::Checkpoint}. Call #resume to continue execution after
|
|
512
|
-
# obtaining an approval decision from the user or an external system.
|
|
513
|
-
#
|
|
514
|
-
# @example Synchronous handler
|
|
515
|
-
# agent = MyAgent.new
|
|
516
|
-
# agent.on_approval_required { |tool_name, args| prompt_user(tool_name, args) }
|
|
517
|
-
# @return [self]
|
|
518
|
-
def on_approval_required(&block)
|
|
519
|
-
@approval_handler = block
|
|
520
|
-
self
|
|
521
|
-
end
|
|
522
|
-
|
|
523
|
-
# Attach a guardrail that validates input before every #invoke call.
|
|
524
|
-
# @param guardrail [Phronomy::Guardrail::InputGuardrail]
|
|
525
|
-
def add_input_guardrail(guardrail)
|
|
526
|
-
@input_guardrails ||= []
|
|
527
|
-
@input_guardrails << guardrail
|
|
528
|
-
self
|
|
529
|
-
end
|
|
530
|
-
|
|
531
|
-
# Attach a guardrail that validates output before it is returned.
|
|
532
|
-
# @param guardrail [Phronomy::Guardrail::OutputGuardrail]
|
|
533
|
-
def add_output_guardrail(guardrail)
|
|
534
|
-
@output_guardrails ||= []
|
|
535
|
-
@output_guardrails << guardrail
|
|
536
|
-
self
|
|
537
|
-
end
|
|
538
|
-
|
|
539
413
|
# Returns the {Context::ContextVersionCache} for the current thread.
|
|
540
414
|
# @api private
|
|
541
415
|
def context_version_cache
|
|
@@ -544,27 +418,8 @@ module Phronomy
|
|
|
544
418
|
|
|
545
419
|
private
|
|
546
420
|
|
|
547
|
-
# Retry loop for #invoke. Separated so that ReactAgent can override #invoke_once.
|
|
548
|
-
def _invoke_impl(input, config: {})
|
|
549
|
-
policy = self.class._retry_policy
|
|
550
|
-
attempt = 0
|
|
551
|
-
begin
|
|
552
|
-
invoke_once(input, config: config)
|
|
553
|
-
rescue Phronomy::GuardrailError
|
|
554
|
-
raise
|
|
555
|
-
rescue
|
|
556
|
-
if policy && attempt < policy[:times]
|
|
557
|
-
wait = compute_agent_retry_wait(policy[:wait], policy[:base], attempt)
|
|
558
|
-
self.class._sleep_proc.call(wait) if wait > 0
|
|
559
|
-
attempt += 1
|
|
560
|
-
retry
|
|
561
|
-
end
|
|
562
|
-
raise
|
|
563
|
-
end
|
|
564
|
-
end
|
|
565
|
-
|
|
566
421
|
# Streaming implementation for #stream.
|
|
567
|
-
def _stream_impl(input, config: {}, &block)
|
|
422
|
+
def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
|
|
568
423
|
caller_meta = {}
|
|
569
424
|
caller_meta[:user_id] = config[:user_id] if config[:user_id]
|
|
570
425
|
caller_meta[:session_id] = config[:session_id] if config[:session_id]
|
|
@@ -572,54 +427,12 @@ module Phronomy
|
|
|
572
427
|
trace("agent.invoke", input: input, **caller_meta) do |_span|
|
|
573
428
|
run_input_guardrails!(input)
|
|
574
429
|
|
|
575
|
-
thread_id = config[:thread_id]
|
|
576
|
-
|
|
577
430
|
chat = build_chat
|
|
578
431
|
user_message = extract_message(input)
|
|
579
|
-
budget = build_token_budget
|
|
580
|
-
|
|
581
|
-
# Assemble context via Assembler (same as invoke_once).
|
|
582
|
-
assembler = Context::Assembler.new(budget: budget)
|
|
583
|
-
system_msg = build_instructions(input)
|
|
584
|
-
assembler.add_instruction(system_msg) if system_msg
|
|
585
|
-
|
|
586
|
-
Array(config[:knowledge_sources]).each do |ks|
|
|
587
|
-
ks.fetch(query: user_message).each do |chunk|
|
|
588
|
-
assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
|
|
589
|
-
end
|
|
590
|
-
end
|
|
591
|
-
|
|
592
|
-
msgs = Array(config[:messages])
|
|
593
|
-
unless msgs.empty?
|
|
594
|
-
message_elements = build_message_elements(msgs)
|
|
595
|
-
|
|
596
|
-
# Run on_trim: app may call ctx.remove(seqs) to drop messages this turn.
|
|
597
|
-
if (trim_cb = self.class._on_trim_callback)
|
|
598
|
-
trim_ctx = Context::TrimContext.new(message_elements: message_elements, budget: budget)
|
|
599
|
-
trim_cb.call(trim_ctx)
|
|
600
|
-
message_elements = trim_ctx.message_elements
|
|
601
|
-
end
|
|
602
|
-
|
|
603
|
-
# Run on_compaction_trigger → on_compact pipeline before calling the LLM.
|
|
604
|
-
if (trigger_cb = self.class._on_compaction_trigger_callback)
|
|
605
|
-
trigger_ctx = Context::TriggerContext.new(message_elements: message_elements, budget: budget)
|
|
606
|
-
if trigger_cb.call(trigger_ctx)
|
|
607
|
-
if (compact_cb = self.class._on_compact_callback)
|
|
608
|
-
compact_ctx = Context::CompactionContext.new(
|
|
609
|
-
message_elements: message_elements,
|
|
610
|
-
budget: budget,
|
|
611
|
-
thread_id: thread_id
|
|
612
|
-
)
|
|
613
|
-
compact_cb.call(compact_ctx)
|
|
614
|
-
message_elements = build_message_elements(compact_ctx.result_messages)
|
|
615
|
-
end
|
|
616
|
-
end
|
|
617
|
-
end
|
|
618
|
-
|
|
619
|
-
assembler.add_messages(message_elements.map { |e| e[:message] })
|
|
620
|
-
end
|
|
621
432
|
|
|
622
|
-
context
|
|
433
|
+
# Assemble context (system prompt + history). Override #build_context to
|
|
434
|
+
# inject custom context editing logic at the Agent subclass level.
|
|
435
|
+
context = build_context(input, messages: messages, thread_id: thread_id, config: config)
|
|
623
436
|
apply_instructions(chat, context[:system]) if context[:system]
|
|
624
437
|
context[:messages].each { |msg| chat.messages << msg }
|
|
625
438
|
|
|
@@ -655,9 +468,79 @@ module Phronomy
|
|
|
655
468
|
end
|
|
656
469
|
end
|
|
657
470
|
|
|
471
|
+
# Assembles the LLM context (system prompt + conversation messages)
|
|
472
|
+
# for a single invocation. Subclasses may override this method to
|
|
473
|
+
# inject custom context editing logic without having to override
|
|
474
|
+
# the full #invoke_once pipeline.
|
|
475
|
+
#
|
|
476
|
+
# @param input [String, Hash] the user's input for this turn
|
|
477
|
+
# @param messages [Array<RubyLLM::Message>] raw conversation history
|
|
478
|
+
# @param thread_id [String, nil] conversation thread identifier
|
|
479
|
+
# @param config [Hash] the invocation config (see #invoke)
|
|
480
|
+
# @return [Hash] { system: String|nil, messages: Array }
|
|
481
|
+
def build_context(input, messages: [], thread_id: nil, config: {})
|
|
482
|
+
history = prepare_history(messages: messages, thread_id: thread_id, config: config)
|
|
483
|
+
budget = build_token_budget
|
|
484
|
+
system_text = build_cached_system_text(input)
|
|
485
|
+
user_message = extract_message(input)
|
|
486
|
+
|
|
487
|
+
assembler = Context::Assembler.new(budget: budget)
|
|
488
|
+
assembler.add_instruction(system_text) if system_text
|
|
489
|
+
|
|
490
|
+
Array(config[:knowledge_sources]).each do |ks|
|
|
491
|
+
ks.fetch(query: user_message).each do |chunk|
|
|
492
|
+
assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
|
|
493
|
+
end
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
assembler.add_messages(history)
|
|
497
|
+
assembler.build
|
|
498
|
+
end
|
|
499
|
+
protected :build_context
|
|
500
|
+
|
|
501
|
+
# Runs the on_trim / on_compaction_trigger / on_compact pipeline on the
|
|
502
|
+
# supplied message array and returns the final Array of message objects
|
|
503
|
+
# ready to pass to the Assembler.
|
|
504
|
+
#
|
|
505
|
+
# Override this method in a subclass to customize how conversation
|
|
506
|
+
# history is filtered or compressed before context assembly.
|
|
507
|
+
#
|
|
508
|
+
# @param messages [Array<RubyLLM::Message>] raw conversation history
|
|
509
|
+
# @param thread_id [String, nil] conversation thread identifier
|
|
510
|
+
# @param config [Hash] additional invocation options
|
|
511
|
+
# @return [Array] filtered and/or compacted message objects
|
|
512
|
+
def prepare_history(messages: [], thread_id: nil, config: {})
|
|
513
|
+
budget = build_token_budget
|
|
514
|
+
elements = build_message_elements(Array(messages))
|
|
515
|
+
|
|
516
|
+
if (trim_cb = self.class._on_trim_callback)
|
|
517
|
+
trim_ctx = Context::TrimContext.new(message_elements: elements, budget: budget)
|
|
518
|
+
trim_cb.call(trim_ctx)
|
|
519
|
+
elements = trim_ctx.message_elements
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
if (trigger_cb = self.class._on_compaction_trigger_callback)
|
|
523
|
+
trigger_ctx = Context::TriggerContext.new(message_elements: elements, budget: budget)
|
|
524
|
+
if trigger_cb.call(trigger_ctx)
|
|
525
|
+
if (compact_cb = self.class._on_compact_callback)
|
|
526
|
+
compact_ctx = Context::CompactionContext.new(
|
|
527
|
+
message_elements: elements,
|
|
528
|
+
budget: budget,
|
|
529
|
+
thread_id: thread_id
|
|
530
|
+
)
|
|
531
|
+
compact_cb.call(compact_ctx)
|
|
532
|
+
elements = build_message_elements(compact_ctx.result_messages)
|
|
533
|
+
end
|
|
534
|
+
end
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
elements.map { |e| e[:message] }
|
|
538
|
+
end
|
|
539
|
+
protected :prepare_history
|
|
540
|
+
|
|
658
541
|
# Performs a single (non-retried) invocation. Extracted so that #invoke can
|
|
659
542
|
# wrap it in a retry loop without duplicating the LLM interaction logic.
|
|
660
|
-
def invoke_once(input, config: {})
|
|
543
|
+
def invoke_once(input, messages: [], thread_id: nil, config: {})
|
|
661
544
|
caller_meta = {}
|
|
662
545
|
caller_meta[:user_id] = config[:user_id] if config[:user_id]
|
|
663
546
|
caller_meta[:session_id] = config[:session_id] if config[:session_id]
|
|
@@ -666,62 +549,12 @@ module Phronomy
|
|
|
666
549
|
# Run input guardrails before touching the LLM.
|
|
667
550
|
run_input_guardrails!(input)
|
|
668
551
|
|
|
669
|
-
thread_id = config[:thread_id]
|
|
670
552
|
user_message = extract_message(input)
|
|
671
553
|
chat = build_chat
|
|
672
|
-
budget = build_token_budget
|
|
673
|
-
|
|
674
|
-
# Load conversation history from config[:messages] (app-managed).
|
|
675
|
-
raw_messages = Array(config[:messages])
|
|
676
|
-
|
|
677
|
-
# Assign synthetic 0-based seq numbers for use by trim/compaction callbacks.
|
|
678
|
-
message_elements = build_message_elements(raw_messages)
|
|
679
|
-
|
|
680
|
-
# Run on_trim: app may call ctx.remove(seqs) to drop messages this turn.
|
|
681
|
-
if (trim_cb = self.class._on_trim_callback)
|
|
682
|
-
trim_ctx = Context::TrimContext.new(message_elements: message_elements, budget: budget)
|
|
683
|
-
trim_cb.call(trim_ctx)
|
|
684
|
-
message_elements = trim_ctx.message_elements
|
|
685
|
-
end
|
|
686
|
-
|
|
687
|
-
# Run on_compaction_trigger → on_compact pipeline before calling the LLM.
|
|
688
|
-
if (trigger_cb = self.class._on_compaction_trigger_callback)
|
|
689
|
-
trigger_ctx = Context::TriggerContext.new(
|
|
690
|
-
message_elements: message_elements, budget: budget
|
|
691
|
-
)
|
|
692
|
-
if trigger_cb.call(trigger_ctx)
|
|
693
|
-
if (compact_cb = self.class._on_compact_callback)
|
|
694
|
-
compact_ctx = Context::CompactionContext.new(
|
|
695
|
-
message_elements: message_elements,
|
|
696
|
-
budget: budget,
|
|
697
|
-
thread_id: thread_id
|
|
698
|
-
)
|
|
699
|
-
compact_cb.call(compact_ctx)
|
|
700
|
-
message_elements = build_message_elements(compact_ctx.result_messages)
|
|
701
|
-
end
|
|
702
|
-
end
|
|
703
|
-
end
|
|
704
|
-
|
|
705
|
-
# Build the system prompt via the fingerprint-keyed ContextVersionCache.
|
|
706
|
-
# Static knowledge is fetched and concatenated once; the result is reused
|
|
707
|
-
# on subsequent calls as long as the fingerprint remains valid.
|
|
708
|
-
system_text = build_cached_system_text(input)
|
|
709
|
-
|
|
710
|
-
# Assemble context regions 1 (Instruction+Static Knowledge) + 3 (Dynamic Knowledge)
|
|
711
|
-
# + 4 (Conversation).
|
|
712
|
-
assembler = Context::Assembler.new(budget: budget)
|
|
713
|
-
assembler.add_instruction(system_text) if system_text
|
|
714
|
-
|
|
715
|
-
# Dynamic knowledge from config[:knowledge_sources] (backward compatible).
|
|
716
|
-
Array(config[:knowledge_sources]).each do |ks|
|
|
717
|
-
ks.fetch(query: user_message).each do |chunk|
|
|
718
|
-
assembler.add_knowledge(chunk[:content], type: chunk[:type], source: chunk[:source])
|
|
719
|
-
end
|
|
720
|
-
end
|
|
721
554
|
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
context =
|
|
555
|
+
# Assemble context (system prompt + history). Override #build_context to
|
|
556
|
+
# inject custom context editing logic at the Agent subclass level.
|
|
557
|
+
context = build_context(input, messages: messages, thread_id: thread_id, config: config)
|
|
725
558
|
apply_instructions(chat, context[:system]) if context[:system]
|
|
726
559
|
context[:messages].each { |msg| chat.messages << msg }
|
|
727
560
|
|
|
@@ -737,6 +570,7 @@ module Phronomy
|
|
|
737
570
|
rescue SuspendSignal => signal
|
|
738
571
|
checkpoint = Checkpoint.new(
|
|
739
572
|
thread_id: thread_id,
|
|
573
|
+
original_input: input,
|
|
740
574
|
messages: chat.messages.dup,
|
|
741
575
|
pending_tool_name: signal.tool_name,
|
|
742
576
|
pending_tool_args: signal.args,
|
|
@@ -757,77 +591,6 @@ module Phronomy
|
|
|
757
591
|
end
|
|
758
592
|
end
|
|
759
593
|
|
|
760
|
-
# Computes the agent-level retry wait duration.
|
|
761
|
-
# @param strategy [Symbol, Numeric]
|
|
762
|
-
# @param base [Float]
|
|
763
|
-
# @param attempt [Integer]
|
|
764
|
-
# @return [Float]
|
|
765
|
-
def compute_agent_retry_wait(strategy, base, attempt)
|
|
766
|
-
case strategy
|
|
767
|
-
when :exponential
|
|
768
|
-
(2**attempt) * base
|
|
769
|
-
when :linear
|
|
770
|
-
(attempt + 1) * base
|
|
771
|
-
when Numeric
|
|
772
|
-
strategy.to_f
|
|
773
|
-
else
|
|
774
|
-
base.to_f
|
|
775
|
-
end
|
|
776
|
-
end
|
|
777
|
-
|
|
778
|
-
# Collects and runs all registered before_completion hooks in order
|
|
779
|
-
# (global → class → instance) and applies the merged params to the chat.
|
|
780
|
-
#
|
|
781
|
-
# @param chat [RubyLLM::Chat] the assembled chat object
|
|
782
|
-
# @param config [Hash] the invocation config hash
|
|
783
|
-
# @return [Hash] the merged params applied to the chat
|
|
784
|
-
def run_before_completion_hooks!(chat, config)
|
|
785
|
-
hooks = [
|
|
786
|
-
Phronomy.configuration.before_completion,
|
|
787
|
-
self.class._before_completion,
|
|
788
|
-
@before_completion
|
|
789
|
-
].compact
|
|
790
|
-
|
|
791
|
-
return {} if hooks.empty?
|
|
792
|
-
|
|
793
|
-
ctx = BeforeCompletionContext.new(
|
|
794
|
-
agent: self,
|
|
795
|
-
messages: chat.messages,
|
|
796
|
-
config: config,
|
|
797
|
-
params: {}
|
|
798
|
-
)
|
|
799
|
-
|
|
800
|
-
merged = {}
|
|
801
|
-
hooks.each do |hook|
|
|
802
|
-
result = hook.call(ctx)
|
|
803
|
-
merged.merge!(result) if result.is_a?(Hash)
|
|
804
|
-
end
|
|
805
|
-
|
|
806
|
-
apply_before_completion_params!(chat, merged)
|
|
807
|
-
merged
|
|
808
|
-
end
|
|
809
|
-
|
|
810
|
-
# Applies a merged param hash returned by before_completion hooks to
|
|
811
|
-
# the chat object using the appropriate RubyLLM::Chat API methods.
|
|
812
|
-
# When overriding the model, reuses the agent's configured provider and
|
|
813
|
-
# assume_exists setting so that local/namespaced models continue to work.
|
|
814
|
-
#
|
|
815
|
-
# @param chat [RubyLLM::Chat]
|
|
816
|
-
# @param params [Hash]
|
|
817
|
-
def apply_before_completion_params!(chat, params)
|
|
818
|
-
params.each do |key, value|
|
|
819
|
-
case key
|
|
820
|
-
when :model
|
|
821
|
-
prov = self.class.provider
|
|
822
|
-
chat.with_model(value, provider: prov, assume_exists: !prov.nil?)
|
|
823
|
-
when :temperature
|
|
824
|
-
chat.with_temperature(value)
|
|
825
|
-
else
|
|
826
|
-
chat.with_params(key => value)
|
|
827
|
-
end
|
|
828
|
-
end
|
|
829
|
-
end
|
|
830
|
-
|
|
831
594
|
# Builds a TokenBudget for this agent's model if possible.
|
|
832
595
|
# When context_window is set at the class level, that value is used directly
|
|
833
596
|
# (bypassing the RubyLLM catalogue) — useful for locally-hosted models where
|
|
@@ -962,39 +725,6 @@ module Phronomy
|
|
|
962
725
|
end
|
|
963
726
|
end
|
|
964
727
|
|
|
965
|
-
def run_input_guardrails!(input)
|
|
966
|
-
(@input_guardrails || []).each { |g| g.run!(input) }
|
|
967
|
-
end
|
|
968
|
-
|
|
969
|
-
def run_output_guardrails!(output)
|
|
970
|
-
(@output_guardrails || []).each { |g| g.run!(output) }
|
|
971
|
-
end
|
|
972
|
-
|
|
973
|
-
# Registers an on_tool_call hook on the chat object that raises SuspendSignal
|
|
974
|
-
# when an approval-required tool is about to be executed and no synchronous
|
|
975
|
-
# on_approval_required handler has been registered.
|
|
976
|
-
#
|
|
977
|
-
# Does nothing when:
|
|
978
|
-
# - a synchronous handler is already registered (@approval_handler is set), or
|
|
979
|
-
# - none of the agent's tools have requires_approval set.
|
|
980
|
-
#
|
|
981
|
-
# @param chat [RubyLLM::Chat]
|
|
982
|
-
def _register_suspension_hook!(chat)
|
|
983
|
-
return if @approval_handler
|
|
984
|
-
return if self.class.tools.none? { |tc| tc.requires_approval }
|
|
985
|
-
|
|
986
|
-
chat.on_tool_call do |tool_call|
|
|
987
|
-
tool_instance = chat.tools[tool_call.name.to_sym]
|
|
988
|
-
if tool_instance&.requires_approval
|
|
989
|
-
raise SuspendSignal.new(
|
|
990
|
-
tool_name: tool_call.name,
|
|
991
|
-
args: tool_call.arguments,
|
|
992
|
-
tool_call_id: tool_call.id
|
|
993
|
-
)
|
|
994
|
-
end
|
|
995
|
-
end
|
|
996
|
-
end
|
|
997
|
-
|
|
998
728
|
# Builds the final tool class to register with the chat.
|
|
999
729
|
#
|
|
1000
730
|
# Two transformations are applied in order:
|
|
@@ -22,6 +22,11 @@ module Phronomy
|
|
|
22
22
|
# @return [String, nil] the thread_id from the invocation config
|
|
23
23
|
attr_reader :thread_id
|
|
24
24
|
|
|
25
|
+
# @return [String, Hash] the original input passed to #invoke; stored so
|
|
26
|
+
# that #resume can re-apply dynamic system instructions (e.g. Proc or
|
|
27
|
+
# PromptTemplate-based instructions that depend on the input value).
|
|
28
|
+
attr_reader :original_input
|
|
29
|
+
|
|
25
30
|
# @return [Array<RubyLLM::Message>] conversation messages up to and including
|
|
26
31
|
# the assistant message that requested the pending tool call
|
|
27
32
|
attr_reader :messages
|
|
@@ -36,13 +41,15 @@ module Phronomy
|
|
|
36
41
|
# inject the tool result message on resume)
|
|
37
42
|
attr_reader :pending_tool_call_id
|
|
38
43
|
|
|
39
|
-
# @param thread_id
|
|
40
|
-
# @param
|
|
41
|
-
# @param
|
|
42
|
-
# @param
|
|
44
|
+
# @param thread_id [String, nil]
|
|
45
|
+
# @param original_input [String, Hash] the input passed to the original #invoke call
|
|
46
|
+
# @param messages [Array<RubyLLM::Message>]
|
|
47
|
+
# @param pending_tool_name [String]
|
|
48
|
+
# @param pending_tool_args [Hash]
|
|
43
49
|
# @param pending_tool_call_id [String]
|
|
44
|
-
def initialize(thread_id:, messages:, pending_tool_name:, pending_tool_args:, pending_tool_call_id:)
|
|
50
|
+
def initialize(thread_id:, original_input:, messages:, pending_tool_name:, pending_tool_args:, pending_tool_call_id:)
|
|
45
51
|
@thread_id = thread_id
|
|
52
|
+
@original_input = original_input
|
|
46
53
|
@messages = messages.dup.freeze
|
|
47
54
|
@pending_tool_name = pending_tool_name
|
|
48
55
|
@pending_tool_args = pending_tool_args
|