phronomy 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -4
- data/README.md +32 -41
- data/benchmark/baseline.json +1 -1
- data/benchmark/bench_agent_invoke.rb +1 -1
- data/benchmark/bench_context_assembler.rb +9 -1
- data/benchmark/bench_regression.rb +8 -8
- data/benchmark/bench_tool_schema.rb +2 -2
- data/benchmark/bench_vector_store.rb +1 -1
- data/docs/decisions/011-build-context-as-single-llm-input-authority.md +224 -0
- data/lib/phronomy/agent/base.rb +328 -366
- data/lib/phronomy/agent/checkpoint.rb +30 -1
- data/lib/phronomy/agent/checkpoint_store.rb +97 -0
- data/lib/phronomy/agent/concerns/retryable.rb +1 -1
- data/lib/phronomy/agent/concerns/suspendable.rb +63 -8
- data/lib/phronomy/agent/context/capability/base.rb +689 -0
- data/lib/phronomy/agent/context/capability/scope_policy.rb +54 -0
- data/lib/phronomy/agent/context/knowledge/base.rb +58 -0
- data/lib/phronomy/agent/context/knowledge/entity_knowledge.rb +102 -0
- data/lib/phronomy/agent/context/knowledge/static_knowledge.rb +58 -0
- data/lib/phronomy/agent/shared_state.rb +2 -2
- data/lib/phronomy/agent/tool_executor.rb +1 -1
- data/lib/phronomy/concurrency/gate_registry.rb +0 -1
- data/lib/phronomy/configuration.rb +13 -6
- data/lib/phronomy/event_loop.rb +1 -18
- data/lib/phronomy/llm_context_window/assembler.rb +77 -44
- data/lib/phronomy/multi_agent/handoff.rb +4 -4
- data/lib/phronomy/multi_agent/orchestrator.rb +1 -1
- data/lib/phronomy/multi_agent/team_coordinator.rb +2 -2
- data/lib/phronomy/runtime/runtime_metrics.rb +0 -1
- data/lib/phronomy/runtime.rb +1 -2
- data/lib/phronomy/tool.rb +3 -4
- data/lib/phronomy/{tool/agent_tool.rb → tools/agent.rb} +8 -9
- data/lib/phronomy/{tool/mcp_tool.rb → tools/mcp.rb} +9 -9
- data/lib/phronomy/tools/vector_search.rb +70 -0
- data/lib/phronomy/vector_store/async_backend.rb +110 -0
- data/lib/phronomy/vector_store/base.rb +89 -0
- data/lib/phronomy/vector_store/embeddings/base.rb +41 -0
- data/lib/phronomy/vector_store/embeddings/ruby_llm_embeddings.rb +47 -0
- data/lib/phronomy/vector_store/in_memory.rb +103 -0
- data/lib/phronomy/vector_store/loader/base.rb +27 -0
- data/lib/phronomy/vector_store/loader/csv_loader.rb +58 -0
- data/lib/phronomy/vector_store/loader/markdown_loader.rb +78 -0
- data/lib/phronomy/vector_store/loader/plain_text_loader.rb +24 -0
- data/lib/phronomy/vector_store/pgvector.rb +127 -0
- data/lib/phronomy/vector_store/redis_search.rb +192 -0
- data/lib/phronomy/vector_store/splitter/base.rb +49 -0
- data/lib/phronomy/vector_store/splitter/fixed_size_splitter.rb +53 -0
- data/lib/phronomy/vector_store/splitter/recursive_splitter.rb +107 -0
- data/lib/phronomy/vector_store.rb +16 -4
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow/fsm_session.rb +249 -0
- data/lib/phronomy/workflow/phase_machine_builder.rb +247 -0
- data/lib/phronomy/workflow_runner.rb +2 -2
- data/lib/phronomy.rb +10 -3
- data/scripts/api_snapshot.rb +11 -10
- metadata +31 -37
- data/lib/phronomy/agent/context/conversation/compaction_context.rb +0 -117
- data/lib/phronomy/agent/context/conversation/trigger_context.rb +0 -43
- data/lib/phronomy/agent/context/conversation/trim_context.rb +0 -82
- data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +0 -45
- data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +0 -51
- data/lib/phronomy/agent/context/knowledge/loader/base.rb +0 -31
- data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +0 -62
- data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +0 -82
- data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +0 -28
- data/lib/phronomy/agent/context/knowledge/source/base.rb +0 -60
- data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +0 -102
- data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +0 -63
- data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +0 -58
- data/lib/phronomy/agent/context/knowledge/splitter/base.rb +0 -53
- data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +0 -57
- data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +0 -111
- data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +0 -116
- data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +0 -95
- data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +0 -109
- data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +0 -133
- data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +0 -198
- data/lib/phronomy/agent/fsm.rb +0 -157
- data/lib/phronomy/agent/invocation_pipeline.rb +0 -99
- data/lib/phronomy/agent/lifecycle/fsm_session.rb +0 -251
- data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +0 -249
- data/lib/phronomy/agent/react_agent.rb +0 -204
- data/lib/phronomy/embeddings.rb +0 -11
- data/lib/phronomy/loader.rb +0 -13
- data/lib/phronomy/splitter.rb +0 -12
- data/lib/phronomy/tool/base.rb +0 -685
- data/lib/phronomy/tool/scope_policy.rb +0 -50
data/lib/phronomy/agent/base.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "digest"
|
|
4
3
|
require "securerandom"
|
|
4
|
+
require_relative "checkpoint_store"
|
|
5
5
|
require_relative "concerns/retryable"
|
|
6
6
|
require_relative "concerns/guardrailable"
|
|
7
7
|
require_relative "concerns/before_completion"
|
|
@@ -255,10 +255,10 @@ module Phronomy
|
|
|
255
255
|
# the first time +invoke+ is called. The cache persists for the lifetime
|
|
256
256
|
# of the process; call {.static_knowledge_refresh!} to force a reload.
|
|
257
257
|
#
|
|
258
|
-
# @param sources [Array<Phronomy::Agent::Context::Knowledge::
|
|
258
|
+
# @param sources [Array<Phronomy::Agent::Context::Knowledge::Base>]
|
|
259
259
|
# @example
|
|
260
260
|
# class PolicyAgent < Phronomy::Agent::Base
|
|
261
|
-
# static_knowledge Phronomy::Agent::Context::Knowledge::
|
|
261
|
+
# static_knowledge Phronomy::Agent::Context::Knowledge::StaticKnowledge.new(POLICY_TEXT)
|
|
262
262
|
# end
|
|
263
263
|
# @api public
|
|
264
264
|
def static_knowledge(*sources)
|
|
@@ -269,7 +269,7 @@ module Phronomy
|
|
|
269
269
|
end
|
|
270
270
|
|
|
271
271
|
# Returns the registered static knowledge sources.
|
|
272
|
-
# @return [Array<Phronomy::Agent::Context::Knowledge::
|
|
272
|
+
# @return [Array<Phronomy::Agent::Context::Knowledge::Base>]
|
|
273
273
|
# @api public
|
|
274
274
|
def static_knowledge_sources
|
|
275
275
|
@static_knowledge_sources || []
|
|
@@ -302,80 +302,6 @@ module Phronomy
|
|
|
302
302
|
@static_knowledge_chunks = nil
|
|
303
303
|
end
|
|
304
304
|
|
|
305
|
-
# Registers a callback that is invoked before every LLM call so the
|
|
306
|
-
# application can remove stale or irrelevant messages from the
|
|
307
|
-
# conversation history.
|
|
308
|
-
#
|
|
309
|
-
# The block receives a {Phronomy::Agent::Context::Conversation::TrimContext} and may call
|
|
310
|
-
# +ctx.remove(seqs)+ to drop messages by seq number. Changes affect
|
|
311
|
-
# only the current invocation; the underlying memory store is unchanged.
|
|
312
|
-
#
|
|
313
|
-
# @yield [ctx] Phronomy::Agent::Context::Conversation::TrimContext
|
|
314
|
-
# @example Drop the oldest message when over 80% of budget is used
|
|
315
|
-
# on_trim do |ctx|
|
|
316
|
-
# limit = ctx.budget&.available(used: 0) || Float::INFINITY
|
|
317
|
-
# ctx.remove(ctx.message_elements.first[:seq]) if ctx.total_tokens > limit * 0.8
|
|
318
|
-
# end
|
|
319
|
-
# @api public
|
|
320
|
-
def on_trim(&block)
|
|
321
|
-
@on_trim_callback = block
|
|
322
|
-
end
|
|
323
|
-
|
|
324
|
-
# @return [Proc, nil]
|
|
325
|
-
# @api private
|
|
326
|
-
def _on_trim_callback
|
|
327
|
-
@on_trim_callback
|
|
328
|
-
end
|
|
329
|
-
|
|
330
|
-
# Registers a callback that decides whether compaction should run.
|
|
331
|
-
# Evaluated before every LLM call (after on_trim). If the block returns
|
|
332
|
-
# truthy AND an +on_compact+ callback is also registered, the compact
|
|
333
|
-
# pipeline is executed.
|
|
334
|
-
#
|
|
335
|
-
# The block receives a read-only {Phronomy::Agent::Context::Conversation::TriggerContext}.
|
|
336
|
-
#
|
|
337
|
-
# @yield [ctx] Phronomy::Agent::Context::Conversation::TriggerContext
|
|
338
|
-
# @return [Boolean] truthy → run on_compact; falsy → skip
|
|
339
|
-
# @example Trigger when messages exceed 70% of token budget
|
|
340
|
-
# on_compaction_trigger do |ctx|
|
|
341
|
-
# limit = ctx.budget&.available(used: 0) || Float::INFINITY
|
|
342
|
-
# ctx.total_tokens > limit * 0.7
|
|
343
|
-
# end
|
|
344
|
-
# @api public
|
|
345
|
-
def on_compaction_trigger(&block)
|
|
346
|
-
@on_compaction_trigger_callback = block
|
|
347
|
-
end
|
|
348
|
-
|
|
349
|
-
# @return [Proc, nil]
|
|
350
|
-
# @api private
|
|
351
|
-
def _on_compaction_trigger_callback
|
|
352
|
-
@on_compaction_trigger_callback
|
|
353
|
-
end
|
|
354
|
-
|
|
355
|
-
# Registers a callback that performs the actual compaction when the
|
|
356
|
-
# +on_compaction_trigger+ callback fires. The block receives a
|
|
357
|
-
# {Phronomy::Agent::Context::Conversation::CompactionContext} and should call +ctx.compact+
|
|
358
|
-
# to specify which messages to summarise.
|
|
359
|
-
#
|
|
360
|
-
# @yield [ctx] Phronomy::Agent::Context::Conversation::CompactionContext
|
|
361
|
-
# @example Replace the first 4 messages with a short summary
|
|
362
|
-
# on_compact do |ctx|
|
|
363
|
-
# ctx.compact(0..3) do |elements|
|
|
364
|
-
# texts = elements.map { |e| e[:message].content }.join(" | ")
|
|
365
|
-
# "Earlier conversation summary: #{texts}"
|
|
366
|
-
# end
|
|
367
|
-
# end
|
|
368
|
-
# @api public
|
|
369
|
-
def on_compact(&block)
|
|
370
|
-
@on_compact_callback = block
|
|
371
|
-
end
|
|
372
|
-
|
|
373
|
-
# @return [Proc, nil]
|
|
374
|
-
# @api private
|
|
375
|
-
def _on_compact_callback
|
|
376
|
-
@on_compact_callback
|
|
377
|
-
end
|
|
378
|
-
|
|
379
305
|
# When enabled, attaches Anthropic prompt-cache markers to the system
|
|
380
306
|
# message so that the fixed instructions are served from cache on
|
|
381
307
|
# subsequent turns, reducing input-token costs.
|
|
@@ -449,11 +375,32 @@ module Phronomy
|
|
|
449
375
|
@context_overhead = val.to_i
|
|
450
376
|
end
|
|
451
377
|
end
|
|
378
|
+
|
|
379
|
+
# Resumes a suspended invocation identified by +checkpoint+ without
|
|
380
|
+
# requiring the original agent instance to be kept in memory.
|
|
381
|
+
#
|
|
382
|
+
# Validates that the checkpoint was created by this agent class, then
|
|
383
|
+
# instantiates a fresh agent and delegates to {Suspendable#resume}.
|
|
384
|
+
#
|
|
385
|
+
# @param checkpoint [Phronomy::Agent::Checkpoint]
|
|
386
|
+
# @param approved [Boolean] +true+ to execute the pending tool; +false+ to deny
|
|
387
|
+
# @param config [Hash] same runtime options as {#invoke}
|
|
388
|
+
# @return [Hash] same shape as {#invoke} — may contain +suspended: true+ if
|
|
389
|
+
# another approval-required tool is encountered during continuation
|
|
390
|
+
# @raise [ArgumentError] when +checkpoint.agent_class+ does not match this class
|
|
391
|
+
# @api public
|
|
392
|
+
def resume(checkpoint, approved:, config: {})
|
|
393
|
+
if checkpoint.agent_class && checkpoint.agent_class != name
|
|
394
|
+
raise ArgumentError,
|
|
395
|
+
"checkpoint belongs to #{checkpoint.agent_class}, cannot resume with #{name}"
|
|
396
|
+
end
|
|
397
|
+
new.resume(checkpoint, approved: approved, config: config)
|
|
398
|
+
end
|
|
452
399
|
end
|
|
453
400
|
|
|
454
401
|
# Registers an anonymous handoff tool class on this agent instance.
|
|
455
402
|
# Called by Runner during construction when routes are configured.
|
|
456
|
-
# @param tool_class [Class<Phronomy::
|
|
403
|
+
# @param tool_class [Class<Phronomy::Agent::Context::Capability::Base>]
|
|
457
404
|
# @return [self]
|
|
458
405
|
# @api private
|
|
459
406
|
def _add_handoff_tool(tool_class)
|
|
@@ -482,7 +429,6 @@ module Phronomy
|
|
|
482
429
|
# @param thread_id [String, nil] conversation thread identifier, forwarded
|
|
483
430
|
# to the compaction context when on_compact is configured.
|
|
484
431
|
# @param config [Hash] additional runtime options:
|
|
485
|
-
# +:knowledge_sources+ (Array) — dynamic knowledge sources for this turn
|
|
486
432
|
# +:user_id+ (+String+, optional) — caller identity forwarded to the tracer
|
|
487
433
|
# +:session_id+ (+String+, optional) — session identity forwarded to the tracer
|
|
488
434
|
# @param invocation_context [Phronomy::InvocationContext, nil] optional first-class context
|
|
@@ -518,63 +464,35 @@ module Phronomy
|
|
|
518
464
|
if invocation_context
|
|
519
465
|
thread_id, config = _apply_invocation_context(thread_id, config, invocation_context)
|
|
520
466
|
end
|
|
521
|
-
|
|
522
|
-
# Protect against blocking the EventLoop thread itself.
|
|
523
|
-
if Phronomy::EventLoop.current?
|
|
524
|
-
raise Phronomy::Error,
|
|
525
|
-
"Cannot call Agent#invoke (EventLoop mode) from within an EventLoop " \
|
|
526
|
-
"entry action. Use agent.run_as_child(input, ctx: ctx) instead."
|
|
527
|
-
end
|
|
467
|
+
_check_scheduler_reentrancy
|
|
528
468
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
timeout_sec = self.class.invoke_timeout
|
|
534
|
-
effective_config, scope = if timeout_sec
|
|
535
|
-
s = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
|
|
536
|
-
s.deadline_in(timeout_sec)
|
|
537
|
-
[config.merge(cancellation_token: s.token), s]
|
|
538
|
-
else
|
|
539
|
-
[config, nil]
|
|
540
|
-
end
|
|
469
|
+
timeout_sec = self.class.invoke_timeout
|
|
470
|
+
unless timeout_sec
|
|
471
|
+
return invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
|
|
472
|
+
end
|
|
541
473
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
completion_queue.pop
|
|
557
|
-
end
|
|
558
|
-
raise result if result.is_a?(Exception)
|
|
559
|
-
result
|
|
560
|
-
else
|
|
561
|
-
# Guard: calling invoke from inside a scheduler task would block the task
|
|
562
|
-
# against itself when using a cooperative backend. Use invoke_async
|
|
563
|
-
# instead to compose agents without introducing a blocking wait.
|
|
564
|
-
if Phronomy::Task.current
|
|
565
|
-
msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
|
|
566
|
-
"This blocks the scheduler until the inner invocation completes, preventing " \
|
|
567
|
-
"other tasks from making progress. Use invoke_async + await instead."
|
|
568
|
-
if Phronomy.configuration.strict_runtime_guards
|
|
569
|
-
raise Phronomy::SchedulerReentrancyError, msg
|
|
570
|
-
elsif Phronomy.configuration.logger
|
|
571
|
-
Phronomy.configuration.logger.warn(msg)
|
|
572
|
-
else
|
|
573
|
-
Kernel.warn("[phronomy] WARNING: #{msg}")
|
|
574
|
-
end
|
|
575
|
-
end
|
|
576
|
-
invoke_async(input, messages: messages, thread_id: thread_id, config: config).await
|
|
474
|
+
# invoke_timeout: create a CancellationScope with deadline, pass its token
|
|
475
|
+
# to the async invocation, and use scope.pop_queue so the calling thread
|
|
476
|
+
# unblocks as soon as either the result arrives or the deadline fires.
|
|
477
|
+
scope = Phronomy::Concurrency::CancellationScope.new(parent_token: config[:cancellation_token])
|
|
478
|
+
scope.deadline_in(timeout_sec)
|
|
479
|
+
effective_config = config.merge(cancellation_token: scope.token)
|
|
480
|
+
task = invoke_async(input, messages: messages, thread_id: thread_id, config: effective_config)
|
|
481
|
+
|
|
482
|
+
# Bridge the task result to an AsyncQueue so scope.pop_queue can observe the deadline.
|
|
483
|
+
completion_queue = Phronomy::Concurrency::AsyncQueue.new
|
|
484
|
+
Phronomy::Runtime.instance.spawn(name: "invoke-timeout-bridge:#{(self.class.name || "agent").downcase}") do
|
|
485
|
+
completion_queue.push(task.await)
|
|
486
|
+
rescue => e
|
|
487
|
+
completion_queue.push(e)
|
|
577
488
|
end
|
|
489
|
+
|
|
490
|
+
result = scope.pop_queue(completion_queue) do
|
|
491
|
+
raise Phronomy::TimeoutError,
|
|
492
|
+
"Agent #{self.class.name} invoke timed out after #{timeout_sec}s"
|
|
493
|
+
end
|
|
494
|
+
raise result if result.is_a?(Exception)
|
|
495
|
+
result
|
|
578
496
|
end
|
|
579
497
|
|
|
580
498
|
# Invokes this agent asynchronously and returns a {Phronomy::Task}.
|
|
@@ -649,15 +567,18 @@ module Phronomy
|
|
|
649
567
|
"Enable with: Phronomy.configure { |c| c.event_loop = true }"
|
|
650
568
|
end
|
|
651
569
|
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
messages: messages,
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
570
|
+
parent_id = ctx.thread_id
|
|
571
|
+
thread_id = "#{parent_id}_agent_#{SecureRandom.uuid}"
|
|
572
|
+
Phronomy::Runtime.instance.spawn(name: "agent-child:#{thread_id}") do
|
|
573
|
+
result = _invoke_impl(input, messages: messages, thread_id: thread_id, config: config)
|
|
574
|
+
Phronomy::EventLoop.instance.post(
|
|
575
|
+
Phronomy::Event.new(type: :child_completed, target_id: parent_id, payload: result)
|
|
576
|
+
)
|
|
577
|
+
rescue => e
|
|
578
|
+
Phronomy::EventLoop.instance.post(
|
|
579
|
+
Phronomy::Event.new(type: :child_failed, target_id: parent_id, payload: e)
|
|
580
|
+
)
|
|
581
|
+
end
|
|
661
582
|
nil
|
|
662
583
|
end
|
|
663
584
|
|
|
@@ -666,8 +587,8 @@ module Phronomy
|
|
|
666
587
|
#
|
|
667
588
|
# Events emitted (in order):
|
|
668
589
|
# :token — each content delta from the LLM
|
|
669
|
-
# :tool_call — when the LLM requests a tool
|
|
670
|
-
# :tool_result — after a tool completes
|
|
590
|
+
# :tool_call — when the LLM requests a tool
|
|
591
|
+
# :tool_result — after a tool completes
|
|
671
592
|
# :done — final event carrying output, messages, and usage
|
|
672
593
|
# :error — if an unrecoverable error occurs
|
|
673
594
|
#
|
|
@@ -687,19 +608,11 @@ module Phronomy
|
|
|
687
608
|
raise
|
|
688
609
|
end
|
|
689
610
|
|
|
690
|
-
#
|
|
691
|
-
#
|
|
692
|
-
# cleaned up in the +ensure+ block of {#invoke}, but a reference is kept
|
|
693
|
-
# in +@last_context_version_cache+ so callers can inspect it after invoke
|
|
694
|
-
# returns.
|
|
695
|
-
#
|
|
696
|
-
# NOTE: Not thread-safe. When the same Agent instance is used concurrently,
|
|
697
|
-
# +@last_context_version_cache+ reflects the most recent +invoke+ on *any*
|
|
698
|
-
# thread. For per-invocation isolation, use a separate Agent instance per
|
|
699
|
-
# thread.
|
|
611
|
+
# @deprecated The context version cache has been removed. Returns nil.
|
|
612
|
+
# Retained for backward compatibility with callers using safe navigation (+&.reset+).
|
|
700
613
|
# @api private
|
|
701
614
|
def context_version_cache
|
|
702
|
-
|
|
615
|
+
nil
|
|
703
616
|
end
|
|
704
617
|
|
|
705
618
|
private
|
|
@@ -722,29 +635,39 @@ module Phronomy
|
|
|
722
635
|
[effective_thread_id, effective_config]
|
|
723
636
|
end
|
|
724
637
|
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
638
|
+
def _check_scheduler_reentrancy
|
|
639
|
+
return unless Phronomy::Task.current
|
|
640
|
+
|
|
641
|
+
msg = "#{self.class.name}#invoke called from inside a scheduler task. " \
|
|
642
|
+
"This blocks the scheduler until the inner invocation completes, preventing " \
|
|
643
|
+
"other tasks from making progress. Use invoke_async + await instead."
|
|
644
|
+
if Phronomy.configuration.strict_runtime_guards
|
|
645
|
+
raise Phronomy::SchedulerReentrancyError, msg
|
|
646
|
+
elsif Phronomy.configuration.logger
|
|
647
|
+
Phronomy.configuration.logger.warn(msg)
|
|
648
|
+
else
|
|
649
|
+
Kernel.warn("[phronomy] WARNING: #{msg}")
|
|
733
650
|
end
|
|
651
|
+
end
|
|
734
652
|
|
|
735
|
-
|
|
653
|
+
# Streaming implementation for #stream.
|
|
654
|
+
def _stream_impl(input, messages: [], thread_id: nil, config: {}, &block)
|
|
655
|
+
trace("agent.invoke", input: input, **_build_caller_meta(config)) do |_span|
|
|
736
656
|
run_input_guardrails!(input)
|
|
737
657
|
|
|
738
658
|
chat = build_chat
|
|
739
659
|
user_message = extract_message(input)
|
|
660
|
+
context = build_context(
|
|
661
|
+
input,
|
|
662
|
+
messages: messages,
|
|
663
|
+
thread_id: thread_id,
|
|
664
|
+
config: config,
|
|
665
|
+
budget: build_token_budget,
|
|
666
|
+
instruction: build_instructions(input),
|
|
667
|
+
tools: self.class.tools + _handoff_tools
|
|
668
|
+
)
|
|
669
|
+
_apply_context_to_chat(chat, context)
|
|
740
670
|
|
|
741
|
-
# Assemble context (system prompt + history). Override #build_context to
|
|
742
|
-
# inject custom context editing logic at the Agent subclass level.
|
|
743
|
-
context = build_context(input, messages: messages, thread_id: thread_id, config: config)
|
|
744
|
-
apply_instructions(chat, context[:system]) if context[:system]
|
|
745
|
-
context[:messages].each { |msg| chat.messages << msg }
|
|
746
|
-
|
|
747
|
-
# Wire per-event callbacks to yield StreamEvents.
|
|
748
671
|
current_tool_call = nil
|
|
749
672
|
chat.on_tool_call do |tool_call|
|
|
750
673
|
current_tool_call = tool_call
|
|
@@ -758,32 +681,9 @@ module Phronomy
|
|
|
758
681
|
}))
|
|
759
682
|
end
|
|
760
683
|
|
|
761
|
-
# Run before_completion hooks (global → class → instance) before the LLM call.
|
|
762
684
|
run_before_completion_hooks!(chat, config)
|
|
763
685
|
|
|
764
|
-
|
|
765
|
-
# Chunks are pushed into a token queue by the pool worker thread and
|
|
766
|
-
# drained here (on the caller's side) so that the user block is never
|
|
767
|
-
# executed on a BlockingAdapterPool worker thread.
|
|
768
|
-
# The queue capacity is bounded by Configuration#stream_queue_max_size
|
|
769
|
-
# (nil = unbounded) to provide backpressure against a fast LLM producer.
|
|
770
|
-
adapter = Phronomy.configuration.llm_adapter
|
|
771
|
-
chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
|
|
772
|
-
pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
|
|
773
|
-
|
|
774
|
-
# Drain the chunk queue on this side (scheduler task / caller thread).
|
|
775
|
-
loop do
|
|
776
|
-
chunk = chunk_queue.pop
|
|
777
|
-
break if chunk.nil? # queue closed — LLM streaming complete
|
|
778
|
-
block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
|
|
779
|
-
check_cancellation!(config, "invocation cancelled during streaming")
|
|
780
|
-
end
|
|
781
|
-
|
|
782
|
-
response = pending.await
|
|
783
|
-
|
|
784
|
-
output = response.content
|
|
785
|
-
usage = Phronomy::TokenUsage.from_tokens(response.tokens)
|
|
786
|
-
|
|
686
|
+
output, usage = _drain_stream(chat, user_message, config, &block)
|
|
787
687
|
run_output_guardrails!(output)
|
|
788
688
|
|
|
789
689
|
result = {output: output, messages: chat.messages, usage: usage}
|
|
@@ -797,146 +697,260 @@ module Phronomy
|
|
|
797
697
|
# inject custom context editing logic without having to override
|
|
798
698
|
# the full #invoke_once pipeline.
|
|
799
699
|
#
|
|
800
|
-
#
|
|
801
|
-
#
|
|
802
|
-
#
|
|
803
|
-
#
|
|
804
|
-
# @
|
|
700
|
+
# The keyword arguments +budget+, +instruction+, +tools+, and +knowledge+
|
|
701
|
+
# carry pre-computed values. Override them in a subclass call to +super+
|
|
702
|
+
# to inject custom context without recomputing the defaults.
|
|
703
|
+
#
|
|
704
|
+
# @param input [String, Hash] the user's input for this turn
|
|
705
|
+
# @param messages [Array<RubyLLM::Message>] raw conversation history
|
|
706
|
+
# @param thread_id [String, nil] conversation thread identifier
|
|
707
|
+
# @param config [Hash] the invocation config (see #invoke)
|
|
708
|
+
# @param budget [LlmContextWindow::TokenBudget, nil] pre-computed token budget
|
|
709
|
+
# @param instruction [String, nil] pre-computed system instruction
|
|
710
|
+
# @param tools [Array<Class>] tool classes to expose
|
|
711
|
+
# @param knowledge [Array<Hash>] knowledge chunks ({ content:, type:, source: })
|
|
712
|
+
# @return [Hash] { system: String|nil, messages: Array, tool_classes: Array }
|
|
805
713
|
# @api public
|
|
806
|
-
def build_context(
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
714
|
+
def build_context(
|
|
715
|
+
input,
|
|
716
|
+
messages: [],
|
|
717
|
+
thread_id: nil,
|
|
718
|
+
config: {},
|
|
719
|
+
budget: build_token_budget,
|
|
720
|
+
instruction: build_instructions(input),
|
|
721
|
+
tools: self.class.tools + _handoff_tools,
|
|
722
|
+
knowledge: self.class.static_knowledge_chunks + instance_knowledge_chunks
|
|
723
|
+
)
|
|
812
724
|
assembler = LlmContextWindow::Assembler.new(budget: budget)
|
|
813
|
-
assembler.add_instruction(
|
|
814
|
-
|
|
815
|
-
|
|
725
|
+
assembler.add_instruction(instruction) if instruction
|
|
726
|
+
assembler.add_capability(tools)
|
|
727
|
+
knowledge.each { |chunk| assembler.add_knowledge(chunk[:content], type: chunk[:type] || :static, trusted: true, source: chunk[:source]) }
|
|
728
|
+
|
|
729
|
+
msgs = Array(messages)
|
|
730
|
+
|
|
731
|
+
if budget && budget_exceeded?(msgs)
|
|
732
|
+
# Default strategy when the token budget is tight:
|
|
733
|
+
# 1. Compact: keep the most recent half of the messages verbatim and
|
|
734
|
+
# replace the older half with a brief omission marker.
|
|
735
|
+
# 2. Trim: if the compacted history still exceeds the budget, call
|
|
736
|
+
# trim_to_budget with the :safe strategy, which discards the oldest
|
|
737
|
+
# message one at a time until the history fits.
|
|
738
|
+
# Subclasses can override build_context to apply a different strategy
|
|
739
|
+
# (e.g. LLM-based summarisation) before calling super.
|
|
740
|
+
keep = [msgs.size / 2, 2].max
|
|
741
|
+
msgs = compact_messages(msgs, keep_tail: keep) do |dropped|
|
|
742
|
+
"[#{dropped.size} earlier messages omitted]"
|
|
743
|
+
end
|
|
744
|
+
remaining = assembler.available_for_messages
|
|
745
|
+
msgs = trim_to_budget(msgs, remaining: remaining, strategy: :safe)
|
|
816
746
|
end
|
|
817
|
-
|
|
818
|
-
assembler.
|
|
747
|
+
|
|
748
|
+
assembler.add_messages(msgs)
|
|
749
|
+
@last_context = assembler.build
|
|
819
750
|
end
|
|
820
751
|
protected :build_context
|
|
821
752
|
|
|
822
|
-
#
|
|
823
|
-
#
|
|
824
|
-
# Each source is spawned as a separate task within a {Phronomy::TaskGroup};
|
|
825
|
-
# the RAG concurrency gate enforces the +max_concurrent_rag_fetches+ cap.
|
|
826
|
-
# Results are returned in registration order (spawn order) as a flat array.
|
|
827
|
-
#
|
|
828
|
-
# This method is available to subclasses as a building block when
|
|
829
|
-
# overriding {#build_context}. Pass a custom +query+ to implement
|
|
830
|
-
# multi-hop RAG or other retrieval strategies.
|
|
753
|
+
# Keeps the last +keep+ messages from +messages+, discarding older ones.
|
|
754
|
+
# Use this inside a +build_context+ override to trim conversation history.
|
|
831
755
|
#
|
|
832
|
-
# @param
|
|
833
|
-
# @param
|
|
834
|
-
#
|
|
835
|
-
# @
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
check_cancellation!(config, "invocation cancelled before RAG fetch")
|
|
842
|
-
|
|
843
|
-
# :skip (default) — ignore per-source failures so the agent can still
|
|
844
|
-
# answer with partial context. :fail surfaces the first error immediately.
|
|
845
|
-
failure_policy =
|
|
846
|
-
case config[:rag_failure_policy]
|
|
847
|
-
when :fail then :fail_fast
|
|
848
|
-
else :skip_failed
|
|
849
|
-
end
|
|
756
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
757
|
+
# @param keep [Integer] number of messages to retain (from the tail)
|
|
758
|
+
# @return [Array<RubyLLM::Message>]
|
|
759
|
+
# @api public
|
|
760
|
+
def trim_messages(messages, keep:)
|
|
761
|
+
Array(messages).last(keep)
|
|
762
|
+
end
|
|
763
|
+
protected :trim_messages
|
|
850
764
|
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
query: query,
|
|
864
|
-
cancellation_token: config[:cancellation_token],
|
|
865
|
-
timeout: config[:rag_timeout]
|
|
866
|
-
).await
|
|
867
|
-
end
|
|
868
|
-
Phronomy.configuration.logger&.debug { "RAG fetch from #{ks.class.name} completed in #{elapsed_ms}ms" }
|
|
869
|
-
result
|
|
870
|
-
end
|
|
871
|
-
end
|
|
872
|
-
end
|
|
765
|
+
# Removes the oldest messages one at a time until the count is within +limit+.
|
|
766
|
+
#
|
|
767
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
768
|
+
# @param limit [Integer] maximum number of messages to retain
|
|
769
|
+
# @return [Array<RubyLLM::Message>]
|
|
770
|
+
# @api public
|
|
771
|
+
def drop_messages_over(messages, limit:)
|
|
772
|
+
msgs = Array(messages).dup
|
|
773
|
+
msgs.shift while msgs.size > limit
|
|
774
|
+
msgs
|
|
775
|
+
end
|
|
776
|
+
protected :drop_messages_over
|
|
873
777
|
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
778
|
+
# Replaces all but the last +keep_tail+ messages with a single system summary.
|
|
779
|
+
# The block receives the dropped messages and must return a summary String.
|
|
780
|
+
#
|
|
781
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
782
|
+
# @param keep_tail [Integer] number of recent messages to preserve verbatim
|
|
783
|
+
# @yield [Array<RubyLLM::Message>] the messages being summarised
|
|
784
|
+
# @yieldreturn [String] summary text
|
|
785
|
+
# @return [Array<RubyLLM::Message>]
|
|
786
|
+
# @api public
|
|
787
|
+
def compact_messages(messages, keep_tail:, &summariser)
|
|
788
|
+
msgs = Array(messages)
|
|
789
|
+
return msgs if msgs.size <= keep_tail
|
|
790
|
+
tail = msgs.last(keep_tail)
|
|
791
|
+
dropped = msgs.first(msgs.size - keep_tail)
|
|
792
|
+
summary_text = summariser.call(dropped)
|
|
793
|
+
[RubyLLM::Message.new(role: :system, content: summary_text)] + tail
|
|
877
794
|
end
|
|
878
|
-
protected :
|
|
795
|
+
protected :compact_messages
|
|
879
796
|
|
|
880
|
-
#
|
|
881
|
-
#
|
|
882
|
-
#
|
|
797
|
+
# Trims +messages+ to fit within +remaining+ tokens using the given
|
|
798
|
+
# +strategy+. Returns the trimmed message array without touching the
|
|
799
|
+
# assembler. The caller is responsible for passing the result to
|
|
800
|
+
# +assembler.add_messages+ and calling +assembler.build+.
|
|
883
801
|
#
|
|
884
|
-
#
|
|
885
|
-
#
|
|
802
|
+
# Supported strategies:
|
|
803
|
+
# +:safe+ — discard the oldest message one at a time (default)
|
|
886
804
|
#
|
|
887
|
-
# @param messages [Array<RubyLLM::Message>]
|
|
888
|
-
# @param
|
|
889
|
-
#
|
|
890
|
-
# @
|
|
805
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
806
|
+
# @param remaining [Integer, nil] token allowance for messages; when +nil+
|
|
807
|
+
# the messages are returned unchanged
|
|
808
|
+
# @param strategy [Symbol] trim strategy (default +:safe+)
|
|
809
|
+
# @return [Array<RubyLLM::Message>]
|
|
891
810
|
# @api public
|
|
892
|
-
def
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
811
|
+
def trim_to_budget(messages, remaining:, strategy: :safe)
|
|
812
|
+
return Array(messages) unless remaining
|
|
813
|
+
msgs = Array(messages)
|
|
814
|
+
loop do
|
|
815
|
+
used = msgs.sum { |m| LlmContextWindow::TokenEstimator.estimate(m.content.to_s) }
|
|
816
|
+
return msgs if used <= remaining
|
|
817
|
+
break if msgs.empty?
|
|
818
|
+
msgs = trim_messages(msgs, keep: msgs.size - 1)
|
|
900
819
|
end
|
|
820
|
+
msgs
|
|
821
|
+
end
|
|
822
|
+
protected :trim_to_budget
|
|
901
823
|
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
824
|
+
# Returns +true+ when the estimated token usage of +messages+ exceeds
|
|
825
|
+
# +threshold+ times the available context budget.
|
|
826
|
+
# Always returns +false+ when no token budget is available.
|
|
827
|
+
#
|
|
828
|
+
# @param messages [Array<RubyLLM::Message>] conversation history
|
|
829
|
+
# @param threshold [Float] fraction of the available budget (default 0.8)
|
|
830
|
+
# @return [Boolean]
|
|
831
|
+
# @api public
|
|
832
|
+
def budget_exceeded?(messages, threshold: 0.8)
|
|
833
|
+
return false unless (b = build_token_budget)
|
|
834
|
+
total = Array(messages).sum { |m| LlmContextWindow::TokenEstimator.estimate(m.content.to_s) }
|
|
835
|
+
limit = b.available(used: 0)
|
|
836
|
+
total > limit * threshold
|
|
837
|
+
end
|
|
838
|
+
protected :budget_exceeded?
|
|
916
839
|
|
|
917
|
-
|
|
840
|
+
# Registers a per-instance knowledge source. Knowledge chunks from all
|
|
841
|
+
# registered sources are included in every LLM call via +build_context+.
|
|
842
|
+
#
|
|
843
|
+
# @param source [#fetch] any object responding to +fetch(query:)+
|
|
844
|
+
# @return [void]
|
|
845
|
+
# @api public
|
|
846
|
+
def add_knowledge_source(source)
|
|
847
|
+
@instance_knowledge_sources ||= []
|
|
848
|
+
@instance_knowledge_sources << source
|
|
849
|
+
end
|
|
850
|
+
protected :add_knowledge_source
|
|
851
|
+
|
|
852
|
+
# Returns knowledge chunks fetched from all instance-level knowledge sources.
|
|
853
|
+
#
|
|
854
|
+
# @return [Array<Hash>]
|
|
855
|
+
# @api private
|
|
856
|
+
def instance_knowledge_chunks
|
|
857
|
+
return [] unless @instance_knowledge_sources
|
|
858
|
+
@instance_knowledge_sources.flat_map { |ks| ks.fetch(query: nil) }
|
|
918
859
|
end
|
|
919
|
-
protected :
|
|
860
|
+
protected :instance_knowledge_chunks
|
|
920
861
|
|
|
921
862
|
# Performs a single (non-retried) invocation. Extracted so that #invoke can
|
|
922
863
|
# wrap it in a retry loop without duplicating the LLM interaction logic.
|
|
923
864
|
def invoke_once(input, messages: [], thread_id: nil, config: {})
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
865
|
+
trace("agent.invoke", input: input, **_build_caller_meta(config)) do |_span|
|
|
866
|
+
run_input_guardrails!(input)
|
|
867
|
+
|
|
868
|
+
user_message = extract_message(input)
|
|
869
|
+
chat = build_chat
|
|
870
|
+
context = build_context(
|
|
871
|
+
input,
|
|
872
|
+
messages: messages, thread_id: thread_id, config: config,
|
|
873
|
+
budget: build_token_budget, instruction: build_instructions(input),
|
|
874
|
+
tools: self.class.tools + _handoff_tools
|
|
875
|
+
)
|
|
876
|
+
_apply_context_to_chat(chat, context)
|
|
877
|
+
|
|
878
|
+
run_before_completion_hooks!(chat, config)
|
|
879
|
+
_register_suspension_hook!(chat)
|
|
880
|
+
check_cancellation!(config, "invocation cancelled before LLM call")
|
|
881
|
+
|
|
882
|
+
result, usage = _complete_with_suspension_guard(
|
|
883
|
+
chat, user_message, config,
|
|
884
|
+
thread_id: thread_id, original_input: input
|
|
885
|
+
)
|
|
886
|
+
next [result, usage] if result[:suspended]
|
|
887
|
+
|
|
888
|
+
run_output_guardrails!(result[:output])
|
|
889
|
+
[result, usage]
|
|
890
|
+
end
|
|
891
|
+
end
|
|
892
|
+
|
|
893
|
+
def _build_caller_meta(config)
|
|
894
|
+
meta = {}
|
|
895
|
+
meta[:user_id] = config[:user_id] if config[:user_id]
|
|
896
|
+
meta[:session_id] = config[:session_id] if config[:session_id]
|
|
927
897
|
if (ic = config[:invocation_context])
|
|
928
|
-
|
|
929
|
-
|
|
898
|
+
meta[:task_id] = ic.task_id if ic.task_id
|
|
899
|
+
meta[:parent_task_id] = ic.parent_task_id if ic.parent_task_id
|
|
930
900
|
end
|
|
901
|
+
meta
|
|
902
|
+
end
|
|
931
903
|
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
904
|
+
def _apply_context_to_chat(chat, context)
|
|
905
|
+
apply_instructions(chat, context[:system]) if context[:system]
|
|
906
|
+
(context[:tool_classes] || []).each { |tc| chat.with_tool(prepare_tool_class(tc)) }
|
|
907
|
+
context[:messages].each { |msg| chat.messages << msg }
|
|
908
|
+
end
|
|
909
|
+
|
|
910
|
+
# Submits the LLM call via LLMAdapter and handles SuspendSignal.
|
|
911
|
+
# Sets/clears the chat cancellation token around the call so that
|
|
912
|
+
# ParallelToolChat can observe cancellation without Thread.current.
|
|
913
|
+
# Returns [result_hash, usage_or_nil].
|
|
914
|
+
def _complete_with_suspension_guard(chat, user_message, config, thread_id:, original_input:)
|
|
915
|
+
chat.cancellation_token = config[:cancellation_token] if chat.respond_to?(:cancellation_token=)
|
|
916
|
+
begin
|
|
917
|
+
adapter = Phronomy.configuration.llm_adapter
|
|
918
|
+
response = adapter.complete_async(chat, user_message, config: config).await
|
|
919
|
+
rescue SuspendSignal => signal
|
|
920
|
+
checkpoint = Checkpoint.new(
|
|
921
|
+
checkpoint_id: SecureRandom.uuid,
|
|
922
|
+
agent_class: self.class.name,
|
|
923
|
+
requested_at: Time.now.utc,
|
|
936
924
|
thread_id: thread_id,
|
|
937
|
-
|
|
925
|
+
original_input: original_input,
|
|
926
|
+
messages: chat.messages.dup,
|
|
927
|
+
pending_tool_name: signal.tool_name,
|
|
928
|
+
pending_tool_args: signal.args,
|
|
929
|
+
pending_tool_call_id: signal.tool_call_id
|
|
938
930
|
)
|
|
931
|
+
return [{output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}, nil]
|
|
932
|
+
ensure
|
|
933
|
+
chat.cancellation_token = nil if chat.respond_to?(:cancellation_token=)
|
|
939
934
|
end
|
|
935
|
+
output = response.content
|
|
936
|
+
usage = Phronomy::TokenUsage.from_tokens(response.tokens)
|
|
937
|
+
[{output: output, messages: chat.messages, usage: usage}, usage]
|
|
938
|
+
end
|
|
939
|
+
|
|
940
|
+
def _drain_stream(chat, user_message, config, &block)
|
|
941
|
+
adapter = Phronomy.configuration.llm_adapter
|
|
942
|
+
chunk_queue = Phronomy::Concurrency::AsyncQueue.new(max_size: Phronomy.configuration.stream_queue_max_size)
|
|
943
|
+
pending = adapter.stream_async(chat, user_message, config: config, enqueue_to: chunk_queue)
|
|
944
|
+
|
|
945
|
+
loop do
|
|
946
|
+
chunk = chunk_queue.pop
|
|
947
|
+
break if chunk.nil?
|
|
948
|
+
block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
|
|
949
|
+
check_cancellation!(config, "invocation cancelled during streaming")
|
|
950
|
+
end
|
|
951
|
+
|
|
952
|
+
response = pending.await
|
|
953
|
+
[response.content, Phronomy::TokenUsage.from_tokens(response.tokens)]
|
|
940
954
|
end
|
|
941
955
|
|
|
942
956
|
# Builds a TokenBudget for this agent's model if possible.
|
|
@@ -965,61 +979,13 @@ module Phronomy
|
|
|
965
979
|
nil
|
|
966
980
|
end
|
|
967
981
|
|
|
968
|
-
# Converts a flat Array of message objects into the internal message_elements
|
|
969
|
-
# format used by TrimContext, TriggerContext, and CompactionContext.
|
|
970
|
-
# Each element receives a 0-based synthetic seq number.
|
|
971
|
-
#
|
|
972
|
-
# @param messages [Array] message-like objects with #role and #content
|
|
973
|
-
# @return [Array<Hash>]
|
|
974
|
-
# @api public
|
|
975
|
-
def build_message_elements(messages)
|
|
976
|
-
Array(messages).each_with_index.map do |msg, idx|
|
|
977
|
-
tokens = LlmContextWindow::TokenEstimator.estimate(msg.content.to_s)
|
|
978
|
-
{seq: idx, message: msg, tokens: tokens, role: msg.role}
|
|
979
|
-
end
|
|
980
|
-
end
|
|
981
|
-
|
|
982
|
-
# Builds (or returns a cached) system prompt text.
|
|
983
|
-
# The fingerprint is a SHA-256 digest of the instruction text concatenated
|
|
984
|
-
# with the content of every registered static knowledge source.
|
|
985
|
-
# When the fingerprint is unchanged the ContextVersionCache returns the
|
|
986
|
-
# previously assembled text without re-fetching any sources.
|
|
987
|
-
#
|
|
988
|
-
# @param input [String, Hash] the agent's current input (used for template evaluation)
|
|
989
|
-
# @return [String, nil] assembled system text, or nil when empty
|
|
990
|
-
# @api public
|
|
991
|
-
def build_cached_system_text(input)
|
|
992
|
-
instruction = build_instructions(input)
|
|
993
|
-
|
|
994
|
-
static_chunks = self.class.static_knowledge_chunks
|
|
995
|
-
|
|
996
|
-
fingerprint = Digest::SHA256.hexdigest(
|
|
997
|
-
[instruction.to_s, *static_chunks.map { |c| c[:content] }].join("\0")
|
|
998
|
-
)
|
|
999
|
-
|
|
1000
|
-
cache = (@context_version_cache ||= LlmContextWindow::ContextVersionCache.new)
|
|
1001
|
-
unless cache.valid?(fingerprint)
|
|
1002
|
-
parts = [instruction]
|
|
1003
|
-
static_chunks.each do |chunk|
|
|
1004
|
-
parts << LlmContextWindow::Assembler.xml_tag(chunk[:content], type: chunk[:type], trusted: true)
|
|
1005
|
-
end
|
|
1006
|
-
cache.update(fingerprint: fingerprint, system_text: parts.compact.join("\n\n"))
|
|
1007
|
-
end
|
|
1008
|
-
|
|
1009
|
-
# Persist a reference on the instance so that context_version_cache
|
|
1010
|
-
# remains accessible after invoke completes.
|
|
1011
|
-
@last_context_version_cache = cache
|
|
1012
|
-
|
|
1013
|
-
cache.system_text.empty? ? nil : cache.system_text
|
|
1014
|
-
end
|
|
1015
|
-
|
|
1016
982
|
# Returns the chat class to instantiate for this invocation.
|
|
1017
|
-
# When
|
|
983
|
+
# When {Phronomy.configuration.parallel_tool_execution} is true,
|
|
1018
984
|
# returns {ParallelToolChat} so that concurrent tool dispatch is enabled.
|
|
1019
985
|
# Falls back to +nil+ otherwise, signalling {#build_chat} to use the
|
|
1020
986
|
# standard +RubyLLM.chat+ factory.
|
|
1021
987
|
def build_chat_class
|
|
1022
|
-
Phronomy.configuration.
|
|
988
|
+
Phronomy.configuration.parallel_tool_execution ? Phronomy::MultiAgent::ParallelToolChat : nil
|
|
1023
989
|
end
|
|
1024
990
|
|
|
1025
991
|
def build_chat
|
|
@@ -1039,10 +1005,6 @@ module Phronomy
|
|
|
1039
1005
|
RubyLLM.chat(**opts)
|
|
1040
1006
|
end
|
|
1041
1007
|
chat.with_temperature(t) if t
|
|
1042
|
-
self.class.tools.each do |tool_class|
|
|
1043
|
-
chat.with_tool(prepare_tool_class(tool_class))
|
|
1044
|
-
end
|
|
1045
|
-
_handoff_tools.each { |tc| chat.with_tool(tc) }
|
|
1046
1008
|
chat
|
|
1047
1009
|
end
|
|
1048
1010
|
|
|
@@ -1102,7 +1064,7 @@ module Phronomy
|
|
|
1102
1064
|
# Builds the final tool class to register with the chat.
|
|
1103
1065
|
#
|
|
1104
1066
|
# When an already-instantiated tool object is passed (e.g. a
|
|
1105
|
-
# {Phronomy::
|
|
1067
|
+
# {Phronomy::Tools::Mcp} returned by +Phronomy::Tools::Mcp.from_server+), it is
|
|
1106
1068
|
# returned as-is. RubyLLM's +with_tool+ accepts both classes and
|
|
1107
1069
|
# instances, so no wrapping is needed.
|
|
1108
1070
|
#
|
|
@@ -1110,7 +1072,7 @@ module Phronomy
|
|
|
1110
1072
|
# 1. Alias override — when the Hash form of .tools maps this class to an
|
|
1111
1073
|
# explicit name, an anonymous subclass with that tool_name is returned.
|
|
1112
1074
|
# 2. Scope policy — when a scope is declared on the tool, the configured
|
|
1113
|
-
# {Phronomy::
|
|
1075
|
+
# {Phronomy::Agent::Context::Capability::ScopePolicy} (or the default) is evaluated.
|
|
1114
1076
|
# +:reject+ wraps the tool to return a denial message without executing.
|
|
1115
1077
|
# +:approve+ behaves like requiring approval (same as step 3 when the
|
|
1116
1078
|
# tool does not already have +requires_approval+).
|
|
@@ -1120,7 +1082,7 @@ module Phronomy
|
|
|
1120
1082
|
# (tool_name, args) and, if it returns falsy, the tool returns a denial
|
|
1121
1083
|
# message instead of executing.
|
|
1122
1084
|
def prepare_tool_class(tool_class)
|
|
1123
|
-
# When an instantiated tool object is passed (e.g.
|
|
1085
|
+
# When an instantiated tool object is passed (e.g. Phronomy::Tools::Mcp.from_server
|
|
1124
1086
|
# returns an instance, not a class), skip class-level processing and
|
|
1125
1087
|
# return it directly. RubyLLM#with_tool handles both forms.
|
|
1126
1088
|
return tool_class unless tool_class.is_a?(Class)
|
|
@@ -1139,7 +1101,7 @@ module Phronomy
|
|
|
1139
1101
|
# Step 2: evaluate scope policy.
|
|
1140
1102
|
scope = resolved.scope
|
|
1141
1103
|
if scope
|
|
1142
|
-
policy = @scope_policy || Phronomy::
|
|
1104
|
+
policy = @scope_policy || Phronomy::Agent::Context::Capability::ScopePolicy::DEFAULT
|
|
1143
1105
|
decision = policy.call(resolved, scope, self)
|
|
1144
1106
|
case decision
|
|
1145
1107
|
when :reject
|