phronomy 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mutant.yml +8 -7
- data/CHANGELOG.md +151 -1
- data/README.md +170 -47
- data/Rakefile +33 -0
- data/benchmark/baseline.json +1 -1
- data/benchmark/bench_context_assembler.rb +2 -2
- data/benchmark/bench_regression.rb +6 -5
- data/benchmark/bench_token_estimator.rb +5 -5
- data/benchmark/bench_tool_schema.rb +1 -1
- data/benchmark/bench_vector_store.rb +1 -1
- data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +24 -0
- data/docs/decisions/006-no-built-in-guardrails.md +20 -2
- data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
- data/lib/phronomy/agent/base.rb +285 -137
- data/lib/phronomy/agent/checkpoint.rb +118 -0
- data/lib/phronomy/agent/concerns/suspendable.rb +15 -0
- data/lib/phronomy/agent/context/conversation/compaction_context.rb +117 -0
- data/lib/phronomy/agent/context/conversation/trigger_context.rb +43 -0
- data/lib/phronomy/agent/context/conversation/trim_context.rb +82 -0
- data/lib/phronomy/agent/context/instruction/prompt_template.rb +102 -0
- data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +45 -0
- data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +51 -0
- data/lib/phronomy/agent/context/knowledge/loader/base.rb +31 -0
- data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +62 -0
- data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +82 -0
- data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +28 -0
- data/lib/phronomy/agent/context/knowledge/source/base.rb +60 -0
- data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +102 -0
- data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +63 -0
- data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +58 -0
- data/lib/phronomy/agent/context/knowledge/splitter/base.rb +53 -0
- data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +57 -0
- data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +111 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +116 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +95 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +109 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +133 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +198 -0
- data/lib/phronomy/agent/fsm.rb +42 -65
- data/lib/phronomy/agent/invocation_pipeline.rb +99 -0
- data/lib/phronomy/agent/lifecycle/fsm_session.rb +251 -0
- data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +249 -0
- data/lib/phronomy/agent/react_agent.rb +27 -14
- data/lib/phronomy/agent/runner.rb +2 -2
- data/lib/phronomy/agent/tool_executor.rb +108 -0
- data/lib/phronomy/concurrency/async_queue.rb +157 -0
- data/lib/phronomy/concurrency/blocking_adapter_pool.rb +443 -0
- data/lib/phronomy/concurrency/cancellation_scope.rb +125 -0
- data/lib/phronomy/concurrency/cancellation_token.rb +140 -0
- data/lib/phronomy/concurrency/concurrency_gate.rb +157 -0
- data/lib/phronomy/concurrency/deadline.rb +65 -0
- data/lib/phronomy/concurrency/gate_registry.rb +52 -0
- data/lib/phronomy/concurrency/pool_registry.rb +57 -0
- data/lib/phronomy/configuration.rb +142 -0
- data/lib/phronomy/context.rb +2 -8
- data/lib/phronomy/diagnostics.rb +62 -0
- data/lib/phronomy/embeddings.rb +2 -2
- data/lib/phronomy/eval/runner.rb +13 -9
- data/lib/phronomy/eval/scorer/llm_judge.rb +12 -1
- data/lib/phronomy/event_loop.rb +184 -46
- data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
- data/lib/phronomy/invocation_context.rb +152 -0
- data/lib/phronomy/knowledge_source.rb +0 -5
- data/lib/phronomy/llm_adapter/base.rb +104 -0
- data/lib/phronomy/llm_adapter/ruby_llm.rb +47 -0
- data/lib/phronomy/llm_adapter.rb +20 -0
- data/lib/phronomy/{context → llm_context_window}/assembler.rb +18 -3
- data/lib/phronomy/{context → llm_context_window}/context_version_cache.rb +1 -1
- data/lib/phronomy/{context → llm_context_window}/token_budget.rb +7 -4
- data/lib/phronomy/{context → llm_context_window}/token_estimator.rb +3 -3
- data/lib/phronomy/loader.rb +4 -4
- data/lib/phronomy/metrics.rb +38 -0
- data/lib/phronomy/{agent → multi_agent}/handoff.rb +2 -2
- data/lib/phronomy/{agent → multi_agent}/orchestrator.rb +151 -126
- data/lib/phronomy/multi_agent/parallel_tool_chat.rb +149 -0
- data/lib/phronomy/{agent → multi_agent}/team_coordinator.rb +2 -2
- data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
- data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
- data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
- data/lib/phronomy/runtime/scheduler.rb +98 -0
- data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
- data/lib/phronomy/runtime/task_registry.rb +48 -0
- data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
- data/lib/phronomy/runtime/timer_queue.rb +106 -0
- data/lib/phronomy/runtime/timer_service.rb +42 -0
- data/lib/phronomy/runtime.rb +389 -0
- data/lib/phronomy/splitter.rb +3 -3
- data/lib/phronomy/task/backend.rb +80 -0
- data/lib/phronomy/task/fiber_backend.rb +157 -0
- data/lib/phronomy/task/immediate_backend.rb +89 -0
- data/lib/phronomy/task/thread_backend.rb +84 -0
- data/lib/phronomy/task.rb +275 -0
- data/lib/phronomy/task_group.rb +265 -0
- data/lib/phronomy/testing/fake_clock.rb +109 -0
- data/lib/phronomy/testing/fake_scheduler.rb +104 -0
- data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
- data/lib/phronomy/testing.rb +12 -0
- data/lib/phronomy/tool/base.rb +156 -7
- data/lib/phronomy/tool/mcp_tool.rb +47 -16
- data/lib/phronomy/tool/scope_policy.rb +50 -0
- data/lib/phronomy/tracing/null_tracer.rb +3 -1
- data/lib/phronomy/tracing/open_telemetry_tracer.rb +34 -0
- data/lib/phronomy/vector_store.rb +2 -2
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +52 -5
- data/lib/phronomy/workflow_context.rb +37 -2
- data/lib/phronomy/workflow_runner.rb +28 -77
- data/lib/phronomy.rb +43 -0
- metadata +73 -33
- data/lib/phronomy/agent/parallel_tool_chat.rb +0 -92
- data/lib/phronomy/cancellation_token.rb +0 -92
- data/lib/phronomy/context/compaction_context.rb +0 -111
- data/lib/phronomy/context/trigger_context.rb +0 -39
- data/lib/phronomy/context/trim_context.rb +0 -75
- data/lib/phronomy/embeddings/base.rb +0 -22
- data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +0 -45
- data/lib/phronomy/fsm_session.rb +0 -201
- data/lib/phronomy/knowledge_source/base.rb +0 -36
- data/lib/phronomy/knowledge_source/entity_knowledge.rb +0 -96
- data/lib/phronomy/knowledge_source/rag_knowledge.rb +0 -57
- data/lib/phronomy/knowledge_source/static_knowledge.rb +0 -52
- data/lib/phronomy/loader/base.rb +0 -25
- data/lib/phronomy/loader/csv_loader.rb +0 -56
- data/lib/phronomy/loader/markdown_loader.rb +0 -76
- data/lib/phronomy/loader/plain_text_loader.rb +0 -22
- data/lib/phronomy/prompt_template.rb +0 -96
- data/lib/phronomy/splitter/base.rb +0 -47
- data/lib/phronomy/splitter/fixed_size_splitter.rb +0 -51
- data/lib/phronomy/splitter/recursive_splitter.rb +0 -105
- data/lib/phronomy/vector_store/base.rb +0 -82
- data/lib/phronomy/vector_store/in_memory.rb +0 -93
- data/lib/phronomy/vector_store/pgvector.rb +0 -127
- data/lib/phronomy/vector_store/redis_search.rb +0 -192
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
# Carries all per-invocation context values through the call stack.
|
|
5
|
+
#
|
|
6
|
+
# +InvocationContext+ is a plain value object (struct-like, frozen on
|
|
7
|
+
# creation) that replaces ad-hoc +Thread.current[...]+ propagation.
|
|
8
|
+
# Pass it explicitly wherever context needs to cross a method boundary
|
|
9
|
+
# or be handed to a child {Task} / {TaskGroup}.
|
|
10
|
+
#
|
|
11
|
+
# @example Build a context for a new agent invocation
|
|
12
|
+
# ctx = Phronomy::InvocationContext.new(
|
|
13
|
+
# thread_id: "conv-123",
|
|
14
|
+
# cancellation_token: Phronomy::Concurrency::CancellationToken.timeout_after(30),
|
|
15
|
+
# max_parallel_tools: 5
|
|
16
|
+
# )
|
|
17
|
+
# agent.invoke("Hello", invocation_context: ctx)
|
|
18
|
+
class InvocationContext
|
|
19
|
+
# @return [String, nil] conversation / workflow thread identifier
|
|
20
|
+
attr_reader :thread_id
|
|
21
|
+
|
|
22
|
+
# @return [String, nil] session identifier (e.g. Rails session id)
|
|
23
|
+
attr_reader :session_id
|
|
24
|
+
|
|
25
|
+
# @return [String, nil] end-user identifier for tracing / audit
|
|
26
|
+
attr_reader :user_id
|
|
27
|
+
|
|
28
|
+
# @return [CancellationToken, nil]
|
|
29
|
+
attr_reader :cancellation_token
|
|
30
|
+
|
|
31
|
+
# @return [Deadline, nil]
|
|
32
|
+
attr_reader :deadline
|
|
33
|
+
|
|
34
|
+
# @return [Object, nil] OpenTelemetry / tracing span
|
|
35
|
+
attr_reader :tracer_span
|
|
36
|
+
|
|
37
|
+
# @return [Integer, nil] max tokens the agent may consume this invocation
|
|
38
|
+
attr_reader :token_budget
|
|
39
|
+
|
|
40
|
+
# @return [Integer] maximum simultaneous tool calls (default: 10)
|
|
41
|
+
attr_reader :max_parallel_tools
|
|
42
|
+
|
|
43
|
+
# @return [Object, nil] approval policy applied before write-scope tools
|
|
44
|
+
attr_reader :approval_policy
|
|
45
|
+
|
|
46
|
+
# @return [Object, nil] redaction policy applied to tool args / results
|
|
47
|
+
attr_reader :redaction_policy
|
|
48
|
+
|
|
49
|
+
# @return [Hash, nil] per-provider concurrency / rate-limit overrides
|
|
50
|
+
attr_reader :provider_limits
|
|
51
|
+
|
|
52
|
+
# @return [String, nil] unique identifier for this task in the trace tree
|
|
53
|
+
attr_reader :task_id
|
|
54
|
+
|
|
55
|
+
# @return [String, nil] task_id of the parent span / task
|
|
56
|
+
attr_reader :parent_task_id
|
|
57
|
+
|
|
58
|
+
# @param thread_id [String, nil]
|
|
59
|
+
# @param session_id [String, nil]
|
|
60
|
+
# @param user_id [String, nil]
|
|
61
|
+
# @param cancellation_token [CancellationToken, nil]
|
|
62
|
+
# @param deadline [Deadline, nil]
|
|
63
|
+
# @param tracer_span [Object, nil]
|
|
64
|
+
# @param token_budget [Integer, nil]
|
|
65
|
+
# @param max_parallel_tools [Integer]
|
|
66
|
+
# @param approval_policy [Object, nil]
|
|
67
|
+
# @param redaction_policy [Object, nil]
|
|
68
|
+
# @param provider_limits [Hash, nil]
|
|
69
|
+
# @param task_id [String, nil]
|
|
70
|
+
# @param parent_task_id [String, nil]
|
|
71
|
+
# @api private
|
|
72
|
+
def initialize(
|
|
73
|
+
thread_id: nil,
|
|
74
|
+
session_id: nil,
|
|
75
|
+
user_id: nil,
|
|
76
|
+
cancellation_token: nil,
|
|
77
|
+
deadline: nil,
|
|
78
|
+
tracer_span: nil,
|
|
79
|
+
token_budget: nil,
|
|
80
|
+
max_parallel_tools: 10,
|
|
81
|
+
approval_policy: nil,
|
|
82
|
+
redaction_policy: nil,
|
|
83
|
+
provider_limits: nil,
|
|
84
|
+
task_id: nil,
|
|
85
|
+
parent_task_id: nil
|
|
86
|
+
)
|
|
87
|
+
@thread_id = thread_id
|
|
88
|
+
@session_id = session_id
|
|
89
|
+
@user_id = user_id
|
|
90
|
+
@cancellation_token = cancellation_token
|
|
91
|
+
@deadline = deadline
|
|
92
|
+
@tracer_span = tracer_span
|
|
93
|
+
@token_budget = token_budget
|
|
94
|
+
@max_parallel_tools = max_parallel_tools
|
|
95
|
+
@approval_policy = approval_policy
|
|
96
|
+
@redaction_policy = redaction_policy
|
|
97
|
+
@provider_limits = provider_limits
|
|
98
|
+
@task_id = task_id
|
|
99
|
+
@parent_task_id = parent_task_id
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Returns a new +InvocationContext+ with the given attributes merged in.
|
|
103
|
+
# All other attributes are carried over unchanged.
|
|
104
|
+
#
|
|
105
|
+
# @param overrides [Hash] keyword arguments to override
|
|
106
|
+
# @return [InvocationContext]
|
|
107
|
+
# @api private
|
|
108
|
+
def merge(**overrides)
|
|
109
|
+
InvocationContext.new(
|
|
110
|
+
thread_id: overrides.fetch(:thread_id, @thread_id),
|
|
111
|
+
session_id: overrides.fetch(:session_id, @session_id),
|
|
112
|
+
user_id: overrides.fetch(:user_id, @user_id),
|
|
113
|
+
cancellation_token: overrides.fetch(:cancellation_token, @cancellation_token),
|
|
114
|
+
deadline: overrides.fetch(:deadline, @deadline),
|
|
115
|
+
tracer_span: overrides.fetch(:tracer_span, @tracer_span),
|
|
116
|
+
token_budget: overrides.fetch(:token_budget, @token_budget),
|
|
117
|
+
max_parallel_tools: overrides.fetch(:max_parallel_tools, @max_parallel_tools),
|
|
118
|
+
approval_policy: overrides.fetch(:approval_policy, @approval_policy),
|
|
119
|
+
redaction_policy: overrides.fetch(:redaction_policy, @redaction_policy),
|
|
120
|
+
provider_limits: overrides.fetch(:provider_limits, @provider_limits),
|
|
121
|
+
task_id: overrides.fetch(:task_id, @task_id),
|
|
122
|
+
parent_task_id: overrides.fetch(:parent_task_id, @parent_task_id)
|
|
123
|
+
)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Convenience: returns the cancellation token or a new never-cancelled token.
|
|
127
|
+
# @return [CancellationToken]
|
|
128
|
+
# @api private
|
|
129
|
+
def effective_cancellation_token
|
|
130
|
+
@cancellation_token || Phronomy::Concurrency::CancellationToken.new
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Returns the cancellation token to use for an invocation, taking both the
|
|
134
|
+
# explicit +cancellation_token+ and the +deadline+ into account.
|
|
135
|
+
#
|
|
136
|
+
# - When +cancellation_token+ is set, it is returned unchanged.
|
|
137
|
+
# - When only +deadline+ is set, a new {CancellationToken} is created and
|
|
138
|
+
# the deadline is attached to it via {Deadline#attach_to}.
|
|
139
|
+
# - When neither is set, returns +nil+.
|
|
140
|
+
#
|
|
141
|
+
# @return [CancellationToken, nil]
|
|
142
|
+
# @api private
|
|
143
|
+
def effective_timeout_token
|
|
144
|
+
return @cancellation_token if @cancellation_token
|
|
145
|
+
return nil if @deadline.nil?
|
|
146
|
+
|
|
147
|
+
token = Phronomy::Concurrency::CancellationToken.new
|
|
148
|
+
@deadline.attach_to(token)
|
|
149
|
+
token
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "knowledge_source/base"
|
|
4
|
-
require_relative "knowledge_source/static_knowledge"
|
|
5
|
-
require_relative "knowledge_source/rag_knowledge"
|
|
6
|
-
require_relative "knowledge_source/entity_knowledge"
|
|
7
|
-
|
|
8
3
|
module Phronomy
|
|
9
4
|
# KnowledgeSource provides the interface for supplying context region 3 (Knowledge)
|
|
10
5
|
# to the Context::Assembler.
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
module LLMAdapter
|
|
5
|
+
# Abstract base class for LLM adapters.
|
|
6
|
+
#
|
|
7
|
+
# Subclasses must implement {#complete} and {#stream}.
|
|
8
|
+
# The agent pipeline calls {#complete_async} / {#stream_async} which wrap
|
|
9
|
+
# those methods in a {BlockingAdapterPool} submission.
|
|
10
|
+
class Base
|
|
11
|
+
# Performs a blocking (non-streaming) LLM completion.
|
|
12
|
+
# Implementors must call +chat.ask(message)+ (or equivalent) and
|
|
13
|
+
# return the response object.
|
|
14
|
+
#
|
|
15
|
+
# @param chat [Object] the configured chat session object
|
|
16
|
+
# @param message [String] the user message
|
|
17
|
+
# @param config [Hash] the invocation config (e.g. +:cancellation_token+)
|
|
18
|
+
# @return [Object] LLM response object
|
|
19
|
+
# @raise [NotImplementedError]
|
|
20
|
+
# @api private
|
|
21
|
+
def complete(chat, message, config: {})
|
|
22
|
+
raise NotImplementedError, "#{self.class}#complete is not implemented"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Performs a blocking streaming LLM completion.
|
|
26
|
+
# Implementors must call +chat.ask(message) { |chunk| block.call(chunk) }+
|
|
27
|
+
# (or equivalent) and return the response object.
|
|
28
|
+
#
|
|
29
|
+
# @param chat [Object] the configured chat session object
|
|
30
|
+
# @param message [String] the user message
|
|
31
|
+
# @param config [Hash] the invocation config
|
|
32
|
+
# @yield [chunk] streaming chunk from the LLM
|
|
33
|
+
# @return [Object] LLM response object
|
|
34
|
+
# @raise [NotImplementedError]
|
|
35
|
+
# @api private
|
|
36
|
+
def stream(chat, message, config: {}, &block)
|
|
37
|
+
raise NotImplementedError, "#{self.class}#stream is not implemented"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Submits a non-streaming LLM call to {BlockingAdapterPool} and returns
|
|
41
|
+
# a {BlockingAdapterPool::PendingOperation}.
|
|
42
|
+
#
|
|
43
|
+
# @param chat [Object] configured chat session
|
|
44
|
+
# @param message [String] user message
|
|
45
|
+
# @param config [Hash] invocation config
|
|
46
|
+
# @param pool [BlockingAdapterPool] pool to submit to
|
|
47
|
+
# @return [BlockingAdapterPool::PendingOperation]
|
|
48
|
+
# @api private
|
|
49
|
+
def complete_async(chat, message, config: {}, pool: default_pool)
|
|
50
|
+
token = config[:cancellation_token]
|
|
51
|
+
timeout = config[:llm_timeout]
|
|
52
|
+
pool.submit(timeout: timeout, cancellation_token: token) do
|
|
53
|
+
complete(chat, message, config: config)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Submits a streaming LLM call to {BlockingAdapterPool} and returns
|
|
58
|
+
# a {BlockingAdapterPool::PendingOperation}.
|
|
59
|
+
#
|
|
60
|
+
# When +enqueue_to:+ is given, streaming chunks are pushed into that
|
|
61
|
+
# {AsyncQueue} from the worker thread instead of being passed directly
|
|
62
|
+
# to the caller's block. The queue is closed (via +ensure+) after the
|
|
63
|
+
# LLM call finishes so the consumer's drain loop terminates naturally.
|
|
64
|
+
# This keeps user-supplied blocks off the blocking-pool worker thread.
|
|
65
|
+
#
|
|
66
|
+
# When +enqueue_to:+ is nil and a block is given, the block is invoked
|
|
67
|
+
# directly from the worker thread (legacy behaviour, preserved for
|
|
68
|
+
# backward compatibility).
|
|
69
|
+
#
|
|
70
|
+
# @param chat [Object] configured chat session
|
|
71
|
+
# @param message [String] user message
|
|
72
|
+
# @param config [Hash] invocation config
|
|
73
|
+
# @param pool [BlockingAdapterPool] pool to submit to
|
|
74
|
+
# @param enqueue_to [AsyncQueue, nil] when set, push chunks here instead of
|
|
75
|
+
# calling the block on the worker thread
|
|
76
|
+
# @yield [chunk] streaming chunk — only used when +enqueue_to:+ is nil
|
|
77
|
+
# @return [BlockingAdapterPool::PendingOperation]
|
|
78
|
+
# @api private
|
|
79
|
+
def stream_async(chat, message, config: {}, pool: default_pool, enqueue_to: nil, &block)
|
|
80
|
+
token = config[:cancellation_token]
|
|
81
|
+
timeout = config[:llm_timeout]
|
|
82
|
+
if enqueue_to
|
|
83
|
+
pool.submit(timeout: timeout, cancellation_token: token) do
|
|
84
|
+
stream(chat, message, config: config) do |chunk|
|
|
85
|
+
enqueue_to.push(chunk)
|
|
86
|
+
end
|
|
87
|
+
ensure
|
|
88
|
+
enqueue_to.close
|
|
89
|
+
end
|
|
90
|
+
else
|
|
91
|
+
pool.submit(timeout: timeout, cancellation_token: token) do
|
|
92
|
+
stream(chat, message, config: config, &block)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
def default_pool
|
|
100
|
+
Phronomy::Runtime.instance.blocking_io
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
module LLMAdapter
|
|
5
|
+
# LLM adapter that delegates to the RubyLLM blocking client.
|
|
6
|
+
#
|
|
7
|
+
# This is the default adapter used by Phronomy agents. It wraps
|
|
8
|
+
# +chat.ask+ (and its streaming variant) so that the blocking HTTP
|
|
9
|
+
# call runs inside {BlockingAdapterPool} rather than on the EventLoop
|
|
10
|
+
# thread or the caller's thread directly.
|
|
11
|
+
#
|
|
12
|
+
# @example Explicitly configuring this adapter
|
|
13
|
+
# Phronomy.configure do |c|
|
|
14
|
+
# c.llm_adapter = Phronomy::LLMAdapter::RubyLLM.new
|
|
15
|
+
# end
|
|
16
|
+
class RubyLLM < Base
|
|
17
|
+
# Delegates to +chat.ask(message)+ or +chat.complete+ when message is nil.
|
|
18
|
+
#
|
|
19
|
+
# Passing +nil+ for +message+ is used by the ReAct loop for continuation
|
|
20
|
+
# turns where the user message has already been added to the chat history
|
|
21
|
+
# (e.g. after a tool result) and the LLM should continue without a new
|
|
22
|
+
# user turn.
|
|
23
|
+
#
|
|
24
|
+
# @param chat [Object] RubyLLM chat session
|
|
25
|
+
# @param message [String, nil] user message, or nil to continue the chat
|
|
26
|
+
# @param config [Hash] invocation config (not used directly by this impl)
|
|
27
|
+
# @return [Object] RubyLLM response
|
|
28
|
+
# @api private
|
|
29
|
+
def complete(chat, message, config: {})
|
|
30
|
+
message ? chat.ask(message) : chat.complete
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Delegates to +chat.ask(message) { |chunk| block.call(chunk) }+ or
|
|
34
|
+
# +chat.complete(&block)+ when message is nil.
|
|
35
|
+
#
|
|
36
|
+
# @param chat [Object] RubyLLM chat session
|
|
37
|
+
# @param message [String, nil] user message, or nil to continue the chat
|
|
38
|
+
# @param config [Hash] invocation config
|
|
39
|
+
# @yield [chunk] streaming chunk forwarded from +chat.ask+ / +chat.complete+
|
|
40
|
+
# @return [Object] RubyLLM response
|
|
41
|
+
# @api private
|
|
42
|
+
def stream(chat, message, config: {}, &block)
|
|
43
|
+
message ? chat.ask(message, &block) : chat.complete(&block)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
# Namespace for LLM adapter implementations.
|
|
5
|
+
#
|
|
6
|
+
# An LLMAdapter decouples Phronomy's agent pipeline from direct
|
|
7
|
+
# dependency on the RubyLLM blocking client. All LLM calls in
|
|
8
|
+
# {Agent::Base} are routed through the adapter so that:
|
|
9
|
+
#
|
|
10
|
+
# - Blocking HTTP can be submitted to {BlockingAdapterPool} for bounded
|
|
11
|
+
# concurrency and per-operation timeouts.
|
|
12
|
+
# - Alternative LLM clients can be swapped in without changing agent code.
|
|
13
|
+
#
|
|
14
|
+
# @example Configuring a custom adapter
|
|
15
|
+
# Phronomy.configure do |c|
|
|
16
|
+
# c.llm_adapter = MyCustomAdapter.new
|
|
17
|
+
# end
|
|
18
|
+
module LLMAdapter
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
require "cgi"
|
|
4
4
|
|
|
5
5
|
module Phronomy
|
|
6
|
-
module
|
|
6
|
+
module LlmContextWindow
|
|
7
7
|
# Assembler collects all four context regions and produces the final
|
|
8
8
|
# {system:, messages:} hash consumed by Agent::Base.
|
|
9
9
|
#
|
|
@@ -20,7 +20,7 @@ module Phronomy
|
|
|
20
20
|
# messages are passed through unchanged.
|
|
21
21
|
#
|
|
22
22
|
# @example
|
|
23
|
-
# assembler = Phronomy::
|
|
23
|
+
# assembler = Phronomy::LlmContextWindow::Assembler.new(budget: budget)
|
|
24
24
|
# assembler.add_instruction("You are a helpful assistant.")
|
|
25
25
|
# assembler.add_knowledge("The user lives in Tokyo.", type: :entity, trusted: false)
|
|
26
26
|
# assembler.add_messages(manager.load(thread_id: "t1", query: user_input))
|
|
@@ -36,13 +36,15 @@ module Phronomy
|
|
|
36
36
|
# @param trusted [Boolean]
|
|
37
37
|
# @return [String]
|
|
38
38
|
# @api private
|
|
39
|
+
# mutant:disable - text.to_str and plain text (no to_s) are genuine equivalents when text is a String; type.to_str is genuine equivalent when type is a String
|
|
39
40
|
def self.xml_tag(text, type:, trusted: false)
|
|
40
41
|
"<context type=\"#{CGI.escapeHTML(type.to_s)}\" trusted=\"#{trusted}\">\n#{CGI.escapeHTML(text.to_s)}\n</context>"
|
|
41
42
|
end
|
|
42
43
|
|
|
43
|
-
# @param budget [Phronomy::
|
|
44
|
+
# @param budget [Phronomy::LlmContextWindow::TokenBudget, nil]
|
|
44
45
|
# when nil no token trimming is performed
|
|
45
46
|
# @api private
|
|
47
|
+
# mutant:disable - @instruction = nil deletion is a genuine equivalent (uninitialized Ruby instance variables return nil)
|
|
46
48
|
def initialize(budget: nil)
|
|
47
49
|
@budget = budget
|
|
48
50
|
@instruction = nil
|
|
@@ -56,6 +58,7 @@ module Phronomy
|
|
|
56
58
|
# @param text [String]
|
|
57
59
|
# @return [self]
|
|
58
60
|
# @api private
|
|
61
|
+
# mutant:disable - text.to_str and plain text (no .to_s) are genuine equivalents when callers always pass a String
|
|
59
62
|
def add_instruction(text)
|
|
60
63
|
@instruction = text.to_s
|
|
61
64
|
self
|
|
@@ -71,6 +74,7 @@ module Phronomy
|
|
|
71
74
|
# XML tag so the LLM can produce grounded citations. Omitted when nil.
|
|
72
75
|
# @return [self]
|
|
73
76
|
# @api private
|
|
77
|
+
# mutant:disable - {text:} (shorthand, no .to_s) and text.to_str are genuine equivalents when text is a String; {type:} shorthand is genuine equivalent because xml_context_tag always calls .to_s on chunk[:type]
|
|
74
78
|
def add_knowledge(text, type:, trusted: false, source: nil)
|
|
75
79
|
@knowledge_chunks << {text: text.to_s, type: type.to_s, trusted: trusted, source: source}
|
|
76
80
|
self
|
|
@@ -81,6 +85,7 @@ module Phronomy
|
|
|
81
85
|
# @param messages [Array] message-like objects with #role and #content
|
|
82
86
|
# @return [self]
|
|
83
87
|
# @api private
|
|
88
|
+
# mutant:disable - @messages = messages (no Array()) is a genuine equivalent when callers always pass an Array
|
|
84
89
|
def add_messages(messages)
|
|
85
90
|
@messages = Array(messages)
|
|
86
91
|
self
|
|
@@ -92,6 +97,7 @@ module Phronomy
|
|
|
92
97
|
# :system [String, nil] combined system prompt (instruction + knowledge XML tags)
|
|
93
98
|
# :messages [Array] conversation messages, trimmed to budget if set
|
|
94
99
|
# @api private
|
|
100
|
+
# mutant:disable - multiple genuine equivalent mutations: map{}.join("\n\n") → map{} is genuine because Ruby Array#join recursively joins nested arrays with the same separator (so [outer_array].join("\n\n") == original String); `unless knowledge_text.empty?` vs ternary is genuine (same conditional logic); `{ system: unless system_text.empty? }` vs ternary is genuine; `messages:` shorthand vs `messages: messages` is genuine
|
|
95
101
|
def build
|
|
96
102
|
knowledge_text = @knowledge_chunks.map { |c| xml_context_tag(c) }.join("\n\n")
|
|
97
103
|
system_parts = [@instruction, knowledge_text.empty? ? nil : knowledge_text].compact
|
|
@@ -111,11 +117,20 @@ module Phronomy
|
|
|
111
117
|
|
|
112
118
|
private
|
|
113
119
|
|
|
120
|
+
# mutant:disable - multiple genuine equivalent mutations: chunk.fetch(key) vs chunk[key] (key always present); chunk[:text] no .to_s / .to_str are genuine (stored as String); chunk[:type] no .to_s / .to_str are genuine (stored as String); chunk[:source] no .to_s / .to_str are genuine (truthy branch, always String); src_attr chunk.fetch(:source) is genuine (source key always present)
|
|
114
121
|
def xml_context_tag(chunk)
|
|
115
122
|
src_attr = chunk[:source] ? " source=\"#{CGI.escapeHTML(chunk[:source].to_s)}\"" : ""
|
|
116
123
|
"<context type=\"#{CGI.escapeHTML(chunk[:type].to_s)}\"#{src_attr} trusted=\"#{chunk[:trusted]}\">\n#{CGI.escapeHTML(chunk[:text].to_s)}\n</context>"
|
|
117
124
|
end
|
|
118
125
|
|
|
126
|
+
# mutant:disable - multiple genuine equivalent mutations on the early-return guard:
|
|
127
|
+
# `remaining <= 0 && false/nil`, `if false`, `if nil`, `if remaining && messages.empty?`,
|
|
128
|
+
# `if remaining < 0 && messages.empty?`, `if remaining <= -1 && messages.empty?`,
|
|
129
|
+
# `if remaining <= 1 && messages.empty?`, `if remaining == 0 && messages.empty?`,
|
|
130
|
+
# `if remaining.eql?(0) && messages.empty?`, `if remaining.equal?(0) && messages.empty?`,
|
|
131
|
+
# `if 0 && messages.empty?`, `if nil && messages.empty?` —
|
|
132
|
+
# all are genuine equivalents because when messages.empty? the loop produces [] anyway,
|
|
133
|
+
# and remaining is always >= 0 (clamp(0..)) so `remaining < 0` / `<= -1` are never true.
|
|
119
134
|
def trim_messages_to_budget(messages, system_text)
|
|
120
135
|
used = TokenEstimator.estimate(system_text)
|
|
121
136
|
remaining = @budget.available(used: used)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Phronomy
|
|
4
|
-
module
|
|
4
|
+
module LlmContextWindow
|
|
5
5
|
# Caches the assembled static system prompt text keyed by a SHA-256
|
|
6
6
|
# fingerprint of the agent's instructions + static knowledge content.
|
|
7
7
|
# Each instance is owned by one thread (stored in +Thread.current+).
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Phronomy
|
|
4
|
-
module
|
|
4
|
+
module LlmContextWindow
|
|
5
5
|
# Raised when a model name is not found in the RubyLLM model registry and
|
|
6
6
|
# no explicit context_window was provided.
|
|
7
7
|
class UnknownModelError < Phronomy::Error; end
|
|
@@ -17,16 +17,16 @@ module Phronomy
|
|
|
17
17
|
# └─ effective_input_limit (available for memory + knowledge)
|
|
18
18
|
#
|
|
19
19
|
# @example Auto-derive from RubyLLM model registry
|
|
20
|
-
# budget = Phronomy::
|
|
20
|
+
# budget = Phronomy::LlmContextWindow::TokenBudget.new(model: "claude-3-5-sonnet-20241022")
|
|
21
21
|
#
|
|
22
22
|
# @example Explicit values (useful for local / unknown models)
|
|
23
|
-
# budget = Phronomy::
|
|
23
|
+
# budget = Phronomy::LlmContextWindow::TokenBudget.new(
|
|
24
24
|
# context_window: 32_768,
|
|
25
25
|
# max_output_tokens: 4_096
|
|
26
26
|
# )
|
|
27
27
|
#
|
|
28
28
|
# @example With overhead for instructions + tool definitions
|
|
29
|
-
# budget = Phronomy::
|
|
29
|
+
# budget = Phronomy::LlmContextWindow::TokenBudget.new(
|
|
30
30
|
# model: "gpt-4o",
|
|
31
31
|
# overhead: 800
|
|
32
32
|
# )
|
|
@@ -46,6 +46,7 @@ module Phronomy
|
|
|
46
46
|
# and model is given, uses max_output_tokens
|
|
47
47
|
# @param overhead [Integer] tokens reserved for instructions/tools
|
|
48
48
|
# @api private
|
|
49
|
+
# mutant:disable - multiple genuine equivalent mutations: overhead/context_window/max_output_tokens .to_i vs .to_int vs Integer() vs omitted are equivalent for Integer inputs; (max_output_tokens||0).to_i vs (max_output_tokens).to_i and (||nil).to_i are genuine because nil.to_i==0; overhead:nil default is genuine because nil.to_i==0
|
|
49
50
|
def initialize(model: nil, context_window: nil, max_output_tokens: nil, overhead: 0)
|
|
50
51
|
@overhead = overhead.to_i
|
|
51
52
|
|
|
@@ -76,12 +77,14 @@ module Phronomy
|
|
|
76
77
|
# @param used [Integer] tokens already committed (e.g. from knowledge injection)
|
|
77
78
|
# @return [Integer] remaining tokens (always >= 0)
|
|
78
79
|
# @api private
|
|
80
|
+
# mutant:disable - used.to_i vs used vs used.to_int vs Integer(used) are genuine equivalents when used is an Integer; used:nil default is genuine because nil.to_i==0==default 0
|
|
79
81
|
def available(used: 0)
|
|
80
82
|
[effective_input_limit - used.to_i, 0].max
|
|
81
83
|
end
|
|
82
84
|
|
|
83
85
|
private
|
|
84
86
|
|
|
87
|
+
# mutant:disable - raise(UnknownModelError) and raise(UnknownModelError,nil) and raise(UnknownModelError,"Model '#{nil}' not found") in both branches are genuine equivalents (spec checks exception class only, not message text)
|
|
85
88
|
def lookup_model!(model_name)
|
|
86
89
|
found = RubyLLM.models.find(model_name)
|
|
87
90
|
raise UnknownModelError, "Model '#{model_name}' not found in RubyLLM registry" unless found
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Phronomy
|
|
4
|
-
module
|
|
4
|
+
module LlmContextWindow
|
|
5
5
|
# Central, stateless token estimation utility.
|
|
6
6
|
#
|
|
7
7
|
# All token counting in the framework passes through this module so that the
|
|
@@ -21,10 +21,10 @@ module Phronomy
|
|
|
21
21
|
# @example Use tiktoken_ruby for accurate GPT token counts
|
|
22
22
|
# require "tiktoken_ruby"
|
|
23
23
|
# enc = Tiktoken.encoding_for_model("gpt-4o")
|
|
24
|
-
# Phronomy::
|
|
24
|
+
# Phronomy::LlmContextWindow::TokenEstimator.tokenizer = ->(text) { enc.encode(text).length }
|
|
25
25
|
#
|
|
26
26
|
# @example Reset to built-in heuristic
|
|
27
|
-
# Phronomy::
|
|
27
|
+
# Phronomy::LlmContextWindow::TokenEstimator.tokenizer = nil
|
|
28
28
|
module TokenEstimator
|
|
29
29
|
@tokenizer = nil
|
|
30
30
|
@tokenizer_mutex = Mutex.new
|
data/lib/phronomy/loader.rb
CHANGED
|
@@ -4,10 +4,10 @@ module Phronomy
|
|
|
4
4
|
# Document loader implementations for ingesting files into a RAG pipeline.
|
|
5
5
|
#
|
|
6
6
|
# Sub-classes are auto-loaded by Zeitwerk:
|
|
7
|
-
# Phronomy::Loader::Base
|
|
8
|
-
# Phronomy::Loader::PlainTextLoader
|
|
9
|
-
# Phronomy::Loader::MarkdownLoader
|
|
10
|
-
# Phronomy::Loader::CsvLoader
|
|
7
|
+
# Phronomy::Agent::Context::Knowledge::Loader::Base
|
|
8
|
+
# Phronomy::Agent::Context::Knowledge::Loader::PlainTextLoader
|
|
9
|
+
# Phronomy::Agent::Context::Knowledge::Loader::MarkdownLoader
|
|
10
|
+
# Phronomy::Agent::Context::Knowledge::Loader::CsvLoader
|
|
11
11
|
module Loader
|
|
12
12
|
end
|
|
13
13
|
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
# Task-centric observability snapshot (Issue #276, extended in #307).
|
|
5
|
+
#
|
|
6
|
+
# Collects live metrics from the shared Runtime components
|
|
7
|
+
# (BlockingAdapterPool, EventLoop, and Runtime task registry) and returns
|
|
8
|
+
# them as a plain Hash so they can be forwarded to any monitoring backend
|
|
9
|
+
# (Prometheus, OpenTelemetry, StatsD, etc.).
|
|
10
|
+
#
|
|
11
|
+
# All metrics are read at the moment {.snapshot} is called; no
|
|
12
|
+
# persistent state is held here.
|
|
13
|
+
#
|
|
14
|
+
# @example Exporting to a metrics endpoint
|
|
15
|
+
# data = Phronomy::Metrics.snapshot
|
|
16
|
+
# # => { blocking_pool_active: 2, active_agent_tasks: 1, ... }
|
|
17
|
+
module Metrics
|
|
18
|
+
# Returns a Hash of current observability metrics.
|
|
19
|
+
#
|
|
20
|
+
# @return [Hash{Symbol => Numeric}]
|
|
21
|
+
# @api public
|
|
22
|
+
def self.snapshot
|
|
23
|
+
pool = Runtime.instance.blocking_io
|
|
24
|
+
el = EventLoop.instance
|
|
25
|
+
task_snap = Runtime.instance.task_snapshot
|
|
26
|
+
|
|
27
|
+
{
|
|
28
|
+
blocking_pool_active: pool.active_count,
|
|
29
|
+
blocking_pool_queue_length: pool.queue_depth,
|
|
30
|
+
blocking_pool_abandoned_total: pool.abandoned_count,
|
|
31
|
+
blocking_pool_size: pool.pool_size,
|
|
32
|
+
event_loop_lag_last_ms: (el.last_lag_seconds * 1000).round(3),
|
|
33
|
+
event_loop_lag_max_ms: (el.max_lag_seconds * 1000).round(3),
|
|
34
|
+
event_loop_lag_average_ms: (el.average_lag_seconds * 1000).round(3)
|
|
35
|
+
}.merge(task_snap)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
require "securerandom"
|
|
4
4
|
|
|
5
5
|
module Phronomy
|
|
6
|
-
module
|
|
6
|
+
module MultiAgent
|
|
7
7
|
# Represents a transfer edge from one agent to another.
|
|
8
8
|
# Creates an anonymous Phronomy::Tool::Base subclass that the source agent
|
|
9
9
|
# exposes to the LLM as a +transfer_to_<name>+ function.
|
|
@@ -12,7 +12,7 @@ module Phronomy
|
|
|
12
12
|
#
|
|
13
13
|
# @example
|
|
14
14
|
# billing = BillingAgent.new
|
|
15
|
-
# handoff = Phronomy::
|
|
15
|
+
# handoff = Phronomy::MultiAgent::Handoff.new(target_agent: billing)
|
|
16
16
|
# tool_class = handoff.to_tool_class
|
|
17
17
|
class Handoff
|
|
18
18
|
# Prefix embedded in tool results so Runner can detect handoffs.
|