phronomy 0.5.4 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mutant.yml +21 -0
- data/CHANGELOG.md +379 -0
- data/CONTRIBUTING.md +102 -0
- data/README.md +262 -48
- data/RELEASE_CHECKLIST.md +86 -0
- data/SECURITY.md +80 -0
- data/benchmark/baseline.json +9 -0
- data/benchmark/bench_agent_invoke.rb +105 -0
- data/benchmark/bench_context_assembler.rb +46 -0
- data/benchmark/bench_regression.rb +171 -0
- data/benchmark/bench_token_estimator.rb +44 -0
- data/benchmark/bench_tool_schema.rb +69 -0
- data/benchmark/bench_vector_store.rb +39 -0
- data/benchmark/bench_workflow.rb +55 -0
- data/benchmark/run_all.rb +118 -0
- data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
- data/docs/decisions/002-workflow-context-immutability.md +42 -0
- data/docs/decisions/003-event-loop-singleton.md +48 -0
- data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
- data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
- data/docs/decisions/006-no-built-in-guardrails.md +48 -0
- data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
- data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
- data/docs/decisions/009-state-store-abstraction.md +141 -0
- data/lib/phronomy/agent/base.rb +281 -13
- data/lib/phronomy/agent/before_completion_context.rb +1 -0
- data/lib/phronomy/agent/checkpoint.rb +1 -0
- data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
- data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
- data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
- data/lib/phronomy/agent/concerns/retryable.rb +12 -1
- data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
- data/lib/phronomy/agent/fsm.rb +180 -0
- data/lib/phronomy/agent/handoff.rb +3 -0
- data/lib/phronomy/agent/orchestrator.rb +123 -11
- data/lib/phronomy/agent/parallel_tool_chat.rb +92 -0
- data/lib/phronomy/agent/react_agent.rb +8 -6
- data/lib/phronomy/agent/runner.rb +2 -0
- data/lib/phronomy/agent/shared_state.rb +11 -0
- data/lib/phronomy/agent/suspend_signal.rb +2 -0
- data/lib/phronomy/agent/team_coordinator.rb +17 -5
- data/lib/phronomy/cancellation_token.rb +92 -0
- data/lib/phronomy/configuration.rb +32 -2
- data/lib/phronomy/context/assembler.rb +6 -0
- data/lib/phronomy/context/compaction_context.rb +2 -0
- data/lib/phronomy/context/context_version_cache.rb +2 -0
- data/lib/phronomy/context/token_budget.rb +3 -0
- data/lib/phronomy/context/token_estimator.rb +9 -2
- data/lib/phronomy/context/trigger_context.rb +1 -0
- data/lib/phronomy/context/trim_context.rb +4 -0
- data/lib/phronomy/context.rb +0 -1
- data/lib/phronomy/embeddings/base.rb +5 -2
- data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
- data/lib/phronomy/eval/comparison.rb +2 -0
- data/lib/phronomy/eval/dataset.rb +4 -0
- data/lib/phronomy/eval/metrics.rb +6 -0
- data/lib/phronomy/eval/runner.rb +2 -0
- data/lib/phronomy/eval/scorer/base.rb +1 -0
- data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
- data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
- data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
- data/lib/phronomy/event.rb +14 -0
- data/lib/phronomy/event_loop.rb +254 -0
- data/lib/phronomy/fsm_session.rb +201 -0
- data/lib/phronomy/generator_verifier.rb +24 -22
- data/lib/phronomy/guardrail/base.rb +3 -0
- data/lib/phronomy/guardrail.rb +0 -1
- data/lib/phronomy/knowledge_source/base.rb +6 -2
- data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
- data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
- data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
- data/lib/phronomy/loader/base.rb +1 -0
- data/lib/phronomy/loader/csv_loader.rb +2 -0
- data/lib/phronomy/loader/markdown_loader.rb +2 -0
- data/lib/phronomy/loader/plain_text_loader.rb +1 -0
- data/lib/phronomy/output_parser/base.rb +1 -0
- data/lib/phronomy/output_parser/json_parser.rb +22 -3
- data/lib/phronomy/output_parser/structured_parser.rb +2 -0
- data/lib/phronomy/prompt_template.rb +5 -0
- data/lib/phronomy/runnable.rb +20 -3
- data/lib/phronomy/splitter/base.rb +2 -0
- data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
- data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
- data/lib/phronomy/state_store/base.rb +48 -0
- data/lib/phronomy/state_store/in_memory.rb +62 -0
- data/lib/phronomy/tool/agent_tool.rb +1 -0
- data/lib/phronomy/tool/base.rb +189 -27
- data/lib/phronomy/tool/mcp_tool.rb +68 -13
- data/lib/phronomy/tracing/base.rb +3 -0
- data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
- data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
- data/lib/phronomy/vector_store/base.rb +33 -7
- data/lib/phronomy/vector_store/in_memory.rb +16 -7
- data/lib/phronomy/vector_store/pgvector.rb +40 -9
- data/lib/phronomy/vector_store/redis_search.rb +29 -8
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +175 -74
- data/lib/phronomy/workflow_context.rb +55 -5
- data/lib/phronomy/workflow_runner.rb +197 -114
- data/lib/phronomy.rb +74 -1
- data/scripts/api_snapshot.rb +91 -0
- data/scripts/check_api_annotations.rb +68 -0
- data/scripts/check_private_enforcement.rb +93 -0
- data/scripts/check_readme_runnable.rb +98 -0
- data/scripts/run_mutation.sh +46 -0
- metadata +50 -6
- data/lib/phronomy/context/builder.rb +0 -92
- data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +0 -100
- data/lib/phronomy/guardrail/builtin/prompt_injection_detector.rb +0 -67
- data/lib/phronomy/guardrail/builtin.rb +0 -16
|
@@ -57,6 +57,7 @@ module Phronomy
|
|
|
57
57
|
|
|
58
58
|
# Returns a shallow copy of all findings in insertion order.
|
|
59
59
|
# @return [Array<Hash>]
|
|
60
|
+
# @api public
|
|
60
61
|
def read_all
|
|
61
62
|
@findings.dup
|
|
62
63
|
end
|
|
@@ -66,6 +67,7 @@ module Phronomy
|
|
|
66
67
|
# @param content [String] the finding text
|
|
67
68
|
# @param cycle [Integer] the current cycle number
|
|
68
69
|
# @return [nil]
|
|
70
|
+
# @api public
|
|
69
71
|
def write(agent:, content:, cycle:)
|
|
70
72
|
@findings << {agent: agent, content: content, cycle: cycle}
|
|
71
73
|
nil
|
|
@@ -73,6 +75,7 @@ module Phronomy
|
|
|
73
75
|
|
|
74
76
|
# Returns the number of findings recorded so far.
|
|
75
77
|
# @return [Integer]
|
|
78
|
+
# @api public
|
|
76
79
|
def size
|
|
77
80
|
@findings.size
|
|
78
81
|
end
|
|
@@ -85,6 +88,7 @@ module Phronomy
|
|
|
85
88
|
# @param klass [Class] an Agent::Base subclass
|
|
86
89
|
# @param instruction [String, nil] optional per-agent coordination instruction
|
|
87
90
|
# appended to the team coordination text in this agent's prompt
|
|
91
|
+
# @api public
|
|
88
92
|
def member(klass, instruction: nil)
|
|
89
93
|
@members ||= []
|
|
90
94
|
@members << {klass: klass, instruction: instruction}
|
|
@@ -94,6 +98,7 @@ module Phronomy
|
|
|
94
98
|
# per-agent instruction. Prefer {.member} for new code.
|
|
95
99
|
#
|
|
96
100
|
# @param classes [Array<Class>] Agent::Base subclasses
|
|
101
|
+
# @api public
|
|
97
102
|
def researchers(*classes)
|
|
98
103
|
classes.flatten.each { |klass| member(klass) }
|
|
99
104
|
end
|
|
@@ -104,6 +109,7 @@ module Phronomy
|
|
|
104
109
|
# workflow. Override this when you need a different protocol or tone.
|
|
105
110
|
#
|
|
106
111
|
# @param text [String, nil] the coordination instructions
|
|
112
|
+
# @api public
|
|
107
113
|
def coordination(text = nil)
|
|
108
114
|
text ? @coordination = text : @coordination
|
|
109
115
|
end
|
|
@@ -112,6 +118,7 @@ module Phronomy
|
|
|
112
118
|
# At least one of +max_cycles+ or +timeout+ must be configured.
|
|
113
119
|
#
|
|
114
120
|
# @param value [Integer, nil]
|
|
121
|
+
# @api public
|
|
115
122
|
def max_cycles(value = nil)
|
|
116
123
|
value ? @max_cycles = Integer(value) : @max_cycles
|
|
117
124
|
end
|
|
@@ -120,6 +127,7 @@ module Phronomy
|
|
|
120
127
|
# At least one of +max_cycles+ or +timeout+ must be configured.
|
|
121
128
|
#
|
|
122
129
|
# @param value [Numeric, nil]
|
|
130
|
+
# @api public
|
|
123
131
|
def timeout(value = nil)
|
|
124
132
|
value ? @timeout = value.to_f : @timeout
|
|
125
133
|
end
|
|
@@ -128,6 +136,7 @@ module Phronomy
|
|
|
128
136
|
# cycle; when it returns +true+ the loop terminates early.
|
|
129
137
|
#
|
|
130
138
|
# @yield [KnowledgeStore] receives the store; return +true+ to stop
|
|
139
|
+
# @api public
|
|
131
140
|
def terminate_when(&block)
|
|
132
141
|
block ? @terminate_when = block : @terminate_when
|
|
133
142
|
end
|
|
@@ -136,6 +145,7 @@ module Phronomy
|
|
|
136
145
|
# When omitted, +store.read_all+ is used as-is.
|
|
137
146
|
#
|
|
138
147
|
# @yield [KnowledgeStore] receives the final store; return value becomes +:output+
|
|
148
|
+
# @api public
|
|
139
149
|
def aggregate(&block)
|
|
140
150
|
block ? @aggregator = block : @aggregator
|
|
141
151
|
end
|
|
@@ -162,6 +172,7 @@ module Phronomy
|
|
|
162
172
|
# @param config [Hash] reserved for future use
|
|
163
173
|
# @return [Hash] +:output+, +:cycles+, +:terminated_by+
|
|
164
174
|
# @raise [ArgumentError] when neither +max_cycles+ nor +timeout+ is configured
|
|
175
|
+
# @api public
|
|
165
176
|
def invoke(input, config: {})
|
|
166
177
|
validate_termination!
|
|
167
178
|
|
|
@@ -8,6 +8,7 @@ module Phronomy
|
|
|
8
8
|
# suspended result hash containing a Checkpoint.
|
|
9
9
|
#
|
|
10
10
|
# This class is intentionally NOT part of the public API. Callers should
|
|
11
|
+
# @api private
|
|
11
12
|
# inspect the +:suspended+ key in the result hash returned by #invoke.
|
|
12
13
|
#
|
|
13
14
|
# @api private
|
|
@@ -24,6 +25,7 @@ module Phronomy
|
|
|
24
25
|
# @param tool_name [String]
|
|
25
26
|
# @param args [Hash]
|
|
26
27
|
# @param tool_call_id [String]
|
|
28
|
+
# @api private
|
|
27
29
|
def initialize(tool_name:, args:, tool_call_id:)
|
|
28
30
|
super("Agent suspended waiting for approval of tool: #{tool_name}")
|
|
29
31
|
@tool_name = tool_name
|
|
@@ -7,9 +7,13 @@ module Phronomy
|
|
|
7
7
|
# @see https://claude.com/blog/multi-agent-coordination-patterns
|
|
8
8
|
#
|
|
9
9
|
# A coordinator LLM agent decomposes work into tasks and enqueues them
|
|
10
|
-
# dynamically via built-in tools. A fixed
|
|
11
|
-
#
|
|
12
|
-
# assignments to accumulate domain context over time.
|
|
10
|
+
# dynamically via built-in tools. A fixed set of worker agents processes tasks
|
|
11
|
+
# sequentially — one task per worker per turn — carrying forward their
|
|
12
|
+
# conversation history across assignments to accumulate domain context over time.
|
|
13
|
+
#
|
|
14
|
+
# Workers are selected in sequence (the worker with the fewest accumulated
|
|
15
|
+
# messages is chosen by default). Task dispatch is synchronous; there is no
|
|
16
|
+
# concurrent or parallel execution.
|
|
13
17
|
#
|
|
14
18
|
# The coordinator is an {Agent::Base} subclass that has two built-in tools:
|
|
15
19
|
# - +enqueue_task+ — adds a task description to the queue
|
|
@@ -56,6 +60,7 @@ module Phronomy
|
|
|
56
60
|
# Falls back to +Phronomy.configuration.default_model+ when not set.
|
|
57
61
|
#
|
|
58
62
|
# @param value [String, nil]
|
|
63
|
+
# @api public
|
|
59
64
|
def coordinator_model(value = nil)
|
|
60
65
|
value ? @coordinator_model = value : @coordinator_model
|
|
61
66
|
end
|
|
@@ -65,6 +70,7 @@ module Phronomy
|
|
|
65
70
|
# and then call +finalize+ when all tasks are enqueued.
|
|
66
71
|
#
|
|
67
72
|
# @param value [String, nil]
|
|
73
|
+
# @api public
|
|
68
74
|
def coordinator_instructions(value = nil)
|
|
69
75
|
value ? @coordinator_instructions = value : @coordinator_instructions
|
|
70
76
|
end
|
|
@@ -75,16 +81,18 @@ module Phronomy
|
|
|
75
81
|
# Pass the same value as +LLMConfig::PROVIDER+ in your examples.
|
|
76
82
|
#
|
|
77
83
|
# @param value [Symbol, nil]
|
|
84
|
+
# @api public
|
|
78
85
|
def coordinator_provider(value = nil)
|
|
79
86
|
value ? @coordinator_provider = value : @coordinator_provider
|
|
80
87
|
end
|
|
81
88
|
|
|
82
|
-
# Configures the
|
|
89
|
+
# Configures the set of workers.
|
|
83
90
|
#
|
|
84
|
-
# @param size [Integer] number of persistent worker instances
|
|
91
|
+
# @param size [Integer] number of persistent worker instances (tasks are assigned sequentially)
|
|
85
92
|
# @param agent [Class] Agent::Base subclass used for all workers
|
|
86
93
|
# @param on_error [Symbol] +:raise+ (default) propagates worker exceptions;
|
|
87
94
|
# +:skip+ records the failure and continues with remaining tasks
|
|
95
|
+
# @api public
|
|
88
96
|
def pool(size:, agent:, on_error: :raise)
|
|
89
97
|
@pool_size = Integer(size)
|
|
90
98
|
@worker_agent = agent
|
|
@@ -98,6 +106,7 @@ module Phronomy
|
|
|
98
106
|
#
|
|
99
107
|
# @yield [Array<WorkerState>] available workers
|
|
100
108
|
# @yieldreturn [WorkerState] the chosen worker
|
|
109
|
+
# @api public
|
|
101
110
|
def schedule(&block)
|
|
102
111
|
@scheduler = block
|
|
103
112
|
end
|
|
@@ -108,6 +117,7 @@ module Phronomy
|
|
|
108
117
|
# When omitted, the raw assignments array is returned.
|
|
109
118
|
#
|
|
110
119
|
# @yield [Array<Hash>] all completed (and skipped) task assignments
|
|
120
|
+
# @api public
|
|
111
121
|
def aggregate(&block)
|
|
112
122
|
@aggregator = block
|
|
113
123
|
end
|
|
@@ -137,6 +147,7 @@ module Phronomy
|
|
|
137
147
|
# @param config [Hash] reserved for future use
|
|
138
148
|
# @return [Object] the return value of the aggregate block, or the raw assignments Array
|
|
139
149
|
# @raise [ArgumentError] when +pool :agent+ has not been configured
|
|
150
|
+
# @api public
|
|
140
151
|
def invoke(team_input, config: {})
|
|
141
152
|
raise ArgumentError, "pool :agent must be configured before invoking" unless self.class._worker_agent
|
|
142
153
|
|
|
@@ -161,6 +172,7 @@ module Phronomy
|
|
|
161
172
|
# @yield [Hash] one event per completed/failed task
|
|
162
173
|
# @return [Object] same as +invoke+
|
|
163
174
|
# @raise [ArgumentError] when +pool :agent+ has not been configured
|
|
175
|
+
# @api public
|
|
164
176
|
def stream(team_input, config: {}, &block)
|
|
165
177
|
return invoke(team_input, config: config) unless block
|
|
166
178
|
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
# Provides cooperative cancellation for agent invocations.
|
|
5
|
+
#
|
|
6
|
+
# Pass a token to an agent via +config: { cancellation_token: token }+.
|
|
7
|
+
# The agent checks the token before each LLM call and raises
|
|
8
|
+
# {Phronomy::CancellationError} when the token is cancelled or the
|
|
9
|
+
# optional deadline has passed.
|
|
10
|
+
#
|
|
11
|
+
# A token may be shared across multiple agent invocations and across threads;
|
|
12
|
+
# all access to internal state is protected by a Mutex.
|
|
13
|
+
#
|
|
14
|
+
# @example Explicit cancel from another thread
|
|
15
|
+
# token = Phronomy::CancellationToken.new
|
|
16
|
+
# Thread.new { sleep 5; token.cancel! }
|
|
17
|
+
# result = agent.invoke("...", config: { cancellation_token: token })
|
|
18
|
+
#
|
|
19
|
+
# @example Hard deadline via monotonic clock (recommended)
|
|
20
|
+
# token = Phronomy::CancellationToken.timeout_after(30)
|
|
21
|
+
# result = agent.invoke("...", config: { cancellation_token: token })
|
|
22
|
+
#
|
|
23
|
+
# @example Hard deadline via wall-clock (legacy)
|
|
24
|
+
# token = Phronomy::CancellationToken.new(deadline: Time.now + 30)
|
|
25
|
+
# result = agent.invoke("...", config: { cancellation_token: token })
|
|
26
|
+
#
|
|
27
|
+
# @example Propagate to parallel workers
|
|
28
|
+
# token = Phronomy::CancellationToken.new
|
|
29
|
+
# orchestrator.dispatch_parallel(task1, task2, cancellation_token: token)
|
|
30
|
+
class CancellationToken
|
|
31
|
+
# Returns a new token that will expire after +seconds+ seconds, measured
|
|
32
|
+
# with the monotonic clock (+Process::CLOCK_MONOTONIC+). Unlike constructing
|
|
33
|
+
# a token with +deadline: Time.now + seconds+, this factory is immune to NTP
|
|
34
|
+
# adjustments and DST transitions.
|
|
35
|
+
#
|
|
36
|
+
# @param seconds [Numeric] duration in seconds until the token expires.
|
|
37
|
+
# @return [CancellationToken]
|
|
38
|
+
# @api public
|
|
39
|
+
def self.timeout_after(seconds)
|
|
40
|
+
monotonic_deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + seconds
|
|
41
|
+
new(monotonic_deadline: monotonic_deadline)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# @param deadline [Time, nil] optional wall-clock deadline; the token reports
|
|
45
|
+
# +cancelled?+ as +true+ once +Time.now >= deadline+. Prefer
|
|
46
|
+
# {.timeout_after} for duration-based cancellation.
|
|
47
|
+
# @param monotonic_deadline [Float, nil] internal monotonic timestamp set by
|
|
48
|
+
# {.timeout_after}; prefer that factory method over passing this directly.
|
|
49
|
+
# @api public
|
|
50
|
+
def initialize(deadline: nil, monotonic_deadline: nil)
|
|
51
|
+
@cancelled = false
|
|
52
|
+
@deadline = deadline
|
|
53
|
+
@monotonic_deadline = monotonic_deadline
|
|
54
|
+
@mutex = Mutex.new
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# @return [Time, nil] the wall-clock deadline passed to {#initialize}, or +nil+.
|
|
58
|
+
attr_reader :deadline
|
|
59
|
+
|
|
60
|
+
# Mark the token as cancelled. Thread-safe; may be called from any thread.
|
|
61
|
+
# @return [self]
|
|
62
|
+
# @api public
|
|
63
|
+
def cancel!
|
|
64
|
+
@mutex.synchronize { @cancelled = true }
|
|
65
|
+
self
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Returns +true+ when the token has been explicitly cancelled via {#cancel!},
|
|
69
|
+
# when the wall-clock deadline has passed, or when the monotonic deadline
|
|
70
|
+
# (set by {.timeout_after}) has elapsed. Thread-safe.
|
|
71
|
+
# @return [Boolean]
|
|
72
|
+
# @api public
|
|
73
|
+
def cancelled?
|
|
74
|
+
return true if @mutex.synchronize { @cancelled }
|
|
75
|
+
return true if !@deadline.nil? && Time.now >= @deadline
|
|
76
|
+
!@monotonic_deadline.nil? &&
|
|
77
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC) >= @monotonic_deadline
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Raises {Phronomy::CancellationError} if the token is cancelled.
|
|
81
|
+
# A convenience method for cooperative cancellation checks inside tools,
|
|
82
|
+
# RAG loaders, and hooks, replacing the +if cancelled? then raise+ pattern.
|
|
83
|
+
#
|
|
84
|
+
# @param message [String] optional error message
|
|
85
|
+
# @return [nil] when the token is not cancelled
|
|
86
|
+
# @raise [Phronomy::CancellationError] when the token is cancelled
|
|
87
|
+
# @api public
|
|
88
|
+
def raise_if_cancelled!(message = "invocation cancelled")
|
|
89
|
+
raise Phronomy::CancellationError, message if cancelled?
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -28,15 +28,45 @@ module Phronomy
|
|
|
28
28
|
# Recursion limit for graph execution (default: 25)
|
|
29
29
|
attr_accessor :recursion_limit
|
|
30
30
|
|
|
31
|
-
# When true
|
|
31
|
+
# When true, workflow execution is driven by EventLoop instead of a
|
|
32
|
+
# synchronous loop in the calling thread. Defaults to false (sync mode).
|
|
33
|
+
# @see Phronomy::EventLoop
|
|
34
|
+
attr_accessor :event_loop
|
|
35
|
+
|
|
36
|
+
# When true, user input and LLM output are recorded in trace spans.
|
|
37
|
+
# Defaults to false; set to true only in environments where PII capture is acceptable.
|
|
32
38
|
# Set to false in privacy-sensitive environments to prevent PII from reaching
|
|
33
39
|
# the tracing backend (OTel, Langfuse, etc.).
|
|
34
40
|
attr_accessor :trace_pii
|
|
35
41
|
|
|
42
|
+
# Optional logger for framework diagnostic messages (e.g. unreachable-state warnings).
|
|
43
|
+
# Must respond to +#warn(message)+. When nil (default), messages are written to +$stderr+
|
|
44
|
+
# via +Kernel#warn+.
|
|
45
|
+
# @example
|
|
46
|
+
# Phronomy.configure { |c| c.logger = Rails.logger }
|
|
47
|
+
attr_accessor :logger
|
|
48
|
+
|
|
49
|
+
# Grace period (in seconds) before the EventLoop background thread is force-killed
|
|
50
|
+
# after a cooperative stop request. Applies both to the overall thread join
|
|
51
|
+
# and to the drain-and-cancel phase when +stop(drain: true)+ is used.
|
|
52
|
+
# Default: 5 seconds.
|
|
53
|
+
# @see Phronomy::EventLoop#stop
|
|
54
|
+
attr_accessor :event_loop_stop_grace_seconds
|
|
55
|
+
|
|
56
|
+
# Global state store for workflow persistence.
|
|
57
|
+
# When set, WorkflowRunner routes all state reads and writes through this store.
|
|
58
|
+
# Must be an instance of a class that inherits from Phronomy::StateStore::Base.
|
|
59
|
+
# Defaults to +nil+ (no persistence — state lives only for the duration of invoke).
|
|
60
|
+
# @example
|
|
61
|
+
# Phronomy.configure { |c| c.state_store = Phronomy::StateStore::InMemory.new }
|
|
62
|
+
attr_accessor :state_store
|
|
63
|
+
|
|
36
64
|
def initialize
|
|
37
65
|
@recursion_limit = 25
|
|
38
66
|
@tracer = Phronomy::Tracing::NullTracer.new
|
|
39
|
-
@trace_pii =
|
|
67
|
+
@trace_pii = false
|
|
68
|
+
@event_loop = false
|
|
69
|
+
@event_loop_stop_grace_seconds = 5
|
|
40
70
|
end
|
|
41
71
|
end
|
|
42
72
|
end
|
|
@@ -35,12 +35,14 @@ module Phronomy
|
|
|
35
35
|
# @param type [Symbol, String]
|
|
36
36
|
# @param trusted [Boolean]
|
|
37
37
|
# @return [String]
|
|
38
|
+
# @api private
|
|
38
39
|
def self.xml_tag(text, type:, trusted: false)
|
|
39
40
|
"<context type=\"#{CGI.escapeHTML(type.to_s)}\" trusted=\"#{trusted}\">\n#{CGI.escapeHTML(text.to_s)}\n</context>"
|
|
40
41
|
end
|
|
41
42
|
|
|
42
43
|
# @param budget [Phronomy::Context::TokenBudget, nil]
|
|
43
44
|
# when nil no token trimming is performed
|
|
45
|
+
# @api private
|
|
44
46
|
def initialize(budget: nil)
|
|
45
47
|
@budget = budget
|
|
46
48
|
@instruction = nil
|
|
@@ -53,6 +55,7 @@ module Phronomy
|
|
|
53
55
|
#
|
|
54
56
|
# @param text [String]
|
|
55
57
|
# @return [self]
|
|
58
|
+
# @api private
|
|
56
59
|
def add_instruction(text)
|
|
57
60
|
@instruction = text.to_s
|
|
58
61
|
self
|
|
@@ -67,6 +70,7 @@ module Phronomy
|
|
|
67
70
|
# @param source [String, nil] optional source label (e.g. filename); included in the
|
|
68
71
|
# XML tag so the LLM can produce grounded citations. Omitted when nil.
|
|
69
72
|
# @return [self]
|
|
73
|
+
# @api private
|
|
70
74
|
def add_knowledge(text, type:, trusted: false, source: nil)
|
|
71
75
|
@knowledge_chunks << {text: text.to_s, type: type.to_s, trusted: trusted, source: source}
|
|
72
76
|
self
|
|
@@ -76,6 +80,7 @@ module Phronomy
|
|
|
76
80
|
#
|
|
77
81
|
# @param messages [Array] message-like objects with #role and #content
|
|
78
82
|
# @return [self]
|
|
83
|
+
# @api private
|
|
79
84
|
def add_messages(messages)
|
|
80
85
|
@messages = Array(messages)
|
|
81
86
|
self
|
|
@@ -86,6 +91,7 @@ module Phronomy
|
|
|
86
91
|
# @return [Hash{Symbol => Object}]
|
|
87
92
|
# :system [String, nil] combined system prompt (instruction + knowledge XML tags)
|
|
88
93
|
# :messages [Array] conversation messages, trimmed to budget if set
|
|
94
|
+
# @api private
|
|
89
95
|
def build
|
|
90
96
|
knowledge_text = @knowledge_chunks.map { |c| xml_context_tag(c) }.join("\n\n")
|
|
91
97
|
system_parts = [@instruction, knowledge_text.empty? ? nil : knowledge_text].compact
|
|
@@ -45,6 +45,7 @@ module Phronomy
|
|
|
45
45
|
# @param thread_id [String, nil] used when saving compaction records
|
|
46
46
|
# @param memory [Object, nil] memory object; must respond to #save_compaction
|
|
47
47
|
# for compaction records to be persisted
|
|
48
|
+
# @api private
|
|
48
49
|
def initialize(message_elements:, budget:, thread_id: nil, memory: nil)
|
|
49
50
|
@message_elements = message_elements.dup
|
|
50
51
|
@budget = budget
|
|
@@ -67,6 +68,7 @@ module Phronomy
|
|
|
67
68
|
# @yieldparam elements [Array<Hash>] the selected message elements
|
|
68
69
|
# @yieldreturn [String] summary text to replace the selected messages
|
|
69
70
|
# @return [Array] the updated result_messages array
|
|
71
|
+
# @api private
|
|
70
72
|
def compact(range)
|
|
71
73
|
# Normalise: Integer index → single-element Array; Range → Array slice.
|
|
72
74
|
raw = @message_elements[range]
|
|
@@ -25,6 +25,7 @@ module Phronomy
|
|
|
25
25
|
#
|
|
26
26
|
# @param fingerprint [String] SHA-256 hex digest to compare
|
|
27
27
|
# @return [Boolean]
|
|
28
|
+
# @api private
|
|
28
29
|
def valid?(fingerprint)
|
|
29
30
|
!@fingerprint.nil? && !@system_text.nil? && @fingerprint == fingerprint
|
|
30
31
|
end
|
|
@@ -33,6 +34,7 @@ module Phronomy
|
|
|
33
34
|
#
|
|
34
35
|
# @param fingerprint [String] new SHA-256 hex digest
|
|
35
36
|
# @param system_text [String] fully assembled system prompt text
|
|
37
|
+
# @api private
|
|
36
38
|
def update(fingerprint:, system_text:)
|
|
37
39
|
@fingerprint = fingerprint
|
|
38
40
|
@system_text = system_text.to_s
|
|
@@ -45,6 +45,7 @@ module Phronomy
|
|
|
45
45
|
# @param max_output_tokens [Integer, nil] explicit output reservation; when nil
|
|
46
46
|
# and model is given, uses max_output_tokens
|
|
47
47
|
# @param overhead [Integer] tokens reserved for instructions/tools
|
|
48
|
+
# @api private
|
|
48
49
|
def initialize(model: nil, context_window: nil, max_output_tokens: nil, overhead: 0)
|
|
49
50
|
@overhead = overhead.to_i
|
|
50
51
|
|
|
@@ -65,6 +66,7 @@ module Phronomy
|
|
|
65
66
|
# Always >= 0.
|
|
66
67
|
#
|
|
67
68
|
# @return [Integer]
|
|
69
|
+
# @api private
|
|
68
70
|
def effective_input_limit
|
|
69
71
|
[@context_window - @max_output_tokens - @overhead, 0].max
|
|
70
72
|
end
|
|
@@ -73,6 +75,7 @@ module Phronomy
|
|
|
73
75
|
#
|
|
74
76
|
# @param used [Integer] tokens already committed (e.g. from knowledge injection)
|
|
75
77
|
# @return [Integer] remaining tokens (always >= 0)
|
|
78
|
+
# @api private
|
|
76
79
|
def available(used: 0)
|
|
77
80
|
[effective_input_limit - used.to_i, 0].max
|
|
78
81
|
end
|
|
@@ -9,8 +9,12 @@ module Phronomy
|
|
|
9
9
|
# any other class.
|
|
10
10
|
#
|
|
11
11
|
# Default approximation: ceil(char_count / 4).
|
|
12
|
-
#
|
|
13
|
-
#
|
|
12
|
+
# This heuristic is calibrated for ASCII/Latin text (~4 chars/token).
|
|
13
|
+
# For CJK languages (Chinese, Japanese, Korean) the actual token count is
|
|
14
|
+
# approximately 4× higher than the estimate because CJK characters are
|
|
15
|
+
# typically 1 token each in GPT-4/Claude tokenizers (~1 char/token vs the
|
|
16
|
+
# 4 char/token assumed here). Use a tokenizer-backed callable via
|
|
17
|
+
# +.tokenizer=+ for accurate CJK token counting.
|
|
14
18
|
#
|
|
15
19
|
# Replace the built-in heuristic with any callable via .tokenizer=:
|
|
16
20
|
#
|
|
@@ -33,11 +37,13 @@ module Phronomy
|
|
|
33
37
|
# In tests, call +TokenEstimator.reset_tokenizer!+ after each test to
|
|
34
38
|
# prevent cross-test contamination.
|
|
35
39
|
# @param callable [#call, nil]
|
|
40
|
+
# @api private
|
|
36
41
|
def tokenizer=(callable)
|
|
37
42
|
@tokenizer_mutex.synchronize { @tokenizer = callable }
|
|
38
43
|
end
|
|
39
44
|
|
|
40
45
|
# @return [#call, nil]
|
|
46
|
+
# @api private
|
|
41
47
|
def tokenizer
|
|
42
48
|
@tokenizer_mutex.synchronize { @tokenizer }
|
|
43
49
|
end
|
|
@@ -52,6 +58,7 @@ module Phronomy
|
|
|
52
58
|
# @param input [String, Array, #content] a string, a message-like object,
|
|
53
59
|
# or an Array of message-like objects (each must respond to #content).
|
|
54
60
|
# @return [Integer] estimated token count (>= 0)
|
|
61
|
+
# @api private
|
|
55
62
|
def estimate(input)
|
|
56
63
|
tok = @tokenizer_mutex.synchronize { @tokenizer }
|
|
57
64
|
case input
|
|
@@ -28,6 +28,7 @@ module Phronomy
|
|
|
28
28
|
# @param message_elements [Array<Hash>]
|
|
29
29
|
# each element: { seq: Integer, message: Object, tokens: Integer, role: Symbol }
|
|
30
30
|
# @param budget [Phronomy::Context::TokenBudget, nil]
|
|
31
|
+
# @api private
|
|
31
32
|
def initialize(message_elements:, budget:)
|
|
32
33
|
@message_elements = message_elements.dup
|
|
33
34
|
@budget = budget
|
|
@@ -38,6 +39,7 @@ module Phronomy
|
|
|
38
39
|
# Each element is a Hash with +:seq+, +:message+, +:tokens+, and +:role+.
|
|
39
40
|
#
|
|
40
41
|
# @return [Array<Hash>]
|
|
42
|
+
# @api private
|
|
41
43
|
def message_elements
|
|
42
44
|
@message_elements.dup
|
|
43
45
|
end
|
|
@@ -47,6 +49,7 @@ module Phronomy
|
|
|
47
49
|
#
|
|
48
50
|
# @param seqs [Integer, Array<Integer>] seq number(s) to remove
|
|
49
51
|
# @return [self]
|
|
52
|
+
# @api private
|
|
50
53
|
def remove(seqs)
|
|
51
54
|
seqs_set = Array(seqs).to_set
|
|
52
55
|
@message_elements.reject! { |e| seqs_set.include?(e[:seq]) }
|
|
@@ -57,6 +60,7 @@ module Phronomy
|
|
|
57
60
|
# Convenience: returns the plain message objects (without element metadata).
|
|
58
61
|
#
|
|
59
62
|
# @return [Array]
|
|
63
|
+
# @api private
|
|
60
64
|
def messages
|
|
61
65
|
@message_elements.map { |e| e[:message] }
|
|
62
66
|
end
|
data/lib/phronomy/context.rb
CHANGED
|
@@ -9,9 +9,12 @@ module Phronomy
|
|
|
9
9
|
class Base
|
|
10
10
|
# Embed the given text and return a vector representation.
|
|
11
11
|
#
|
|
12
|
-
# @param text
|
|
12
|
+
# @param text [String] the text to embed
|
|
13
|
+
# @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
|
|
13
14
|
# @return [Array<Float>] the embedding vector
|
|
14
|
-
|
|
15
|
+
# @api public
|
|
16
|
+
def embed(text, cancellation_token = nil)
|
|
17
|
+
cancellation_token&.raise_if_cancelled!
|
|
15
18
|
raise NotImplementedError, "#{self.class}#embed is not implemented"
|
|
16
19
|
end
|
|
17
20
|
end
|
|
@@ -19,6 +19,7 @@ module Phronomy
|
|
|
19
19
|
# @param provider [Symbol, nil] provider override (e.g. :openai); nil uses the RubyLLM default
|
|
20
20
|
# @param assume_model_exists [Boolean] when true, skips RubyLLM model-registry validation
|
|
21
21
|
# (useful for locally hosted models not in the registry)
|
|
22
|
+
# @api public
|
|
22
23
|
def initialize(model: nil, provider: nil, assume_model_exists: false)
|
|
23
24
|
@model = model
|
|
24
25
|
@provider = provider
|
|
@@ -27,9 +28,12 @@ module Phronomy
|
|
|
27
28
|
|
|
28
29
|
# Embed text via RubyLLM.
|
|
29
30
|
#
|
|
30
|
-
# @param text
|
|
31
|
+
# @param text [String]
|
|
32
|
+
# @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
|
|
31
33
|
# @return [Array<Float>]
|
|
32
|
-
|
|
34
|
+
# @api public
|
|
35
|
+
def embed(text, cancellation_token = nil)
|
|
36
|
+
cancellation_token&.raise_if_cancelled!
|
|
33
37
|
opts = {}
|
|
34
38
|
opts[:model] = @model if @model
|
|
35
39
|
opts[:provider] = @provider if @provider
|
|
@@ -19,6 +19,7 @@ module Phronomy
|
|
|
19
19
|
ComparisonPair = Data.define(:eval_case, :result_a, :result_b)
|
|
20
20
|
|
|
21
21
|
# @param scorer [Scorer::Base]
|
|
22
|
+
# @api public
|
|
22
23
|
def initialize(scorer: Scorer::ExactMatch.new)
|
|
23
24
|
@scorer = scorer
|
|
24
25
|
end
|
|
@@ -29,6 +30,7 @@ module Phronomy
|
|
|
29
30
|
# @param callable_a [#call]
|
|
30
31
|
# @param callable_b [#call]
|
|
31
32
|
# @return [Array<ComparisonPair>]
|
|
33
|
+
# @api public
|
|
32
34
|
def compare(dataset, callable_a, callable_b)
|
|
33
35
|
runner_a = Runner.new(scorer: @scorer)
|
|
34
36
|
runner_b = Runner.new(scorer: @scorer)
|
|
@@ -13,6 +13,7 @@ module Phronomy
|
|
|
13
13
|
include Enumerable
|
|
14
14
|
|
|
15
15
|
# @param cases [Array<EvalCase>]
|
|
16
|
+
# @api public
|
|
16
17
|
def initialize(cases = [])
|
|
17
18
|
@cases = cases.freeze
|
|
18
19
|
end
|
|
@@ -23,16 +24,19 @@ module Phronomy
|
|
|
23
24
|
#
|
|
24
25
|
# @param pairs [Array<Hash>]
|
|
25
26
|
# @return [Dataset]
|
|
27
|
+
# @api public
|
|
26
28
|
def self.from_array(pairs)
|
|
27
29
|
new(pairs.map { |h| EvalCase.new(**h) })
|
|
28
30
|
end
|
|
29
31
|
|
|
30
32
|
# @yield [EvalCase]
|
|
33
|
+
# @api public
|
|
31
34
|
def each(&block)
|
|
32
35
|
@cases.each(&block)
|
|
33
36
|
end
|
|
34
37
|
|
|
35
38
|
# @return [Integer]
|
|
39
|
+
# @api public
|
|
36
40
|
def size
|
|
37
41
|
@cases.size
|
|
38
42
|
end
|
|
@@ -11,12 +11,14 @@ module Phronomy
|
|
|
11
11
|
# puts metrics.to_h
|
|
12
12
|
class Metrics
|
|
13
13
|
# @param results [Array<EvalResult>]
|
|
14
|
+
# @api public
|
|
14
15
|
def initialize(results)
|
|
15
16
|
@results = results
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
# Fraction of results that passed (score == 1.0).
|
|
19
20
|
# @return [Float] in [0.0, 1.0]
|
|
21
|
+
# @api public
|
|
20
22
|
def pass_rate
|
|
21
23
|
return 0.0 if @results.empty?
|
|
22
24
|
@results.count(&:pass?).to_f / @results.size
|
|
@@ -24,6 +26,7 @@ module Phronomy
|
|
|
24
26
|
|
|
25
27
|
# Arithmetic mean of all scores.
|
|
26
28
|
# @return [Float]
|
|
29
|
+
# @api public
|
|
27
30
|
def average_score
|
|
28
31
|
return 0.0 if @results.empty?
|
|
29
32
|
@results.sum(&:score) / @results.size
|
|
@@ -32,12 +35,14 @@ module Phronomy
|
|
|
32
35
|
# Sum of all TokenUsage objects present in the results.
|
|
33
36
|
# Results without usage are skipped.
|
|
34
37
|
# @return [Phronomy::TokenUsage]
|
|
38
|
+
# @api public
|
|
35
39
|
def total_usage
|
|
36
40
|
@results.map(&:usage).compact.reduce(TokenUsage.zero, :+)
|
|
37
41
|
end
|
|
38
42
|
|
|
39
43
|
# Arithmetic mean of latency_ms across all results.
|
|
40
44
|
# @return [Float]
|
|
45
|
+
# @api public
|
|
41
46
|
def average_latency_ms
|
|
42
47
|
return 0.0 if @results.empty?
|
|
43
48
|
@results.sum(&:latency_ms).to_f / @results.size
|
|
@@ -45,6 +50,7 @@ module Phronomy
|
|
|
45
50
|
|
|
46
51
|
# Returns a plain Hash summary suitable for logging or serialisation.
|
|
47
52
|
# @return [Hash]
|
|
53
|
+
# @api public
|
|
48
54
|
def to_h
|
|
49
55
|
{
|
|
50
56
|
total: @results.size,
|
data/lib/phronomy/eval/runner.rb
CHANGED
|
@@ -18,6 +18,7 @@ module Phronomy
|
|
|
18
18
|
# results = runner.run(dataset, ->(input) { agent.invoke(input) })
|
|
19
19
|
class Runner
|
|
20
20
|
# @param scorer [Scorer::Base] scorer used to evaluate each result
|
|
21
|
+
# @api public
|
|
21
22
|
def initialize(scorer: Scorer::ExactMatch.new)
|
|
22
23
|
@scorer = scorer
|
|
23
24
|
end
|
|
@@ -26,6 +27,7 @@ module Phronomy
|
|
|
26
27
|
# @param callable [#call] accepts a single String argument
|
|
27
28
|
# @param concurrency [Integer] number of parallel threads (default: 1, sequential)
|
|
28
29
|
# @return [Array<EvalResult>]
|
|
30
|
+
# @api public
|
|
29
31
|
def run(dataset, callable, concurrency: 1)
|
|
30
32
|
cases = dataset.to_a
|
|
31
33
|
return cases.map { |eval_case| run_one(eval_case, callable) } if concurrency <= 1
|
|
@@ -12,6 +12,7 @@ module Phronomy
|
|
|
12
12
|
# @param expected [String] the ground-truth value from the EvalCase
|
|
13
13
|
# @param input [String, nil] the original input (used by LLM scorers)
|
|
14
14
|
# @return [Float] a value in [0.0, 1.0]
|
|
15
|
+
# @api public
|
|
15
16
|
def score(actual:, expected:, input: nil)
|
|
16
17
|
raise NotImplementedError, "#{self.class}#score is not implemented"
|
|
17
18
|
end
|