phronomy 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -0
- data/README.md +80 -12
- data/lib/phronomy/agent/base.rb +8 -3
- data/lib/phronomy/agent/orchestrator.rb +119 -0
- data/lib/phronomy/agent/shared_state.rb +303 -0
- data/lib/phronomy/agent/team_coordinator.rb +285 -0
- data/lib/phronomy/{trust_pipeline.rb → generator_verifier.rb} +95 -108
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow_runner.rb +41 -22
- data/lib/phronomy.rb +17 -0
- metadata +8 -6
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
module Agent
|
|
5
|
+
# Implements the "Agent teams" coordination pattern (Anthropic blog, Pattern 3).
|
|
6
|
+
#
|
|
7
|
+
# @see https://claude.com/blog/multi-agent-coordination-patterns
|
|
8
|
+
#
|
|
9
|
+
# A coordinator LLM agent decomposes work into tasks and enqueues them
|
|
10
|
+
# dynamically via built-in tools. A fixed pool of worker agents claims tasks
|
|
11
|
+
# from the shared queue, carrying forward their conversation history across
|
|
12
|
+
# assignments to accumulate domain context over time.
|
|
13
|
+
#
|
|
14
|
+
# The coordinator is an {Agent::Base} subclass that has two built-in tools:
|
|
15
|
+
# - +enqueue_task+ — adds a task description to the queue
|
|
16
|
+
# - +finalize+ — signals that all tasks have been enqueued
|
|
17
|
+
#
|
|
18
|
+
# Worker persistence is implemented by passing each worker's accumulated
|
|
19
|
+
# +messages+ array back via +config[:messages]+ on every subsequent +invoke+
|
|
20
|
+
# call, so the LLM retains context across multiple task assignments.
|
|
21
|
+
#
|
|
22
|
+
# @example Basic usage
|
|
23
|
+
# class MigrationTeam < Phronomy::Agent::TeamCoordinator
|
|
24
|
+
# coordinator_model "claude-3-5-sonnet-20241022"
|
|
25
|
+
# coordinator_instructions <<~INST
|
|
26
|
+
# Analyze the request and enqueue one migration task per service.
|
|
27
|
+
# Call enqueue_task for each service, then call finalize.
|
|
28
|
+
# INST
|
|
29
|
+
#
|
|
30
|
+
# pool size: 3, agent: MigrationAgent
|
|
31
|
+
#
|
|
32
|
+
# aggregate do |assignments|
|
|
33
|
+
# { reports: assignments.map { |a| { task: a[:task][:description], result: a[:result] } } }
|
|
34
|
+
# end
|
|
35
|
+
# end
|
|
36
|
+
#
|
|
37
|
+
# result = MigrationTeam.new.invoke("Migrate all services to Rails 8")
|
|
38
|
+
class TeamCoordinator
|
|
39
|
+
# Holds per-worker context between task invocations.
|
|
40
|
+
# Worker persistence is implemented by carrying +messages+ forward on each
|
|
41
|
+
# successive +agent#invoke+ call via +config[:messages]+.
|
|
42
|
+
WorkerState = Struct.new(
|
|
43
|
+
:index, # Integer — 0-based worker index
|
|
44
|
+
:agent, # Agent::Base instance
|
|
45
|
+
:messages, # Array — accumulated conversation history
|
|
46
|
+
:status, # Symbol — :idle | :available | :done
|
|
47
|
+
keyword_init: true
|
|
48
|
+
) do
|
|
49
|
+
# Returns true when this worker is ready to accept the next task.
|
|
50
|
+
def available? = [:idle, :available].include?(status)
|
|
51
|
+
end
|
|
52
|
+
private_constant :WorkerState
|
|
53
|
+
|
|
54
|
+
class << self
|
|
55
|
+
# Sets the LLM model for the coordinator agent.
|
|
56
|
+
# Falls back to +Phronomy.configuration.default_model+ when not set.
|
|
57
|
+
#
|
|
58
|
+
# @param value [String, nil]
|
|
59
|
+
def coordinator_model(value = nil)
|
|
60
|
+
value ? @coordinator_model = value : @coordinator_model
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Sets the system instructions for the coordinator agent.
|
|
64
|
+
# The prompt should direct the LLM to call +enqueue_task+ for each task
|
|
65
|
+
# and then call +finalize+ when all tasks are enqueued.
|
|
66
|
+
#
|
|
67
|
+
# @param value [String, nil]
|
|
68
|
+
def coordinator_instructions(value = nil)
|
|
69
|
+
value ? @coordinator_instructions = value : @coordinator_instructions
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Sets the LLM provider for the coordinator agent.
|
|
73
|
+
# Required when using a custom +BASE_URL+ (e.g. LM Studio, Ollama, vLLM)
|
|
74
|
+
# so that RubyLLM does not attempt to resolve an unknown model name.
|
|
75
|
+
# Pass the same value as +LLMConfig::PROVIDER+ in your examples.
|
|
76
|
+
#
|
|
77
|
+
# @param value [Symbol, nil]
|
|
78
|
+
def coordinator_provider(value = nil)
|
|
79
|
+
value ? @coordinator_provider = value : @coordinator_provider
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Configures the worker pool.
|
|
83
|
+
#
|
|
84
|
+
# @param size [Integer] number of persistent worker instances
|
|
85
|
+
# @param agent [Class] Agent::Base subclass used for all workers
|
|
86
|
+
# @param on_error [Symbol] +:raise+ (default) propagates worker exceptions;
|
|
87
|
+
# +:skip+ records the failure and continues with remaining tasks
|
|
88
|
+
def pool(size:, agent:, on_error: :raise)
|
|
89
|
+
@pool_size = Integer(size)
|
|
90
|
+
@worker_agent = agent
|
|
91
|
+
@on_error = on_error
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Customises the worker selection algorithm.
|
|
95
|
+
# The block receives an Array of available WorkerState objects and must
|
|
96
|
+
# return the one to assign the next task to.
|
|
97
|
+
# Default: worker with the fewest accumulated messages (round-robin-like).
|
|
98
|
+
#
|
|
99
|
+
# @yield [Array<WorkerState>] available workers
|
|
100
|
+
# @yieldreturn [WorkerState] the chosen worker
|
|
101
|
+
def schedule(&block)
|
|
102
|
+
@scheduler = block
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Defines how task assignments are merged into the final return value.
|
|
106
|
+
# The block receives an Array of assignment Hashes:
|
|
107
|
+
# { task: Hash, result: String|nil, worker: Integer, error: Exception|nil }
|
|
108
|
+
# When omitted, the raw assignments array is returned.
|
|
109
|
+
#
|
|
110
|
+
# @yield [Array<Hash>] all completed (and skipped) task assignments
|
|
111
|
+
def aggregate(&block)
|
|
112
|
+
@aggregator = block
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# @!visibility private
|
|
116
|
+
def _coordinator_model = @coordinator_model
|
|
117
|
+
# @!visibility private
|
|
118
|
+
def _coordinator_instructions = @coordinator_instructions
|
|
119
|
+
# @!visibility private
|
|
120
|
+
def _coordinator_provider = @coordinator_provider
|
|
121
|
+
# @!visibility private
|
|
122
|
+
def _pool_size = @pool_size || 1
|
|
123
|
+
# @!visibility private
|
|
124
|
+
def _worker_agent = @worker_agent
|
|
125
|
+
# @!visibility private
|
|
126
|
+
def _on_error = @on_error || :raise
|
|
127
|
+
# @!visibility private
|
|
128
|
+
def _scheduler = @scheduler
|
|
129
|
+
# @!visibility private
|
|
130
|
+
def _aggregator = @aggregator
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Runs the full team coordination: coordinator generates tasks, workers
|
|
134
|
+
# process them sequentially, and the aggregate block merges the results.
|
|
135
|
+
#
|
|
136
|
+
# @param team_input [String, Hash] the high-level objective given to the coordinator
|
|
137
|
+
# @param config [Hash] reserved for future use
|
|
138
|
+
# @return [Object] the return value of the aggregate block, or the raw assignments Array
|
|
139
|
+
# @raise [ArgumentError] when +pool :agent+ has not been configured
|
|
140
|
+
def invoke(team_input, config: {})
|
|
141
|
+
raise ArgumentError, "pool :agent must be configured before invoking" unless self.class._worker_agent
|
|
142
|
+
|
|
143
|
+
task_queue = []
|
|
144
|
+
run_coordinator(team_input, task_queue)
|
|
145
|
+
assignments = run_workers(task_queue)
|
|
146
|
+
finalize_result(assignments)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Streaming version of +invoke+. Yields a Hash event for each completed or
|
|
150
|
+
# failed task assignment.
|
|
151
|
+
#
|
|
152
|
+
# Yielded Hash keys:
|
|
153
|
+
# :type — +:task_completed+ or +:task_failed+
|
|
154
|
+
# :worker — worker index (Integer)
|
|
155
|
+
# :task — the task Hash from the queue ({ id:, description:, metadata:, enqueued_at: })
|
|
156
|
+
# :result — output string, or +nil+ on failure
|
|
157
|
+
# :error — Exception, or +nil+ on success
|
|
158
|
+
#
|
|
159
|
+
# @param team_input [String, Hash]
|
|
160
|
+
# @param config [Hash]
|
|
161
|
+
# @yield [Hash] one event per completed/failed task
|
|
162
|
+
# @return [Object] same as +invoke+
|
|
163
|
+
# @raise [ArgumentError] when +pool :agent+ has not been configured
|
|
164
|
+
def stream(team_input, config: {}, &block)
|
|
165
|
+
return invoke(team_input, config: config) unless block
|
|
166
|
+
|
|
167
|
+
raise ArgumentError, "pool :agent must be configured before invoking" unless self.class._worker_agent
|
|
168
|
+
|
|
169
|
+
task_queue = []
|
|
170
|
+
run_coordinator(team_input, task_queue)
|
|
171
|
+
assignments = run_workers(task_queue, &block)
|
|
172
|
+
finalize_result(assignments)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
private
|
|
176
|
+
|
|
177
|
+
# Phase 1: Run the coordinator LLM agent to populate task_queue.
|
|
178
|
+
def run_coordinator(team_input, task_queue)
|
|
179
|
+
coordinator = build_coordinator_agent(task_queue)
|
|
180
|
+
input = team_input.is_a?(String) ? team_input : team_input.to_s
|
|
181
|
+
coordinator.invoke(input)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Phase 2: Process tasks from the queue using the worker pool.
|
|
185
|
+
# Workers accumulate message history across assignments.
|
|
186
|
+
def run_workers(task_queue, &event_block)
|
|
187
|
+
pool_size = self.class._pool_size
|
|
188
|
+
agent_class = self.class._worker_agent
|
|
189
|
+
on_error = self.class._on_error
|
|
190
|
+
scheduler = self.class._scheduler
|
|
191
|
+
|
|
192
|
+
workers = Array.new(pool_size) do |i|
|
|
193
|
+
WorkerState.new(index: i, agent: agent_class.new, messages: [], status: :idle)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
assignments = []
|
|
197
|
+
|
|
198
|
+
until task_queue.empty?
|
|
199
|
+
task = task_queue.shift
|
|
200
|
+
available = workers.select(&:available?)
|
|
201
|
+
worker = scheduler ? scheduler.call(available) : default_scheduler(available)
|
|
202
|
+
|
|
203
|
+
begin
|
|
204
|
+
result = worker.agent.invoke(task[:description], config: {messages: worker.messages})
|
|
205
|
+
worker.messages = result[:messages]
|
|
206
|
+
worker.status = :available
|
|
207
|
+
entry = {task: task, result: result[:output], worker: worker.index, error: nil}
|
|
208
|
+
assignments << entry
|
|
209
|
+
event_block&.call(entry.merge(type: :task_completed))
|
|
210
|
+
rescue => e
|
|
211
|
+
worker.status = :available
|
|
212
|
+
raise unless on_error == :skip
|
|
213
|
+
|
|
214
|
+
entry = {task: task, result: nil, worker: worker.index, error: e}
|
|
215
|
+
assignments << entry
|
|
216
|
+
event_block&.call(entry.merge(type: :task_failed))
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
workers.each { |w| w.status = :done }
|
|
221
|
+
assignments
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Phase 3: Apply the aggregate block (or return raw assignments).
|
|
225
|
+
def finalize_result(assignments)
|
|
226
|
+
aggregator = self.class._aggregator
|
|
227
|
+
aggregator ? aggregator.call(assignments) : assignments
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# Default scheduler: assign to the worker with the fewest accumulated
|
|
231
|
+
# messages (promotes round-robin-like distribution across the pool).
|
|
232
|
+
def default_scheduler(available_workers)
|
|
233
|
+
available_workers.min_by { |w| w.messages.size }
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Build an anonymous coordinator Agent::Base with the two built-in tools.
|
|
237
|
+
def build_coordinator_agent(task_queue)
|
|
238
|
+
coordinator_model_val = self.class._coordinator_model
|
|
239
|
+
coordinator_instructions_val = self.class._coordinator_instructions
|
|
240
|
+
coordinator_provider_val = self.class._coordinator_provider
|
|
241
|
+
enqueue_tool = build_enqueue_tool(task_queue)
|
|
242
|
+
finalize_tool = build_finalize_tool(task_queue)
|
|
243
|
+
|
|
244
|
+
coordinator_class = Class.new(Phronomy::Agent::Base) do
|
|
245
|
+
model coordinator_model_val
|
|
246
|
+
provider coordinator_provider_val if coordinator_provider_val
|
|
247
|
+
instructions coordinator_instructions_val
|
|
248
|
+
tools enqueue_tool, finalize_tool
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
coordinator_class.new
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Builds the +enqueue_task+ tool. Each call appends a task Hash to task_queue.
|
|
255
|
+
def build_enqueue_tool(task_queue)
|
|
256
|
+
Class.new(Phronomy::Tool::Base) do
|
|
257
|
+
tool_name "enqueue_task"
|
|
258
|
+
description "Add a task to the worker queue."
|
|
259
|
+
param :description, type: :string, desc: "What the worker agent should do"
|
|
260
|
+
param :metadata, type: :string, desc: "Optional metadata", required: false
|
|
261
|
+
|
|
262
|
+
define_method(:execute) do |description:, metadata: nil|
|
|
263
|
+
task = {id: task_queue.size + 1, description: description, metadata: metadata, enqueued_at: Time.now}
|
|
264
|
+
task_queue << task
|
|
265
|
+
"Task ##{task[:id]} enqueued: #{description}"
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Builds the +finalize+ tool. Signals to the coordinator LLM that all tasks
|
|
271
|
+
# have been enqueued; returns a confirmation string.
|
|
272
|
+
def build_finalize_tool(task_queue)
|
|
273
|
+
Class.new(Phronomy::Tool::Base) do
|
|
274
|
+
tool_name "finalize"
|
|
275
|
+
description "Signal that task generation is complete. Call this after all tasks have been enqueued."
|
|
276
|
+
param :summary, type: :string, desc: "Brief summary of what was enqueued", required: false
|
|
277
|
+
|
|
278
|
+
define_method(:execute) do |summary: ""|
|
|
279
|
+
"Finalized. #{task_queue.size} task(s) enqueued. #{summary}".strip
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
end
|
|
@@ -1,40 +1,53 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Phronomy
|
|
4
|
-
#
|
|
4
|
+
# Implements the Generator-Verifier multi-agent coordination pattern
|
|
5
|
+
# (Anthropic blog, Pattern 1): a generator agent produces an
|
|
6
|
+
# answer while a verifier agent evaluates its quality.
|
|
5
7
|
#
|
|
6
|
-
#
|
|
7
|
-
# sources it relied on. Citations are extracted and attached to the result.
|
|
8
|
+
# @see https://claude.com/blog/multi-agent-coordination-patterns
|
|
8
9
|
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
10
|
+
# All prompt construction and result parsing are provided by the caller,
|
|
11
|
+
# giving full control over the LLM dialogue.
|
|
12
|
+
# The generator and verifier agents are configurable, and the pipeline
|
|
13
|
+
# retries until confidence passes the threshold or max iterations are reached.
|
|
12
14
|
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
18
|
-
#
|
|
19
|
-
# @example
|
|
20
|
-
# pipeline = Phronomy::TrustPipeline.new(
|
|
21
|
-
# draft_agent: PolicyDraftAgent,
|
|
22
|
-
# review_agent: PolicyReviewAgent,
|
|
23
|
-
# confidence_threshold: 0.7,
|
|
24
|
-
# max_iterations: 3
|
|
15
|
+
# @example Basic usage with custom prompt builders
|
|
16
|
+
# pipeline = Phronomy::GeneratorVerifier.new(
|
|
17
|
+
# draft_agent: MyDraftAgent,
|
|
18
|
+
# review_agent: MyReviewAgent,
|
|
19
|
+
# draft_prompt_builder: ->(input, feedback) { "Question: #{input}" },
|
|
20
|
+
# review_prompt_builder: ->(input, draft, citations) { "Review: #{draft}" }
|
|
25
21
|
# )
|
|
26
22
|
# result = pipeline.invoke("What is the refund policy?")
|
|
27
23
|
# puts result.output # the final answer string
|
|
28
24
|
# puts result.trusted? # true when confidence >= threshold
|
|
29
|
-
#
|
|
30
|
-
|
|
25
|
+
#
|
|
26
|
+
# @example Custom result parsers
|
|
27
|
+
# pipeline = Phronomy::GeneratorVerifier.new(
|
|
28
|
+
# ...,
|
|
29
|
+
# draft_result_parser: ->(text) { my_parse_draft(text) },
|
|
30
|
+
# review_result_parser: ->(text) { my_parse_review(text) }
|
|
31
|
+
# )
|
|
32
|
+
#
|
|
33
|
+
# @example Raising on low confidence
|
|
34
|
+
# pipeline = Phronomy::GeneratorVerifier.new(
|
|
35
|
+
# ...,
|
|
36
|
+
# raise_if_untrusted: true
|
|
37
|
+
# )
|
|
38
|
+
# begin
|
|
39
|
+
# result = pipeline.invoke("question")
|
|
40
|
+
# rescue Phronomy::LowConfidenceError => e
|
|
41
|
+
# puts "Untrusted: #{e.result.confidence}"
|
|
42
|
+
# end
|
|
43
|
+
class GeneratorVerifier
|
|
31
44
|
# Default confidence threshold for trusting an answer.
|
|
32
45
|
DEFAULT_CONFIDENCE_THRESHOLD = 0.7
|
|
33
46
|
|
|
34
47
|
# Default maximum draft-review cycles before returning best effort.
|
|
35
48
|
DEFAULT_MAX_ITERATIONS = 3
|
|
36
49
|
|
|
37
|
-
# Immutable value object returned by {
|
|
50
|
+
# Immutable value object returned by {GeneratorVerifier#invoke}.
|
|
38
51
|
#
|
|
39
52
|
# @!attribute [r] output
|
|
40
53
|
# @return [String] the final answer text
|
|
@@ -43,17 +56,19 @@ module Phronomy
|
|
|
43
56
|
# @!attribute [r] citations
|
|
44
57
|
# @return [Array<Hash>] [{source:, excerpt:}, ...]
|
|
45
58
|
#
|
|
46
|
-
# **WARNING**: These citations are extracted from the LLM's own response
|
|
47
|
-
#
|
|
48
|
-
#
|
|
49
|
-
# independent verification.
|
|
59
|
+
# **WARNING**: These citations are extracted from the LLM's own response
|
|
60
|
+
# and are **not** verified against any external knowledge base or URL.
|
|
61
|
+
# Do not treat them as authoritative without independent verification.
|
|
50
62
|
# @!attribute [r] iterations
|
|
51
63
|
# @return [Integer] number of draft-review cycles executed
|
|
52
64
|
# @!attribute [r] review_notes
|
|
53
65
|
# @return [Array<String>] reviewer feedback for each cycle
|
|
54
66
|
# @!attribute [r] trusted
|
|
55
67
|
# @return [Boolean] true when confidence >= threshold
|
|
56
|
-
Result = Struct.new(
|
|
68
|
+
Result = Struct.new(
|
|
69
|
+
:output, :confidence, :citations, :iterations, :review_notes, :trusted,
|
|
70
|
+
keyword_init: true
|
|
71
|
+
) do
|
|
57
72
|
# @return [Boolean] true when confidence >= threshold
|
|
58
73
|
alias_method :trusted?, :trusted
|
|
59
74
|
end
|
|
@@ -76,44 +91,73 @@ module Phronomy
|
|
|
76
91
|
|
|
77
92
|
private_constant :PipelineState
|
|
78
93
|
|
|
79
|
-
# @param draft_agent
|
|
80
|
-
#
|
|
81
|
-
# @param
|
|
82
|
-
#
|
|
83
|
-
# @param
|
|
84
|
-
#
|
|
85
|
-
#
|
|
86
|
-
#
|
|
87
|
-
#
|
|
88
|
-
|
|
94
|
+
# @param draft_agent [Class] subclass of Phronomy::Agent::Base
|
|
95
|
+
# used to generate answer drafts
|
|
96
|
+
# @param review_agent [Class] subclass of Phronomy::Agent::Base
|
|
97
|
+
# used to evaluate each draft
|
|
98
|
+
# @param draft_prompt_builder [#call] +call(input, feedback)+ → String
|
|
99
|
+
# prompt for the generator. +feedback+ is nil on the first iteration and
|
|
100
|
+
# contains the reviewer's feedback string on subsequent iterations.
|
|
101
|
+
# @param review_prompt_builder [#call] +call(input, draft, citations)+ → String
|
|
102
|
+
# prompt for the verifier. +citations+ is an Array of Hashes.
|
|
103
|
+
# @param draft_result_parser [#call, nil] +call(text)+ → Hash with
|
|
104
|
+
# +:answer+, +:confidence+, and +:citations+ keys. Defaults to JSON parsing
|
|
105
|
+
# with a safe fallback when the response cannot be parsed.
|
|
106
|
+
# @param review_result_parser [#call, nil] +call(text)+ → Hash with
|
|
107
|
+
# +:approved+, +:score+, and +:feedback+ keys. Defaults to JSON parsing
|
|
108
|
+
# with a safe fallback.
|
|
109
|
+
# @param confidence_threshold [Float] minimum combined confidence to
|
|
110
|
+
# trust an answer (default: 0.7)
|
|
111
|
+
# @param max_iterations [Integer] maximum draft-review cycles
|
|
112
|
+
# before returning the best-effort answer (default: 3)
|
|
113
|
+
# @param raise_if_untrusted [Boolean] when +true+, raises
|
|
114
|
+
# {Phronomy::LowConfidenceError} if the final result does not meet the
|
|
115
|
+
# confidence threshold (default: false)
|
|
116
|
+
def initialize(
|
|
117
|
+
draft_agent:,
|
|
118
|
+
review_agent:,
|
|
119
|
+
draft_prompt_builder:,
|
|
120
|
+
review_prompt_builder:,
|
|
121
|
+
draft_result_parser: nil,
|
|
122
|
+
review_result_parser: nil,
|
|
89
123
|
confidence_threshold: DEFAULT_CONFIDENCE_THRESHOLD,
|
|
90
124
|
max_iterations: DEFAULT_MAX_ITERATIONS,
|
|
91
|
-
|
|
125
|
+
raise_if_untrusted: false
|
|
126
|
+
)
|
|
92
127
|
@draft_agent_class = draft_agent
|
|
93
128
|
@review_agent_class = review_agent
|
|
129
|
+
@draft_prompt_builder = draft_prompt_builder
|
|
130
|
+
@review_prompt_builder = review_prompt_builder
|
|
131
|
+
@draft_result_parser = draft_result_parser || method(:default_parse_draft)
|
|
132
|
+
@review_result_parser = review_result_parser || method(:default_parse_review)
|
|
94
133
|
@threshold = confidence_threshold.to_f
|
|
95
134
|
@max_iterations = max_iterations.to_i
|
|
96
|
-
@
|
|
135
|
+
@raise_if_untrusted = raise_if_untrusted
|
|
97
136
|
@compiled_graph = nil
|
|
98
137
|
end
|
|
99
138
|
|
|
100
|
-
# Run the pipeline.
|
|
139
|
+
# Run the generator-verifier pipeline.
|
|
101
140
|
#
|
|
102
141
|
# @param input [String] the user question or task description
|
|
103
142
|
# @param config [Hash] forwarded to the underlying agents (e.g. thread_id)
|
|
104
143
|
# @return [Result]
|
|
144
|
+
# @raise [Phronomy::LowConfidenceError] when +raise_if_untrusted:+ is +true+
|
|
145
|
+
# and the result does not meet the confidence threshold
|
|
105
146
|
def invoke(input, config: {})
|
|
106
147
|
app = compiled_graph
|
|
107
148
|
state = app.invoke({input: input}, config: config)
|
|
108
149
|
confidence = combined_confidence(state)
|
|
109
|
-
|
|
150
|
+
trusted = confidence >= @threshold
|
|
151
|
+
result = Result.new(
|
|
110
152
|
output: state.output || state.draft.to_s,
|
|
111
153
|
confidence: confidence,
|
|
112
154
|
citations: state.citations,
|
|
113
155
|
iterations: state.iteration,
|
|
114
156
|
review_notes: state.review_notes,
|
|
115
|
-
trusted:
|
|
157
|
+
trusted: trusted
|
|
116
158
|
)
|
|
159
|
+
raise LowConfidenceError.new(result) if @raise_if_untrusted && !trusted
|
|
160
|
+
result
|
|
117
161
|
end
|
|
118
162
|
|
|
119
163
|
private
|
|
@@ -122,7 +166,6 @@ module Phronomy
|
|
|
122
166
|
[(state.self_score || 0.0).to_f, (state.review_score || 0.0).to_f].min
|
|
123
167
|
end
|
|
124
168
|
|
|
125
|
-
# Returns the compiled workflow, building and caching it on first call.
|
|
126
169
|
def compiled_graph
|
|
127
170
|
@compiled_graph ||= build_workflow
|
|
128
171
|
end
|
|
@@ -132,6 +175,10 @@ module Phronomy
|
|
|
132
175
|
review_agent = @review_agent_class.new
|
|
133
176
|
threshold = @threshold
|
|
134
177
|
max_iter = @max_iterations
|
|
178
|
+
dpb = @draft_prompt_builder
|
|
179
|
+
rpb = @review_prompt_builder
|
|
180
|
+
drp = @draft_result_parser
|
|
181
|
+
rrp = @review_result_parser
|
|
135
182
|
pipeline = self
|
|
136
183
|
|
|
137
184
|
Phronomy::Workflow.define(PipelineState) do
|
|
@@ -139,9 +186,9 @@ module Phronomy
|
|
|
139
186
|
|
|
140
187
|
state :draft, action: ->(state) {
|
|
141
188
|
feedback = state.review_notes.last
|
|
142
|
-
prompt =
|
|
189
|
+
prompt = dpb.call(state.input, feedback)
|
|
143
190
|
result = draft_agent.invoke(prompt)
|
|
144
|
-
parsed =
|
|
191
|
+
parsed = drp.call(result[:output])
|
|
145
192
|
state.merge(
|
|
146
193
|
draft: parsed[:answer].to_s,
|
|
147
194
|
self_score: pipeline.__send__(:clamp, parsed[:confidence]),
|
|
@@ -151,9 +198,9 @@ module Phronomy
|
|
|
151
198
|
}
|
|
152
199
|
|
|
153
200
|
state :review, action: ->(state) {
|
|
154
|
-
prompt =
|
|
201
|
+
prompt = rpb.call(state.input, state.draft, state.citations)
|
|
155
202
|
result = review_agent.invoke(prompt)
|
|
156
|
-
parsed =
|
|
203
|
+
parsed = rrp.call(result[:output])
|
|
157
204
|
state.merge(
|
|
158
205
|
review_score: pipeline.__send__(:clamp, parsed[:score]),
|
|
159
206
|
approved: parsed[:approved] == true,
|
|
@@ -176,73 +223,13 @@ module Phronomy
|
|
|
176
223
|
end
|
|
177
224
|
end
|
|
178
225
|
|
|
179
|
-
|
|
180
|
-
# When no delimiter is configured the input is returned unchanged.
|
|
181
|
-
def wrap_input(input)
|
|
182
|
-
return input unless @input_delimiter
|
|
183
|
-
|
|
184
|
-
start_tag, end_tag = @input_delimiter
|
|
185
|
-
"#{start_tag}\n#{input}\n#{end_tag}"
|
|
186
|
-
end
|
|
187
|
-
|
|
188
|
-
# Builds the prompt sent to the DraftAgent for each iteration.
|
|
189
|
-
def draft_prompt(input, feedback)
|
|
190
|
-
lines = [
|
|
191
|
-
"Answer the following question as accurately as possible.",
|
|
192
|
-
"Use any knowledge provided in <context> tags and cite your sources."
|
|
193
|
-
]
|
|
194
|
-
if feedback && !feedback.strip.empty?
|
|
195
|
-
lines << ""
|
|
196
|
-
lines << "Your previous draft was reviewed and rejected. Address ALL of this feedback:"
|
|
197
|
-
lines << feedback.strip
|
|
198
|
-
end
|
|
199
|
-
lines += [
|
|
200
|
-
"",
|
|
201
|
-
"Question: #{wrap_input(input)}",
|
|
202
|
-
"",
|
|
203
|
-
"RESPOND ONLY WITH VALID JSON (no text outside the JSON block):",
|
|
204
|
-
'{"answer":"<full answer>","confidence":<0.0-1.0>,' \
|
|
205
|
-
'"citations":[{"source":"<doc name>","excerpt":"<exact quote>"}]}'
|
|
206
|
-
]
|
|
207
|
-
lines.join("\n")
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
# Builds the prompt sent to the ReviewAgent.
|
|
211
|
-
def review_prompt(input, draft, citations)
|
|
212
|
-
citation_text = if citations.empty?
|
|
213
|
-
" (none)"
|
|
214
|
-
else
|
|
215
|
-
citations.map { |c| " - #{c[:source]}: \"#{c[:excerpt]}\"" }.join("\n")
|
|
216
|
-
end
|
|
217
|
-
[
|
|
218
|
-
"You are a rigorous quality reviewer. Evaluate the draft answer below.",
|
|
219
|
-
"",
|
|
220
|
-
"Question: #{wrap_input(input)}",
|
|
221
|
-
"",
|
|
222
|
-
"Draft answer:",
|
|
223
|
-
draft.to_s,
|
|
224
|
-
"",
|
|
225
|
-
"Citations provided:",
|
|
226
|
-
citation_text,
|
|
227
|
-
"",
|
|
228
|
-
"Evaluation criteria:",
|
|
229
|
-
" 1. Is the answer factually accurate and complete?",
|
|
230
|
-
" 2. Is every significant claim backed by a citation?",
|
|
231
|
-
" 3. Is the self-reported confidence realistic?",
|
|
232
|
-
"",
|
|
233
|
-
"RESPOND ONLY WITH VALID JSON (no text outside the JSON block):",
|
|
234
|
-
'{"approved":<true|false>,"score":<0.0-1.0>,' \
|
|
235
|
-
'"feedback":"<specific actionable feedback, or empty string if approved>"}'
|
|
236
|
-
].join("\n")
|
|
237
|
-
end
|
|
238
|
-
|
|
239
|
-
def safe_parse_draft(text)
|
|
226
|
+
def default_parse_draft(text)
|
|
240
227
|
json_parser.parse(text)
|
|
241
228
|
rescue Phronomy::ParseError
|
|
242
229
|
{answer: text.to_s, confidence: 0.0, citations: []}
|
|
243
230
|
end
|
|
244
231
|
|
|
245
|
-
def
|
|
232
|
+
def default_parse_review(text)
|
|
246
233
|
json_parser.parse(text)
|
|
247
234
|
rescue Phronomy::ParseError
|
|
248
235
|
{approved: false, score: 0.0, feedback: "Review output could not be parsed: #{text}"}
|
data/lib/phronomy/version.rb
CHANGED