phronomy 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,40 +1,53 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Phronomy
4
- # Orchestrates three trust mechanisms in a single pipeline:
4
+ # Implements the Generator-Verifier multi-agent coordination pattern
5
+ # (Anthropic blog, Pattern 1): a generator agent produces an
6
+ # answer while a verifier agent evaluates its quality.
5
7
  #
6
- # 1. **Citation Tracking** — the DraftAgent is prompted to list the knowledge
7
- # sources it relied on. Citations are extracted and attached to the result.
8
+ # @see https://claude.com/blog/multi-agent-coordination-patterns
8
9
  #
9
- # 2. **Self-Review Loop** a dedicated ReviewAgent evaluates each draft,
10
- # assigns a quality score, and provides actionable feedback. Rejected drafts
11
- # are retried with the reviewer's feedback embedded in the next prompt.
10
+ # All prompt construction and result parsing are provided by the caller,
11
+ # giving full control over the LLM dialogue.
12
+ # The generator and verifier agents are configurable, and the pipeline
13
+ # retries until confidence passes the threshold or max iterations are reached.
12
14
  #
13
- # 3. **Confidence Gate** a combined confidence score (the minimum of the
14
- # DraftAgent's self-reported confidence and the ReviewAgent's score) is
15
- # compared against a threshold. The pipeline finishes early when the gate
16
- # passes; after +max_iterations+ cycles it finishes regardless and marks
17
- # the result as untrusted when the threshold was not reached.
18
- #
19
- # @example
20
- # pipeline = Phronomy::TrustPipeline.new(
21
- # draft_agent: PolicyDraftAgent,
22
- # review_agent: PolicyReviewAgent,
23
- # confidence_threshold: 0.7,
24
- # max_iterations: 3
15
+ # @example Basic usage with custom prompt builders
16
+ # pipeline = Phronomy::GeneratorVerifier.new(
17
+ # draft_agent: MyDraftAgent,
18
+ # review_agent: MyReviewAgent,
19
+ # draft_prompt_builder: ->(input, feedback) { "Question: #{input}" },
20
+ # review_prompt_builder: ->(input, draft, citations) { "Review: #{draft}" }
25
21
  # )
26
22
  # result = pipeline.invoke("What is the refund policy?")
27
23
  # puts result.output # the final answer string
28
24
  # puts result.trusted? # true when confidence >= threshold
29
- # result.citations.each { |c| puts "#{c[:source]}: #{c[:excerpt]}" }
30
- class TrustPipeline
25
+ #
26
+ # @example Custom result parsers
27
+ # pipeline = Phronomy::GeneratorVerifier.new(
28
+ # ...,
29
+ # draft_result_parser: ->(text) { my_parse_draft(text) },
30
+ # review_result_parser: ->(text) { my_parse_review(text) }
31
+ # )
32
+ #
33
+ # @example Raising on low confidence
34
+ # pipeline = Phronomy::GeneratorVerifier.new(
35
+ # ...,
36
+ # raise_if_untrusted: true
37
+ # )
38
+ # begin
39
+ # result = pipeline.invoke("question")
40
+ # rescue Phronomy::LowConfidenceError => e
41
+ # puts "Untrusted: #{e.result.confidence}"
42
+ # end
43
+ class GeneratorVerifier
31
44
  # Default confidence threshold for trusting an answer.
32
45
  DEFAULT_CONFIDENCE_THRESHOLD = 0.7
33
46
 
34
47
  # Default maximum draft-review cycles before returning best effort.
35
48
  DEFAULT_MAX_ITERATIONS = 3
36
49
 
37
- # Immutable value object returned by {TrustPipeline#invoke}.
50
+ # Immutable value object returned by {GeneratorVerifier#invoke}.
38
51
  #
39
52
  # @!attribute [r] output
40
53
  # @return [String] the final answer text
@@ -43,17 +56,19 @@ module Phronomy
43
56
  # @!attribute [r] citations
44
57
  # @return [Array<Hash>] [{source:, excerpt:}, ...]
45
58
  #
46
- # **WARNING**: These citations are extracted from the LLM's own response via
47
- # the ReviewAgent and are **not** verified against any external knowledge base,
48
- # document store, or URL. Do not treat them as authoritative without
49
- # independent verification.
59
+ # **WARNING**: These citations are extracted from the LLM's own response
60
+ # and are **not** verified against any external knowledge base or URL.
61
+ # Do not treat them as authoritative without independent verification.
50
62
  # @!attribute [r] iterations
51
63
  # @return [Integer] number of draft-review cycles executed
52
64
  # @!attribute [r] review_notes
53
65
  # @return [Array<String>] reviewer feedback for each cycle
54
66
  # @!attribute [r] trusted
55
67
  # @return [Boolean] true when confidence >= threshold
56
- Result = Struct.new(:output, :confidence, :citations, :iterations, :review_notes, :trusted, keyword_init: true) do
68
+ Result = Struct.new(
69
+ :output, :confidence, :citations, :iterations, :review_notes, :trusted,
70
+ keyword_init: true
71
+ ) do
57
72
  # @return [Boolean] true when confidence >= threshold
58
73
  alias_method :trusted?, :trusted
59
74
  end
@@ -76,44 +91,73 @@ module Phronomy
76
91
 
77
92
  private_constant :PipelineState
78
93
 
79
- # @param draft_agent [Class] subclass of Phronomy::Agent::Base
80
- # @param review_agent [Class] subclass of Phronomy::Agent::Base
81
- # @param confidence_threshold [Float] answers below this are retried (default: 0.7)
82
- # @param max_iterations [Integer] maximum draft-review cycles (default: 3)
83
- # @param input_delimiter [Array<String>, nil] optional two-element array
84
- # [start_tag, end_tag] used to wrap user input in prompts, e.g.
85
- # ["<user_input>", "</user_input>"] or
86
- # ["=== user input start ===", "=== user input end ==="].
87
- # When nil (default), input is embedded as-is for backward compatibility.
88
- def initialize(draft_agent:, review_agent:,
94
+ # @param draft_agent [Class] subclass of Phronomy::Agent::Base
95
+ # used to generate answer drafts
96
+ # @param review_agent [Class] subclass of Phronomy::Agent::Base
97
+ # used to evaluate each draft
98
+ # @param draft_prompt_builder [#call] +call(input, feedback)+ String
99
+ # prompt for the generator. +feedback+ is nil on the first iteration and
100
+ # contains the reviewer's feedback string on subsequent iterations.
101
+ # @param review_prompt_builder [#call] +call(input, draft, citations)+ String
102
+ # prompt for the verifier. +citations+ is an Array of Hashes.
103
+ # @param draft_result_parser [#call, nil] +call(text)+ → Hash with
104
+ # +:answer+, +:confidence+, and +:citations+ keys. Defaults to JSON parsing
105
+ # with a safe fallback when the response cannot be parsed.
106
+ # @param review_result_parser [#call, nil] +call(text)+ → Hash with
107
+ # +:approved+, +:score+, and +:feedback+ keys. Defaults to JSON parsing
108
+ # with a safe fallback.
109
+ # @param confidence_threshold [Float] minimum combined confidence to
110
+ # trust an answer (default: 0.7)
111
+ # @param max_iterations [Integer] maximum draft-review cycles
112
+ # before returning the best-effort answer (default: 3)
113
+ # @param raise_if_untrusted [Boolean] when +true+, raises
114
+ # {Phronomy::LowConfidenceError} if the final result does not meet the
115
+ # confidence threshold (default: false)
116
+ def initialize(
117
+ draft_agent:,
118
+ review_agent:,
119
+ draft_prompt_builder:,
120
+ review_prompt_builder:,
121
+ draft_result_parser: nil,
122
+ review_result_parser: nil,
89
123
  confidence_threshold: DEFAULT_CONFIDENCE_THRESHOLD,
90
124
  max_iterations: DEFAULT_MAX_ITERATIONS,
91
- input_delimiter: nil)
125
+ raise_if_untrusted: false
126
+ )
92
127
  @draft_agent_class = draft_agent
93
128
  @review_agent_class = review_agent
129
+ @draft_prompt_builder = draft_prompt_builder
130
+ @review_prompt_builder = review_prompt_builder
131
+ @draft_result_parser = draft_result_parser || method(:default_parse_draft)
132
+ @review_result_parser = review_result_parser || method(:default_parse_review)
94
133
  @threshold = confidence_threshold.to_f
95
134
  @max_iterations = max_iterations.to_i
96
- @input_delimiter = input_delimiter
135
+ @raise_if_untrusted = raise_if_untrusted
97
136
  @compiled_graph = nil
98
137
  end
99
138
 
100
- # Run the pipeline.
139
+ # Run the generator-verifier pipeline.
101
140
  #
102
141
  # @param input [String] the user question or task description
103
142
  # @param config [Hash] forwarded to the underlying agents (e.g. thread_id)
104
143
  # @return [Result]
144
+ # @raise [Phronomy::LowConfidenceError] when +raise_if_untrusted:+ is +true+
145
+ # and the result does not meet the confidence threshold
105
146
  def invoke(input, config: {})
106
147
  app = compiled_graph
107
148
  state = app.invoke({input: input}, config: config)
108
149
  confidence = combined_confidence(state)
109
- Result.new(
150
+ trusted = confidence >= @threshold
151
+ result = Result.new(
110
152
  output: state.output || state.draft.to_s,
111
153
  confidence: confidence,
112
154
  citations: state.citations,
113
155
  iterations: state.iteration,
114
156
  review_notes: state.review_notes,
115
- trusted: confidence >= @threshold
157
+ trusted: trusted
116
158
  )
159
+ raise LowConfidenceError.new(result) if @raise_if_untrusted && !trusted
160
+ result
117
161
  end
118
162
 
119
163
  private
@@ -122,7 +166,6 @@ module Phronomy
122
166
  [(state.self_score || 0.0).to_f, (state.review_score || 0.0).to_f].min
123
167
  end
124
168
 
125
- # Returns the compiled workflow, building and caching it on first call.
126
169
  def compiled_graph
127
170
  @compiled_graph ||= build_workflow
128
171
  end
@@ -132,6 +175,10 @@ module Phronomy
132
175
  review_agent = @review_agent_class.new
133
176
  threshold = @threshold
134
177
  max_iter = @max_iterations
178
+ dpb = @draft_prompt_builder
179
+ rpb = @review_prompt_builder
180
+ drp = @draft_result_parser
181
+ rrp = @review_result_parser
135
182
  pipeline = self
136
183
 
137
184
  Phronomy::Workflow.define(PipelineState) do
@@ -139,9 +186,9 @@ module Phronomy
139
186
 
140
187
  state :draft, action: ->(state) {
141
188
  feedback = state.review_notes.last
142
- prompt = pipeline.__send__(:draft_prompt, state.input, feedback)
189
+ prompt = dpb.call(state.input, feedback)
143
190
  result = draft_agent.invoke(prompt)
144
- parsed = pipeline.__send__(:safe_parse_draft, result[:output])
191
+ parsed = drp.call(result[:output])
145
192
  state.merge(
146
193
  draft: parsed[:answer].to_s,
147
194
  self_score: pipeline.__send__(:clamp, parsed[:confidence]),
@@ -151,9 +198,9 @@ module Phronomy
151
198
  }
152
199
 
153
200
  state :review, action: ->(state) {
154
- prompt = pipeline.__send__(:review_prompt, state.input, state.draft, state.citations)
201
+ prompt = rpb.call(state.input, state.draft, state.citations)
155
202
  result = review_agent.invoke(prompt)
156
- parsed = pipeline.__send__(:safe_parse_review, result[:output])
203
+ parsed = rrp.call(result[:output])
157
204
  state.merge(
158
205
  review_score: pipeline.__send__(:clamp, parsed[:score]),
159
206
  approved: parsed[:approved] == true,
@@ -176,73 +223,13 @@ module Phronomy
176
223
  end
177
224
  end
178
225
 
179
- # Wraps +input+ with the configured delimiter pair when +input_delimiter+ is set.
180
- # When no delimiter is configured the input is returned unchanged.
181
- def wrap_input(input)
182
- return input unless @input_delimiter
183
-
184
- start_tag, end_tag = @input_delimiter
185
- "#{start_tag}\n#{input}\n#{end_tag}"
186
- end
187
-
188
- # Builds the prompt sent to the DraftAgent for each iteration.
189
- def draft_prompt(input, feedback)
190
- lines = [
191
- "Answer the following question as accurately as possible.",
192
- "Use any knowledge provided in <context> tags and cite your sources."
193
- ]
194
- if feedback && !feedback.strip.empty?
195
- lines << ""
196
- lines << "Your previous draft was reviewed and rejected. Address ALL of this feedback:"
197
- lines << feedback.strip
198
- end
199
- lines += [
200
- "",
201
- "Question: #{wrap_input(input)}",
202
- "",
203
- "RESPOND ONLY WITH VALID JSON (no text outside the JSON block):",
204
- '{"answer":"<full answer>","confidence":<0.0-1.0>,' \
205
- '"citations":[{"source":"<doc name>","excerpt":"<exact quote>"}]}'
206
- ]
207
- lines.join("\n")
208
- end
209
-
210
- # Builds the prompt sent to the ReviewAgent.
211
- def review_prompt(input, draft, citations)
212
- citation_text = if citations.empty?
213
- " (none)"
214
- else
215
- citations.map { |c| " - #{c[:source]}: \"#{c[:excerpt]}\"" }.join("\n")
216
- end
217
- [
218
- "You are a rigorous quality reviewer. Evaluate the draft answer below.",
219
- "",
220
- "Question: #{wrap_input(input)}",
221
- "",
222
- "Draft answer:",
223
- draft.to_s,
224
- "",
225
- "Citations provided:",
226
- citation_text,
227
- "",
228
- "Evaluation criteria:",
229
- " 1. Is the answer factually accurate and complete?",
230
- " 2. Is every significant claim backed by a citation?",
231
- " 3. Is the self-reported confidence realistic?",
232
- "",
233
- "RESPOND ONLY WITH VALID JSON (no text outside the JSON block):",
234
- '{"approved":<true|false>,"score":<0.0-1.0>,' \
235
- '"feedback":"<specific actionable feedback, or empty string if approved>"}'
236
- ].join("\n")
237
- end
238
-
239
- def safe_parse_draft(text)
226
+ def default_parse_draft(text)
240
227
  json_parser.parse(text)
241
228
  rescue Phronomy::ParseError
242
229
  {answer: text.to_s, confidence: 0.0, citations: []}
243
230
  end
244
231
 
245
- def safe_parse_review(text)
232
+ def default_parse_review(text)
246
233
  json_parser.parse(text)
247
234
  rescue Phronomy::ParseError
248
235
  {approved: false, score: 0.0, feedback: "Review output could not be parsed: #{text}"}
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Phronomy
4
- VERSION = "0.3.0"
4
+ VERSION = "0.5.0"
5
5
  end
@@ -137,20 +137,42 @@ module Phronomy
137
137
  current_node = from_node || @entry_point
138
138
  tracker = new_phase_machine(current_node)
139
139
  tracker.context = state
140
+ # Event queue: decouple node execution from transition firing.
141
+ # Events are enqueued after a node completes and processed at the top
142
+ # of the next iteration so that guards always see the freshest context.
143
+ event_queue = []
140
144
  step = 0
141
145
 
142
- while current_node && current_node != FINISH
143
- if step >= recursion_limit
144
- raise Phronomy::RecursionLimitError,
145
- "Recursion limit (#{recursion_limit}) exceeded"
146
+ loop do
147
+ break if current_node == FINISH
148
+
149
+ # -- Process next pending event -----------------------------------------
150
+ # Dequeue one event and fire it against the state machine. Guards are
151
+ # evaluated here (at fire time) so they see the context written by the
152
+ # node that enqueued the event.
153
+ if (event = event_queue.shift)
154
+ if step >= recursion_limit
155
+ raise Phronomy::RecursionLimitError,
156
+ "Recursion limit (#{recursion_limit}) exceeded"
157
+ end
158
+
159
+ fire_event!(tracker, event, current_node)
160
+ next_phase = tracker.phase.to_sym
161
+ # When next_phase == current_node no transition matched → terminal node.
162
+ current_node = (next_phase == current_node) ? FINISH : next_phase
163
+ step += 1
164
+ next
146
165
  end
147
166
 
148
- # Auto-halt at wait states: save context and return to caller.
167
+ # -- Queue empty: check for halt -----------------------------------------
168
+ # Auto-halt at wait states: persist phase in context and return to caller.
169
+ # The caller resumes via send_event, which starts a fresh run_graph call.
149
170
  if @wait_state_names.include?(current_node)
150
171
  state.set_graph_metadata(thread_id: state.thread_id, phase: current_node)
151
172
  return state
152
173
  end
153
174
 
175
+ # -- Execute node action ------------------------------------------------
154
176
  node_fn = @nodes[current_node]
155
177
  raise ArgumentError, "Node #{current_node.inspect} is not defined" unless node_fn
156
178
 
@@ -165,27 +187,22 @@ module Phronomy
165
187
  "expected Hash, #{@state_class}, or nil"
166
188
  end
167
189
 
168
- # Update tracker so guards see the freshest context.
190
+ # Update tracker so guards see the freshest context when the event fires.
169
191
  tracker.context = state
170
192
 
171
193
  event_block&.call({node: current_node, state: state})
172
194
 
173
- # Delegate transition decision to state_machines.
195
+ # -- Enqueue transition event -------------------------------------------
196
+ # node_completed: generic event for all after-transitions (unconditional).
197
+ # route event: user-named event carrying guarded conditional branches.
198
+ # No enqueue: terminal node — next iteration exits via FINISH check.
174
199
  if @after_transitions.key?(current_node)
175
- fire_event!(tracker, :"advance_#{current_node}", current_node)
200
+ event_queue << :node_completed
176
201
  elsif @route_transitions.key?(current_node)
177
- ev_name = @route_transitions[current_node][:event_name]
178
- fire_event!(tracker, ev_name, current_node)
202
+ event_queue << @route_transitions[current_node][:event_name]
203
+ else
204
+ current_node = FINISH
179
205
  end
180
- # Nodes with no declared outgoing transition are treated as terminal:
181
- # next_phase == current_node triggers the FINISH assignment below.
182
-
183
- next_phase = tracker.phase.to_sym
184
- # When next_phase == current_node: no transition fired (terminal node) → end.
185
- # When next_phase == :__end__ (== FINISH): route led to finish → exit loop.
186
- current_node = (next_phase == current_node) ? FINISH : next_phase
187
-
188
- step += 1
189
206
  end
190
207
 
191
208
  state.set_graph_metadata(thread_id: state.thread_id, phase: :__end__)
@@ -225,9 +242,11 @@ module Phronomy
225
242
  state_machine :phase, initial: entry do
226
243
  all_states.each { |s| state s }
227
244
 
228
- # 1. After-transitions: unconditional, fire on action completion.
229
- after_trans.each do |from, to|
230
- event :"advance_#{from}" do
245
+ # 1. After-transitions: one generic :node_completed event covers all
246
+ # unconditional transitions. This keeps event names independent of
247
+ # source state names and matches standard state machine semantics.
248
+ event :node_completed do
249
+ after_trans.each do |from, to|
231
250
  transition from => to
232
251
  end
233
252
  end
data/lib/phronomy.rb CHANGED
@@ -27,6 +27,23 @@ module Phronomy
27
27
 
28
28
  class HandoffError < Error; end
29
29
 
30
+ # Raised by {Phronomy::GeneratorVerifier#invoke} when +raise_if_untrusted: true+
31
+ # and the pipeline's combined confidence score falls below the configured threshold.
32
+ #
33
+ # @example
34
+ # rescue Phronomy::LowConfidenceError => e
35
+ # puts e.result.confidence # => e.g. 0.45
36
+ # puts e.result.output # best-effort answer despite low confidence
37
+ class LowConfidenceError < Error
38
+ # @return [Phronomy::GeneratorVerifier::Result] the untrusted result
39
+ attr_reader :result
40
+
41
+ def initialize(result)
42
+ @result = result
43
+ super("Answer confidence #{result.confidence} is below the required threshold")
44
+ end
45
+ end
46
+
30
47
  class GuardrailError < Error
31
48
  attr_reader :guardrail
32
49
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phronomy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Raizo T.C.S
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-17 00:00:00.000000000 Z
11
+ date: 2026-05-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby_llm
@@ -52,9 +52,8 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.6'
55
- description: Phronomy provides Agent, Workflow, Memory, Tool, Guardrail, RAG, and
56
- Multi-agent capabilities for building AI agents in Ruby and Rails. Powered by RubyLLM
57
- for LLM abstraction.
55
+ description: Phronomy provides Agent, Workflow, Tool, Guardrail, RAG, and Multi-agent
56
+ capabilities for building AI agents in Ruby. Powered by RubyLLM for LLM abstraction.
58
57
  email:
59
58
  - raizo.tcs@gmail.com
60
59
  executables: []
@@ -74,10 +73,17 @@ files:
74
73
  - lib/phronomy/agent/base.rb
75
74
  - lib/phronomy/agent/before_completion_context.rb
76
75
  - lib/phronomy/agent/checkpoint.rb
76
+ - lib/phronomy/agent/concerns/before_completion.rb
77
+ - lib/phronomy/agent/concerns/guardrailable.rb
78
+ - lib/phronomy/agent/concerns/retryable.rb
79
+ - lib/phronomy/agent/concerns/suspendable.rb
77
80
  - lib/phronomy/agent/handoff.rb
81
+ - lib/phronomy/agent/orchestrator.rb
78
82
  - lib/phronomy/agent/react_agent.rb
79
83
  - lib/phronomy/agent/runner.rb
84
+ - lib/phronomy/agent/shared_state.rb
80
85
  - lib/phronomy/agent/suspend_signal.rb
86
+ - lib/phronomy/agent/team_coordinator.rb
81
87
  - lib/phronomy/configuration.rb
82
88
  - lib/phronomy/context.rb
83
89
  - lib/phronomy/context/assembler.rb
@@ -103,6 +109,7 @@ files:
103
109
  - lib/phronomy/eval/scorer/exact_match.rb
104
110
  - lib/phronomy/eval/scorer/includes_scorer.rb
105
111
  - lib/phronomy/eval/scorer/llm_judge.rb
112
+ - lib/phronomy/generator_verifier.rb
106
113
  - lib/phronomy/guardrail.rb
107
114
  - lib/phronomy/guardrail/base.rb
108
115
  - lib/phronomy/guardrail/builtin.rb
@@ -142,7 +149,6 @@ files:
142
149
  - lib/phronomy/tracing/langfuse_tracer.rb
143
150
  - lib/phronomy/tracing/null_tracer.rb
144
151
  - lib/phronomy/tracing/open_telemetry_tracer.rb
145
- - lib/phronomy/trust_pipeline.rb
146
152
  - lib/phronomy/vector_store.rb
147
153
  - lib/phronomy/vector_store/base.rb
148
154
  - lib/phronomy/vector_store/in_memory.rb