RubyGems - phronomy - Versions diffs - 0.3.0 → 0.5.0 - Mend

phronomy 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +83 -0
data/README.md +85 -16
data/lib/phronomy/agent/base.rb +117 -382
data/lib/phronomy/agent/checkpoint.rb +12 -5
data/lib/phronomy/agent/concerns/before_completion.rb +105 -0
data/lib/phronomy/agent/concerns/guardrailable.rb +42 -0
data/lib/phronomy/agent/concerns/retryable.rb +88 -0
data/lib/phronomy/agent/concerns/suspendable.rb +116 -0
data/lib/phronomy/agent/orchestrator.rb +119 -0
data/lib/phronomy/agent/react_agent.rb +37 -16
data/lib/phronomy/agent/shared_state.rb +303 -0
data/lib/phronomy/agent/team_coordinator.rb +285 -0
data/lib/phronomy/{trust_pipeline.rb → generator_verifier.rb} +95 -108
data/lib/phronomy/version.rb +1 -1
data/lib/phronomy/workflow_runner.rb +41 -22
data/lib/phronomy.rb +17 -0
metadata +12 -6

data/lib/phronomy/{trust_pipeline.rb → generator_verifier.rb} RENAMED Viewed

@@ -1,40 +1,53 @@
 # frozen_string_literal: true
 module Phronomy
-  # Orchestrates three trust mechanisms in a single pipeline:
+  # Implements the Generator-Verifier multi-agent coordination pattern
+  # (Anthropic blog, Pattern 1): a generator agent produces an
+  # answer while a verifier agent evaluates its quality.
   #
-  # 1. **Citation Tracking** — the DraftAgent is prompted to list the knowledge
-  #    sources it relied on. Citations are extracted and attached to the result.
+  # @see https://claude.com/blog/multi-agent-coordination-patterns
   #
-  # 2. **Self-Review Loop** — a dedicated ReviewAgent evaluates each draft,
-  #    assigns a quality score, and provides actionable feedback. Rejected drafts
-  #    are retried with the reviewer's feedback embedded in the next prompt.
+  # All prompt construction and result parsing are provided by the caller,
+  # giving full control over the LLM dialogue.
+  # The generator and verifier agents are configurable, and the pipeline
+  # retries until confidence passes the threshold or max iterations are reached.
   #
-  # 3. **Confidence Gate** — a combined confidence score (the minimum of the
-  #    DraftAgent's self-reported confidence and the ReviewAgent's score) is
-  #    compared against a threshold. The pipeline finishes early when the gate
-  #    passes; after +max_iterations+ cycles it finishes regardless and marks
-  #    the result as untrusted when the threshold was not reached.
-  #
-  # @example
-  #   pipeline = Phronomy::TrustPipeline.new(
-  #     draft_agent:          PolicyDraftAgent,
-  #     review_agent:         PolicyReviewAgent,
-  #     confidence_threshold: 0.7,
-  #     max_iterations:       3
+  # @example Basic usage with custom prompt builders
+  #   pipeline = Phronomy::GeneratorVerifier.new(
+  #     draft_agent:           MyDraftAgent,
+  #     review_agent:          MyReviewAgent,
+  #     draft_prompt_builder:  ->(input, feedback) { "Question: #{input}" },
+  #     review_prompt_builder: ->(input, draft, citations) { "Review: #{draft}" }
   #   )
   #   result = pipeline.invoke("What is the refund policy?")
   #   puts result.output      # the final answer string
   #   puts result.trusted?    # true when confidence >= threshold
-  #   result.citations.each { |c| puts "#{c[:source]}: #{c[:excerpt]}" }
-  class TrustPipeline
+  #
+  # @example Custom result parsers
+  #   pipeline = Phronomy::GeneratorVerifier.new(
+  #     ...,
+  #     draft_result_parser:  ->(text) { my_parse_draft(text) },
+  #     review_result_parser: ->(text) { my_parse_review(text) }
+  #   )
+  #
+  # @example Raising on low confidence
+  #   pipeline = Phronomy::GeneratorVerifier.new(
+  #     ...,
+  #     raise_if_untrusted: true
+  #   )
+  #   begin
+  #     result = pipeline.invoke("question")
+  #   rescue Phronomy::LowConfidenceError => e
+  #     puts "Untrusted: #{e.result.confidence}"
+  #   end
+  class GeneratorVerifier
     # Default confidence threshold for trusting an answer.
     DEFAULT_CONFIDENCE_THRESHOLD = 0.7
     # Default maximum draft-review cycles before returning best effort.
     DEFAULT_MAX_ITERATIONS = 3
-    # Immutable value object returned by {TrustPipeline#invoke}.
+    # Immutable value object returned by {GeneratorVerifier#invoke}.
     #
     # @!attribute [r] output
     #   @return [String] the final answer text
@@ -43,17 +56,19 @@ module Phronomy
     # @!attribute [r] citations
     #   @return [Array<Hash>] [{source:, excerpt:}, ...]
     #
-    #   **WARNING**: These citations are extracted from the LLM's own response via
-    #   the ReviewAgent and are **not** verified against any external knowledge base,
-    #   document store, or URL.  Do not treat them as authoritative without
-    #   independent verification.
+    #   **WARNING**: These citations are extracted from the LLM's own response
+    #   and are **not** verified against any external knowledge base or URL.
+    #   Do not treat them as authoritative without independent verification.
     # @!attribute [r] iterations
     #   @return [Integer] number of draft-review cycles executed
     # @!attribute [r] review_notes
     #   @return [Array<String>] reviewer feedback for each cycle
     # @!attribute [r] trusted
     #   @return [Boolean] true when confidence >= threshold
-    Result = Struct.new(:output, :confidence, :citations, :iterations, :review_notes, :trusted, keyword_init: true) do
+    Result = Struct.new(
+      :output, :confidence, :citations, :iterations, :review_notes, :trusted,
+      keyword_init: true
+    ) do
       # @return [Boolean] true when confidence >= threshold
       alias_method :trusted?, :trusted
     end
@@ -76,44 +91,73 @@ module Phronomy
     private_constant :PipelineState
-    # @param draft_agent          [Class]   subclass of Phronomy::Agent::Base
-    # @param review_agent         [Class]   subclass of Phronomy::Agent::Base
-    # @param confidence_threshold [Float]   answers below this are retried (default: 0.7)
-    # @param max_iterations       [Integer] maximum draft-review cycles (default: 3)
-    # @param input_delimiter      [Array<String>, nil] optional two-element array
-    #   [start_tag, end_tag] used to wrap user input in prompts, e.g.
-    #   ["<user_input>", "</user_input>"] or
-    #   ["=== user input start ===", "=== user input end ==="].
-    #   When nil (default), input is embedded as-is for backward compatibility.
-    def initialize(draft_agent:, review_agent:,
+    # @param draft_agent           [Class]   subclass of Phronomy::Agent::Base
+    #   used to generate answer drafts
+    # @param review_agent          [Class]   subclass of Phronomy::Agent::Base
+    #   used to evaluate each draft
+    # @param draft_prompt_builder  [#call]   +call(input, feedback)+ → String
+    #   prompt for the generator. +feedback+ is nil on the first iteration and
+    #   contains the reviewer's feedback string on subsequent iterations.
+    # @param review_prompt_builder [#call]   +call(input, draft, citations)+ → String
+    #   prompt for the verifier. +citations+ is an Array of Hashes.
+    # @param draft_result_parser   [#call, nil]  +call(text)+ → Hash with
+    #   +:answer+, +:confidence+, and +:citations+ keys. Defaults to JSON parsing
+    #   with a safe fallback when the response cannot be parsed.
+    # @param review_result_parser  [#call, nil]  +call(text)+ → Hash with
+    #   +:approved+, +:score+, and +:feedback+ keys. Defaults to JSON parsing
+    #   with a safe fallback.
+    # @param confidence_threshold  [Float]   minimum combined confidence to
+    #   trust an answer (default: 0.7)
+    # @param max_iterations        [Integer] maximum draft-review cycles
+    #   before returning the best-effort answer (default: 3)
+    # @param raise_if_untrusted    [Boolean] when +true+, raises
+    #   {Phronomy::LowConfidenceError} if the final result does not meet the
+    #   confidence threshold (default: false)
+    def initialize(
+      draft_agent:,
+      review_agent:,
+      draft_prompt_builder:,
+      review_prompt_builder:,
+      draft_result_parser: nil,
+      review_result_parser: nil,
       confidence_threshold: DEFAULT_CONFIDENCE_THRESHOLD,
       max_iterations: DEFAULT_MAX_ITERATIONS,
-      input_delimiter: nil)
+      raise_if_untrusted: false
+    )
       @draft_agent_class = draft_agent
       @review_agent_class = review_agent
+      @draft_prompt_builder = draft_prompt_builder
+      @review_prompt_builder = review_prompt_builder
+      @draft_result_parser = draft_result_parser || method(:default_parse_draft)
+      @review_result_parser = review_result_parser || method(:default_parse_review)
       @threshold = confidence_threshold.to_f
       @max_iterations = max_iterations.to_i
-      @input_delimiter = input_delimiter
+      @raise_if_untrusted = raise_if_untrusted
       @compiled_graph = nil
     end
-    # Run the pipeline.
+    # Run the generator-verifier pipeline.
     #
     # @param input  [String] the user question or task description
     # @param config [Hash]   forwarded to the underlying agents (e.g. thread_id)
     # @return [Result]
+    # @raise [Phronomy::LowConfidenceError] when +raise_if_untrusted:+ is +true+
+    #   and the result does not meet the confidence threshold
     def invoke(input, config: {})
       app = compiled_graph
       state = app.invoke({input: input}, config: config)
       confidence = combined_confidence(state)
-      Result.new(
+      trusted = confidence >= @threshold
+      result = Result.new(
         output: state.output || state.draft.to_s,
         confidence: confidence,
         citations: state.citations,
         iterations: state.iteration,
         review_notes: state.review_notes,
-        trusted: confidence >= @threshold
+        trusted: trusted
       )
+      raise LowConfidenceError.new(result) if @raise_if_untrusted && !trusted
+      result
     end
     private
@@ -122,7 +166,6 @@ module Phronomy
       [(state.self_score || 0.0).to_f, (state.review_score || 0.0).to_f].min
     end
-    # Returns the compiled workflow, building and caching it on first call.
     def compiled_graph
       @compiled_graph ||= build_workflow
     end
@@ -132,6 +175,10 @@ module Phronomy
       review_agent = @review_agent_class.new
       threshold = @threshold
       max_iter = @max_iterations
+      dpb = @draft_prompt_builder
+      rpb = @review_prompt_builder
+      drp = @draft_result_parser
+      rrp = @review_result_parser
       pipeline = self
       Phronomy::Workflow.define(PipelineState) do
@@ -139,9 +186,9 @@ module Phronomy
         state :draft, action: ->(state) {
           feedback = state.review_notes.last
-          prompt = pipeline.__send__(:draft_prompt, state.input, feedback)
+          prompt = dpb.call(state.input, feedback)
           result = draft_agent.invoke(prompt)
-          parsed = pipeline.__send__(:safe_parse_draft, result[:output])
+          parsed = drp.call(result[:output])
           state.merge(
             draft: parsed[:answer].to_s,
             self_score: pipeline.__send__(:clamp, parsed[:confidence]),
@@ -151,9 +198,9 @@ module Phronomy
         }
         state :review, action: ->(state) {
-          prompt = pipeline.__send__(:review_prompt, state.input, state.draft, state.citations)
+          prompt = rpb.call(state.input, state.draft, state.citations)
           result = review_agent.invoke(prompt)
-          parsed = pipeline.__send__(:safe_parse_review, result[:output])
+          parsed = rrp.call(result[:output])
           state.merge(
             review_score: pipeline.__send__(:clamp, parsed[:score]),
             approved: parsed[:approved] == true,
@@ -176,73 +223,13 @@ module Phronomy
       end
     end
-    # Wraps +input+ with the configured delimiter pair when +input_delimiter+ is set.
-    # When no delimiter is configured the input is returned unchanged.
-    def wrap_input(input)
-      return input unless @input_delimiter
-      start_tag, end_tag = @input_delimiter
-      "#{start_tag}\n#{input}\n#{end_tag}"
-    end
-    # Builds the prompt sent to the DraftAgent for each iteration.
-    def draft_prompt(input, feedback)
-      lines = [
-        "Answer the following question as accurately as possible.",
-        "Use any knowledge provided in <context> tags and cite your sources."
-      ]
-      if feedback && !feedback.strip.empty?
-        lines << ""
-        lines << "Your previous draft was reviewed and rejected. Address ALL of this feedback:"
-        lines << feedback.strip
-      end
-      lines += [
-        "",
-        "Question: #{wrap_input(input)}",
-        "",
-        "RESPOND ONLY WITH VALID JSON (no text outside the JSON block):",
-        '{"answer":"<full answer>","confidence":<0.0-1.0>,' \
-          '"citations":[{"source":"<doc name>","excerpt":"<exact quote>"}]}'
-      ]
-      lines.join("\n")
-    end
-    # Builds the prompt sent to the ReviewAgent.
-    def review_prompt(input, draft, citations)
-      citation_text = if citations.empty?
-        "  (none)"
-      else
-        citations.map { |c| "  - #{c[:source]}: \"#{c[:excerpt]}\"" }.join("\n")
-      end
-      [
-        "You are a rigorous quality reviewer. Evaluate the draft answer below.",
-        "",
-        "Question: #{wrap_input(input)}",
-        "",
-        "Draft answer:",
-        draft.to_s,
-        "",
-        "Citations provided:",
-        citation_text,
-        "",
-        "Evaluation criteria:",
-        "  1. Is the answer factually accurate and complete?",
-        "  2. Is every significant claim backed by a citation?",
-        "  3. Is the self-reported confidence realistic?",
-        "",
-        "RESPOND ONLY WITH VALID JSON (no text outside the JSON block):",
-        '{"approved":<true|false>,"score":<0.0-1.0>,' \
-          '"feedback":"<specific actionable feedback, or empty string if approved>"}'
-      ].join("\n")
-    end
-    def safe_parse_draft(text)
+    def default_parse_draft(text)
       json_parser.parse(text)
     rescue Phronomy::ParseError
       {answer: text.to_s, confidence: 0.0, citations: []}
     end
-    def safe_parse_review(text)
+    def default_parse_review(text)
       json_parser.parse(text)
     rescue Phronomy::ParseError
       {approved: false, score: 0.0, feedback: "Review output could not be parsed: #{text}"}

data/lib/phronomy/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Phronomy
-  VERSION = "0.3.0"
+  VERSION = "0.5.0"
 end

data/lib/phronomy/workflow_runner.rb CHANGED Viewed

@@ -137,20 +137,42 @@ module Phronomy
       current_node = from_node || @entry_point
       tracker = new_phase_machine(current_node)
       tracker.context = state
+      # Event queue: decouple node execution from transition firing.
+      # Events are enqueued after a node completes and processed at the top
+      # of the next iteration so that guards always see the freshest context.
+      event_queue = []
       step = 0
-      while current_node && current_node != FINISH
-        if step >= recursion_limit
-          raise Phronomy::RecursionLimitError,
-            "Recursion limit (#{recursion_limit}) exceeded"
+      loop do
+        break if current_node == FINISH
+        # -- Process next pending event -----------------------------------------
+        # Dequeue one event and fire it against the state machine. Guards are
+        # evaluated here (at fire time) so they see the context written by the
+        # node that enqueued the event.
+        if (event = event_queue.shift)
+          if step >= recursion_limit
+            raise Phronomy::RecursionLimitError,
+              "Recursion limit (#{recursion_limit}) exceeded"
+          end
+          fire_event!(tracker, event, current_node)
+          next_phase = tracker.phase.to_sym
+          # When next_phase == current_node no transition matched → terminal node.
+          current_node = (next_phase == current_node) ? FINISH : next_phase
+          step += 1
+          next
         end
-        # Auto-halt at wait states: save context and return to caller.
+        # -- Queue empty: check for halt -----------------------------------------
+        # Auto-halt at wait states: persist phase in context and return to caller.
+        # The caller resumes via send_event, which starts a fresh run_graph call.
         if @wait_state_names.include?(current_node)
           state.set_graph_metadata(thread_id: state.thread_id, phase: current_node)
           return state
         end
+        # -- Execute node action ------------------------------------------------
         node_fn = @nodes[current_node]
         raise ArgumentError, "Node #{current_node.inspect} is not defined" unless node_fn
@@ -165,27 +187,22 @@ module Phronomy
             "expected Hash, #{@state_class}, or nil"
         end
-        # Update tracker so guards see the freshest context.
+        # Update tracker so guards see the freshest context when the event fires.
         tracker.context = state
         event_block&.call({node: current_node, state: state})
-        # Delegate transition decision to state_machines.
+        # -- Enqueue transition event -------------------------------------------
+        # node_completed: generic event for all after-transitions (unconditional).
+        # route event:    user-named event carrying guarded conditional branches.
+        # No enqueue:     terminal node — next iteration exits via FINISH check.
         if @after_transitions.key?(current_node)
-          fire_event!(tracker, :"advance_#{current_node}", current_node)
+          event_queue << :node_completed
         elsif @route_transitions.key?(current_node)
-          ev_name = @route_transitions[current_node][:event_name]
-          fire_event!(tracker, ev_name, current_node)
+          event_queue << @route_transitions[current_node][:event_name]
+        else
+          current_node = FINISH
         end
-        # Nodes with no declared outgoing transition are treated as terminal:
-        # next_phase == current_node triggers the FINISH assignment below.
-        next_phase = tracker.phase.to_sym
-        # When next_phase == current_node: no transition fired (terminal node) → end.
-        # When next_phase == :__end__ (== FINISH): route led to finish → exit loop.
-        current_node = (next_phase == current_node) ? FINISH : next_phase
-        step += 1
       end
       state.set_graph_metadata(thread_id: state.thread_id, phase: :__end__)
@@ -225,9 +242,11 @@ module Phronomy
         state_machine :phase, initial: entry do
           all_states.each { |s| state s }
-          # 1. After-transitions: unconditional, fire on action completion.
-          after_trans.each do |from, to|
-            event :"advance_#{from}" do
+          # 1. After-transitions: one generic :node_completed event covers all
+          #    unconditional transitions. This keeps event names independent of
+          #    source state names and matches standard state machine semantics.
+          event :node_completed do
+            after_trans.each do |from, to|
               transition from => to
             end
           end

data/lib/phronomy.rb CHANGED Viewed

@@ -27,6 +27,23 @@ module Phronomy
   class HandoffError < Error; end
+  # Raised by {Phronomy::GeneratorVerifier#invoke} when +raise_if_untrusted: true+
+  # and the pipeline's combined confidence score falls below the configured threshold.
+  #
+  # @example
+  #   rescue Phronomy::LowConfidenceError => e
+  #     puts e.result.confidence   # => e.g. 0.45
+  #     puts e.result.output       # best-effort answer despite low confidence
+  class LowConfidenceError < Error
+    # @return [Phronomy::GeneratorVerifier::Result] the untrusted result
+    attr_reader :result
+    def initialize(result)
+      @result = result
+      super("Answer confidence #{result.confidence} is below the required threshold")
+    end
+  end
   class GuardrailError < Error
     attr_reader :guardrail

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: phronomy
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.5.0
 platform: ruby
 authors:
 - Raizo T.C.S
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2026-05-17 00:00:00.000000000 Z
+date: 2026-05-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ruby_llm
@@ -52,9 +52,8 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.6'
-description: Phronomy provides Agent, Workflow, Memory, Tool, Guardrail, RAG, and
-  Multi-agent capabilities for building AI agents in Ruby and Rails. Powered by RubyLLM
-  for LLM abstraction.
+description: Phronomy provides Agent, Workflow, Tool, Guardrail, RAG, and Multi-agent
+  capabilities for building AI agents in Ruby. Powered by RubyLLM for LLM abstraction.
 email:
 - raizo.tcs@gmail.com
 executables: []
@@ -74,10 +73,17 @@ files:
 - lib/phronomy/agent/base.rb
 - lib/phronomy/agent/before_completion_context.rb
 - lib/phronomy/agent/checkpoint.rb
+- lib/phronomy/agent/concerns/before_completion.rb
+- lib/phronomy/agent/concerns/guardrailable.rb
+- lib/phronomy/agent/concerns/retryable.rb
+- lib/phronomy/agent/concerns/suspendable.rb
 - lib/phronomy/agent/handoff.rb
+- lib/phronomy/agent/orchestrator.rb
 - lib/phronomy/agent/react_agent.rb
 - lib/phronomy/agent/runner.rb
+- lib/phronomy/agent/shared_state.rb
 - lib/phronomy/agent/suspend_signal.rb
+- lib/phronomy/agent/team_coordinator.rb
 - lib/phronomy/configuration.rb
 - lib/phronomy/context.rb
 - lib/phronomy/context/assembler.rb
@@ -103,6 +109,7 @@ files:
 - lib/phronomy/eval/scorer/exact_match.rb
 - lib/phronomy/eval/scorer/includes_scorer.rb
 - lib/phronomy/eval/scorer/llm_judge.rb
+- lib/phronomy/generator_verifier.rb
 - lib/phronomy/guardrail.rb
 - lib/phronomy/guardrail/base.rb
 - lib/phronomy/guardrail/builtin.rb
@@ -142,7 +149,6 @@ files:
 - lib/phronomy/tracing/langfuse_tracer.rb
 - lib/phronomy/tracing/null_tracer.rb
 - lib/phronomy/tracing/open_telemetry_tracer.rb
-- lib/phronomy/trust_pipeline.rb
 - lib/phronomy/vector_store.rb
 - lib/phronomy/vector_store/base.rb
 - lib/phronomy/vector_store/in_memory.rb