phronomy 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -30
- data/README.md +106 -122
- data/lib/phronomy/agent/base.rb +135 -57
- data/lib/phronomy/agent/checkpoint.rb +53 -0
- data/lib/phronomy/agent/orchestrator.rb +119 -0
- data/lib/phronomy/agent/react_agent.rb +18 -28
- data/lib/phronomy/agent/shared_state.rb +303 -0
- data/lib/phronomy/agent/suspend_signal.rb +35 -0
- data/lib/phronomy/agent/team_coordinator.rb +285 -0
- data/lib/phronomy/agent.rb +2 -1
- data/lib/phronomy/configuration.rb +0 -24
- data/lib/phronomy/generator_verifier.rb +250 -0
- data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +10 -27
- data/lib/phronomy/railtie.rb +0 -6
- data/lib/phronomy/ruby_llm_patches.rb +20 -0
- data/lib/phronomy/tool/mcp_tool.rb +23 -26
- data/lib/phronomy/tracing/langfuse_tracer.rb +3 -6
- data/lib/phronomy/vector_store/redis_search.rb +4 -4
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +4 -7
- data/lib/phronomy/workflow_runner.rb +42 -30
- data/lib/phronomy.rb +18 -0
- data/scripts/check_readme_ruby.rb +38 -0
- metadata +12 -38
- data/docs/trustworthy_ai_enhancements.md +0 -332
- data/lib/phronomy/active_record/acts_as.rb +0 -48
- data/lib/phronomy/active_record/checkpoint.rb +0 -20
- data/lib/phronomy/active_record/extensions.rb +0 -14
- data/lib/phronomy/active_record/message.rb +0 -20
- data/lib/phronomy/actor.rb +0 -68
- data/lib/phronomy/memory/compression/base.rb +0 -37
- data/lib/phronomy/memory/compression/summary.rb +0 -107
- data/lib/phronomy/memory/compression/tool_output_pruner.rb +0 -67
- data/lib/phronomy/memory/compression.rb +0 -11
- data/lib/phronomy/memory/conversation_manager.rb +0 -213
- data/lib/phronomy/memory/retrieval/base.rb +0 -22
- data/lib/phronomy/memory/retrieval/composite.rb +0 -76
- data/lib/phronomy/memory/retrieval/recent.rb +0 -35
- data/lib/phronomy/memory/retrieval/semantic.rb +0 -114
- data/lib/phronomy/memory/retrieval.rb +0 -12
- data/lib/phronomy/memory/storage/active_record.rb +0 -248
- data/lib/phronomy/memory/storage/base.rb +0 -155
- data/lib/phronomy/memory/storage/in_memory.rb +0 -152
- data/lib/phronomy/memory/storage.rb +0 -11
- data/lib/phronomy/memory.rb +0 -21
- data/lib/phronomy/rails/agent_job.rb +0 -75
- data/lib/phronomy/state_store/active_record.rb +0 -76
- data/lib/phronomy/state_store/base.rb +0 -112
- data/lib/phronomy/state_store/encryptor/active_support.rb +0 -49
- data/lib/phronomy/state_store/encryptor/base.rb +0 -34
- data/lib/phronomy/state_store/encryptor.rb +0 -16
- data/lib/phronomy/state_store/file.rb +0 -85
- data/lib/phronomy/state_store/in_memory.rb +0 -53
- data/lib/phronomy/state_store/redis.rb +0 -70
- data/lib/phronomy/state_store.rb +0 -9
- data/lib/phronomy/thread_actor_registry.rb +0 -85
- data/lib/phronomy/trust_pipeline.rb +0 -264
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "json"
|
|
4
|
-
|
|
5
|
-
module Phronomy
|
|
6
|
-
module StateStore
|
|
7
|
-
# Redis-backed state store.
|
|
8
|
-
# Persists graph state as a JSON string under the key
|
|
9
|
-
# "phronomy:state:<thread_id>" in Redis.
|
|
10
|
-
#
|
|
11
|
-
# The Redis client must be compatible with the redis-rb gem interface:
|
|
12
|
-
# client.set(key, value)
|
|
13
|
-
# client.get(key)
|
|
14
|
-
# client.del(key)
|
|
15
|
-
#
|
|
16
|
-
# @example
|
|
17
|
-
# require "redis"
|
|
18
|
-
# redis = Redis.new(url: ENV["REDIS_URL"])
|
|
19
|
-
# Phronomy.configure do |c|
|
|
20
|
-
# c.default_state_store = Phronomy::StateStore::Redis.new(client: redis)
|
|
21
|
-
# end
|
|
22
|
-
#
|
|
23
|
-
# @example with TTL
|
|
24
|
-
# Phronomy::StateStore::Redis.new(client: redis, ttl: 3600)
|
|
25
|
-
class Redis < Base
|
|
26
|
-
KEY_PREFIX = "phronomy:state:"
|
|
27
|
-
private_constant :KEY_PREFIX
|
|
28
|
-
|
|
29
|
-
# @param client [#set, #get, #del] Redis-compatible client
|
|
30
|
-
# @param ttl [Integer, nil] optional key expiry in seconds
|
|
31
|
-
def initialize(client:, ttl: nil)
|
|
32
|
-
@client = client
|
|
33
|
-
@ttl = ttl
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# @param state [Object] includes Phronomy::WorkflowContext
|
|
37
|
-
# @return [self]
|
|
38
|
-
def save(state)
|
|
39
|
-
serialized = serialize_state(state)
|
|
40
|
-
if @ttl
|
|
41
|
-
@client.set(key(state.thread_id), serialized, ex: @ttl)
|
|
42
|
-
else
|
|
43
|
-
@client.set(key(state.thread_id), serialized)
|
|
44
|
-
end
|
|
45
|
-
self
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# @param thread_id [String]
|
|
49
|
-
# @return [Object, nil] state instance or nil
|
|
50
|
-
def load(thread_id)
|
|
51
|
-
raw = @client.get(key(thread_id))
|
|
52
|
-
return nil unless raw
|
|
53
|
-
|
|
54
|
-
deserialize_state(raw)
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# @return [self]
|
|
58
|
-
def clear(thread_id)
|
|
59
|
-
@client.del(key(thread_id))
|
|
60
|
-
self
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
private
|
|
64
|
-
|
|
65
|
-
def key(thread_id)
|
|
66
|
-
"#{KEY_PREFIX}#{thread_id}"
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
end
|
data/lib/phronomy/state_store.rb
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
# Namespace for state persistence backends.
|
|
5
|
-
# A StateStore saves and loads graph State objects keyed by thread_id.
|
|
6
|
-
# The thread_id is embedded in the State itself (state.thread_id).
|
|
7
|
-
module StateStore
|
|
8
|
-
end
|
|
9
|
-
end
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
# Global per-thread-id {Actor} registry.
|
|
5
|
-
#
|
|
6
|
-
# Maps the +:thread_id+ key from the +config:+ argument passed to
|
|
7
|
-
# {Phronomy::Agent::Base#invoke} to a {Phronomy::Actor} instance.
|
|
8
|
-
# Each thread_id gets exactly one Actor so that all operations for the same
|
|
9
|
-
# conversation are serialised automatically.
|
|
10
|
-
#
|
|
11
|
-
# @example
|
|
12
|
-
# Phronomy::ThreadActorRegistry.for("user-42").call do
|
|
13
|
-
# # runs sequentially on the Actor's thread
|
|
14
|
-
# end
|
|
15
|
-
module ThreadActorRegistry
|
|
16
|
-
@actors = {}
|
|
17
|
-
@registry_actor = Actor.new
|
|
18
|
-
|
|
19
|
-
class << self
|
|
20
|
-
# Returns (or lazily creates) the {Actor} for +thread_id+.
|
|
21
|
-
#
|
|
22
|
-
# When +Phronomy.configuration.max_actors+ is set, the registry evicts the
|
|
23
|
-
# least-recently-used Actor (by stopping it) before inserting a new one.
|
|
24
|
-
# Accessing an existing Actor moves it to the most-recently-used position.
|
|
25
|
-
#
|
|
26
|
-
# @param thread_id [String]
|
|
27
|
-
# @return [Phronomy::Actor]
|
|
28
|
-
def for(thread_id)
|
|
29
|
-
@registry_actor.call do
|
|
30
|
-
if @actors.key?(thread_id)
|
|
31
|
-
# LRU touch: move to end (most-recently used)
|
|
32
|
-
actor = @actors.delete(thread_id)
|
|
33
|
-
@actors[thread_id] = actor
|
|
34
|
-
else
|
|
35
|
-
evict_lru_if_needed!
|
|
36
|
-
@actors[thread_id] = Actor.new
|
|
37
|
-
end
|
|
38
|
-
@actors[thread_id]
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# Returns the current number of registered Actors.
|
|
43
|
-
#
|
|
44
|
-
# @return [Integer]
|
|
45
|
-
def actor_count
|
|
46
|
-
@registry_actor.call { @actors.size }
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Gracefully stops the Actor for +thread_id+ and removes it from the
|
|
50
|
-
# registry. The next call to {.for} with the same id creates a fresh Actor.
|
|
51
|
-
#
|
|
52
|
-
# @param thread_id [String]
|
|
53
|
-
def stop(thread_id)
|
|
54
|
-
@registry_actor.call { @actors.delete(thread_id) }&.stop
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# Stops and removes every registered Actor.
|
|
58
|
-
# Intended for test teardown and process shutdown.
|
|
59
|
-
def clear_all
|
|
60
|
-
actors = @registry_actor.call { @actors.values.tap { @actors.clear } }
|
|
61
|
-
actors.each(&:stop)
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Yields each currently registered Actor.
|
|
65
|
-
# A snapshot is taken so the registry cannot change while callers iterate.
|
|
66
|
-
#
|
|
67
|
-
# @yield [Phronomy::Actor]
|
|
68
|
-
def each_actor(&block)
|
|
69
|
-
@registry_actor.call { @actors.values.dup }.each(&block)
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
private
|
|
73
|
-
|
|
74
|
-
# Evicts the least-recently-used Actor when the registry is at capacity.
|
|
75
|
-
# Must be called from within @registry_actor.call { } to be thread-safe.
|
|
76
|
-
def evict_lru_if_needed!
|
|
77
|
-
max = Phronomy.configuration.max_actors
|
|
78
|
-
return unless max && @actors.size >= max
|
|
79
|
-
|
|
80
|
-
_lru_id, lru_actor = @actors.shift
|
|
81
|
-
lru_actor.stop
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
end
|
|
@@ -1,264 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
# Orchestrates three trust mechanisms in a single pipeline:
|
|
5
|
-
#
|
|
6
|
-
# 1. **Citation Tracking** — the DraftAgent is prompted to list the knowledge
|
|
7
|
-
# sources it relied on. Citations are extracted and attached to the result.
|
|
8
|
-
#
|
|
9
|
-
# 2. **Self-Review Loop** — a dedicated ReviewAgent evaluates each draft,
|
|
10
|
-
# assigns a quality score, and provides actionable feedback. Rejected drafts
|
|
11
|
-
# are retried with the reviewer's feedback embedded in the next prompt.
|
|
12
|
-
#
|
|
13
|
-
# 3. **Confidence Gate** — a combined confidence score (the minimum of the
|
|
14
|
-
# DraftAgent's self-reported confidence and the ReviewAgent's score) is
|
|
15
|
-
# compared against a threshold. The pipeline finishes early when the gate
|
|
16
|
-
# passes; after +max_iterations+ cycles it finishes regardless and marks
|
|
17
|
-
# the result as untrusted when the threshold was not reached.
|
|
18
|
-
#
|
|
19
|
-
# @example
|
|
20
|
-
# pipeline = Phronomy::TrustPipeline.new(
|
|
21
|
-
# draft_agent: PolicyDraftAgent,
|
|
22
|
-
# review_agent: PolicyReviewAgent,
|
|
23
|
-
# confidence_threshold: 0.7,
|
|
24
|
-
# max_iterations: 3
|
|
25
|
-
# )
|
|
26
|
-
# result = pipeline.invoke("What is the refund policy?")
|
|
27
|
-
# puts result.output # the final answer string
|
|
28
|
-
# puts result.trusted? # true when confidence >= threshold
|
|
29
|
-
# result.citations.each { |c| puts "#{c[:source]}: #{c[:excerpt]}" }
|
|
30
|
-
class TrustPipeline
|
|
31
|
-
# Default confidence threshold for trusting an answer.
|
|
32
|
-
DEFAULT_CONFIDENCE_THRESHOLD = 0.7
|
|
33
|
-
|
|
34
|
-
# Default maximum draft-review cycles before returning best effort.
|
|
35
|
-
DEFAULT_MAX_ITERATIONS = 3
|
|
36
|
-
|
|
37
|
-
# Immutable value object returned by {TrustPipeline#invoke}.
|
|
38
|
-
#
|
|
39
|
-
# @!attribute [r] output
|
|
40
|
-
# @return [String] the final answer text
|
|
41
|
-
# @!attribute [r] confidence
|
|
42
|
-
# @return [Float] combined confidence score (0.0–1.0)
|
|
43
|
-
# @!attribute [r] citations
|
|
44
|
-
# @return [Array<Hash>] [{source:, excerpt:}, ...]
|
|
45
|
-
#
|
|
46
|
-
# **WARNING**: These citations are extracted from the LLM's own response via
|
|
47
|
-
# the ReviewAgent and are **not** verified against any external knowledge base,
|
|
48
|
-
# document store, or URL. Do not treat them as authoritative without
|
|
49
|
-
# independent verification.
|
|
50
|
-
# @!attribute [r] iterations
|
|
51
|
-
# @return [Integer] number of draft-review cycles executed
|
|
52
|
-
# @!attribute [r] review_notes
|
|
53
|
-
# @return [Array<String>] reviewer feedback for each cycle
|
|
54
|
-
# @!attribute [r] trusted
|
|
55
|
-
# @return [Boolean] true when confidence >= threshold
|
|
56
|
-
Result = Struct.new(:output, :confidence, :citations, :iterations, :review_notes, :trusted, keyword_init: true) do
|
|
57
|
-
# @return [Boolean] true when confidence >= threshold
|
|
58
|
-
alias_method :trusted?, :trusted
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Internal graph state — not part of the public API.
|
|
62
|
-
# @private
|
|
63
|
-
class PipelineState
|
|
64
|
-
include Phronomy::WorkflowContext
|
|
65
|
-
|
|
66
|
-
field :input, type: :replace, default: -> { "" }
|
|
67
|
-
field :draft, type: :replace, default: -> {}
|
|
68
|
-
field :self_score, type: :replace, default: -> { 0.0 }
|
|
69
|
-
field :review_score, type: :replace, default: -> { 0.0 }
|
|
70
|
-
field :citations, type: :replace, default: -> { [] }
|
|
71
|
-
field :review_notes, type: :append, default: -> { [] }
|
|
72
|
-
field :iteration, type: :replace, default: -> { 0 }
|
|
73
|
-
field :approved, type: :replace, default: -> { false }
|
|
74
|
-
field :output, type: :replace, default: -> {}
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
private_constant :PipelineState
|
|
78
|
-
|
|
79
|
-
# @param draft_agent [Class] subclass of Phronomy::Agent::Base
|
|
80
|
-
# @param review_agent [Class] subclass of Phronomy::Agent::Base
|
|
81
|
-
# @param confidence_threshold [Float] answers below this are retried (default: 0.7)
|
|
82
|
-
# @param max_iterations [Integer] maximum draft-review cycles (default: 3)
|
|
83
|
-
# @param input_delimiter [Array<String>, nil] optional two-element array
|
|
84
|
-
# [start_tag, end_tag] used to wrap user input in prompts, e.g.
|
|
85
|
-
# ["<user_input>", "</user_input>"] or
|
|
86
|
-
# ["=== user input start ===", "=== user input end ==="].
|
|
87
|
-
# When nil (default), input is embedded as-is for backward compatibility.
|
|
88
|
-
def initialize(draft_agent:, review_agent:,
|
|
89
|
-
confidence_threshold: DEFAULT_CONFIDENCE_THRESHOLD,
|
|
90
|
-
max_iterations: DEFAULT_MAX_ITERATIONS,
|
|
91
|
-
input_delimiter: nil)
|
|
92
|
-
@draft_agent_class = draft_agent
|
|
93
|
-
@review_agent_class = review_agent
|
|
94
|
-
@threshold = confidence_threshold.to_f
|
|
95
|
-
@max_iterations = max_iterations.to_i
|
|
96
|
-
@input_delimiter = input_delimiter
|
|
97
|
-
@actor = Phronomy::Actor.new
|
|
98
|
-
@compiled_graph = nil
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
# Run the pipeline.
|
|
102
|
-
#
|
|
103
|
-
# @param input [String] the user question or task description
|
|
104
|
-
# @param config [Hash] forwarded to the underlying agents (e.g. thread_id)
|
|
105
|
-
# @return [Result]
|
|
106
|
-
def invoke(input, config: {})
|
|
107
|
-
app = compiled_graph
|
|
108
|
-
state = app.invoke({input: input}, config: config)
|
|
109
|
-
confidence = combined_confidence(state)
|
|
110
|
-
Result.new(
|
|
111
|
-
output: state.output || state.draft.to_s,
|
|
112
|
-
confidence: confidence,
|
|
113
|
-
citations: state.citations,
|
|
114
|
-
iterations: state.iteration,
|
|
115
|
-
review_notes: state.review_notes,
|
|
116
|
-
trusted: confidence >= @threshold
|
|
117
|
-
)
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
private
|
|
121
|
-
|
|
122
|
-
def combined_confidence(state)
|
|
123
|
-
[(state.self_score || 0.0).to_f, (state.review_score || 0.0).to_f].min
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
# Returns the compiled workflow, building and caching it on first call.
|
|
127
|
-
def compiled_graph
|
|
128
|
-
@actor.call { @compiled_graph ||= build_workflow }
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
def build_workflow
|
|
132
|
-
draft_agent = @draft_agent_class.new
|
|
133
|
-
review_agent = @review_agent_class.new
|
|
134
|
-
threshold = @threshold
|
|
135
|
-
max_iter = @max_iterations
|
|
136
|
-
pipeline = self
|
|
137
|
-
|
|
138
|
-
Phronomy::Workflow.define(PipelineState) do
|
|
139
|
-
initial :draft
|
|
140
|
-
|
|
141
|
-
state :draft, action: ->(state) {
|
|
142
|
-
feedback = state.review_notes.last
|
|
143
|
-
prompt = pipeline.__send__(:draft_prompt, state.input, feedback)
|
|
144
|
-
result = draft_agent.invoke(prompt)
|
|
145
|
-
parsed = pipeline.__send__(:safe_parse_draft, result[:output])
|
|
146
|
-
state.merge(
|
|
147
|
-
draft: parsed[:answer].to_s,
|
|
148
|
-
self_score: pipeline.__send__(:clamp, parsed[:confidence]),
|
|
149
|
-
citations: pipeline.__send__(:normalize_citations, parsed[:citations]),
|
|
150
|
-
iteration: state.iteration + 1
|
|
151
|
-
)
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
state :review, action: ->(state) {
|
|
155
|
-
prompt = pipeline.__send__(:review_prompt, state.input, state.draft, state.citations)
|
|
156
|
-
result = review_agent.invoke(prompt)
|
|
157
|
-
parsed = pipeline.__send__(:safe_parse_review, result[:output])
|
|
158
|
-
state.merge(
|
|
159
|
-
review_score: pipeline.__send__(:clamp, parsed[:score]),
|
|
160
|
-
approved: parsed[:approved] == true,
|
|
161
|
-
review_notes: parsed[:feedback].to_s
|
|
162
|
-
)
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
state :finalize, action: ->(state) { state.merge(output: state.draft) }
|
|
166
|
-
|
|
167
|
-
after :draft, to: :review
|
|
168
|
-
after :finalize, to: :__finish__
|
|
169
|
-
|
|
170
|
-
event :route_review, from: :review,
|
|
171
|
-
guard: ->(state) {
|
|
172
|
-
confidence = [state.self_score || 0.0, state.review_score || 0.0].min
|
|
173
|
-
(confidence >= threshold && state.approved) || state.iteration >= max_iter
|
|
174
|
-
},
|
|
175
|
-
to: :finalize
|
|
176
|
-
event :route_review, from: :review, to: :draft
|
|
177
|
-
end
|
|
178
|
-
end
|
|
179
|
-
|
|
180
|
-
# Wraps +input+ with the configured delimiter pair when +input_delimiter+ is set.
|
|
181
|
-
# When no delimiter is configured the input is returned unchanged.
|
|
182
|
-
def wrap_input(input)
|
|
183
|
-
return input unless @input_delimiter
|
|
184
|
-
|
|
185
|
-
start_tag, end_tag = @input_delimiter
|
|
186
|
-
"#{start_tag}\n#{input}\n#{end_tag}"
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
# Builds the prompt sent to the DraftAgent for each iteration.
|
|
190
|
-
def draft_prompt(input, feedback)
|
|
191
|
-
lines = [
|
|
192
|
-
"Answer the following question as accurately as possible.",
|
|
193
|
-
"Use any knowledge provided in <context> tags and cite your sources."
|
|
194
|
-
]
|
|
195
|
-
if feedback && !feedback.strip.empty?
|
|
196
|
-
lines << ""
|
|
197
|
-
lines << "Your previous draft was reviewed and rejected. Address ALL of this feedback:"
|
|
198
|
-
lines << feedback.strip
|
|
199
|
-
end
|
|
200
|
-
lines += [
|
|
201
|
-
"",
|
|
202
|
-
"Question: #{wrap_input(input)}",
|
|
203
|
-
"",
|
|
204
|
-
"RESPOND ONLY WITH VALID JSON (no text outside the JSON block):",
|
|
205
|
-
'{"answer":"<full answer>","confidence":<0.0-1.0>,' \
|
|
206
|
-
'"citations":[{"source":"<doc name>","excerpt":"<exact quote>"}]}'
|
|
207
|
-
]
|
|
208
|
-
lines.join("\n")
|
|
209
|
-
end
|
|
210
|
-
|
|
211
|
-
# Builds the prompt sent to the ReviewAgent.
|
|
212
|
-
def review_prompt(input, draft, citations)
|
|
213
|
-
citation_text = if citations.empty?
|
|
214
|
-
" (none)"
|
|
215
|
-
else
|
|
216
|
-
citations.map { |c| " - #{c[:source]}: \"#{c[:excerpt]}\"" }.join("\n")
|
|
217
|
-
end
|
|
218
|
-
[
|
|
219
|
-
"You are a rigorous quality reviewer. Evaluate the draft answer below.",
|
|
220
|
-
"",
|
|
221
|
-
"Question: #{wrap_input(input)}",
|
|
222
|
-
"",
|
|
223
|
-
"Draft answer:",
|
|
224
|
-
draft.to_s,
|
|
225
|
-
"",
|
|
226
|
-
"Citations provided:",
|
|
227
|
-
citation_text,
|
|
228
|
-
"",
|
|
229
|
-
"Evaluation criteria:",
|
|
230
|
-
" 1. Is the answer factually accurate and complete?",
|
|
231
|
-
" 2. Is every significant claim backed by a citation?",
|
|
232
|
-
" 3. Is the self-reported confidence realistic?",
|
|
233
|
-
"",
|
|
234
|
-
"RESPOND ONLY WITH VALID JSON (no text outside the JSON block):",
|
|
235
|
-
'{"approved":<true|false>,"score":<0.0-1.0>,' \
|
|
236
|
-
'"feedback":"<specific actionable feedback, or empty string if approved>"}'
|
|
237
|
-
].join("\n")
|
|
238
|
-
end
|
|
239
|
-
|
|
240
|
-
def safe_parse_draft(text)
|
|
241
|
-
json_parser.parse(text)
|
|
242
|
-
rescue Phronomy::ParseError
|
|
243
|
-
{answer: text.to_s, confidence: 0.0, citations: []}
|
|
244
|
-
end
|
|
245
|
-
|
|
246
|
-
def safe_parse_review(text)
|
|
247
|
-
json_parser.parse(text)
|
|
248
|
-
rescue Phronomy::ParseError
|
|
249
|
-
{approved: false, score: 0.0, feedback: "Review output could not be parsed: #{text}"}
|
|
250
|
-
end
|
|
251
|
-
|
|
252
|
-
def json_parser
|
|
253
|
-
@json_parser ||= Phronomy::OutputParser::JsonParser.new
|
|
254
|
-
end
|
|
255
|
-
|
|
256
|
-
def clamp(val)
|
|
257
|
-
val.to_f.clamp(0.0, 1.0)
|
|
258
|
-
end
|
|
259
|
-
|
|
260
|
-
def normalize_citations(raw)
|
|
261
|
-
Array(raw).filter_map { |c| c.is_a?(Hash) ? c.transform_keys(&:to_sym) : nil }
|
|
262
|
-
end
|
|
263
|
-
end
|
|
264
|
-
end
|