phronomy 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -30
- data/README.md +106 -122
- data/lib/phronomy/agent/base.rb +135 -57
- data/lib/phronomy/agent/checkpoint.rb +53 -0
- data/lib/phronomy/agent/orchestrator.rb +119 -0
- data/lib/phronomy/agent/react_agent.rb +18 -28
- data/lib/phronomy/agent/shared_state.rb +303 -0
- data/lib/phronomy/agent/suspend_signal.rb +35 -0
- data/lib/phronomy/agent/team_coordinator.rb +285 -0
- data/lib/phronomy/agent.rb +2 -1
- data/lib/phronomy/configuration.rb +0 -24
- data/lib/phronomy/generator_verifier.rb +250 -0
- data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +10 -27
- data/lib/phronomy/railtie.rb +0 -6
- data/lib/phronomy/ruby_llm_patches.rb +20 -0
- data/lib/phronomy/tool/mcp_tool.rb +23 -26
- data/lib/phronomy/tracing/langfuse_tracer.rb +3 -6
- data/lib/phronomy/vector_store/redis_search.rb +4 -4
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +4 -7
- data/lib/phronomy/workflow_runner.rb +42 -30
- data/lib/phronomy.rb +18 -0
- data/scripts/check_readme_ruby.rb +38 -0
- metadata +12 -38
- data/docs/trustworthy_ai_enhancements.md +0 -332
- data/lib/phronomy/active_record/acts_as.rb +0 -48
- data/lib/phronomy/active_record/checkpoint.rb +0 -20
- data/lib/phronomy/active_record/extensions.rb +0 -14
- data/lib/phronomy/active_record/message.rb +0 -20
- data/lib/phronomy/actor.rb +0 -68
- data/lib/phronomy/memory/compression/base.rb +0 -37
- data/lib/phronomy/memory/compression/summary.rb +0 -107
- data/lib/phronomy/memory/compression/tool_output_pruner.rb +0 -67
- data/lib/phronomy/memory/compression.rb +0 -11
- data/lib/phronomy/memory/conversation_manager.rb +0 -213
- data/lib/phronomy/memory/retrieval/base.rb +0 -22
- data/lib/phronomy/memory/retrieval/composite.rb +0 -76
- data/lib/phronomy/memory/retrieval/recent.rb +0 -35
- data/lib/phronomy/memory/retrieval/semantic.rb +0 -114
- data/lib/phronomy/memory/retrieval.rb +0 -12
- data/lib/phronomy/memory/storage/active_record.rb +0 -248
- data/lib/phronomy/memory/storage/base.rb +0 -155
- data/lib/phronomy/memory/storage/in_memory.rb +0 -152
- data/lib/phronomy/memory/storage.rb +0 -11
- data/lib/phronomy/memory.rb +0 -21
- data/lib/phronomy/rails/agent_job.rb +0 -75
- data/lib/phronomy/state_store/active_record.rb +0 -76
- data/lib/phronomy/state_store/base.rb +0 -112
- data/lib/phronomy/state_store/encryptor/active_support.rb +0 -49
- data/lib/phronomy/state_store/encryptor/base.rb +0 -34
- data/lib/phronomy/state_store/encryptor.rb +0 -16
- data/lib/phronomy/state_store/file.rb +0 -85
- data/lib/phronomy/state_store/in_memory.rb +0 -53
- data/lib/phronomy/state_store/redis.rb +0 -70
- data/lib/phronomy/state_store.rb +0 -9
- data/lib/phronomy/thread_actor_registry.rb +0 -85
- data/lib/phronomy/trust_pipeline.rb +0 -264
|
@@ -38,7 +38,7 @@ module Phronomy
|
|
|
38
38
|
|
|
39
39
|
def initialize(state_class:, nodes:, after_transitions:, route_transitions:,
|
|
40
40
|
external_events:, entry_point:, wait_state_names: [],
|
|
41
|
-
before_callbacks: {}, after_callbacks: {}
|
|
41
|
+
before_callbacks: {}, after_callbacks: {})
|
|
42
42
|
@state_class = state_class
|
|
43
43
|
@nodes = nodes
|
|
44
44
|
@after_transitions = after_transitions # { from => to }
|
|
@@ -48,7 +48,6 @@ module Phronomy
|
|
|
48
48
|
@wait_state_names = wait_state_names
|
|
49
49
|
@before_callbacks = before_callbacks.dup
|
|
50
50
|
@after_callbacks = after_callbacks.dup
|
|
51
|
-
@state_store_override = state_store
|
|
52
51
|
@phase_machine_class = build_phase_machine_class
|
|
53
52
|
end
|
|
54
53
|
|
|
@@ -134,29 +133,46 @@ module Phronomy
|
|
|
134
133
|
|
|
135
134
|
private
|
|
136
135
|
|
|
137
|
-
def state_store
|
|
138
|
-
@state_store_override || Phronomy.configuration.default_state_store
|
|
139
|
-
end
|
|
140
|
-
|
|
141
136
|
def run_graph(state, from_node: nil, recursion_limit: 25, &event_block)
|
|
142
137
|
current_node = from_node || @entry_point
|
|
143
138
|
tracker = new_phase_machine(current_node)
|
|
144
139
|
tracker.context = state
|
|
140
|
+
# Event queue: decouple node execution from transition firing.
|
|
141
|
+
# Events are enqueued after a node completes and processed at the top
|
|
142
|
+
# of the next iteration so that guards always see the freshest context.
|
|
143
|
+
event_queue = []
|
|
145
144
|
step = 0
|
|
146
145
|
|
|
147
|
-
|
|
148
|
-
if
|
|
149
|
-
|
|
150
|
-
|
|
146
|
+
loop do
|
|
147
|
+
break if current_node == FINISH
|
|
148
|
+
|
|
149
|
+
# -- Process next pending event -----------------------------------------
|
|
150
|
+
# Dequeue one event and fire it against the state machine. Guards are
|
|
151
|
+
# evaluated here (at fire time) so they see the context written by the
|
|
152
|
+
# node that enqueued the event.
|
|
153
|
+
if (event = event_queue.shift)
|
|
154
|
+
if step >= recursion_limit
|
|
155
|
+
raise Phronomy::RecursionLimitError,
|
|
156
|
+
"Recursion limit (#{recursion_limit}) exceeded"
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
fire_event!(tracker, event, current_node)
|
|
160
|
+
next_phase = tracker.phase.to_sym
|
|
161
|
+
# When next_phase == current_node no transition matched → terminal node.
|
|
162
|
+
current_node = (next_phase == current_node) ? FINISH : next_phase
|
|
163
|
+
step += 1
|
|
164
|
+
next
|
|
151
165
|
end
|
|
152
166
|
|
|
153
|
-
#
|
|
167
|
+
# -- Queue empty: check for halt -----------------------------------------
|
|
168
|
+
# Auto-halt at wait states: persist phase in context and return to caller.
|
|
169
|
+
# The caller resumes via send_event, which starts a fresh run_graph call.
|
|
154
170
|
if @wait_state_names.include?(current_node)
|
|
155
171
|
state.set_graph_metadata(thread_id: state.thread_id, phase: current_node)
|
|
156
|
-
state_store&.save(state)
|
|
157
172
|
return state
|
|
158
173
|
end
|
|
159
174
|
|
|
175
|
+
# -- Execute node action ------------------------------------------------
|
|
160
176
|
node_fn = @nodes[current_node]
|
|
161
177
|
raise ArgumentError, "Node #{current_node.inspect} is not defined" unless node_fn
|
|
162
178
|
|
|
@@ -171,31 +187,25 @@ module Phronomy
|
|
|
171
187
|
"expected Hash, #{@state_class}, or nil"
|
|
172
188
|
end
|
|
173
189
|
|
|
174
|
-
# Update tracker so guards see the freshest context.
|
|
190
|
+
# Update tracker so guards see the freshest context when the event fires.
|
|
175
191
|
tracker.context = state
|
|
176
192
|
|
|
177
193
|
event_block&.call({node: current_node, state: state})
|
|
178
194
|
|
|
179
|
-
#
|
|
195
|
+
# -- Enqueue transition event -------------------------------------------
|
|
196
|
+
# node_completed: generic event for all after-transitions (unconditional).
|
|
197
|
+
# route event: user-named event carrying guarded conditional branches.
|
|
198
|
+
# No enqueue: terminal node — next iteration exits via FINISH check.
|
|
180
199
|
if @after_transitions.key?(current_node)
|
|
181
|
-
|
|
200
|
+
event_queue << :node_completed
|
|
182
201
|
elsif @route_transitions.key?(current_node)
|
|
183
|
-
|
|
184
|
-
|
|
202
|
+
event_queue << @route_transitions[current_node][:event_name]
|
|
203
|
+
else
|
|
204
|
+
current_node = FINISH
|
|
185
205
|
end
|
|
186
|
-
# Nodes with no declared outgoing transition are treated as terminal:
|
|
187
|
-
# next_phase == current_node triggers the FINISH assignment below.
|
|
188
|
-
|
|
189
|
-
next_phase = tracker.phase.to_sym
|
|
190
|
-
# When next_phase == current_node: no transition fired (terminal node) → end.
|
|
191
|
-
# When next_phase == :__end__ (== FINISH): route led to finish → exit loop.
|
|
192
|
-
current_node = (next_phase == current_node) ? FINISH : next_phase
|
|
193
|
-
|
|
194
|
-
step += 1
|
|
195
206
|
end
|
|
196
207
|
|
|
197
208
|
state.set_graph_metadata(thread_id: state.thread_id, phase: :__end__)
|
|
198
|
-
state_store&.save(state)
|
|
199
209
|
state
|
|
200
210
|
end
|
|
201
211
|
|
|
@@ -232,9 +242,11 @@ module Phronomy
|
|
|
232
242
|
state_machine :phase, initial: entry do
|
|
233
243
|
all_states.each { |s| state s }
|
|
234
244
|
|
|
235
|
-
# 1. After-transitions:
|
|
236
|
-
|
|
237
|
-
|
|
245
|
+
# 1. After-transitions: one generic :node_completed event covers all
|
|
246
|
+
# unconditional transitions. This keeps event names independent of
|
|
247
|
+
# source state names and matches standard state machine semantics.
|
|
248
|
+
event :node_completed do
|
|
249
|
+
after_trans.each do |from, to|
|
|
238
250
|
transition from => to
|
|
239
251
|
end
|
|
240
252
|
end
|
data/lib/phronomy.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "zeitwerk"
|
|
4
4
|
require "ruby_llm"
|
|
5
|
+
require_relative "phronomy/ruby_llm_patches"
|
|
5
6
|
|
|
6
7
|
loader = Zeitwerk::Loader.for_gem
|
|
7
8
|
loader.ignore(File.expand_path("generators", __dir__))
|
|
@@ -26,6 +27,23 @@ module Phronomy
|
|
|
26
27
|
|
|
27
28
|
class HandoffError < Error; end
|
|
28
29
|
|
|
30
|
+
# Raised by {Phronomy::GeneratorVerifier#invoke} when +raise_if_untrusted: true+
|
|
31
|
+
# and the pipeline's combined confidence score falls below the configured threshold.
|
|
32
|
+
#
|
|
33
|
+
# @example
|
|
34
|
+
# rescue Phronomy::LowConfidenceError => e
|
|
35
|
+
# puts e.result.confidence # => e.g. 0.45
|
|
36
|
+
# puts e.result.output # best-effort answer despite low confidence
|
|
37
|
+
class LowConfidenceError < Error
|
|
38
|
+
# @return [Phronomy::GeneratorVerifier::Result] the untrusted result
|
|
39
|
+
attr_reader :result
|
|
40
|
+
|
|
41
|
+
def initialize(result)
|
|
42
|
+
@result = result
|
|
43
|
+
super("Answer confidence #{result.confidence} is below the required threshold")
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
29
47
|
class GuardrailError < Error
|
|
30
48
|
attr_reader :guardrail
|
|
31
49
|
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Extracts every ```ruby ... ``` block from README.md and runs `ruby -c` on each.
|
|
4
|
+
# Exits non-zero if any block has a syntax error.
|
|
5
|
+
|
|
6
|
+
require "tempfile"
|
|
7
|
+
require "open3"
|
|
8
|
+
|
|
9
|
+
readme_path = File.expand_path("../README.md", __dir__)
|
|
10
|
+
readme = File.read(readme_path)
|
|
11
|
+
blocks = readme.scan(/^```ruby\n(.*?)^```/m).map.with_index(1) { |(code), i| [i, code] }
|
|
12
|
+
|
|
13
|
+
puts "Checking #{blocks.size} Ruby code blocks in README.md..."
|
|
14
|
+
|
|
15
|
+
failures = []
|
|
16
|
+
|
|
17
|
+
blocks.each do |index, code|
|
|
18
|
+
Tempfile.create(["readme_block_#{index}", ".rb"]) do |f|
|
|
19
|
+
f.write(code)
|
|
20
|
+
f.flush
|
|
21
|
+
stdout, status = Open3.capture2e("ruby", "-c", f.path)
|
|
22
|
+
if status.success?
|
|
23
|
+
puts " OK block ##{index}"
|
|
24
|
+
else
|
|
25
|
+
failures << index
|
|
26
|
+
puts " FAIL block ##{index}"
|
|
27
|
+
puts stdout.gsub(f.path, "block ##{index}")
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
if failures.empty?
|
|
33
|
+
puts "All #{blocks.size} Ruby code blocks passed syntax check."
|
|
34
|
+
exit 0
|
|
35
|
+
else
|
|
36
|
+
puts "\n#{failures.size} block(s) failed syntax check: #{failures.join(", ")}"
|
|
37
|
+
exit 1
|
|
38
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: phronomy
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Raizo T.C.S
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: ruby_llm
|
|
@@ -52,9 +52,8 @@ dependencies:
|
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '0.6'
|
|
55
|
-
description: Phronomy provides Agent, Workflow,
|
|
56
|
-
|
|
57
|
-
for LLM abstraction.
|
|
55
|
+
description: Phronomy provides Agent, Workflow, Tool, Guardrail, RAG, and Multi-agent
|
|
56
|
+
capabilities for building AI agents in Ruby. Powered by RubyLLM for LLM abstraction.
|
|
58
57
|
email:
|
|
59
58
|
- raizo.tcs@gmail.com
|
|
60
59
|
executables: []
|
|
@@ -65,23 +64,22 @@ files:
|
|
|
65
64
|
- CHANGELOG.md
|
|
66
65
|
- README.md
|
|
67
66
|
- Rakefile
|
|
68
|
-
- docs/trustworthy_ai_enhancements.md
|
|
69
67
|
- lib/generators/phronomy/install/install_generator.rb
|
|
70
68
|
- lib/generators/phronomy/install/templates/create_phronomy_messages.rb.tt
|
|
71
69
|
- lib/generators/phronomy/install/templates/initializer.rb.tt
|
|
72
70
|
- lib/generators/phronomy/install/templates/message_model.rb.tt
|
|
73
71
|
- lib/phronomy.rb
|
|
74
|
-
- lib/phronomy/active_record/acts_as.rb
|
|
75
|
-
- lib/phronomy/active_record/checkpoint.rb
|
|
76
|
-
- lib/phronomy/active_record/extensions.rb
|
|
77
|
-
- lib/phronomy/active_record/message.rb
|
|
78
|
-
- lib/phronomy/actor.rb
|
|
79
72
|
- lib/phronomy/agent.rb
|
|
80
73
|
- lib/phronomy/agent/base.rb
|
|
81
74
|
- lib/phronomy/agent/before_completion_context.rb
|
|
75
|
+
- lib/phronomy/agent/checkpoint.rb
|
|
82
76
|
- lib/phronomy/agent/handoff.rb
|
|
77
|
+
- lib/phronomy/agent/orchestrator.rb
|
|
83
78
|
- lib/phronomy/agent/react_agent.rb
|
|
84
79
|
- lib/phronomy/agent/runner.rb
|
|
80
|
+
- lib/phronomy/agent/shared_state.rb
|
|
81
|
+
- lib/phronomy/agent/suspend_signal.rb
|
|
82
|
+
- lib/phronomy/agent/team_coordinator.rb
|
|
85
83
|
- lib/phronomy/configuration.rb
|
|
86
84
|
- lib/phronomy/context.rb
|
|
87
85
|
- lib/phronomy/context/assembler.rb
|
|
@@ -107,6 +105,7 @@ files:
|
|
|
107
105
|
- lib/phronomy/eval/scorer/exact_match.rb
|
|
108
106
|
- lib/phronomy/eval/scorer/includes_scorer.rb
|
|
109
107
|
- lib/phronomy/eval/scorer/llm_judge.rb
|
|
108
|
+
- lib/phronomy/generator_verifier.rb
|
|
110
109
|
- lib/phronomy/guardrail.rb
|
|
111
110
|
- lib/phronomy/guardrail/base.rb
|
|
112
111
|
- lib/phronomy/guardrail/builtin.rb
|
|
@@ -124,43 +123,18 @@ files:
|
|
|
124
123
|
- lib/phronomy/loader/csv_loader.rb
|
|
125
124
|
- lib/phronomy/loader/markdown_loader.rb
|
|
126
125
|
- lib/phronomy/loader/plain_text_loader.rb
|
|
127
|
-
- lib/phronomy/memory.rb
|
|
128
|
-
- lib/phronomy/memory/compression.rb
|
|
129
|
-
- lib/phronomy/memory/compression/base.rb
|
|
130
|
-
- lib/phronomy/memory/compression/summary.rb
|
|
131
|
-
- lib/phronomy/memory/compression/tool_output_pruner.rb
|
|
132
|
-
- lib/phronomy/memory/conversation_manager.rb
|
|
133
|
-
- lib/phronomy/memory/retrieval.rb
|
|
134
|
-
- lib/phronomy/memory/retrieval/base.rb
|
|
135
|
-
- lib/phronomy/memory/retrieval/composite.rb
|
|
136
|
-
- lib/phronomy/memory/retrieval/recent.rb
|
|
137
|
-
- lib/phronomy/memory/retrieval/semantic.rb
|
|
138
|
-
- lib/phronomy/memory/storage.rb
|
|
139
|
-
- lib/phronomy/memory/storage/active_record.rb
|
|
140
|
-
- lib/phronomy/memory/storage/base.rb
|
|
141
|
-
- lib/phronomy/memory/storage/in_memory.rb
|
|
142
126
|
- lib/phronomy/output_parser.rb
|
|
143
127
|
- lib/phronomy/output_parser/base.rb
|
|
144
128
|
- lib/phronomy/output_parser/json_parser.rb
|
|
145
129
|
- lib/phronomy/output_parser/structured_parser.rb
|
|
146
130
|
- lib/phronomy/prompt_template.rb
|
|
147
|
-
- lib/phronomy/rails/agent_job.rb
|
|
148
131
|
- lib/phronomy/railtie.rb
|
|
132
|
+
- lib/phronomy/ruby_llm_patches.rb
|
|
149
133
|
- lib/phronomy/runnable.rb
|
|
150
134
|
- lib/phronomy/splitter.rb
|
|
151
135
|
- lib/phronomy/splitter/base.rb
|
|
152
136
|
- lib/phronomy/splitter/fixed_size_splitter.rb
|
|
153
137
|
- lib/phronomy/splitter/recursive_splitter.rb
|
|
154
|
-
- lib/phronomy/state_store.rb
|
|
155
|
-
- lib/phronomy/state_store/active_record.rb
|
|
156
|
-
- lib/phronomy/state_store/base.rb
|
|
157
|
-
- lib/phronomy/state_store/encryptor.rb
|
|
158
|
-
- lib/phronomy/state_store/encryptor/active_support.rb
|
|
159
|
-
- lib/phronomy/state_store/encryptor/base.rb
|
|
160
|
-
- lib/phronomy/state_store/file.rb
|
|
161
|
-
- lib/phronomy/state_store/in_memory.rb
|
|
162
|
-
- lib/phronomy/state_store/redis.rb
|
|
163
|
-
- lib/phronomy/thread_actor_registry.rb
|
|
164
138
|
- lib/phronomy/token_usage.rb
|
|
165
139
|
- lib/phronomy/tool.rb
|
|
166
140
|
- lib/phronomy/tool/agent_tool.rb
|
|
@@ -171,7 +145,6 @@ files:
|
|
|
171
145
|
- lib/phronomy/tracing/langfuse_tracer.rb
|
|
172
146
|
- lib/phronomy/tracing/null_tracer.rb
|
|
173
147
|
- lib/phronomy/tracing/open_telemetry_tracer.rb
|
|
174
|
-
- lib/phronomy/trust_pipeline.rb
|
|
175
148
|
- lib/phronomy/vector_store.rb
|
|
176
149
|
- lib/phronomy/vector_store/base.rb
|
|
177
150
|
- lib/phronomy/vector_store/in_memory.rb
|
|
@@ -181,6 +154,7 @@ files:
|
|
|
181
154
|
- lib/phronomy/workflow.rb
|
|
182
155
|
- lib/phronomy/workflow_context.rb
|
|
183
156
|
- lib/phronomy/workflow_runner.rb
|
|
157
|
+
- scripts/check_readme_ruby.rb
|
|
184
158
|
- sig/phronomy.rbs
|
|
185
159
|
homepage: https://github.com/Raizo-TCS/phronomy
|
|
186
160
|
licenses:
|
|
@@ -1,332 +0,0 @@
|
|
|
1
|
-
# Trustworthy AI Enhancements
|
|
2
|
-
|
|
3
|
-
Specification for features that address the NIST AI Risk Management Framework (AI RMF 1.0)
|
|
4
|
-
trustworthiness characteristics, as applied to the phronomy gem.
|
|
5
|
-
|
|
6
|
-
Reference: NIST AI 100-1 — https://doi.org/10.6028/NIST.AI.100-1
|
|
7
|
-
Japanese translation: https://aisi.go.jp/assets/pdf/NIST_AI_RMF_jp_20240806.pdf
|
|
8
|
-
|
|
9
|
-
---
|
|
10
|
-
|
|
11
|
-
## Responsibility Model
|
|
12
|
-
|
|
13
|
-
Three layers share responsibility for trustworthy AI:
|
|
14
|
-
|
|
15
|
-
```
|
|
16
|
-
┌─────────────────────────────────────────┐
|
|
17
|
-
│ Application Domain logic / UX │
|
|
18
|
-
├─────────────────────────────────────────┤
|
|
19
|
-
│ phronomy Control flow / observation / boundary enforcement │
|
|
20
|
-
├─────────────────────────────────────────┤
|
|
21
|
-
│ LLM Probabilistic reasoning / generation │
|
|
22
|
-
└─────────────────────────────────────────┘
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
Key principle: **the LLM is untrusted**. phronomy acts as the deterministic control
|
|
26
|
-
layer that validates, constrains, and observes LLM behaviour. Characteristics that
|
|
27
|
-
cannot be delegated to the LLM must be enforced by phronomy or the application layer.
|
|
28
|
-
|
|
29
|
-
---
|
|
30
|
-
|
|
31
|
-
## Trustworthiness Characteristics — Status and Plan
|
|
32
|
-
|
|
33
|
-
### 3.1 Valid and Reliable
|
|
34
|
-
|
|
35
|
-
| Layer | Responsibility | Status |
|
|
36
|
-
|---|---|---|
|
|
37
|
-
| LLM | Base reasoning capability | Model-dependent |
|
|
38
|
-
| **phronomy** | Eval infrastructure, output type validation | ✅ `Eval::Runner`, `Eval::Dataset`, `Eval::Metrics` — see `lib/phronomy/eval/` |
|
|
39
|
-
| **phronomy** | Drift / accuracy monitoring hooks | ❌ Not implemented — **planned** |
|
|
40
|
-
| Application | Test-case design, accuracy thresholds | Application responsibility |
|
|
41
|
-
|
|
42
|
-
**Planned work:**
|
|
43
|
-
- None in this iteration. `Eval` infrastructure is sufficient for current needs.
|
|
44
|
-
|
|
45
|
-
---
|
|
46
|
-
|
|
47
|
-
### 3.2 Safe
|
|
48
|
-
|
|
49
|
-
| Layer | Responsibility | Status |
|
|
50
|
-
|---|---|---|
|
|
51
|
-
| LLM | Basic harmful-content avoidance (RLHF) | Model-dependent, not guaranteed |
|
|
52
|
-
| **phronomy** | Intervention points, iteration limits, approval gates | ✅ `wait_state`/`send_event`, `requires_approval`, `max_iterations` — see `lib/phronomy/workflow.rb`, `lib/phronomy/agent/base.rb` |
|
|
53
|
-
| **phronomy** | Built-in guardrails (PII, prompt injection) | ❌ Not implemented — **planned (Feature A)** |
|
|
54
|
-
| Application | Concrete guardrail logic, approval workflows | Application responsibility |
|
|
55
|
-
|
|
56
|
-
---
|
|
57
|
-
|
|
58
|
-
### 3.3 Secure and Resilient
|
|
59
|
-
|
|
60
|
-
| Layer | Responsibility | Status |
|
|
61
|
-
|---|---|---|
|
|
62
|
-
| LLM | Partial prompt-injection resistance | Model-dependent, partial |
|
|
63
|
-
| **phronomy** | State persistence across process restarts | ✅ `StateStore::ActiveRecord` — see `lib/phronomy/state_store/` |
|
|
64
|
-
| **phronomy** | Encrypted state store adapter interface | ❌ Not implemented — **planned (Feature C)** |
|
|
65
|
-
| Application | Authentication / authorisation / infrastructure encryption | Application / infrastructure responsibility |
|
|
66
|
-
|
|
67
|
-
---
|
|
68
|
-
|
|
69
|
-
### 3.4 Accountable and Transparent
|
|
70
|
-
|
|
71
|
-
| Layer | Responsibility | Status |
|
|
72
|
-
|---|---|---|
|
|
73
|
-
| LLM | Token usage reporting | ✅ `TokenUsage` — see `lib/phronomy/token_usage.rb` |
|
|
74
|
-
| **phronomy** | Tracing / span recording | ✅ `Tracing::LangfuseTracer`, `OpenTelemetryTracer` — see `lib/phronomy/tracing/` |
|
|
75
|
-
| **phronomy** | Caller identity propagation to tracers | ❌ Not implemented — **planned (Feature B)** |
|
|
76
|
-
| Application | User-facing AI disclosure, business audit requirements | Application responsibility |
|
|
77
|
-
|
|
78
|
-
---
|
|
79
|
-
|
|
80
|
-
### 3.5 Explainable and Interpretable
|
|
81
|
-
|
|
82
|
-
| Layer | Responsibility | Status |
|
|
83
|
-
|---|---|---|
|
|
84
|
-
| LLM | Chain-of-thought generation | Prompt-dependent |
|
|
85
|
-
| **phronomy** | Processing step recording via Graph and Tracing | ✅ Partial — `Workflow`/`WorkflowRunner`, `Tracing` |
|
|
86
|
-
| Application | Explanation UI, CoT prompt design | Application responsibility |
|
|
87
|
-
|
|
88
|
-
**Planned work:** None in this iteration.
|
|
89
|
-
|
|
90
|
-
---
|
|
91
|
-
|
|
92
|
-
### 3.6 Privacy-Enhanced
|
|
93
|
-
|
|
94
|
-
| Layer | Responsibility | Status |
|
|
95
|
-
|---|---|---|
|
|
96
|
-
| LLM | Training data handling | Provider responsibility |
|
|
97
|
-
| **phronomy** | Memory compression (data minimisation) | ✅ `Memory::Compression` — see `lib/phronomy/memory/compression/` |
|
|
98
|
-
| **phronomy** | Built-in PII detection guardrail | ❌ Not implemented — **planned (Feature A)** |
|
|
99
|
-
| **phronomy** | TTL and explicit purge API on ConversationManager | ❌ Not implemented — **planned (Feature D)** |
|
|
100
|
-
| Application | Privacy policy, user consent management | Application responsibility |
|
|
101
|
-
|
|
102
|
-
---
|
|
103
|
-
|
|
104
|
-
### 3.7 Fair — with Harmful Bias Managed
|
|
105
|
-
|
|
106
|
-
| Layer | Responsibility | Status |
|
|
107
|
-
|---|---|---|
|
|
108
|
-
| LLM | Bias reduction via RLHF | Provider responsibility |
|
|
109
|
-
| **phronomy** | Eval infrastructure for custom metrics | ✅ `Eval::Metrics` — extensible |
|
|
110
|
-
| Application | Fairness test-set design, threshold definition | Application responsibility |
|
|
111
|
-
|
|
112
|
-
**Planned work:** None in this iteration. The existing `Eval::Metrics` extension
|
|
113
|
-
point is sufficient; fairness metrics are domain-specific and belong to the
|
|
114
|
-
application layer.
|
|
115
|
-
|
|
116
|
-
---
|
|
117
|
-
|
|
118
|
-
## Planned Features (This Branch)
|
|
119
|
-
|
|
120
|
-
### Feature A — `Phronomy::Guardrail::Builtin` module
|
|
121
|
-
|
|
122
|
-
**Addresses:** 3.2 Safe, 3.6 Privacy-Enhanced
|
|
123
|
-
|
|
124
|
-
**Motivation:**
|
|
125
|
-
Prompt injection and PII leakage are the two most common, high-severity risks for
|
|
126
|
-
any LLM application. They require deterministic, regex/heuristic-based detection
|
|
127
|
-
that the LLM cannot reliably provide. phronomy should ship sensible defaults so
|
|
128
|
-
that applications do not have to re-implement these from scratch.
|
|
129
|
-
|
|
130
|
-
**Design:**
|
|
131
|
-
- New module: `Phronomy::Guardrail::Builtin`
|
|
132
|
-
- Two concrete classes, both under `lib/phronomy/guardrail/builtin/`:
|
|
133
|
-
- `PromptInjectionDetector < InputGuardrail`
|
|
134
|
-
- `PIIPatternDetector < InputGuardrail`
|
|
135
|
-
- Existing base classes (`InputGuardrail`, `OutputGuardrail`) are unchanged — see
|
|
136
|
-
`lib/phronomy/guardrail/input_guardrail.rb` and `output_guardrail.rb`.
|
|
137
|
-
|
|
138
|
-
**`PromptInjectionDetector`:**
|
|
139
|
-
- Detects common prompt-injection patterns in input strings:
|
|
140
|
-
- "ignore previous instructions", "disregard all prior", "system prompt:" prefixes,
|
|
141
|
-
jailbreak keywords, role-switch attempts.
|
|
142
|
-
- Pattern list is configurable via constructor argument `additional_patterns: []`.
|
|
143
|
-
- Raises `GuardrailError` with message `"Potential prompt injection detected"`.
|
|
144
|
-
|
|
145
|
-
**`PIIPatternDetector`:**
|
|
146
|
-
- Detects common Japanese and international PII patterns:
|
|
147
|
-
- Japanese My Number (12-digit number): `/\b\d{4}[- ]?\d{4}[- ]?\d{4}\b/`
|
|
148
|
-
- Credit card numbers: `/\b(?:\d{4}[- ]?){3}\d{4}\b/`
|
|
149
|
-
- Email addresses: standard RFC 5322 simplified pattern
|
|
150
|
-
- Phone numbers (JP): `/\b0\d{1,4}[- ]?\d{1,4}[- ]?\d{4}\b/`
|
|
151
|
-
- Each pattern category is independently togglable via constructor:
|
|
152
|
-
`PIIPatternDetector.new(detect: [:my_number, :credit_card, :email, :phone])`
|
|
153
|
-
- Default: all four categories active.
|
|
154
|
-
- Raises `GuardrailError` with message `"PII detected in input: <category>"`.
|
|
155
|
-
|
|
156
|
-
**Usage example:**
|
|
157
|
-
```ruby
|
|
158
|
-
agent = MyAgent.new
|
|
159
|
-
agent.add_input_guardrail(Phronomy::Guardrail::Builtin::PromptInjectionDetector.new)
|
|
160
|
-
agent.add_input_guardrail(Phronomy::Guardrail::Builtin::PIIPatternDetector.new(detect: [:my_number, :credit_card]))
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
**Files to create:**
|
|
164
|
-
- `lib/phronomy/guardrail/builtin/prompt_injection_detector.rb`
|
|
165
|
-
- `lib/phronomy/guardrail/builtin/pii_pattern_detector.rb`
|
|
166
|
-
- `lib/phronomy/guardrail/builtin.rb` (requires both, defines module)
|
|
167
|
-
- Update `lib/phronomy/guardrail.rb` to require `builtin`
|
|
168
|
-
|
|
169
|
-
**Tests:**
|
|
170
|
-
- Unit: `spec/phronomy/guardrail/builtin/prompt_injection_detector_spec.rb`
|
|
171
|
-
- Unit: `spec/phronomy/guardrail/builtin/pii_pattern_detector_spec.rb`
|
|
172
|
-
- Integration: extend `spec/integration/tool_guardrail_spec.rb` with builtin guardrail factors
|
|
173
|
-
|
|
174
|
-
---
|
|
175
|
-
|
|
176
|
-
### Feature B — Caller identity propagation in `config:`
|
|
177
|
-
|
|
178
|
-
**Addresses:** 3.4 Accountable and Transparent
|
|
179
|
-
|
|
180
|
-
**Motivation:**
|
|
181
|
-
`Tracing` already records what happened (spans, token usage). What is missing is
|
|
182
|
-
**who** triggered the action. Without a caller identity, audit logs cannot be
|
|
183
|
-
attributed to users or sessions, which is a requirement for accountability under
|
|
184
|
-
NIST AI RMF 3.4.
|
|
185
|
-
|
|
186
|
-
**Design:**
|
|
187
|
-
- `Agent::Base#invoke` and `WorkflowRunner#invoke` already accept `config: {}` — see
|
|
188
|
-
`lib/phronomy/agent/base.rb` and `lib/phronomy/graph/workflow_runner.rb`.
|
|
189
|
-
- Add two new optional keys to `config:`:
|
|
190
|
-
- `user_id:` (String | nil) — caller identity
|
|
191
|
-
- `session_id:` (String | nil) — session / request identity
|
|
192
|
-
- Both are extracted in `invoke_once` / `call` and forwarded to
|
|
193
|
-
`Tracing::Base#start_span` as span attributes.
|
|
194
|
-
- `Tracing::Base#start_span` already accepts `**attributes` — no signature change needed.
|
|
195
|
-
- `LangfuseTracer` and `OpenTelemetryTracer` will automatically forward them as
|
|
196
|
-
metadata/attributes respectively.
|
|
197
|
-
|
|
198
|
-
**Usage example:**
|
|
199
|
-
```ruby
|
|
200
|
-
agent.invoke("What is the weather?", config: {
|
|
201
|
-
thread_id: "conv-123",
|
|
202
|
-
user_id: "user-42",
|
|
203
|
-
session_id: "sess-abc"
|
|
204
|
-
})
|
|
205
|
-
```
|
|
206
|
-
|
|
207
|
-
**Files to modify:**
|
|
208
|
-
- `lib/phronomy/agent/base.rb` — extract `user_id` and `session_id` from config, pass to tracer
|
|
209
|
-
- `lib/phronomy/graph/compiled_graph.rb` — same for graph invocations
|
|
210
|
-
|
|
211
|
-
**Tests:**
|
|
212
|
-
- Unit: extend `spec/phronomy/agent_spec.rb` with `user_id`/`session_id` forwarding assertions
|
|
213
|
-
- Unit: extend `spec/phronomy/tracing/langfuse_tracer_spec.rb` with attribute forwarding
|
|
214
|
-
|
|
215
|
-
---
|
|
216
|
-
|
|
217
|
-
### Feature C — `StateStore` encryption adapter interface
|
|
218
|
-
|
|
219
|
-
**Addresses:** 3.3 Secure and Resilient
|
|
220
|
-
|
|
221
|
-
**Motivation:**
|
|
222
|
-
`StateStore::ActiveRecord` persists conversation state as plain-text JSON. In
|
|
223
|
-
regulated environments (healthcare, finance, government) this violates data-at-rest
|
|
224
|
-
requirements. phronomy should define a standard interface so that an encryption
|
|
225
|
-
adapter can be layered transparently without modifying `StateStore::ActiveRecord`.
|
|
226
|
-
|
|
227
|
-
**Design:**
|
|
228
|
-
- New abstract class: `Phronomy::StateStore::Encryptor::Base`
|
|
229
|
-
- `encrypt(plaintext) → ciphertext` (abstract)
|
|
230
|
-
- `decrypt(ciphertext) → plaintext` (abstract)
|
|
231
|
-
- New concrete class: `Phronomy::StateStore::Encryptor::ActiveSupport`
|
|
232
|
-
- Delegates to `ActiveSupport::MessageEncryptor` when available.
|
|
233
|
-
- Constructor: `ActiveSupport.new(secret_key_base:, cipher: "aes-256-gcm")`
|
|
234
|
-
- `StateStore::ActiveRecord` accepts an optional `encryptor:` constructor argument:
|
|
235
|
-
- When present, `serialize_state` output is passed through `encryptor.encrypt`
|
|
236
|
-
before writing to the DB, and `encryptor.decrypt` before `deserialize_state`.
|
|
237
|
-
- When absent, behaviour is unchanged (backwards compatible).
|
|
238
|
-
|
|
239
|
-
**Usage example:**
|
|
240
|
-
```ruby
|
|
241
|
-
encryptor = Phronomy::StateStore::Encryptor::ActiveSupport.new(
|
|
242
|
-
secret_key_base: ENV.fetch("SECRET_KEY_BASE")
|
|
243
|
-
)
|
|
244
|
-
store = Phronomy::StateStore::ActiveRecord.new(
|
|
245
|
-
model_class: PhronomyStateRecord,
|
|
246
|
-
encryptor: encryptor
|
|
247
|
-
)
|
|
248
|
-
```
|
|
249
|
-
|
|
250
|
-
**Files to create:**
|
|
251
|
-
- `lib/phronomy/state_store/encryptor/base.rb`
|
|
252
|
-
- `lib/phronomy/state_store/encryptor/active_support.rb`
|
|
253
|
-
- `lib/phronomy/state_store/encryptor.rb`
|
|
254
|
-
|
|
255
|
-
**Files to modify:**
|
|
256
|
-
- `lib/phronomy/state_store/active_record.rb` — accept `encryptor:`, apply in save/load
|
|
257
|
-
- `lib/phronomy/state_store.rb` — require `encryptor`
|
|
258
|
-
|
|
259
|
-
**Tests:**
|
|
260
|
-
- Unit: `spec/phronomy/state_store/encryptor/base_spec.rb`
|
|
261
|
-
- Unit: `spec/phronomy/state_store/encryptor/active_support_spec.rb`
|
|
262
|
-
- Unit: extend `spec/phronomy/state_store_spec.rb` with encrypted save/load round-trip
|
|
263
|
-
|
|
264
|
-
---
|
|
265
|
-
|
|
266
|
-
### Feature D — TTL and `purge` API on `ConversationManager`
|
|
267
|
-
|
|
268
|
-
**Addresses:** 3.6 Privacy-Enhanced
|
|
269
|
-
|
|
270
|
-
**Motivation:**
|
|
271
|
-
Users have a right to be forgotten. `ConversationManager` currently has no way to
|
|
272
|
-
delete stored messages for a given thread, nor does it enforce data retention limits.
|
|
273
|
-
|
|
274
|
-
**Design:**
|
|
275
|
-
- `ConversationManager#purge(thread_id:)` — deletes all stored messages for the
|
|
276
|
-
thread from both the storage backend and the retrieval index.
|
|
277
|
-
- Optional `ttl:` constructor argument (Integer seconds | nil):
|
|
278
|
-
- When set, messages older than `ttl` seconds are filtered out on `load_messages`.
|
|
279
|
-
- Storage backends that support native TTL (e.g. Redis) should be informed via
|
|
280
|
-
a `Storage::Base#purge_older_than(thread_id:, older_than:)` hook.
|
|
281
|
-
- Default: `nil` (no expiry — current behaviour unchanged).
|
|
282
|
-
- `Storage::ActiveRecord` gains a `purge_older_than` implementation using
|
|
283
|
-
`where("created_at < ?", Time.now - ttl).destroy_all`.
|
|
284
|
-
|
|
285
|
-
**Usage example:**
|
|
286
|
-
```ruby
|
|
287
|
-
memory = Phronomy::Memory::ConversationManager.new(
|
|
288
|
-
storage: Phronomy::Memory::Storage::ActiveRecord.new(model_class: PhronomyMessageRecord),
|
|
289
|
-
ttl: 60 * 60 * 24 * 30 # 30 days
|
|
290
|
-
)
|
|
291
|
-
# Later:
|
|
292
|
-
memory.purge(thread_id: "conv-123")
|
|
293
|
-
```
|
|
294
|
-
|
|
295
|
-
**Files to modify:**
|
|
296
|
-
- `lib/phronomy/memory/conversation_manager.rb` — add `purge`, accept `ttl:`
|
|
297
|
-
- `lib/phronomy/memory/storage/base.rb` — add `purge(thread_id:)` abstract method, `purge_older_than` hook
|
|
298
|
-
- `lib/phronomy/memory/storage/active_record.rb` — implement both
|
|
299
|
-
- `lib/phronomy/memory/storage/in_memory.rb` — implement both
|
|
300
|
-
|
|
301
|
-
**Tests:**
|
|
302
|
-
- Unit: extend `spec/phronomy/memory_spec.rb` with `purge` and TTL filtering tests
|
|
303
|
-
- Unit: extend `spec/phronomy/active_record/message_spec.rb` with `purge_older_than`
|
|
304
|
-
|
|
305
|
-
---
|
|
306
|
-
|
|
307
|
-
## Implementation Order
|
|
308
|
-
|
|
309
|
-
| Step | Feature | Rationale |
|
|
310
|
-
|---|---|---|
|
|
311
|
-
| 1 | Feature A — BuiltinGuardrails | Self-contained, no dependencies, highest safety impact |
|
|
312
|
-
| 2 | Feature B — Caller identity | Small change, high accountability value |
|
|
313
|
-
| 3 | Feature C — Encryptor I/F | More complex, depends on no other feature |
|
|
314
|
-
| 4 | Feature D — TTL / purge | Touches storage layer, do last to avoid churn |
|
|
315
|
-
|
|
316
|
-
Each feature follows the same workflow:
|
|
317
|
-
1. Implement source files
|
|
318
|
-
2. Run StandardRB on new files
|
|
319
|
-
3. Run unit tests: `bundle exec rspec <spec_file>`
|
|
320
|
-
4. Run full unit suite: `bundle exec rspec --format progress`
|
|
321
|
-
5. Run integration suite: `bundle exec rspec --tag integration --format progress`
|
|
322
|
-
6. Present diff for commit approval
|
|
323
|
-
|
|
324
|
-
---
|
|
325
|
-
|
|
326
|
-
## Out of Scope (This Branch)
|
|
327
|
-
|
|
328
|
-
- Fairness metrics / demographic parity (`Eval::Metrics` extension) — domain-specific,
|
|
329
|
-
belongs to application layer
|
|
330
|
-
- Kill switch / forced shutdown — infrastructure concern
|
|
331
|
-
- Differential privacy — academic/research topic, not yet practical for gem scope
|
|
332
|
-
- Authentication / authorisation — application / infrastructure concern
|