phronomy 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -30
  3. data/README.md +106 -122
  4. data/lib/phronomy/agent/base.rb +135 -57
  5. data/lib/phronomy/agent/checkpoint.rb +53 -0
  6. data/lib/phronomy/agent/orchestrator.rb +119 -0
  7. data/lib/phronomy/agent/react_agent.rb +18 -28
  8. data/lib/phronomy/agent/shared_state.rb +303 -0
  9. data/lib/phronomy/agent/suspend_signal.rb +35 -0
  10. data/lib/phronomy/agent/team_coordinator.rb +285 -0
  11. data/lib/phronomy/agent.rb +2 -1
  12. data/lib/phronomy/configuration.rb +0 -24
  13. data/lib/phronomy/generator_verifier.rb +250 -0
  14. data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +10 -27
  15. data/lib/phronomy/railtie.rb +0 -6
  16. data/lib/phronomy/ruby_llm_patches.rb +20 -0
  17. data/lib/phronomy/tool/mcp_tool.rb +23 -26
  18. data/lib/phronomy/tracing/langfuse_tracer.rb +3 -6
  19. data/lib/phronomy/vector_store/redis_search.rb +4 -4
  20. data/lib/phronomy/version.rb +1 -1
  21. data/lib/phronomy/workflow.rb +4 -7
  22. data/lib/phronomy/workflow_runner.rb +42 -30
  23. data/lib/phronomy.rb +18 -0
  24. data/scripts/check_readme_ruby.rb +38 -0
  25. metadata +12 -38
  26. data/docs/trustworthy_ai_enhancements.md +0 -332
  27. data/lib/phronomy/active_record/acts_as.rb +0 -48
  28. data/lib/phronomy/active_record/checkpoint.rb +0 -20
  29. data/lib/phronomy/active_record/extensions.rb +0 -14
  30. data/lib/phronomy/active_record/message.rb +0 -20
  31. data/lib/phronomy/actor.rb +0 -68
  32. data/lib/phronomy/memory/compression/base.rb +0 -37
  33. data/lib/phronomy/memory/compression/summary.rb +0 -107
  34. data/lib/phronomy/memory/compression/tool_output_pruner.rb +0 -67
  35. data/lib/phronomy/memory/compression.rb +0 -11
  36. data/lib/phronomy/memory/conversation_manager.rb +0 -213
  37. data/lib/phronomy/memory/retrieval/base.rb +0 -22
  38. data/lib/phronomy/memory/retrieval/composite.rb +0 -76
  39. data/lib/phronomy/memory/retrieval/recent.rb +0 -35
  40. data/lib/phronomy/memory/retrieval/semantic.rb +0 -114
  41. data/lib/phronomy/memory/retrieval.rb +0 -12
  42. data/lib/phronomy/memory/storage/active_record.rb +0 -248
  43. data/lib/phronomy/memory/storage/base.rb +0 -155
  44. data/lib/phronomy/memory/storage/in_memory.rb +0 -152
  45. data/lib/phronomy/memory/storage.rb +0 -11
  46. data/lib/phronomy/memory.rb +0 -21
  47. data/lib/phronomy/rails/agent_job.rb +0 -75
  48. data/lib/phronomy/state_store/active_record.rb +0 -76
  49. data/lib/phronomy/state_store/base.rb +0 -112
  50. data/lib/phronomy/state_store/encryptor/active_support.rb +0 -49
  51. data/lib/phronomy/state_store/encryptor/base.rb +0 -34
  52. data/lib/phronomy/state_store/encryptor.rb +0 -16
  53. data/lib/phronomy/state_store/file.rb +0 -85
  54. data/lib/phronomy/state_store/in_memory.rb +0 -53
  55. data/lib/phronomy/state_store/redis.rb +0 -70
  56. data/lib/phronomy/state_store.rb +0 -9
  57. data/lib/phronomy/thread_actor_registry.rb +0 -85
  58. data/lib/phronomy/trust_pipeline.rb +0 -264
@@ -38,7 +38,7 @@ module Phronomy
38
38
 
39
39
  def initialize(state_class:, nodes:, after_transitions:, route_transitions:,
40
40
  external_events:, entry_point:, wait_state_names: [],
41
- before_callbacks: {}, after_callbacks: {}, state_store: nil)
41
+ before_callbacks: {}, after_callbacks: {})
42
42
  @state_class = state_class
43
43
  @nodes = nodes
44
44
  @after_transitions = after_transitions # { from => to }
@@ -48,7 +48,6 @@ module Phronomy
48
48
  @wait_state_names = wait_state_names
49
49
  @before_callbacks = before_callbacks.dup
50
50
  @after_callbacks = after_callbacks.dup
51
- @state_store_override = state_store
52
51
  @phase_machine_class = build_phase_machine_class
53
52
  end
54
53
 
@@ -134,29 +133,46 @@ module Phronomy
134
133
 
135
134
  private
136
135
 
137
- def state_store
138
- @state_store_override || Phronomy.configuration.default_state_store
139
- end
140
-
141
136
  def run_graph(state, from_node: nil, recursion_limit: 25, &event_block)
142
137
  current_node = from_node || @entry_point
143
138
  tracker = new_phase_machine(current_node)
144
139
  tracker.context = state
140
+ # Event queue: decouple node execution from transition firing.
141
+ # Events are enqueued after a node completes and processed at the top
142
+ # of the next iteration so that guards always see the freshest context.
143
+ event_queue = []
145
144
  step = 0
146
145
 
147
- while current_node && current_node != FINISH
148
- if step >= recursion_limit
149
- raise Phronomy::RecursionLimitError,
150
- "Recursion limit (#{recursion_limit}) exceeded"
146
+ loop do
147
+ break if current_node == FINISH
148
+
149
+ # -- Process next pending event -----------------------------------------
150
+ # Dequeue one event and fire it against the state machine. Guards are
151
+ # evaluated here (at fire time) so they see the context written by the
152
+ # node that enqueued the event.
153
+ if (event = event_queue.shift)
154
+ if step >= recursion_limit
155
+ raise Phronomy::RecursionLimitError,
156
+ "Recursion limit (#{recursion_limit}) exceeded"
157
+ end
158
+
159
+ fire_event!(tracker, event, current_node)
160
+ next_phase = tracker.phase.to_sym
161
+ # When next_phase == current_node no transition matched → terminal node.
162
+ current_node = (next_phase == current_node) ? FINISH : next_phase
163
+ step += 1
164
+ next
151
165
  end
152
166
 
153
- # Auto-halt at wait states: save context and return to caller.
167
+ # -- Queue empty: check for halt -----------------------------------------
168
+ # Auto-halt at wait states: persist phase in context and return to caller.
169
+ # The caller resumes via send_event, which starts a fresh run_graph call.
154
170
  if @wait_state_names.include?(current_node)
155
171
  state.set_graph_metadata(thread_id: state.thread_id, phase: current_node)
156
- state_store&.save(state)
157
172
  return state
158
173
  end
159
174
 
175
+ # -- Execute node action ------------------------------------------------
160
176
  node_fn = @nodes[current_node]
161
177
  raise ArgumentError, "Node #{current_node.inspect} is not defined" unless node_fn
162
178
 
@@ -171,31 +187,25 @@ module Phronomy
171
187
  "expected Hash, #{@state_class}, or nil"
172
188
  end
173
189
 
174
- # Update tracker so guards see the freshest context.
190
+ # Update tracker so guards see the freshest context when the event fires.
175
191
  tracker.context = state
176
192
 
177
193
  event_block&.call({node: current_node, state: state})
178
194
 
179
- # Delegate transition decision to state_machines.
195
+ # -- Enqueue transition event -------------------------------------------
196
+ # node_completed: generic event for all after-transitions (unconditional).
197
+ # route event: user-named event carrying guarded conditional branches.
198
+ # No enqueue: terminal node — next iteration exits via FINISH check.
180
199
  if @after_transitions.key?(current_node)
181
- fire_event!(tracker, :"advance_#{current_node}", current_node)
200
+ event_queue << :node_completed
182
201
  elsif @route_transitions.key?(current_node)
183
- ev_name = @route_transitions[current_node][:event_name]
184
- fire_event!(tracker, ev_name, current_node)
202
+ event_queue << @route_transitions[current_node][:event_name]
203
+ else
204
+ current_node = FINISH
185
205
  end
186
- # Nodes with no declared outgoing transition are treated as terminal:
187
- # next_phase == current_node triggers the FINISH assignment below.
188
-
189
- next_phase = tracker.phase.to_sym
190
- # When next_phase == current_node: no transition fired (terminal node) → end.
191
- # When next_phase == :__end__ (== FINISH): route led to finish → exit loop.
192
- current_node = (next_phase == current_node) ? FINISH : next_phase
193
-
194
- step += 1
195
206
  end
196
207
 
197
208
  state.set_graph_metadata(thread_id: state.thread_id, phase: :__end__)
198
- state_store&.save(state)
199
209
  state
200
210
  end
201
211
 
@@ -232,9 +242,11 @@ module Phronomy
232
242
  state_machine :phase, initial: entry do
233
243
  all_states.each { |s| state s }
234
244
 
235
- # 1. After-transitions: unconditional, fire on action completion.
236
- after_trans.each do |from, to|
237
- event :"advance_#{from}" do
245
+ # 1. After-transitions: one generic :node_completed event covers all
246
+ # unconditional transitions. This keeps event names independent of
247
+ # source state names and matches standard state machine semantics.
248
+ event :node_completed do
249
+ after_trans.each do |from, to|
238
250
  transition from => to
239
251
  end
240
252
  end
data/lib/phronomy.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "zeitwerk"
4
4
  require "ruby_llm"
5
+ require_relative "phronomy/ruby_llm_patches"
5
6
 
6
7
  loader = Zeitwerk::Loader.for_gem
7
8
  loader.ignore(File.expand_path("generators", __dir__))
@@ -26,6 +27,23 @@ module Phronomy
26
27
 
27
28
  class HandoffError < Error; end
28
29
 
30
+ # Raised by {Phronomy::GeneratorVerifier#invoke} when +raise_if_untrusted: true+
31
+ # and the pipeline's combined confidence score falls below the configured threshold.
32
+ #
33
+ # @example
34
+ # rescue Phronomy::LowConfidenceError => e
35
+ # puts e.result.confidence # => e.g. 0.45
36
+ # puts e.result.output # best-effort answer despite low confidence
37
+ class LowConfidenceError < Error
38
+ # @return [Phronomy::GeneratorVerifier::Result] the untrusted result
39
+ attr_reader :result
40
+
41
+ def initialize(result)
42
+ @result = result
43
+ super("Answer confidence #{result.confidence} is below the required threshold")
44
+ end
45
+ end
46
+
29
47
  class GuardrailError < Error
30
48
  attr_reader :guardrail
31
49
 
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Extracts every ```ruby ... ``` block from README.md and runs `ruby -c` on each.
4
+ # Exits non-zero if any block has a syntax error.
5
+
6
+ require "tempfile"
7
+ require "open3"
8
+
9
+ readme_path = File.expand_path("../README.md", __dir__)
10
+ readme = File.read(readme_path)
11
+ blocks = readme.scan(/^```ruby\n(.*?)^```/m).map.with_index(1) { |(code), i| [i, code] }
12
+
13
+ puts "Checking #{blocks.size} Ruby code blocks in README.md..."
14
+
15
+ failures = []
16
+
17
+ blocks.each do |index, code|
18
+ Tempfile.create(["readme_block_#{index}", ".rb"]) do |f|
19
+ f.write(code)
20
+ f.flush
21
+ stdout, status = Open3.capture2e("ruby", "-c", f.path)
22
+ if status.success?
23
+ puts " OK block ##{index}"
24
+ else
25
+ failures << index
26
+ puts " FAIL block ##{index}"
27
+ puts stdout.gsub(f.path, "block ##{index}")
28
+ end
29
+ end
30
+ end
31
+
32
+ if failures.empty?
33
+ puts "All #{blocks.size} Ruby code blocks passed syntax check."
34
+ exit 0
35
+ else
36
+ puts "\n#{failures.size} block(s) failed syntax check: #{failures.join(", ")}"
37
+ exit 1
38
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phronomy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Raizo T.C.S
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-17 00:00:00.000000000 Z
11
+ date: 2026-05-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby_llm
@@ -52,9 +52,8 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.6'
55
- description: Phronomy provides Agent, Workflow, Memory, Tool, Guardrail, RAG, and
56
- Multi-agent capabilities for building AI agents in Ruby and Rails. Powered by RubyLLM
57
- for LLM abstraction.
55
+ description: Phronomy provides Agent, Workflow, Tool, Guardrail, RAG, and Multi-agent
56
+ capabilities for building AI agents in Ruby. Powered by RubyLLM for LLM abstraction.
58
57
  email:
59
58
  - raizo.tcs@gmail.com
60
59
  executables: []
@@ -65,23 +64,22 @@ files:
65
64
  - CHANGELOG.md
66
65
  - README.md
67
66
  - Rakefile
68
- - docs/trustworthy_ai_enhancements.md
69
67
  - lib/generators/phronomy/install/install_generator.rb
70
68
  - lib/generators/phronomy/install/templates/create_phronomy_messages.rb.tt
71
69
  - lib/generators/phronomy/install/templates/initializer.rb.tt
72
70
  - lib/generators/phronomy/install/templates/message_model.rb.tt
73
71
  - lib/phronomy.rb
74
- - lib/phronomy/active_record/acts_as.rb
75
- - lib/phronomy/active_record/checkpoint.rb
76
- - lib/phronomy/active_record/extensions.rb
77
- - lib/phronomy/active_record/message.rb
78
- - lib/phronomy/actor.rb
79
72
  - lib/phronomy/agent.rb
80
73
  - lib/phronomy/agent/base.rb
81
74
  - lib/phronomy/agent/before_completion_context.rb
75
+ - lib/phronomy/agent/checkpoint.rb
82
76
  - lib/phronomy/agent/handoff.rb
77
+ - lib/phronomy/agent/orchestrator.rb
83
78
  - lib/phronomy/agent/react_agent.rb
84
79
  - lib/phronomy/agent/runner.rb
80
+ - lib/phronomy/agent/shared_state.rb
81
+ - lib/phronomy/agent/suspend_signal.rb
82
+ - lib/phronomy/agent/team_coordinator.rb
85
83
  - lib/phronomy/configuration.rb
86
84
  - lib/phronomy/context.rb
87
85
  - lib/phronomy/context/assembler.rb
@@ -107,6 +105,7 @@ files:
107
105
  - lib/phronomy/eval/scorer/exact_match.rb
108
106
  - lib/phronomy/eval/scorer/includes_scorer.rb
109
107
  - lib/phronomy/eval/scorer/llm_judge.rb
108
+ - lib/phronomy/generator_verifier.rb
110
109
  - lib/phronomy/guardrail.rb
111
110
  - lib/phronomy/guardrail/base.rb
112
111
  - lib/phronomy/guardrail/builtin.rb
@@ -124,43 +123,18 @@ files:
124
123
  - lib/phronomy/loader/csv_loader.rb
125
124
  - lib/phronomy/loader/markdown_loader.rb
126
125
  - lib/phronomy/loader/plain_text_loader.rb
127
- - lib/phronomy/memory.rb
128
- - lib/phronomy/memory/compression.rb
129
- - lib/phronomy/memory/compression/base.rb
130
- - lib/phronomy/memory/compression/summary.rb
131
- - lib/phronomy/memory/compression/tool_output_pruner.rb
132
- - lib/phronomy/memory/conversation_manager.rb
133
- - lib/phronomy/memory/retrieval.rb
134
- - lib/phronomy/memory/retrieval/base.rb
135
- - lib/phronomy/memory/retrieval/composite.rb
136
- - lib/phronomy/memory/retrieval/recent.rb
137
- - lib/phronomy/memory/retrieval/semantic.rb
138
- - lib/phronomy/memory/storage.rb
139
- - lib/phronomy/memory/storage/active_record.rb
140
- - lib/phronomy/memory/storage/base.rb
141
- - lib/phronomy/memory/storage/in_memory.rb
142
126
  - lib/phronomy/output_parser.rb
143
127
  - lib/phronomy/output_parser/base.rb
144
128
  - lib/phronomy/output_parser/json_parser.rb
145
129
  - lib/phronomy/output_parser/structured_parser.rb
146
130
  - lib/phronomy/prompt_template.rb
147
- - lib/phronomy/rails/agent_job.rb
148
131
  - lib/phronomy/railtie.rb
132
+ - lib/phronomy/ruby_llm_patches.rb
149
133
  - lib/phronomy/runnable.rb
150
134
  - lib/phronomy/splitter.rb
151
135
  - lib/phronomy/splitter/base.rb
152
136
  - lib/phronomy/splitter/fixed_size_splitter.rb
153
137
  - lib/phronomy/splitter/recursive_splitter.rb
154
- - lib/phronomy/state_store.rb
155
- - lib/phronomy/state_store/active_record.rb
156
- - lib/phronomy/state_store/base.rb
157
- - lib/phronomy/state_store/encryptor.rb
158
- - lib/phronomy/state_store/encryptor/active_support.rb
159
- - lib/phronomy/state_store/encryptor/base.rb
160
- - lib/phronomy/state_store/file.rb
161
- - lib/phronomy/state_store/in_memory.rb
162
- - lib/phronomy/state_store/redis.rb
163
- - lib/phronomy/thread_actor_registry.rb
164
138
  - lib/phronomy/token_usage.rb
165
139
  - lib/phronomy/tool.rb
166
140
  - lib/phronomy/tool/agent_tool.rb
@@ -171,7 +145,6 @@ files:
171
145
  - lib/phronomy/tracing/langfuse_tracer.rb
172
146
  - lib/phronomy/tracing/null_tracer.rb
173
147
  - lib/phronomy/tracing/open_telemetry_tracer.rb
174
- - lib/phronomy/trust_pipeline.rb
175
148
  - lib/phronomy/vector_store.rb
176
149
  - lib/phronomy/vector_store/base.rb
177
150
  - lib/phronomy/vector_store/in_memory.rb
@@ -181,6 +154,7 @@ files:
181
154
  - lib/phronomy/workflow.rb
182
155
  - lib/phronomy/workflow_context.rb
183
156
  - lib/phronomy/workflow_runner.rb
157
+ - scripts/check_readme_ruby.rb
184
158
  - sig/phronomy.rbs
185
159
  homepage: https://github.com/Raizo-TCS/phronomy
186
160
  licenses:
@@ -1,332 +0,0 @@
1
- # Trustworthy AI Enhancements
2
-
3
- Specification for features that address the NIST AI Risk Management Framework (AI RMF 1.0)
4
- trustworthiness characteristics, as applied to the phronomy gem.
5
-
6
- Reference: NIST AI 100-1 — https://doi.org/10.6028/NIST.AI.100-1
7
- Japanese translation: https://aisi.go.jp/assets/pdf/NIST_AI_RMF_jp_20240806.pdf
8
-
9
- ---
10
-
11
- ## Responsibility Model
12
-
13
- Three layers share responsibility for trustworthy AI:
14
-
15
- ```
16
- ┌─────────────────────────────────────────┐
17
- │ Application Domain logic / UX │
18
- ├─────────────────────────────────────────┤
19
- │ phronomy Control flow / observation / boundary enforcement │
20
- ├─────────────────────────────────────────┤
21
- │ LLM Probabilistic reasoning / generation │
22
- └─────────────────────────────────────────┘
23
- ```
24
-
25
- Key principle: **the LLM is untrusted**. phronomy acts as the deterministic control
26
- layer that validates, constrains, and observes LLM behaviour. Characteristics that
27
- cannot be delegated to the LLM must be enforced by phronomy or the application layer.
28
-
29
- ---
30
-
31
- ## Trustworthiness Characteristics — Status and Plan
32
-
33
- ### 3.1 Valid and Reliable
34
-
35
- | Layer | Responsibility | Status |
36
- |---|---|---|
37
- | LLM | Base reasoning capability | Model-dependent |
38
- | **phronomy** | Eval infrastructure, output type validation | ✅ `Eval::Runner`, `Eval::Dataset`, `Eval::Metrics` — see `lib/phronomy/eval/` |
39
- | **phronomy** | Drift / accuracy monitoring hooks | ❌ Not implemented — **planned** |
40
- | Application | Test-case design, accuracy thresholds | Application responsibility |
41
-
42
- **Planned work:**
43
- - None in this iteration. `Eval` infrastructure is sufficient for current needs.
44
-
45
- ---
46
-
47
- ### 3.2 Safe
48
-
49
- | Layer | Responsibility | Status |
50
- |---|---|---|
51
- | LLM | Basic harmful-content avoidance (RLHF) | Model-dependent, not guaranteed |
52
- | **phronomy** | Intervention points, iteration limits, approval gates | ✅ `wait_state`/`send_event`, `requires_approval`, `max_iterations` — see `lib/phronomy/workflow.rb`, `lib/phronomy/agent/base.rb` |
53
- | **phronomy** | Built-in guardrails (PII, prompt injection) | ❌ Not implemented — **planned (Feature A)** |
54
- | Application | Concrete guardrail logic, approval workflows | Application responsibility |
55
-
56
- ---
57
-
58
- ### 3.3 Secure and Resilient
59
-
60
- | Layer | Responsibility | Status |
61
- |---|---|---|
62
- | LLM | Partial prompt-injection resistance | Model-dependent, partial |
63
- | **phronomy** | State persistence across process restarts | ✅ `StateStore::ActiveRecord` — see `lib/phronomy/state_store/` |
64
- | **phronomy** | Encrypted state store adapter interface | ❌ Not implemented — **planned (Feature C)** |
65
- | Application | Authentication / authorisation / infrastructure encryption | Application / infrastructure responsibility |
66
-
67
- ---
68
-
69
- ### 3.4 Accountable and Transparent
70
-
71
- | Layer | Responsibility | Status |
72
- |---|---|---|
73
- | LLM | Token usage reporting | ✅ `TokenUsage` — see `lib/phronomy/token_usage.rb` |
74
- | **phronomy** | Tracing / span recording | ✅ `Tracing::LangfuseTracer`, `OpenTelemetryTracer` — see `lib/phronomy/tracing/` |
75
- | **phronomy** | Caller identity propagation to tracers | ❌ Not implemented — **planned (Feature B)** |
76
- | Application | User-facing AI disclosure, business audit requirements | Application responsibility |
77
-
78
- ---
79
-
80
- ### 3.5 Explainable and Interpretable
81
-
82
- | Layer | Responsibility | Status |
83
- |---|---|---|
84
- | LLM | Chain-of-thought generation | Prompt-dependent |
85
- | **phronomy** | Processing step recording via Graph and Tracing | ✅ Partial — `Workflow`/`WorkflowRunner`, `Tracing` |
86
- | Application | Explanation UI, CoT prompt design | Application responsibility |
87
-
88
- **Planned work:** None in this iteration.
89
-
90
- ---
91
-
92
- ### 3.6 Privacy-Enhanced
93
-
94
- | Layer | Responsibility | Status |
95
- |---|---|---|
96
- | LLM | Training data handling | Provider responsibility |
97
- | **phronomy** | Memory compression (data minimisation) | ✅ `Memory::Compression` — see `lib/phronomy/memory/compression/` |
98
- | **phronomy** | Built-in PII detection guardrail | ❌ Not implemented — **planned (Feature A)** |
99
- | **phronomy** | TTL and explicit purge API on ConversationManager | ❌ Not implemented — **planned (Feature D)** |
100
- | Application | Privacy policy, user consent management | Application responsibility |
101
-
102
- ---
103
-
104
- ### 3.7 Fair — with Harmful Bias Managed
105
-
106
- | Layer | Responsibility | Status |
107
- |---|---|---|
108
- | LLM | Bias reduction via RLHF | Provider responsibility |
109
- | **phronomy** | Eval infrastructure for custom metrics | ✅ `Eval::Metrics` — extensible |
110
- | Application | Fairness test-set design, threshold definition | Application responsibility |
111
-
112
- **Planned work:** None in this iteration. The existing `Eval::Metrics` extension
113
- point is sufficient; fairness metrics are domain-specific and belong to the
114
- application layer.
115
-
116
- ---
117
-
118
- ## Planned Features (This Branch)
119
-
120
- ### Feature A — `Phronomy::Guardrail::Builtin` module
121
-
122
- **Addresses:** 3.2 Safe, 3.6 Privacy-Enhanced
123
-
124
- **Motivation:**
125
- Prompt injection and PII leakage are the two most common, high-severity risks for
126
- any LLM application. They require deterministic, regex/heuristic-based detection
127
- that the LLM cannot reliably provide. phronomy should ship sensible defaults so
128
- that applications do not have to re-implement these from scratch.
129
-
130
- **Design:**
131
- - New module: `Phronomy::Guardrail::Builtin`
132
- - Two concrete classes, both under `lib/phronomy/guardrail/builtin/`:
133
- - `PromptInjectionDetector < InputGuardrail`
134
- - `PIIPatternDetector < InputGuardrail`
135
- - Existing base classes (`InputGuardrail`, `OutputGuardrail`) are unchanged — see
136
- `lib/phronomy/guardrail/input_guardrail.rb` and `output_guardrail.rb`.
137
-
138
- **`PromptInjectionDetector`:**
139
- - Detects common prompt-injection patterns in input strings:
140
- - "ignore previous instructions", "disregard all prior", "system prompt:" prefixes,
141
- jailbreak keywords, role-switch attempts.
142
- - Pattern list is configurable via constructor argument `additional_patterns: []`.
143
- - Raises `GuardrailError` with message `"Potential prompt injection detected"`.
144
-
145
- **`PIIPatternDetector`:**
146
- - Detects common Japanese and international PII patterns:
147
- - Japanese My Number (12-digit number): `/\b\d{4}[- ]?\d{4}[- ]?\d{4}\b/`
148
- - Credit card numbers: `/\b(?:\d{4}[- ]?){3}\d{4}\b/`
149
- - Email addresses: standard RFC 5322 simplified pattern
150
- - Phone numbers (JP): `/\b0\d{1,4}[- ]?\d{1,4}[- ]?\d{4}\b/`
151
- - Each pattern category is independently togglable via constructor:
152
- `PIIPatternDetector.new(detect: [:my_number, :credit_card, :email, :phone])`
153
- - Default: all four categories active.
154
- - Raises `GuardrailError` with message `"PII detected in input: <category>"`.
155
-
156
- **Usage example:**
157
- ```ruby
158
- agent = MyAgent.new
159
- agent.add_input_guardrail(Phronomy::Guardrail::Builtin::PromptInjectionDetector.new)
160
- agent.add_input_guardrail(Phronomy::Guardrail::Builtin::PIIPatternDetector.new(detect: [:my_number, :credit_card]))
161
- ```
162
-
163
- **Files to create:**
164
- - `lib/phronomy/guardrail/builtin/prompt_injection_detector.rb`
165
- - `lib/phronomy/guardrail/builtin/pii_pattern_detector.rb`
166
- - `lib/phronomy/guardrail/builtin.rb` (requires both, defines module)
167
- - Update `lib/phronomy/guardrail.rb` to require `builtin`
168
-
169
- **Tests:**
170
- - Unit: `spec/phronomy/guardrail/builtin/prompt_injection_detector_spec.rb`
171
- - Unit: `spec/phronomy/guardrail/builtin/pii_pattern_detector_spec.rb`
172
- - Integration: extend `spec/integration/tool_guardrail_spec.rb` with builtin guardrail factors
173
-
174
- ---
175
-
176
- ### Feature B — Caller identity propagation in `config:`
177
-
178
- **Addresses:** 3.4 Accountable and Transparent
179
-
180
- **Motivation:**
181
- `Tracing` already records what happened (spans, token usage). What is missing is
182
- **who** triggered the action. Without a caller identity, audit logs cannot be
183
- attributed to users or sessions, which is a requirement for accountability under
184
- NIST AI RMF 3.4.
185
-
186
- **Design:**
187
- - `Agent::Base#invoke` and `WorkflowRunner#invoke` already accept `config: {}` — see
188
- `lib/phronomy/agent/base.rb` and `lib/phronomy/graph/workflow_runner.rb`.
189
- - Add two new optional keys to `config:`:
190
- - `user_id:` (String | nil) — caller identity
191
- - `session_id:` (String | nil) — session / request identity
192
- - Both are extracted in `invoke_once` / `call` and forwarded to
193
- `Tracing::Base#start_span` as span attributes.
194
- - `Tracing::Base#start_span` already accepts `**attributes` — no signature change needed.
195
- - `LangfuseTracer` and `OpenTelemetryTracer` will automatically forward them as
196
- metadata/attributes respectively.
197
-
198
- **Usage example:**
199
- ```ruby
200
- agent.invoke("What is the weather?", config: {
201
- thread_id: "conv-123",
202
- user_id: "user-42",
203
- session_id: "sess-abc"
204
- })
205
- ```
206
-
207
- **Files to modify:**
208
- - `lib/phronomy/agent/base.rb` — extract `user_id` and `session_id` from config, pass to tracer
209
- - `lib/phronomy/graph/compiled_graph.rb` — same for graph invocations
210
-
211
- **Tests:**
212
- - Unit: extend `spec/phronomy/agent_spec.rb` with `user_id`/`session_id` forwarding assertions
213
- - Unit: extend `spec/phronomy/tracing/langfuse_tracer_spec.rb` with attribute forwarding
214
-
215
- ---
216
-
217
- ### Feature C — `StateStore` encryption adapter interface
218
-
219
- **Addresses:** 3.3 Secure and Resilient
220
-
221
- **Motivation:**
222
- `StateStore::ActiveRecord` persists conversation state as plain-text JSON. In
223
- regulated environments (healthcare, finance, government) this violates data-at-rest
224
- requirements. phronomy should define a standard interface so that an encryption
225
- adapter can be layered transparently without modifying `StateStore::ActiveRecord`.
226
-
227
- **Design:**
228
- - New abstract class: `Phronomy::StateStore::Encryptor::Base`
229
- - `encrypt(plaintext) → ciphertext` (abstract)
230
- - `decrypt(ciphertext) → plaintext` (abstract)
231
- - New concrete class: `Phronomy::StateStore::Encryptor::ActiveSupport`
232
- - Delegates to `ActiveSupport::MessageEncryptor` when available.
233
- - Constructor: `ActiveSupport.new(secret_key_base:, cipher: "aes-256-gcm")`
234
- - `StateStore::ActiveRecord` accepts an optional `encryptor:` constructor argument:
235
- - When present, `serialize_state` output is passed through `encryptor.encrypt`
236
- before writing to the DB, and `encryptor.decrypt` before `deserialize_state`.
237
- - When absent, behaviour is unchanged (backwards compatible).
238
-
239
- **Usage example:**
240
- ```ruby
241
- encryptor = Phronomy::StateStore::Encryptor::ActiveSupport.new(
242
- secret_key_base: ENV.fetch("SECRET_KEY_BASE")
243
- )
244
- store = Phronomy::StateStore::ActiveRecord.new(
245
- model_class: PhronomyStateRecord,
246
- encryptor: encryptor
247
- )
248
- ```
249
-
250
- **Files to create:**
251
- - `lib/phronomy/state_store/encryptor/base.rb`
252
- - `lib/phronomy/state_store/encryptor/active_support.rb`
253
- - `lib/phronomy/state_store/encryptor.rb`
254
-
255
- **Files to modify:**
256
- - `lib/phronomy/state_store/active_record.rb` — accept `encryptor:`, apply in save/load
257
- - `lib/phronomy/state_store.rb` — require `encryptor`
258
-
259
- **Tests:**
260
- - Unit: `spec/phronomy/state_store/encryptor/base_spec.rb`
261
- - Unit: `spec/phronomy/state_store/encryptor/active_support_spec.rb`
262
- - Unit: extend `spec/phronomy/state_store_spec.rb` with encrypted save/load round-trip
263
-
264
- ---
265
-
266
- ### Feature D — TTL and `purge` API on `ConversationManager`
267
-
268
- **Addresses:** 3.6 Privacy-Enhanced
269
-
270
- **Motivation:**
271
- Users have a right to be forgotten. `ConversationManager` currently has no way to
272
- delete stored messages for a given thread, nor does it enforce data retention limits.
273
-
274
- **Design:**
275
- - `ConversationManager#purge(thread_id:)` — deletes all stored messages for the
276
- thread from both the storage backend and the retrieval index.
277
- - Optional `ttl:` constructor argument (Integer seconds | nil):
278
- - When set, messages older than `ttl` seconds are filtered out on `load_messages`.
279
- - Storage backends that support native TTL (e.g. Redis) should be informed via
280
- a `Storage::Base#purge_older_than(thread_id:, older_than:)` hook.
281
- - Default: `nil` (no expiry — current behaviour unchanged).
282
- - `Storage::ActiveRecord` gains a `purge_older_than` implementation using
283
- `where("created_at < ?", Time.now - ttl).destroy_all`.
284
-
285
- **Usage example:**
286
- ```ruby
287
- memory = Phronomy::Memory::ConversationManager.new(
288
- storage: Phronomy::Memory::Storage::ActiveRecord.new(model_class: PhronomyMessageRecord),
289
- ttl: 60 * 60 * 24 * 30 # 30 days
290
- )
291
- # Later:
292
- memory.purge(thread_id: "conv-123")
293
- ```
294
-
295
- **Files to modify:**
296
- - `lib/phronomy/memory/conversation_manager.rb` — add `purge`, accept `ttl:`
297
- - `lib/phronomy/memory/storage/base.rb` — add `purge(thread_id:)` abstract method, `purge_older_than` hook
298
- - `lib/phronomy/memory/storage/active_record.rb` — implement both
299
- - `lib/phronomy/memory/storage/in_memory.rb` — implement both
300
-
301
- **Tests:**
302
- - Unit: extend `spec/phronomy/memory_spec.rb` with `purge` and TTL filtering tests
303
- - Unit: extend `spec/phronomy/active_record/message_spec.rb` with `purge_older_than`
304
-
305
- ---
306
-
307
- ## Implementation Order
308
-
309
- | Step | Feature | Rationale |
310
- |---|---|---|
311
- | 1 | Feature A — BuiltinGuardrails | Self-contained, no dependencies, highest safety impact |
312
- | 2 | Feature B — Caller identity | Small change, high accountability value |
313
- | 3 | Feature C — Encryptor I/F | More complex, depends on no other feature |
314
- | 4 | Feature D — TTL / purge | Touches storage layer, do last to avoid churn |
315
-
316
- Each feature follows the same workflow:
317
- 1. Implement source files
318
- 2. Run StandardRB on new files
319
- 3. Run unit tests: `bundle exec rspec <spec_file>`
320
- 4. Run full unit suite: `bundle exec rspec --format progress`
321
- 5. Run integration suite: `bundle exec rspec --tag integration --format progress`
322
- 6. Present diff for commit approval
323
-
324
- ---
325
-
326
- ## Out of Scope (This Branch)
327
-
328
- - Fairness metrics / demographic parity (`Eval::Metrics` extension) — domain-specific,
329
- belongs to application layer
330
- - Kill switch / forced shutdown — infrastructure concern
331
- - Differential privacy — academic/research topic, not yet practical for gem scope
332
- - Authentication / authorisation — application / infrastructure concern