phronomy 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +56 -0
- data/README.md +49 -38
- data/docs/trustworthy_ai_enhancements.md +4 -4
- data/lib/generators/phronomy/install/templates/create_phronomy_messages.rb.tt +1 -1
- data/lib/phronomy/actor.rb +68 -0
- data/lib/phronomy/agent/base.rb +125 -91
- data/lib/phronomy/agent/handoff.rb +2 -2
- data/lib/phronomy/agent/react_agent.rb +51 -33
- data/lib/phronomy/context/assembler.rb +11 -3
- data/lib/phronomy/context/compaction_context.rb +1 -3
- data/lib/phronomy/context/context_version_cache.rb +7 -16
- data/lib/phronomy/eval/runner.rb +39 -11
- data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +47 -3
- data/lib/phronomy/memory/compression/summary.rb +4 -3
- data/lib/phronomy/memory/compression/tool_output_pruner.rb +11 -6
- data/lib/phronomy/memory/conversation_manager.rb +25 -16
- data/lib/phronomy/memory/retrieval/semantic.rb +21 -5
- data/lib/phronomy/memory/storage/active_record.rb +32 -10
- data/lib/phronomy/memory/storage/base.rb +22 -0
- data/lib/phronomy/memory/storage/in_memory.rb +65 -26
- data/lib/phronomy/state_store/active_record.rb +1 -1
- data/lib/phronomy/state_store/base.rb +14 -16
- data/lib/phronomy/state_store/in_memory.rb +23 -9
- data/lib/phronomy/state_store/redis.rb +1 -1
- data/lib/phronomy/thread_actor_registry.rb +52 -0
- data/lib/phronomy/tool/base.rb +9 -2
- data/lib/phronomy/tool/mcp_tool.rb +28 -4
- data/lib/phronomy/tracing/base.rb +0 -2
- data/lib/phronomy/tracing/langfuse_tracer.rb +24 -6
- data/lib/phronomy/tracing/null_tracer.rb +6 -3
- data/lib/phronomy/trust_pipeline.rb +60 -52
- data/lib/phronomy/vector_store/redis_search.rb +28 -23
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +281 -0
- data/lib/phronomy/workflow_context.rb +119 -0
- data/lib/phronomy/workflow_runner.rb +262 -0
- data/lib/phronomy.rb +30 -34
- metadata +25 -10
- data/lib/phronomy/graph/compiled_graph.rb +0 -183
- data/lib/phronomy/graph/parallel_node.rb +0 -193
- data/lib/phronomy/graph/state.rb +0 -105
- data/lib/phronomy/graph/state_graph.rb +0 -148
- data/lib/phronomy/graph.rb +0 -13
|
@@ -5,7 +5,11 @@ module Phronomy
|
|
|
5
5
|
# ReAct pattern (Reasoning + Acting) agent.
|
|
6
6
|
# Repeats the LLM <-> Tool loop until no more tool calls are made.
|
|
7
7
|
class ReactAgent < Base
|
|
8
|
-
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
# Performs a single (non-retried) ReAct invocation.
|
|
11
|
+
# Overrides Base#invoke_once so that Base#invoke's retry loop is inherited.
|
|
12
|
+
def invoke_once(input, config: {})
|
|
9
13
|
caller_meta = {}
|
|
10
14
|
caller_meta[:user_id] = config[:user_id] if config[:user_id]
|
|
11
15
|
caller_meta[:session_id] = config[:session_id] if config[:session_id]
|
|
@@ -43,7 +47,11 @@ module Phronomy
|
|
|
43
47
|
|
|
44
48
|
save_to_memory(memory, thread_id: thread_id, messages: messages) if memory && thread_id
|
|
45
49
|
|
|
46
|
-
|
|
50
|
+
# Fall back to the last message that carries non-nil content. This
|
|
51
|
+
# guards against the case where the final message is a tool-call or
|
|
52
|
+
# tool-result message (content == nil) when max_iterations is
|
|
53
|
+
# exhausted before the model produces a text reply.
|
|
54
|
+
output = messages.reverse.find { |m| m.content && !m.content.empty? }&.content
|
|
47
55
|
|
|
48
56
|
# Run output guardrails before returning to the caller.
|
|
49
57
|
run_output_guardrails!(output)
|
|
@@ -53,6 +61,8 @@ module Phronomy
|
|
|
53
61
|
end
|
|
54
62
|
end
|
|
55
63
|
|
|
64
|
+
public
|
|
65
|
+
|
|
56
66
|
# Streaming version of #invoke for the ReAct loop.
|
|
57
67
|
# Yields {Phronomy::Agent::StreamEvent} events while the LLM-tool loop runs.
|
|
58
68
|
#
|
|
@@ -63,42 +73,50 @@ module Phronomy
|
|
|
63
73
|
def stream(input, config: {}, &block)
|
|
64
74
|
return invoke(input, config: config) unless block
|
|
65
75
|
|
|
66
|
-
|
|
76
|
+
caller_meta = {}
|
|
77
|
+
caller_meta[:user_id] = config[:user_id] if config[:user_id]
|
|
78
|
+
caller_meta[:session_id] = config[:session_id] if config[:session_id]
|
|
67
79
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
max_iter = self.class.max_iterations
|
|
80
|
+
trace("agent.invoke", input: input, **caller_meta) do |_span|
|
|
81
|
+
run_input_guardrails!(input)
|
|
71
82
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
[]
|
|
76
|
-
end
|
|
83
|
+
memory = config[:memory]
|
|
84
|
+
thread_id = config[:thread_id]
|
|
85
|
+
max_iter = self.class.max_iterations
|
|
77
86
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
max_iter.times do
|
|
84
|
-
response = stream_step(messages, input, user_asked: user_asked, config: config, &block)
|
|
85
|
-
user_asked = true
|
|
86
|
-
messages = response[:messages]
|
|
87
|
-
total_usage += response[:usage]
|
|
88
|
-
if response[:done]
|
|
89
|
-
iterations_exhausted = false
|
|
90
|
-
break
|
|
87
|
+
initial_messages = if memory && thread_id
|
|
88
|
+
load_from_memory(memory, thread_id: thread_id, query: extract_message(input))
|
|
89
|
+
else
|
|
90
|
+
[]
|
|
91
91
|
end
|
|
92
|
-
end
|
|
93
92
|
|
|
94
|
-
|
|
93
|
+
messages = initial_messages.dup
|
|
94
|
+
user_asked = false
|
|
95
|
+
total_usage = Phronomy::TokenUsage.zero
|
|
96
|
+
iterations_exhausted = true
|
|
97
|
+
|
|
98
|
+
max_iter.times do
|
|
99
|
+
response = stream_step(messages, input, user_asked: user_asked, config: config, &block)
|
|
100
|
+
user_asked = true
|
|
101
|
+
messages = response[:messages]
|
|
102
|
+
total_usage += response[:usage]
|
|
103
|
+
if response[:done]
|
|
104
|
+
iterations_exhausted = false
|
|
105
|
+
break
|
|
106
|
+
end
|
|
107
|
+
end
|
|
95
108
|
|
|
96
|
-
|
|
97
|
-
run_output_guardrails!(output)
|
|
109
|
+
save_to_memory(memory, thread_id: thread_id, messages: messages) if memory && thread_id
|
|
98
110
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
111
|
+
# Fall back to the last message that carries non-nil content (same as
|
|
112
|
+
# the non-streaming path above).
|
|
113
|
+
output = messages.reverse.find { |m| m.content && !m.content.empty? }&.content
|
|
114
|
+
run_output_guardrails!(output)
|
|
115
|
+
|
|
116
|
+
result = {output: output, messages: messages, usage: total_usage, iterations_exhausted: iterations_exhausted}
|
|
117
|
+
block.call(StreamEvent.new(type: :done, payload: result))
|
|
118
|
+
[result, total_usage]
|
|
119
|
+
end
|
|
102
120
|
rescue => e
|
|
103
121
|
block&.call(StreamEvent.new(type: :error, payload: {error: e}))
|
|
104
122
|
raise
|
|
@@ -136,8 +154,8 @@ module Phronomy
|
|
|
136
154
|
chat = build_chat
|
|
137
155
|
messages.each { |m| chat.add_message(m) }
|
|
138
156
|
|
|
139
|
-
chat.
|
|
140
|
-
chat.
|
|
157
|
+
chat.before_tool_call { |tc| block.call(StreamEvent.new(type: :tool_call, payload: {tool_call: tc})) }
|
|
158
|
+
chat.after_tool_result { |tr| block.call(StreamEvent.new(type: :tool_result, payload: {tool_result: tr})) }
|
|
141
159
|
|
|
142
160
|
# Run before_completion hooks before each LLM call in the streaming loop.
|
|
143
161
|
run_before_completion_hooks!(chat, config)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "cgi"
|
|
4
|
+
|
|
3
5
|
module Phronomy
|
|
4
6
|
module Context
|
|
5
7
|
# Assembler collects all four context regions and produces the final
|
|
@@ -34,7 +36,7 @@ module Phronomy
|
|
|
34
36
|
# @param trusted [Boolean]
|
|
35
37
|
# @return [String]
|
|
36
38
|
def self.xml_tag(text, type:, trusted: false)
|
|
37
|
-
"<context type=\"#{type}\" trusted=\"#{trusted}\">\n#{text}\n</context>"
|
|
39
|
+
"<context type=\"#{CGI.escapeHTML(type.to_s)}\" trusted=\"#{trusted}\">\n#{CGI.escapeHTML(text.to_s)}\n</context>"
|
|
38
40
|
end
|
|
39
41
|
|
|
40
42
|
# @param budget [Phronomy::Context::TokenBudget, nil]
|
|
@@ -104,8 +106,8 @@ module Phronomy
|
|
|
104
106
|
private
|
|
105
107
|
|
|
106
108
|
def xml_context_tag(chunk)
|
|
107
|
-
src_attr = chunk[:source] ? " source=\"#{chunk[:source]}\"" : ""
|
|
108
|
-
"<context type=\"#{chunk[:type]}\"#{src_attr} trusted=\"#{chunk[:trusted]}\">\n#{chunk[:text]}\n</context>"
|
|
109
|
+
src_attr = chunk[:source] ? " source=\"#{CGI.escapeHTML(chunk[:source].to_s)}\"" : ""
|
|
110
|
+
"<context type=\"#{CGI.escapeHTML(chunk[:type].to_s)}\"#{src_attr} trusted=\"#{chunk[:trusted]}\">\n#{CGI.escapeHTML(chunk[:text].to_s)}\n</context>"
|
|
109
111
|
end
|
|
110
112
|
|
|
111
113
|
def trim_messages_to_budget(messages, system_text)
|
|
@@ -122,6 +124,12 @@ module Phronomy
|
|
|
122
124
|
accumulated += tokens
|
|
123
125
|
result.push(msg)
|
|
124
126
|
end
|
|
127
|
+
|
|
128
|
+
if result.empty? && messages.any?
|
|
129
|
+
warn "[Phronomy::Assembler] All #{messages.length} conversation message(s) dropped: " \
|
|
130
|
+
"token budget exhausted by system context (budget=#{@budget.context_window}, used_by_system=#{used})"
|
|
131
|
+
end
|
|
132
|
+
|
|
125
133
|
result.reverse
|
|
126
134
|
end
|
|
127
135
|
end
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "ostruct"
|
|
4
|
-
|
|
5
3
|
module Phronomy
|
|
6
4
|
module Context
|
|
7
5
|
# Context object passed to the +on_compact+ callback registered on an agent.
|
|
@@ -103,7 +101,7 @@ module Phronomy
|
|
|
103
101
|
end
|
|
104
102
|
|
|
105
103
|
remaining = (@message_elements[(last_idx + 1)..] || []).map { |e| e[:message] }
|
|
106
|
-
summary_msg =
|
|
104
|
+
summary_msg = RubyLLM::Message.new(role: :system, content: summary_text)
|
|
107
105
|
@result_messages = [summary_msg] + remaining
|
|
108
106
|
end
|
|
109
107
|
end
|
|
@@ -2,20 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
module Phronomy
|
|
4
4
|
module Context
|
|
5
|
-
# Caches the assembled static system prompt text
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
# instruction text and the content of all registered static knowledge
|
|
9
|
-
# sources. When the fingerprint matches the stored value the previously
|
|
10
|
-
# assembled system_text is reused without re-fetching any sources.
|
|
11
|
-
#
|
|
12
|
-
# A cache miss (fingerprint changed or first call) triggers a full
|
|
13
|
-
# rebuild: instruction + static-knowledge XML tags are concatenated and
|
|
14
|
-
# the result is stored alongside the new fingerprint.
|
|
15
|
-
#
|
|
16
|
-
# Each agent *instance* holds one cache object. The cache persists across
|
|
17
|
-
# #invoke calls on the same instance, which is the typical usage pattern
|
|
18
|
-
# for long-running agents.
|
|
5
|
+
# Caches the assembled static system prompt text keyed by a SHA-256
|
|
6
|
+
# fingerprint of the agent's instructions + static knowledge content.
|
|
7
|
+
# Each instance is owned by one thread (stored in +Thread.current+).
|
|
19
8
|
class ContextVersionCache
|
|
20
9
|
# @return [String, nil] last stored fingerprint
|
|
21
10
|
attr_reader :fingerprint
|
|
@@ -27,7 +16,9 @@ module Phronomy
|
|
|
27
16
|
attr_reader :system_tokens
|
|
28
17
|
|
|
29
18
|
def initialize
|
|
30
|
-
|
|
19
|
+
@fingerprint = nil
|
|
20
|
+
@system_text = nil
|
|
21
|
+
@system_tokens = 0
|
|
31
22
|
end
|
|
32
23
|
|
|
33
24
|
# Returns true when the given fingerprint matches the stored one.
|
|
@@ -35,7 +26,7 @@ module Phronomy
|
|
|
35
26
|
# @param fingerprint [String] SHA-256 hex digest to compare
|
|
36
27
|
# @return [Boolean]
|
|
37
28
|
def valid?(fingerprint)
|
|
38
|
-
!@fingerprint.nil? && @fingerprint == fingerprint
|
|
29
|
+
!@fingerprint.nil? && !@system_text.nil? && @fingerprint == fingerprint
|
|
39
30
|
end
|
|
40
31
|
|
|
41
32
|
# Update the cache with a new fingerprint and system text.
|
data/lib/phronomy/eval/runner.rb
CHANGED
|
@@ -22,24 +22,52 @@ module Phronomy
|
|
|
22
22
|
@scorer = scorer
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
# @param dataset
|
|
26
|
-
# @param callable
|
|
25
|
+
# @param dataset [Dataset] collection of EvalCase objects
|
|
26
|
+
# @param callable [#call] accepts a single String argument
|
|
27
|
+
# @param concurrency [Integer] number of parallel threads (default: 1, sequential)
|
|
27
28
|
# @return [Array<EvalResult>]
|
|
28
|
-
def run(dataset, callable)
|
|
29
|
-
dataset.
|
|
30
|
-
|
|
31
|
-
result = callable.call(eval_case.input)
|
|
32
|
-
latency_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - t0
|
|
29
|
+
def run(dataset, callable, concurrency: 1)
|
|
30
|
+
cases = dataset.to_a
|
|
31
|
+
return cases.map { |eval_case| run_one(eval_case, callable) } if concurrency <= 1
|
|
33
32
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
# Run cases in slices of +concurrency+ threads. Each slice is joined
|
|
34
|
+
# before the next starts, bounding peak thread count to +concurrency+.
|
|
35
|
+
# Writing to pre-allocated slots (one per thread) is safe because each
|
|
36
|
+
# thread writes to a unique index and all threads in a slice are joined
|
|
37
|
+
# before the next slice begins.
|
|
38
|
+
# Exceptions in worker threads are collected and re-raised after all
|
|
39
|
+
# threads in the slice are joined, preventing orphaned threads.
|
|
40
|
+
results = Array.new(cases.length)
|
|
41
|
+
cases.each_with_index.each_slice(concurrency) do |batch|
|
|
42
|
+
errors = []
|
|
43
|
+
errors_mu = Mutex.new
|
|
44
|
+
threads = batch.map do |eval_case, i|
|
|
45
|
+
Thread.new do
|
|
46
|
+
results[i] = run_one(eval_case, callable)
|
|
47
|
+
rescue => e
|
|
48
|
+
errors_mu.synchronize { errors << e }
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
threads.each(&:join)
|
|
52
|
+
raise errors.first if errors.any?
|
|
38
53
|
end
|
|
54
|
+
results
|
|
39
55
|
end
|
|
40
56
|
|
|
41
57
|
private
|
|
42
58
|
|
|
59
|
+
# Evaluate a single EvalCase with the given callable and return an EvalResult.
|
|
60
|
+
def run_one(eval_case, callable)
|
|
61
|
+
t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
|
|
62
|
+
result = callable.call(eval_case.input)
|
|
63
|
+
latency_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - t0
|
|
64
|
+
|
|
65
|
+
actual, usage = extract(result)
|
|
66
|
+
score, score_error = score_safely(@scorer, actual: actual, expected: eval_case.expected, input: eval_case.input)
|
|
67
|
+
|
|
68
|
+
EvalResult.new(eval_case: eval_case, actual: actual, score: score, usage: usage, latency_ms: latency_ms, error: score_error)
|
|
69
|
+
end
|
|
70
|
+
|
|
43
71
|
# Normalises the callable's return value into [actual_string, usage_or_nil].
|
|
44
72
|
def extract(result)
|
|
45
73
|
if result.is_a?(Hash)
|
|
@@ -25,14 +25,20 @@ module Phronomy
|
|
|
25
25
|
# Recognised PII categories and their detection patterns.
|
|
26
26
|
PATTERNS = {
|
|
27
27
|
# Japanese My Number: 12 consecutive or grouped digits (4-4-4).
|
|
28
|
+
# Matched candidates are additionally validated with the official check-digit
|
|
29
|
+
# algorithm (JIS X 0076) to eliminate false positives from arbitrary 12-digit strings.
|
|
28
30
|
my_number: {
|
|
29
31
|
pattern: /(?<!\d)(?<!\d[- ])\d{4}[- ]?\d{4}[- ]?\d{4}(?![- ]?\d)/,
|
|
30
|
-
label: "My Number"
|
|
32
|
+
label: "My Number",
|
|
33
|
+
validate_my_number: true
|
|
31
34
|
},
|
|
32
35
|
# Credit / debit card: 16 digits, optionally separated by spaces or hyphens.
|
|
36
|
+
# Matched candidates are additionally validated with the Luhn algorithm
|
|
37
|
+
# to eliminate false positives from arbitrary 16-digit sequences.
|
|
33
38
|
credit_card: {
|
|
34
39
|
pattern: /\b(?:\d{4}[- ]?){3}\d{4}\b/,
|
|
35
|
-
label: "credit card number"
|
|
40
|
+
label: "credit card number",
|
|
41
|
+
validate_luhn: true
|
|
36
42
|
},
|
|
37
43
|
# Email address (simplified RFC 5322).
|
|
38
44
|
email: {
|
|
@@ -64,9 +70,47 @@ module Phronomy
|
|
|
64
70
|
def check(value)
|
|
65
71
|
text = value.to_s
|
|
66
72
|
@active_patterns.each do |entry|
|
|
67
|
-
|
|
73
|
+
detected = if entry[:validate_luhn]
|
|
74
|
+
# Scan for all candidates then filter by Luhn check-digit validation.
|
|
75
|
+
# This avoids false positives on arbitrary 16-digit strings (e.g. internal IDs).
|
|
76
|
+
text.scan(entry[:pattern]).any? { |m| luhn_valid?(m.gsub(/[- ]/, "")) }
|
|
77
|
+
elsif entry[:validate_my_number]
|
|
78
|
+
# Scan for all candidates then apply the JIS X 0076 check-digit algorithm.
|
|
79
|
+
# This avoids false positives on arbitrary 12-digit strings.
|
|
80
|
+
text.scan(entry[:pattern]).any? { |m| my_number_valid?(m.gsub(/[- ]/, "")) }
|
|
81
|
+
else
|
|
82
|
+
text.match?(entry[:pattern])
|
|
83
|
+
end
|
|
84
|
+
fail!("PII detected in input: #{entry[:label]}") if detected
|
|
68
85
|
end
|
|
69
86
|
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
# Returns true when +digits+ (a 12-character string of decimal digits) satisfies
|
|
91
|
+
# the Japanese My Number check-digit algorithm defined in JIS X 0076.
|
|
92
|
+
# The check digit is the 12th digit.
|
|
93
|
+
def my_number_valid?(digits)
|
|
94
|
+
weights = [6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2]
|
|
95
|
+
total = weights.each_with_index.sum { |w, i| w * digits[i].to_i }
|
|
96
|
+
remainder = total % 11
|
|
97
|
+
check = (remainder <= 1) ? 0 : 11 - remainder
|
|
98
|
+
check == digits[11].to_i
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Returns true when +digits+ (a string of decimal digits) satisfies the
|
|
102
|
+
# Luhn check-digit algorithm used by payment card networks.
|
|
103
|
+
def luhn_valid?(digits)
|
|
104
|
+
digits.chars.reverse.each_with_index.sum do |d, i|
|
|
105
|
+
n = d.to_i
|
|
106
|
+
if i.odd?
|
|
107
|
+
doubled = n * 2
|
|
108
|
+
(doubled > 9) ? (doubled - 9) : doubled
|
|
109
|
+
else
|
|
110
|
+
n
|
|
111
|
+
end
|
|
112
|
+
end % 10 == 0
|
|
113
|
+
end
|
|
70
114
|
end
|
|
71
115
|
end
|
|
72
116
|
end
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "ostruct"
|
|
4
|
-
|
|
5
3
|
module Phronomy
|
|
6
4
|
module Memory
|
|
7
5
|
module Compression
|
|
@@ -64,6 +62,9 @@ module Phronomy
|
|
|
64
62
|
else
|
|
65
63
|
{messages: messages, compaction: nil}
|
|
66
64
|
end
|
|
65
|
+
rescue => e
|
|
66
|
+
warn "[Phronomy] Compression failed (#{e.class}: #{e.message}); saving without compaction."
|
|
67
|
+
{messages: messages, compaction: nil}
|
|
67
68
|
end
|
|
68
69
|
|
|
69
70
|
private
|
|
@@ -98,7 +99,7 @@ module Phronomy
|
|
|
98
99
|
#{text}
|
|
99
100
|
</context>
|
|
100
101
|
CONTEXT
|
|
101
|
-
|
|
102
|
+
RubyLLM::Message.new(role: :system, content: content)
|
|
102
103
|
end
|
|
103
104
|
end
|
|
104
105
|
end
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "ostruct"
|
|
4
|
-
|
|
5
3
|
module Phronomy
|
|
6
4
|
module Memory
|
|
7
5
|
module Compression
|
|
@@ -25,6 +23,11 @@ module Phronomy
|
|
|
25
23
|
class ToolOutputPruner < Base
|
|
26
24
|
TRUNCATION_NOTE = "\n[... output truncated ...]"
|
|
27
25
|
|
|
26
|
+
# Internal value object for cloned messages.
|
|
27
|
+
# Uses Struct (not OpenStruct) so that unknown attribute access raises NoMethodError.
|
|
28
|
+
ClonedMessage = Struct.new(:role, :content, :tool_calls, :model_id, keyword_init: true)
|
|
29
|
+
private_constant :ClonedMessage
|
|
30
|
+
|
|
28
31
|
# @param max_chars [Integer] maximum character length for tool-result content
|
|
29
32
|
def initialize(max_chars: 4000)
|
|
30
33
|
@max_chars = max_chars
|
|
@@ -51,10 +54,12 @@ module Phronomy
|
|
|
51
54
|
private
|
|
52
55
|
|
|
53
56
|
def clone_message(original, new_content)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
ClonedMessage.new(
|
|
58
|
+
role: original.role,
|
|
59
|
+
content: new_content,
|
|
60
|
+
tool_calls: (original.tool_calls if original.respond_to?(:tool_calls)),
|
|
61
|
+
model_id: (original.model_id if original.respond_to?(:model_id))
|
|
62
|
+
)
|
|
58
63
|
end
|
|
59
64
|
end
|
|
60
65
|
end
|
|
@@ -48,7 +48,6 @@ module Phronomy
|
|
|
48
48
|
@retrieval = retrieval
|
|
49
49
|
@compression = compression
|
|
50
50
|
@ttl = ttl
|
|
51
|
-
@append_mutex = Mutex.new
|
|
52
51
|
end
|
|
53
52
|
|
|
54
53
|
# Load conversation messages for a thread, applying retrieval selection.
|
|
@@ -83,8 +82,10 @@ module Phronomy
|
|
|
83
82
|
# @param thread_id [String]
|
|
84
83
|
# @param messages [Array] full conversation history up to this point
|
|
85
84
|
def save(thread_id:, messages:)
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
@storage.with_thread_lock(thread_id: thread_id) do
|
|
86
|
+
append_new_messages(thread_id: thread_id, messages: messages)
|
|
87
|
+
compress_and_save(thread_id: thread_id, messages: messages)
|
|
88
|
+
end
|
|
88
89
|
@retrieval.index(thread_id: thread_id, messages: messages) if @retrieval.respond_to?(:index)
|
|
89
90
|
end
|
|
90
91
|
|
|
@@ -125,21 +126,24 @@ module Phronomy
|
|
|
125
126
|
private
|
|
126
127
|
|
|
127
128
|
# Append messages that are new since the last save to the raw history.
|
|
129
|
+
# Must be called while holding the per-thread lock (via Storage#with_thread_lock).
|
|
128
130
|
# Messages are append-only; existing raw entries are never modified.
|
|
131
|
+
#
|
|
132
|
+
# The next seq number is derived from Storage#next_seq, which owns the
|
|
133
|
+
# high-water-mark counter. This survives TTL purges because Storage tracks
|
|
134
|
+
# the HWM independently of the stored raw entries.
|
|
129
135
|
def append_new_messages(thread_id:, messages:)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
@
|
|
133
|
-
raw = @storage.load_raw(thread_id: thread_id)
|
|
134
|
-
starting_seq = raw.length
|
|
135
|
-
new_messages = messages[starting_seq..]
|
|
136
|
-
@storage.append_raw(thread_id: thread_id, messages: new_messages, starting_seq: starting_seq) if new_messages&.any?
|
|
137
|
-
end
|
|
136
|
+
next_seq = @storage.next_seq(thread_id: thread_id)
|
|
137
|
+
new_messages = messages[next_seq..]
|
|
138
|
+
@storage.append_raw(thread_id: thread_id, messages: new_messages, starting_seq: next_seq) if new_messages&.any?
|
|
138
139
|
end
|
|
139
140
|
|
|
140
141
|
# Apply the configured compression strategy and persist the result.
|
|
141
142
|
# When no strategy is configured, saves messages directly to the legacy store.
|
|
142
143
|
# When compression fires, also persists the compaction record.
|
|
144
|
+
# If the compression strategy raises (e.g. LLM timeout), we fall back to
|
|
145
|
+
# saving the messages without compaction so the conversation is never lost
|
|
146
|
+
# due to a transient summarization failure (Issue #58).
|
|
143
147
|
def compress_and_save(thread_id:, messages:)
|
|
144
148
|
unless @compression
|
|
145
149
|
@storage.save(thread_id: thread_id, messages: messages)
|
|
@@ -151,11 +155,16 @@ module Phronomy
|
|
|
151
155
|
all_raw = @storage.load_raw(thread_id: thread_id)
|
|
152
156
|
uncompacted = all_raw.select { |r| r[:seq] >= uncompacted_start_seq }.map { |r| r[:message] }
|
|
153
157
|
|
|
154
|
-
result =
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
158
|
+
result = begin
|
|
159
|
+
@compression.compress(
|
|
160
|
+
thread_id: thread_id,
|
|
161
|
+
messages: uncompacted,
|
|
162
|
+
seq_offset: uncompacted_start_seq
|
|
163
|
+
)
|
|
164
|
+
rescue => e
|
|
165
|
+
warn "[Phronomy] Compression failed (#{e.class}: #{e.message}); saving without compaction."
|
|
166
|
+
{messages: messages, compaction: nil}
|
|
167
|
+
end
|
|
159
168
|
|
|
160
169
|
if result[:compaction]
|
|
161
170
|
@storage.save_compaction(
|
|
@@ -28,22 +28,37 @@ module Phronomy
|
|
|
28
28
|
@index = {} # id => message (insertion-ordered via Ruby Hash)
|
|
29
29
|
@counter = 0
|
|
30
30
|
@max_index_size = max_index_size
|
|
31
|
-
@
|
|
31
|
+
@actor = Phronomy::Actor.new
|
|
32
|
+
@indexed_object_ids = {} # thread_id => { object_id => true }
|
|
32
33
|
end
|
|
33
34
|
|
|
34
35
|
# Index a new batch of messages so they are searchable on future #select calls.
|
|
35
36
|
# Called by ConversationManager#save.
|
|
36
37
|
#
|
|
38
|
+
# Messages are deduplicated by object identity: if a message object has already
|
|
39
|
+
# been indexed for the given thread_id, it is skipped (no duplicate embed call).
|
|
40
|
+
#
|
|
37
41
|
# @param thread_id [String]
|
|
38
42
|
# @param messages [Array]
|
|
39
43
|
def index(thread_id:, messages:)
|
|
40
44
|
messages.each do |msg|
|
|
45
|
+
# Fast path: skip already-indexed messages without calling embed.
|
|
46
|
+
already_indexed = @actor.call do
|
|
47
|
+
(@indexed_object_ids[thread_id] ||= {})[msg.object_id]
|
|
48
|
+
end
|
|
49
|
+
next if already_indexed
|
|
50
|
+
|
|
41
51
|
embedding = @embeddings.embed(msg.content.to_s)
|
|
42
|
-
@
|
|
52
|
+
@actor.call do
|
|
53
|
+
# Re-check inside Actor to handle concurrent callers for the same thread.
|
|
54
|
+
indexed = (@indexed_object_ids[thread_id] ||= {})
|
|
55
|
+
next if indexed[msg.object_id]
|
|
56
|
+
|
|
43
57
|
id = "#{thread_id}:#{@counter}"
|
|
44
58
|
@counter += 1
|
|
45
59
|
@store.add(id: id, embedding: embedding, metadata: {thread_id: thread_id, message: msg})
|
|
46
60
|
@index[id] = msg
|
|
61
|
+
indexed[msg.object_id] = true
|
|
47
62
|
evict_oldest! if @max_index_size && @index.size > @max_index_size
|
|
48
63
|
end
|
|
49
64
|
end
|
|
@@ -53,12 +68,13 @@ module Phronomy
|
|
|
53
68
|
#
|
|
54
69
|
# @param thread_id [String]
|
|
55
70
|
def clear_index(thread_id:)
|
|
56
|
-
@
|
|
71
|
+
@actor.call do
|
|
57
72
|
ids = @index.keys.select { |id| id.start_with?("#{thread_id}:") }
|
|
58
73
|
ids.each do |id|
|
|
59
74
|
@index.delete(id)
|
|
60
75
|
@store.remove(id: id)
|
|
61
76
|
end
|
|
77
|
+
@indexed_object_ids.delete(thread_id)
|
|
62
78
|
end
|
|
63
79
|
end
|
|
64
80
|
|
|
@@ -71,7 +87,7 @@ module Phronomy
|
|
|
71
87
|
def select(messages, query: nil, thread_id: nil)
|
|
72
88
|
if query && !query.strip.empty?
|
|
73
89
|
query_embedding = @embeddings.embed(query)
|
|
74
|
-
results = @store.search(query_embedding: query_embedding, k: @k * 3)
|
|
90
|
+
results = @actor.call { @store.search(query_embedding: query_embedding, k: @k * 3) }
|
|
75
91
|
results
|
|
76
92
|
.select { |r| thread_id.nil? || r[:metadata][:thread_id] == thread_id }
|
|
77
93
|
.first(@k)
|
|
@@ -84,7 +100,7 @@ module Phronomy
|
|
|
84
100
|
private
|
|
85
101
|
|
|
86
102
|
# Evicts the oldest index entry to enforce max_index_size.
|
|
87
|
-
# Must be called inside
|
|
103
|
+
# Must be called inside the Actor.
|
|
88
104
|
def evict_oldest!
|
|
89
105
|
oldest_id = @index.keys.first
|
|
90
106
|
return unless oldest_id
|
|
@@ -75,7 +75,7 @@ module Phronomy
|
|
|
75
75
|
@model_class.create!(
|
|
76
76
|
thread_id: thread_id,
|
|
77
77
|
role: msg.role.to_s,
|
|
78
|
-
content: msg.content
|
|
78
|
+
content: msg.content,
|
|
79
79
|
tool_calls_json: serialize_tool_calls(msg),
|
|
80
80
|
model_id: (msg.model_id if msg.respond_to?(:model_id))
|
|
81
81
|
)
|
|
@@ -100,15 +100,17 @@ module Phronomy
|
|
|
100
100
|
def append_raw(thread_id:, messages:, starting_seq:)
|
|
101
101
|
return unless @raw_model_class
|
|
102
102
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
103
|
+
@raw_model_class.transaction do
|
|
104
|
+
messages.each_with_index do |msg, i|
|
|
105
|
+
@raw_model_class.create!(
|
|
106
|
+
thread_id: thread_id,
|
|
107
|
+
seq: starting_seq + i,
|
|
108
|
+
role: msg.role.to_s,
|
|
109
|
+
content: msg.content,
|
|
110
|
+
tool_calls_json: serialize_tool_calls(msg),
|
|
111
|
+
model_id: (msg.model_id if msg.respond_to?(:model_id))
|
|
112
|
+
)
|
|
113
|
+
end
|
|
112
114
|
end
|
|
113
115
|
end
|
|
114
116
|
|
|
@@ -168,6 +170,26 @@ module Phronomy
|
|
|
168
170
|
@model_class.where(thread_id: thread_id).where("created_at < ?", older_than).delete_all
|
|
169
171
|
end
|
|
170
172
|
|
|
173
|
+
# Returns the next seq number to use for new raw messages for +thread_id+.
|
|
174
|
+
# Derived from MAX(seq) in the database; since purge_older_than does not
|
|
175
|
+
# touch raw records, this value is always correct.
|
|
176
|
+
#
|
|
177
|
+
# @param thread_id [String]
|
|
178
|
+
# @return [Integer]
|
|
179
|
+
def next_seq(thread_id:)
|
|
180
|
+
return 0 unless @raw_model_class
|
|
181
|
+
|
|
182
|
+
((@raw_model_class.where(thread_id: thread_id).maximum(:seq) || -1) + 1)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Delegates to the block directly; serialisation of concurrent saves
|
|
186
|
+
# for the same thread_id is the caller's responsibility (e.g. DB-level
|
|
187
|
+
# transaction isolation or application-layer queuing).
|
|
188
|
+
# @param thread_id [String]
|
|
189
|
+
def with_thread_lock(thread_id:)
|
|
190
|
+
yield
|
|
191
|
+
end
|
|
192
|
+
|
|
171
193
|
private
|
|
172
194
|
|
|
173
195
|
def ensure_raw_model!
|
|
@@ -127,6 +127,28 @@ module Phronomy
|
|
|
127
127
|
def purge_older_than(thread_id:, older_than:)
|
|
128
128
|
# no-op by default
|
|
129
129
|
end
|
|
130
|
+
|
|
131
|
+
# Returns the next seq number to assign when appending new raw messages
|
|
132
|
+
# for +thread_id+. Must be monotonically increasing and must survive
|
|
133
|
+
# purge_older_than (i.e. the counter must not reset when old raw records
|
|
134
|
+
# are deleted by a TTL purge).
|
|
135
|
+
#
|
|
136
|
+
# @param thread_id [String]
|
|
137
|
+
# @return [Integer]
|
|
138
|
+
def next_seq(thread_id:)
|
|
139
|
+
raise NotImplementedError, "#{self.class}#next_seq is not implemented"
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Executes the block while holding a per-thread-id lock for +thread_id+.
|
|
143
|
+
# Used by ConversationManager to prevent concurrent compaction for the
|
|
144
|
+
# same thread. The default implementation yields without locking; backends
|
|
145
|
+
# that require serialisation should override this method.
|
|
146
|
+
#
|
|
147
|
+
# @param thread_id [String]
|
|
148
|
+
# @yield
|
|
149
|
+
def with_thread_lock(thread_id:)
|
|
150
|
+
yield
|
|
151
|
+
end
|
|
130
152
|
end
|
|
131
153
|
end
|
|
132
154
|
end
|