phronomy 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -0
  3. data/README.md +49 -38
  4. data/docs/trustworthy_ai_enhancements.md +4 -4
  5. data/lib/generators/phronomy/install/templates/create_phronomy_messages.rb.tt +1 -1
  6. data/lib/phronomy/actor.rb +68 -0
  7. data/lib/phronomy/agent/base.rb +125 -91
  8. data/lib/phronomy/agent/handoff.rb +2 -2
  9. data/lib/phronomy/agent/react_agent.rb +51 -33
  10. data/lib/phronomy/context/assembler.rb +11 -3
  11. data/lib/phronomy/context/compaction_context.rb +1 -3
  12. data/lib/phronomy/context/context_version_cache.rb +7 -16
  13. data/lib/phronomy/eval/runner.rb +39 -11
  14. data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +47 -3
  15. data/lib/phronomy/memory/compression/summary.rb +4 -3
  16. data/lib/phronomy/memory/compression/tool_output_pruner.rb +11 -6
  17. data/lib/phronomy/memory/conversation_manager.rb +25 -16
  18. data/lib/phronomy/memory/retrieval/semantic.rb +21 -5
  19. data/lib/phronomy/memory/storage/active_record.rb +32 -10
  20. data/lib/phronomy/memory/storage/base.rb +22 -0
  21. data/lib/phronomy/memory/storage/in_memory.rb +65 -26
  22. data/lib/phronomy/state_store/active_record.rb +1 -1
  23. data/lib/phronomy/state_store/base.rb +14 -16
  24. data/lib/phronomy/state_store/in_memory.rb +23 -9
  25. data/lib/phronomy/state_store/redis.rb +1 -1
  26. data/lib/phronomy/thread_actor_registry.rb +52 -0
  27. data/lib/phronomy/tool/base.rb +9 -2
  28. data/lib/phronomy/tool/mcp_tool.rb +28 -4
  29. data/lib/phronomy/tracing/base.rb +0 -2
  30. data/lib/phronomy/tracing/langfuse_tracer.rb +24 -6
  31. data/lib/phronomy/tracing/null_tracer.rb +6 -3
  32. data/lib/phronomy/trust_pipeline.rb +60 -52
  33. data/lib/phronomy/vector_store/redis_search.rb +28 -23
  34. data/lib/phronomy/version.rb +1 -1
  35. data/lib/phronomy/workflow.rb +281 -0
  36. data/lib/phronomy/workflow_context.rb +119 -0
  37. data/lib/phronomy/workflow_runner.rb +262 -0
  38. data/lib/phronomy.rb +30 -34
  39. metadata +25 -10
  40. data/lib/phronomy/graph/compiled_graph.rb +0 -183
  41. data/lib/phronomy/graph/parallel_node.rb +0 -193
  42. data/lib/phronomy/graph/state.rb +0 -105
  43. data/lib/phronomy/graph/state_graph.rb +0 -148
  44. data/lib/phronomy/graph.rb +0 -13
@@ -5,7 +5,11 @@ module Phronomy
5
5
  # ReAct pattern (Reasoning + Acting) agent.
6
6
  # Repeats the LLM <-> Tool loop until no more tool calls are made.
7
7
  class ReactAgent < Base
8
- def invoke(input, config: {})
8
+ private
9
+
10
+ # Performs a single (non-retried) ReAct invocation.
11
+ # Overrides Base#invoke_once so that Base#invoke's retry loop is inherited.
12
+ def invoke_once(input, config: {})
9
13
  caller_meta = {}
10
14
  caller_meta[:user_id] = config[:user_id] if config[:user_id]
11
15
  caller_meta[:session_id] = config[:session_id] if config[:session_id]
@@ -43,7 +47,11 @@ module Phronomy
43
47
 
44
48
  save_to_memory(memory, thread_id: thread_id, messages: messages) if memory && thread_id
45
49
 
46
- output = messages.last&.content
50
+ # Fall back to the last message that carries non-nil content. This
51
+ # guards against the case where the final message is a tool-call or
52
+ # tool-result message (content == nil) when max_iterations is
53
+ # exhausted before the model produces a text reply.
54
+ output = messages.reverse.find { |m| m.content && !m.content.empty? }&.content
47
55
 
48
56
  # Run output guardrails before returning to the caller.
49
57
  run_output_guardrails!(output)
@@ -53,6 +61,8 @@ module Phronomy
53
61
  end
54
62
  end
55
63
 
64
+ public
65
+
56
66
  # Streaming version of #invoke for the ReAct loop.
57
67
  # Yields {Phronomy::Agent::StreamEvent} events while the LLM-tool loop runs.
58
68
  #
@@ -63,42 +73,50 @@ module Phronomy
63
73
  def stream(input, config: {}, &block)
64
74
  return invoke(input, config: config) unless block
65
75
 
66
- run_input_guardrails!(input)
76
+ caller_meta = {}
77
+ caller_meta[:user_id] = config[:user_id] if config[:user_id]
78
+ caller_meta[:session_id] = config[:session_id] if config[:session_id]
67
79
 
68
- memory = config[:memory]
69
- thread_id = config[:thread_id]
70
- max_iter = self.class.max_iterations
80
+ trace("agent.invoke", input: input, **caller_meta) do |_span|
81
+ run_input_guardrails!(input)
71
82
 
72
- initial_messages = if memory && thread_id
73
- load_from_memory(memory, thread_id: thread_id, query: extract_message(input))
74
- else
75
- []
76
- end
83
+ memory = config[:memory]
84
+ thread_id = config[:thread_id]
85
+ max_iter = self.class.max_iterations
77
86
 
78
- messages = initial_messages.dup
79
- user_asked = false
80
- total_usage = Phronomy::TokenUsage.zero
81
- iterations_exhausted = true
82
-
83
- max_iter.times do
84
- response = stream_step(messages, input, user_asked: user_asked, config: config, &block)
85
- user_asked = true
86
- messages = response[:messages]
87
- total_usage += response[:usage]
88
- if response[:done]
89
- iterations_exhausted = false
90
- break
87
+ initial_messages = if memory && thread_id
88
+ load_from_memory(memory, thread_id: thread_id, query: extract_message(input))
89
+ else
90
+ []
91
91
  end
92
- end
93
92
 
94
- save_to_memory(memory, thread_id: thread_id, messages: messages) if memory && thread_id
93
+ messages = initial_messages.dup
94
+ user_asked = false
95
+ total_usage = Phronomy::TokenUsage.zero
96
+ iterations_exhausted = true
97
+
98
+ max_iter.times do
99
+ response = stream_step(messages, input, user_asked: user_asked, config: config, &block)
100
+ user_asked = true
101
+ messages = response[:messages]
102
+ total_usage += response[:usage]
103
+ if response[:done]
104
+ iterations_exhausted = false
105
+ break
106
+ end
107
+ end
95
108
 
96
- output = messages.last&.content
97
- run_output_guardrails!(output)
109
+ save_to_memory(memory, thread_id: thread_id, messages: messages) if memory && thread_id
98
110
 
99
- result = {output: output, messages: messages, usage: total_usage, iterations_exhausted: iterations_exhausted}
100
- block.call(StreamEvent.new(type: :done, payload: result))
101
- result
111
+ # Fall back to the last message that carries non-nil content (same as
112
+ # the non-streaming path above).
113
+ output = messages.reverse.find { |m| m.content && !m.content.empty? }&.content
114
+ run_output_guardrails!(output)
115
+
116
+ result = {output: output, messages: messages, usage: total_usage, iterations_exhausted: iterations_exhausted}
117
+ block.call(StreamEvent.new(type: :done, payload: result))
118
+ [result, total_usage]
119
+ end
102
120
  rescue => e
103
121
  block&.call(StreamEvent.new(type: :error, payload: {error: e}))
104
122
  raise
@@ -136,8 +154,8 @@ module Phronomy
136
154
  chat = build_chat
137
155
  messages.each { |m| chat.add_message(m) }
138
156
 
139
- chat.on_tool_call { |tc| block.call(StreamEvent.new(type: :tool_call, payload: {tool_call: tc})) }
140
- chat.on_tool_result { |tr| block.call(StreamEvent.new(type: :tool_result, payload: {tool_result: tr})) }
157
+ chat.before_tool_call { |tc| block.call(StreamEvent.new(type: :tool_call, payload: {tool_call: tc})) }
158
+ chat.after_tool_result { |tr| block.call(StreamEvent.new(type: :tool_result, payload: {tool_result: tr})) }
141
159
 
142
160
  # Run before_completion hooks before each LLM call in the streaming loop.
143
161
  run_before_completion_hooks!(chat, config)
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "cgi"
4
+
3
5
  module Phronomy
4
6
  module Context
5
7
  # Assembler collects all four context regions and produces the final
@@ -34,7 +36,7 @@ module Phronomy
34
36
  # @param trusted [Boolean]
35
37
  # @return [String]
36
38
  def self.xml_tag(text, type:, trusted: false)
37
- "<context type=\"#{type}\" trusted=\"#{trusted}\">\n#{text}\n</context>"
39
+ "<context type=\"#{CGI.escapeHTML(type.to_s)}\" trusted=\"#{trusted}\">\n#{CGI.escapeHTML(text.to_s)}\n</context>"
38
40
  end
39
41
 
40
42
  # @param budget [Phronomy::Context::TokenBudget, nil]
@@ -104,8 +106,8 @@ module Phronomy
104
106
  private
105
107
 
106
108
  def xml_context_tag(chunk)
107
- src_attr = chunk[:source] ? " source=\"#{chunk[:source]}\"" : ""
108
- "<context type=\"#{chunk[:type]}\"#{src_attr} trusted=\"#{chunk[:trusted]}\">\n#{chunk[:text]}\n</context>"
109
+ src_attr = chunk[:source] ? " source=\"#{CGI.escapeHTML(chunk[:source].to_s)}\"" : ""
110
+ "<context type=\"#{CGI.escapeHTML(chunk[:type].to_s)}\"#{src_attr} trusted=\"#{chunk[:trusted]}\">\n#{CGI.escapeHTML(chunk[:text].to_s)}\n</context>"
109
111
  end
110
112
 
111
113
  def trim_messages_to_budget(messages, system_text)
@@ -122,6 +124,12 @@ module Phronomy
122
124
  accumulated += tokens
123
125
  result.push(msg)
124
126
  end
127
+
128
+ if result.empty? && messages.any?
129
+ warn "[Phronomy::Assembler] All #{messages.length} conversation message(s) dropped: " \
130
+ "token budget exhausted by system context (budget=#{@budget.context_window}, used_by_system=#{used})"
131
+ end
132
+
125
133
  result.reverse
126
134
  end
127
135
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "ostruct"
4
-
5
3
  module Phronomy
6
4
  module Context
7
5
  # Context object passed to the +on_compact+ callback registered on an agent.
@@ -103,7 +101,7 @@ module Phronomy
103
101
  end
104
102
 
105
103
  remaining = (@message_elements[(last_idx + 1)..] || []).map { |e| e[:message] }
106
- summary_msg = OpenStruct.new(role: :system, content: summary_text)
104
+ summary_msg = RubyLLM::Message.new(role: :system, content: summary_text)
107
105
  @result_messages = [summary_msg] + remaining
108
106
  end
109
107
  end
@@ -2,20 +2,9 @@
2
2
 
3
3
  module Phronomy
4
4
  module Context
5
- # Caches the assembled static system prompt text per agent instance.
6
- #
7
- # The cache is keyed by a SHA-256 fingerprint computed from the agent's
8
- # instruction text and the content of all registered static knowledge
9
- # sources. When the fingerprint matches the stored value the previously
10
- # assembled system_text is reused without re-fetching any sources.
11
- #
12
- # A cache miss (fingerprint changed or first call) triggers a full
13
- # rebuild: instruction + static-knowledge XML tags are concatenated and
14
- # the result is stored alongside the new fingerprint.
15
- #
16
- # Each agent *instance* holds one cache object. The cache persists across
17
- # #invoke calls on the same instance, which is the typical usage pattern
18
- # for long-running agents.
5
+ # Caches the assembled static system prompt text keyed by a SHA-256
6
+ # fingerprint of the agent's instructions + static knowledge content.
7
+ # Each instance is owned by one thread (stored in +Thread.current+).
19
8
  class ContextVersionCache
20
9
  # @return [String, nil] last stored fingerprint
21
10
  attr_reader :fingerprint
@@ -27,7 +16,9 @@ module Phronomy
27
16
  attr_reader :system_tokens
28
17
 
29
18
  def initialize
30
- reset
19
+ @fingerprint = nil
20
+ @system_text = nil
21
+ @system_tokens = 0
31
22
  end
32
23
 
33
24
  # Returns true when the given fingerprint matches the stored one.
@@ -35,7 +26,7 @@ module Phronomy
35
26
  # @param fingerprint [String] SHA-256 hex digest to compare
36
27
  # @return [Boolean]
37
28
  def valid?(fingerprint)
38
- !@fingerprint.nil? && @fingerprint == fingerprint
29
+ !@fingerprint.nil? && !@system_text.nil? && @fingerprint == fingerprint
39
30
  end
40
31
 
41
32
  # Update the cache with a new fingerprint and system text.
@@ -22,24 +22,52 @@ module Phronomy
22
22
  @scorer = scorer
23
23
  end
24
24
 
25
- # @param dataset [Dataset] collection of EvalCase objects
26
- # @param callable [#call] accepts a single String argument
25
+ # @param dataset [Dataset] collection of EvalCase objects
26
+ # @param callable [#call] accepts a single String argument
27
+ # @param concurrency [Integer] number of parallel threads (default: 1, sequential)
27
28
  # @return [Array<EvalResult>]
28
- def run(dataset, callable)
29
- dataset.map do |eval_case|
30
- t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
31
- result = callable.call(eval_case.input)
32
- latency_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - t0
29
+ def run(dataset, callable, concurrency: 1)
30
+ cases = dataset.to_a
31
+ return cases.map { |eval_case| run_one(eval_case, callable) } if concurrency <= 1
33
32
 
34
- actual, usage = extract(result)
35
- score, score_error = score_safely(@scorer, actual: actual, expected: eval_case.expected, input: eval_case.input)
36
-
37
- EvalResult.new(eval_case: eval_case, actual: actual, score: score, usage: usage, latency_ms: latency_ms, error: score_error)
33
+ # Run cases in slices of +concurrency+ threads. Each slice is joined
34
+ # before the next starts, bounding peak thread count to +concurrency+.
35
+ # Writing to pre-allocated slots (one per thread) is safe because each
36
+ # thread writes to a unique index and all threads in a slice are joined
37
+ # before the next slice begins.
38
+ # Exceptions in worker threads are collected and re-raised after all
39
+ # threads in the slice are joined, preventing orphaned threads.
40
+ results = Array.new(cases.length)
41
+ cases.each_with_index.each_slice(concurrency) do |batch|
42
+ errors = []
43
+ errors_mu = Mutex.new
44
+ threads = batch.map do |eval_case, i|
45
+ Thread.new do
46
+ results[i] = run_one(eval_case, callable)
47
+ rescue => e
48
+ errors_mu.synchronize { errors << e }
49
+ end
50
+ end
51
+ threads.each(&:join)
52
+ raise errors.first if errors.any?
38
53
  end
54
+ results
39
55
  end
40
56
 
41
57
  private
42
58
 
59
+ # Evaluate a single EvalCase with the given callable and return an EvalResult.
60
+ def run_one(eval_case, callable)
61
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
62
+ result = callable.call(eval_case.input)
63
+ latency_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - t0
64
+
65
+ actual, usage = extract(result)
66
+ score, score_error = score_safely(@scorer, actual: actual, expected: eval_case.expected, input: eval_case.input)
67
+
68
+ EvalResult.new(eval_case: eval_case, actual: actual, score: score, usage: usage, latency_ms: latency_ms, error: score_error)
69
+ end
70
+
43
71
  # Normalises the callable's return value into [actual_string, usage_or_nil].
44
72
  def extract(result)
45
73
  if result.is_a?(Hash)
@@ -25,14 +25,20 @@ module Phronomy
25
25
  # Recognised PII categories and their detection patterns.
26
26
  PATTERNS = {
27
27
  # Japanese My Number: 12 consecutive or grouped digits (4-4-4).
28
+ # Matched candidates are additionally validated with the official check-digit
29
+ # algorithm (JIS X 0076) to eliminate false positives from arbitrary 12-digit strings.
28
30
  my_number: {
29
31
  pattern: /(?<!\d)(?<!\d[- ])\d{4}[- ]?\d{4}[- ]?\d{4}(?![- ]?\d)/,
30
- label: "My Number"
32
+ label: "My Number",
33
+ validate_my_number: true
31
34
  },
32
35
  # Credit / debit card: 16 digits, optionally separated by spaces or hyphens.
36
+ # Matched candidates are additionally validated with the Luhn algorithm
37
+ # to eliminate false positives from arbitrary 16-digit sequences.
33
38
  credit_card: {
34
39
  pattern: /\b(?:\d{4}[- ]?){3}\d{4}\b/,
35
- label: "credit card number"
40
+ label: "credit card number",
41
+ validate_luhn: true
36
42
  },
37
43
  # Email address (simplified RFC 5322).
38
44
  email: {
@@ -64,9 +70,47 @@ module Phronomy
64
70
  def check(value)
65
71
  text = value.to_s
66
72
  @active_patterns.each do |entry|
67
- fail!("PII detected in input: #{entry[:label]}") if text.match?(entry[:pattern])
73
+ detected = if entry[:validate_luhn]
74
+ # Scan for all candidates then filter by Luhn check-digit validation.
75
+ # This avoids false positives on arbitrary 16-digit strings (e.g. internal IDs).
76
+ text.scan(entry[:pattern]).any? { |m| luhn_valid?(m.gsub(/[- ]/, "")) }
77
+ elsif entry[:validate_my_number]
78
+ # Scan for all candidates then apply the JIS X 0076 check-digit algorithm.
79
+ # This avoids false positives on arbitrary 12-digit strings.
80
+ text.scan(entry[:pattern]).any? { |m| my_number_valid?(m.gsub(/[- ]/, "")) }
81
+ else
82
+ text.match?(entry[:pattern])
83
+ end
84
+ fail!("PII detected in input: #{entry[:label]}") if detected
68
85
  end
69
86
  end
87
+
88
+ private
89
+
90
+ # Returns true when +digits+ (a 12-character string of decimal digits) satisfies
91
+ # the Japanese My Number check-digit algorithm defined in JIS X 0076.
92
+ # The check digit is the 12th digit.
93
+ def my_number_valid?(digits)
94
+ weights = [6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2]
95
+ total = weights.each_with_index.sum { |w, i| w * digits[i].to_i }
96
+ remainder = total % 11
97
+ check = (remainder <= 1) ? 0 : 11 - remainder
98
+ check == digits[11].to_i
99
+ end
100
+
101
+ # Returns true when +digits+ (a string of decimal digits) satisfies the
102
+ # Luhn check-digit algorithm used by payment card networks.
103
+ def luhn_valid?(digits)
104
+ digits.chars.reverse.each_with_index.sum do |d, i|
105
+ n = d.to_i
106
+ if i.odd?
107
+ doubled = n * 2
108
+ (doubled > 9) ? (doubled - 9) : doubled
109
+ else
110
+ n
111
+ end
112
+ end % 10 == 0
113
+ end
70
114
  end
71
115
  end
72
116
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "ostruct"
4
-
5
3
  module Phronomy
6
4
  module Memory
7
5
  module Compression
@@ -64,6 +62,9 @@ module Phronomy
64
62
  else
65
63
  {messages: messages, compaction: nil}
66
64
  end
65
+ rescue => e
66
+ warn "[Phronomy] Compression failed (#{e.class}: #{e.message}); saving without compaction."
67
+ {messages: messages, compaction: nil}
67
68
  end
68
69
 
69
70
  private
@@ -98,7 +99,7 @@ module Phronomy
98
99
  #{text}
99
100
  </context>
100
101
  CONTEXT
101
- OpenStruct.new(role: :system, content: content)
102
+ RubyLLM::Message.new(role: :system, content: content)
102
103
  end
103
104
  end
104
105
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "ostruct"
4
-
5
3
  module Phronomy
6
4
  module Memory
7
5
  module Compression
@@ -25,6 +23,11 @@ module Phronomy
25
23
  class ToolOutputPruner < Base
26
24
  TRUNCATION_NOTE = "\n[... output truncated ...]"
27
25
 
26
+ # Internal value object for cloned messages.
27
+ # Uses Struct (not OpenStruct) so that unknown attribute access raises NoMethodError.
28
+ ClonedMessage = Struct.new(:role, :content, :tool_calls, :model_id, keyword_init: true)
29
+ private_constant :ClonedMessage
30
+
28
31
  # @param max_chars [Integer] maximum character length for tool-result content
29
32
  def initialize(max_chars: 4000)
30
33
  @max_chars = max_chars
@@ -51,10 +54,12 @@ module Phronomy
51
54
  private
52
55
 
53
56
  def clone_message(original, new_content)
54
- attrs = {role: original.role, content: new_content}
55
- attrs[:tool_calls] = original.tool_calls if original.respond_to?(:tool_calls)
56
- attrs[:model_id] = original.model_id if original.respond_to?(:model_id)
57
- OpenStruct.new(attrs)
57
+ ClonedMessage.new(
58
+ role: original.role,
59
+ content: new_content,
60
+ tool_calls: (original.tool_calls if original.respond_to?(:tool_calls)),
61
+ model_id: (original.model_id if original.respond_to?(:model_id))
62
+ )
58
63
  end
59
64
  end
60
65
  end
@@ -48,7 +48,6 @@ module Phronomy
48
48
  @retrieval = retrieval
49
49
  @compression = compression
50
50
  @ttl = ttl
51
- @append_mutex = Mutex.new
52
51
  end
53
52
 
54
53
  # Load conversation messages for a thread, applying retrieval selection.
@@ -83,8 +82,10 @@ module Phronomy
83
82
  # @param thread_id [String]
84
83
  # @param messages [Array] full conversation history up to this point
85
84
  def save(thread_id:, messages:)
86
- append_new_messages(thread_id: thread_id, messages: messages)
87
- compress_and_save(thread_id: thread_id, messages: messages)
85
+ @storage.with_thread_lock(thread_id: thread_id) do
86
+ append_new_messages(thread_id: thread_id, messages: messages)
87
+ compress_and_save(thread_id: thread_id, messages: messages)
88
+ end
88
89
  @retrieval.index(thread_id: thread_id, messages: messages) if @retrieval.respond_to?(:index)
89
90
  end
90
91
 
@@ -125,21 +126,24 @@ module Phronomy
125
126
  private
126
127
 
127
128
  # Append messages that are new since the last save to the raw history.
129
+ # Must be called while holding the per-thread lock (via Storage#with_thread_lock).
128
130
  # Messages are append-only; existing raw entries are never modified.
131
+ #
132
+ # The next seq number is derived from Storage#next_seq, which owns the
133
+ # high-water-mark counter. This survives TTL purges because Storage tracks
134
+ # the HWM independently of the stored raw entries.
129
135
  def append_new_messages(thread_id:, messages:)
130
- # Synchronize load + append to prevent seq number collisions when two
131
- # threads save the same thread_id concurrently.
132
- @append_mutex.synchronize do
133
- raw = @storage.load_raw(thread_id: thread_id)
134
- starting_seq = raw.length
135
- new_messages = messages[starting_seq..]
136
- @storage.append_raw(thread_id: thread_id, messages: new_messages, starting_seq: starting_seq) if new_messages&.any?
137
- end
136
+ next_seq = @storage.next_seq(thread_id: thread_id)
137
+ new_messages = messages[next_seq..]
138
+ @storage.append_raw(thread_id: thread_id, messages: new_messages, starting_seq: next_seq) if new_messages&.any?
138
139
  end
139
140
 
140
141
  # Apply the configured compression strategy and persist the result.
141
142
  # When no strategy is configured, saves messages directly to the legacy store.
142
143
  # When compression fires, also persists the compaction record.
144
+ # If the compression strategy raises (e.g. LLM timeout), we fall back to
145
+ # saving the messages without compaction so the conversation is never lost
146
+ # due to a transient summarization failure (Issue #58).
143
147
  def compress_and_save(thread_id:, messages:)
144
148
  unless @compression
145
149
  @storage.save(thread_id: thread_id, messages: messages)
@@ -151,11 +155,16 @@ module Phronomy
151
155
  all_raw = @storage.load_raw(thread_id: thread_id)
152
156
  uncompacted = all_raw.select { |r| r[:seq] >= uncompacted_start_seq }.map { |r| r[:message] }
153
157
 
154
- result = @compression.compress(
155
- thread_id: thread_id,
156
- messages: uncompacted,
157
- seq_offset: uncompacted_start_seq
158
- )
158
+ result = begin
159
+ @compression.compress(
160
+ thread_id: thread_id,
161
+ messages: uncompacted,
162
+ seq_offset: uncompacted_start_seq
163
+ )
164
+ rescue => e
165
+ warn "[Phronomy] Compression failed (#{e.class}: #{e.message}); saving without compaction."
166
+ {messages: messages, compaction: nil}
167
+ end
159
168
 
160
169
  if result[:compaction]
161
170
  @storage.save_compaction(
@@ -28,22 +28,37 @@ module Phronomy
28
28
  @index = {} # id => message (insertion-ordered via Ruby Hash)
29
29
  @counter = 0
30
30
  @max_index_size = max_index_size
31
- @mutex = Mutex.new
31
+ @actor = Phronomy::Actor.new
32
+ @indexed_object_ids = {} # thread_id => { object_id => true }
32
33
  end
33
34
 
34
35
  # Index a new batch of messages so they are searchable on future #select calls.
35
36
  # Called by ConversationManager#save.
36
37
  #
38
+ # Messages are deduplicated by object identity: if a message object has already
39
+ # been indexed for the given thread_id, it is skipped (no duplicate embed call).
40
+ #
37
41
  # @param thread_id [String]
38
42
  # @param messages [Array]
39
43
  def index(thread_id:, messages:)
40
44
  messages.each do |msg|
45
+ # Fast path: skip already-indexed messages without calling embed.
46
+ already_indexed = @actor.call do
47
+ (@indexed_object_ids[thread_id] ||= {})[msg.object_id]
48
+ end
49
+ next if already_indexed
50
+
41
51
  embedding = @embeddings.embed(msg.content.to_s)
42
- @mutex.synchronize do
52
+ @actor.call do
53
+ # Re-check inside Actor to handle concurrent callers for the same thread.
54
+ indexed = (@indexed_object_ids[thread_id] ||= {})
55
+ next if indexed[msg.object_id]
56
+
43
57
  id = "#{thread_id}:#{@counter}"
44
58
  @counter += 1
45
59
  @store.add(id: id, embedding: embedding, metadata: {thread_id: thread_id, message: msg})
46
60
  @index[id] = msg
61
+ indexed[msg.object_id] = true
47
62
  evict_oldest! if @max_index_size && @index.size > @max_index_size
48
63
  end
49
64
  end
@@ -53,12 +68,13 @@ module Phronomy
53
68
  #
54
69
  # @param thread_id [String]
55
70
  def clear_index(thread_id:)
56
- @mutex.synchronize do
71
+ @actor.call do
57
72
  ids = @index.keys.select { |id| id.start_with?("#{thread_id}:") }
58
73
  ids.each do |id|
59
74
  @index.delete(id)
60
75
  @store.remove(id: id)
61
76
  end
77
+ @indexed_object_ids.delete(thread_id)
62
78
  end
63
79
  end
64
80
 
@@ -71,7 +87,7 @@ module Phronomy
71
87
  def select(messages, query: nil, thread_id: nil)
72
88
  if query && !query.strip.empty?
73
89
  query_embedding = @embeddings.embed(query)
74
- results = @store.search(query_embedding: query_embedding, k: @k * 3)
90
+ results = @actor.call { @store.search(query_embedding: query_embedding, k: @k * 3) }
75
91
  results
76
92
  .select { |r| thread_id.nil? || r[:metadata][:thread_id] == thread_id }
77
93
  .first(@k)
@@ -84,7 +100,7 @@ module Phronomy
84
100
  private
85
101
 
86
102
  # Evicts the oldest index entry to enforce max_index_size.
87
- # Must be called inside @mutex.synchronize.
103
+ # Must be called inside the Actor.
88
104
  def evict_oldest!
89
105
  oldest_id = @index.keys.first
90
106
  return unless oldest_id
@@ -75,7 +75,7 @@ module Phronomy
75
75
  @model_class.create!(
76
76
  thread_id: thread_id,
77
77
  role: msg.role.to_s,
78
- content: msg.content.to_s,
78
+ content: msg.content,
79
79
  tool_calls_json: serialize_tool_calls(msg),
80
80
  model_id: (msg.model_id if msg.respond_to?(:model_id))
81
81
  )
@@ -100,15 +100,17 @@ module Phronomy
100
100
  def append_raw(thread_id:, messages:, starting_seq:)
101
101
  return unless @raw_model_class
102
102
 
103
- messages.each_with_index do |msg, i|
104
- @raw_model_class.create!(
105
- thread_id: thread_id,
106
- seq: starting_seq + i,
107
- role: msg.role.to_s,
108
- content: msg.content.to_s,
109
- tool_calls_json: serialize_tool_calls(msg),
110
- model_id: (msg.model_id if msg.respond_to?(:model_id))
111
- )
103
+ @raw_model_class.transaction do
104
+ messages.each_with_index do |msg, i|
105
+ @raw_model_class.create!(
106
+ thread_id: thread_id,
107
+ seq: starting_seq + i,
108
+ role: msg.role.to_s,
109
+ content: msg.content,
110
+ tool_calls_json: serialize_tool_calls(msg),
111
+ model_id: (msg.model_id if msg.respond_to?(:model_id))
112
+ )
113
+ end
112
114
  end
113
115
  end
114
116
 
@@ -168,6 +170,26 @@ module Phronomy
168
170
  @model_class.where(thread_id: thread_id).where("created_at < ?", older_than).delete_all
169
171
  end
170
172
 
173
+ # Returns the next seq number to use for new raw messages for +thread_id+.
174
+ # Derived from MAX(seq) in the database; since purge_older_than does not
175
+ # touch raw records, this value is always correct.
176
+ #
177
+ # @param thread_id [String]
178
+ # @return [Integer]
179
+ def next_seq(thread_id:)
180
+ return 0 unless @raw_model_class
181
+
182
+ ((@raw_model_class.where(thread_id: thread_id).maximum(:seq) || -1) + 1)
183
+ end
184
+
185
+ # Delegates to the block directly; serialisation of concurrent saves
186
+ # for the same thread_id is the caller's responsibility (e.g. DB-level
187
+ # transaction isolation or application-layer queuing).
188
+ # @param thread_id [String]
189
+ def with_thread_lock(thread_id:)
190
+ yield
191
+ end
192
+
171
193
  private
172
194
 
173
195
  def ensure_raw_model!
@@ -127,6 +127,28 @@ module Phronomy
127
127
  def purge_older_than(thread_id:, older_than:)
128
128
  # no-op by default
129
129
  end
130
+
131
+ # Returns the next seq number to assign when appending new raw messages
132
+ # for +thread_id+. Must be monotonically increasing and must survive
133
+ # purge_older_than (i.e. the counter must not reset when old raw records
134
+ # are deleted by a TTL purge).
135
+ #
136
+ # @param thread_id [String]
137
+ # @return [Integer]
138
+ def next_seq(thread_id:)
139
+ raise NotImplementedError, "#{self.class}#next_seq is not implemented"
140
+ end
141
+
142
+ # Executes the block while holding a per-thread-id lock for +thread_id+.
143
+ # Used by ConversationManager to prevent concurrent compaction for the
144
+ # same thread. The default implementation yields without locking; backends
145
+ # that require serialisation should override this method.
146
+ #
147
+ # @param thread_id [String]
148
+ # @yield
149
+ def with_thread_lock(thread_id:)
150
+ yield
151
+ end
130
152
  end
131
153
  end
132
154
  end