phronomy 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +88 -30
- data/README.md +26 -110
- data/lib/phronomy/agent/base.rb +127 -54
- data/lib/phronomy/agent/checkpoint.rb +53 -0
- data/lib/phronomy/agent/react_agent.rb +18 -28
- data/lib/phronomy/agent/suspend_signal.rb +35 -0
- data/lib/phronomy/agent.rb +2 -1
- data/lib/phronomy/configuration.rb +0 -24
- data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +10 -27
- data/lib/phronomy/railtie.rb +0 -6
- data/lib/phronomy/ruby_llm_patches.rb +20 -0
- data/lib/phronomy/tool/mcp_tool.rb +23 -26
- data/lib/phronomy/tracing/langfuse_tracer.rb +3 -6
- data/lib/phronomy/trust_pipeline.rb +1 -2
- data/lib/phronomy/vector_store/redis_search.rb +4 -4
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +4 -7
- data/lib/phronomy/workflow_runner.rb +1 -8
- data/lib/phronomy.rb +1 -0
- data/scripts/check_readme_ruby.rb +38 -0
- metadata +5 -33
- data/docs/trustworthy_ai_enhancements.md +0 -332
- data/lib/phronomy/active_record/acts_as.rb +0 -48
- data/lib/phronomy/active_record/checkpoint.rb +0 -20
- data/lib/phronomy/active_record/extensions.rb +0 -14
- data/lib/phronomy/active_record/message.rb +0 -20
- data/lib/phronomy/actor.rb +0 -68
- data/lib/phronomy/memory/compression/base.rb +0 -37
- data/lib/phronomy/memory/compression/summary.rb +0 -107
- data/lib/phronomy/memory/compression/tool_output_pruner.rb +0 -67
- data/lib/phronomy/memory/compression.rb +0 -11
- data/lib/phronomy/memory/conversation_manager.rb +0 -213
- data/lib/phronomy/memory/retrieval/base.rb +0 -22
- data/lib/phronomy/memory/retrieval/composite.rb +0 -76
- data/lib/phronomy/memory/retrieval/recent.rb +0 -35
- data/lib/phronomy/memory/retrieval/semantic.rb +0 -114
- data/lib/phronomy/memory/retrieval.rb +0 -12
- data/lib/phronomy/memory/storage/active_record.rb +0 -248
- data/lib/phronomy/memory/storage/base.rb +0 -155
- data/lib/phronomy/memory/storage/in_memory.rb +0 -152
- data/lib/phronomy/memory/storage.rb +0 -11
- data/lib/phronomy/memory.rb +0 -21
- data/lib/phronomy/rails/agent_job.rb +0 -75
- data/lib/phronomy/state_store/active_record.rb +0 -76
- data/lib/phronomy/state_store/base.rb +0 -112
- data/lib/phronomy/state_store/encryptor/active_support.rb +0 -49
- data/lib/phronomy/state_store/encryptor/base.rb +0 -34
- data/lib/phronomy/state_store/encryptor.rb +0 -16
- data/lib/phronomy/state_store/file.rb +0 -85
- data/lib/phronomy/state_store/in_memory.rb +0 -53
- data/lib/phronomy/state_store/redis.rb +0 -70
- data/lib/phronomy/state_store.rb +0 -9
- data/lib/phronomy/thread_actor_registry.rb +0 -85
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Memory
|
|
5
|
-
module Compression
|
|
6
|
-
# Compaction strategy that summarizes old messages with an LLM.
|
|
7
|
-
#
|
|
8
|
-
# When the total estimated token count of the uncompacted message history
|
|
9
|
-
# exceeds +max_tokens+, all messages except the most recent +keep+ are
|
|
10
|
-
# summarized by an LLM. The original messages are preserved in Storage
|
|
11
|
-
# (via ConversationManager); this class only decides whether compaction is
|
|
12
|
-
# needed and produces the summary text.
|
|
13
|
-
#
|
|
14
|
-
# The #compress method now returns a Hash instead of a plain Array:
|
|
15
|
-
# {
|
|
16
|
-
# messages: Array, # context-ready message list
|
|
17
|
-
# compaction: Hash | nil # { start_seq:, end_seq:, summary_text: }
|
|
18
|
-
# # nil when no compaction was performed
|
|
19
|
-
# }
|
|
20
|
-
#
|
|
21
|
-
# ConversationManager uses the :compaction entry to persist the compaction
|
|
22
|
-
# record in Storage, ensuring originals are never discarded.
|
|
23
|
-
#
|
|
24
|
-
# @example
|
|
25
|
-
# compressor = Phronomy::Memory::Compression::Summary.new(
|
|
26
|
-
# max_tokens: 4000,
|
|
27
|
-
# summarizer_model: "gpt-4o-mini"
|
|
28
|
-
# )
|
|
29
|
-
# manager = Phronomy::Memory::ConversationManager.new(
|
|
30
|
-
# storage: storage,
|
|
31
|
-
# retrieval: retrieval,
|
|
32
|
-
# compression: compressor
|
|
33
|
-
# )
|
|
34
|
-
class Summary < Base
|
|
35
|
-
# @param max_tokens [Integer] token threshold above which old messages are compacted
|
|
36
|
-
# @param keep [Integer] number of recent messages to preserve verbatim
|
|
37
|
-
# @param summarizer_model [String, nil] LLM model for summarization; nil uses global default
|
|
38
|
-
# @param summarizer_provider [Symbol, nil] LLM provider; required for unregistered models
|
|
39
|
-
def initialize(max_tokens: 4000, keep: 5, summarizer_model: nil, summarizer_provider: nil)
|
|
40
|
-
@max_tokens = max_tokens
|
|
41
|
-
@keep = keep
|
|
42
|
-
@summarizer_model = summarizer_model
|
|
43
|
-
@summarizer_provider = summarizer_provider
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
# Evaluate whether compaction is needed and produce a summary if so.
|
|
47
|
-
#
|
|
48
|
-
# +seq_offset+ is the seq number of messages[0] in the raw history.
|
|
49
|
-
# ConversationManager passes this so the compaction record can reference
|
|
50
|
-
# the correct seq range in Storage.
|
|
51
|
-
#
|
|
52
|
-
# @param thread_id [String]
|
|
53
|
-
# @param messages [Array] uncompacted messages to consider
|
|
54
|
-
# @param seq_offset [Integer] seq number assigned to messages[0]
|
|
55
|
-
# @return [Hash] { messages: Array, compaction: Hash|nil }
|
|
56
|
-
# compaction is { start_seq:, end_seq:, summary_text: } or nil
|
|
57
|
-
def compress(thread_id:, messages:, seq_offset: 0)
|
|
58
|
-
estimated = messages.sum { |m| Phronomy::Context::TokenEstimator.estimate(m.content.to_s) }
|
|
59
|
-
|
|
60
|
-
if estimated > @max_tokens && messages.length > @keep
|
|
61
|
-
compact(messages, seq_offset: seq_offset)
|
|
62
|
-
else
|
|
63
|
-
{messages: messages, compaction: nil}
|
|
64
|
-
end
|
|
65
|
-
rescue => e
|
|
66
|
-
warn "[Phronomy] Compression failed (#{e.class}: #{e.message}); saving without compaction."
|
|
67
|
-
{messages: messages, compaction: nil}
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
private
|
|
71
|
-
|
|
72
|
-
def compact(messages, seq_offset:)
|
|
73
|
-
old_count = messages.length - @keep
|
|
74
|
-
old_messages = messages[0, old_count]
|
|
75
|
-
recent_messages = messages[old_count..]
|
|
76
|
-
|
|
77
|
-
opts = {}
|
|
78
|
-
opts[:model] = @summarizer_model if @summarizer_model
|
|
79
|
-
opts[:provider] = @summarizer_provider if @summarizer_provider
|
|
80
|
-
opts[:assume_model_exists] = true if @summarizer_provider
|
|
81
|
-
chat = RubyLLM.chat(**opts)
|
|
82
|
-
summary_text = chat.ask(
|
|
83
|
-
"Please summarize the following conversation concisely:\n" +
|
|
84
|
-
old_messages.map { |m| "#{m.role}: #{m.content}" }.join("\n")
|
|
85
|
-
).content
|
|
86
|
-
|
|
87
|
-
compaction_record = {
|
|
88
|
-
start_seq: seq_offset,
|
|
89
|
-
end_seq: seq_offset + old_count - 1,
|
|
90
|
-
summary_text: summary_text
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
{messages: [summary_message(summary_text)] + recent_messages, compaction: compaction_record}
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
def summary_message(text)
|
|
97
|
-
content = <<~CONTEXT.chomp
|
|
98
|
-
<context type="summary" source="memory" trusted="false">
|
|
99
|
-
#{text}
|
|
100
|
-
</context>
|
|
101
|
-
CONTEXT
|
|
102
|
-
RubyLLM::Message.new(role: :system, content: content)
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
end
|
|
107
|
-
end
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Memory
|
|
5
|
-
module Compression
|
|
6
|
-
# Compression strategy that truncates oversized tool-call result messages.
|
|
7
|
-
#
|
|
8
|
-
# Large tool outputs — such as a full web-page dump or a massive JSON
|
|
9
|
-
# response — can consume a significant fraction of the context window.
|
|
10
|
-
# This compressor truncates the content of any :tool message whose character
|
|
11
|
-
# count exceeds max_chars, appending a note that the output was truncated.
|
|
12
|
-
#
|
|
13
|
-
# Unlike Summary, this is a stateless compressor: it does not accumulate
|
|
14
|
-
# state across calls and requires no thread_id bookkeeping.
|
|
15
|
-
#
|
|
16
|
-
# @example
|
|
17
|
-
# compressor = Phronomy::Memory::Compression::ToolOutputPruner.new(max_chars: 4000)
|
|
18
|
-
# manager = Phronomy::Memory::ConversationManager.new(
|
|
19
|
-
# storage: storage,
|
|
20
|
-
# retrieval: retrieval,
|
|
21
|
-
# compression: compressor
|
|
22
|
-
# )
|
|
23
|
-
class ToolOutputPruner < Base
|
|
24
|
-
TRUNCATION_NOTE = "\n[... output truncated ...]"
|
|
25
|
-
|
|
26
|
-
# Internal value object for cloned messages.
|
|
27
|
-
# Uses Struct (not OpenStruct) so that unknown attribute access raises NoMethodError.
|
|
28
|
-
ClonedMessage = Struct.new(:role, :content, :tool_calls, :model_id, keyword_init: true)
|
|
29
|
-
private_constant :ClonedMessage
|
|
30
|
-
|
|
31
|
-
# @param max_chars [Integer] maximum character length for tool-result content
|
|
32
|
-
def initialize(max_chars: 4000)
|
|
33
|
-
@max_chars = max_chars
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Truncate oversized :tool messages in-place (non-destructive — returns new array).
|
|
37
|
-
# Content pruning does not produce a compaction record; :compaction is always nil.
|
|
38
|
-
#
|
|
39
|
-
# @param thread_id [String] unused (stateless pruner)
|
|
40
|
-
# @param messages [Array]
|
|
41
|
-
# @param seq_offset [Integer] unused
|
|
42
|
-
# @return [Hash] { messages: Array, compaction: nil }
|
|
43
|
-
def compress(thread_id:, messages:, seq_offset: 0)
|
|
44
|
-
pruned = messages.map do |msg|
|
|
45
|
-
next msg unless msg.role.to_sym == :tool
|
|
46
|
-
next msg if msg.content.to_s.length <= @max_chars
|
|
47
|
-
|
|
48
|
-
truncated = msg.content.to_s[0, @max_chars] + TRUNCATION_NOTE
|
|
49
|
-
clone_message(msg, truncated)
|
|
50
|
-
end
|
|
51
|
-
{messages: pruned, compaction: nil}
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
private
|
|
55
|
-
|
|
56
|
-
def clone_message(original, new_content)
|
|
57
|
-
ClonedMessage.new(
|
|
58
|
-
role: original.role,
|
|
59
|
-
content: new_content,
|
|
60
|
-
tool_calls: (original.tool_calls if original.respond_to?(:tool_calls)),
|
|
61
|
-
model_id: (original.model_id if original.respond_to?(:model_id))
|
|
62
|
-
)
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
end
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Memory
|
|
5
|
-
# Compression is the reduction axis of conversation management.
|
|
6
|
-
# Implementations transform a message array into a smaller representation
|
|
7
|
-
# (e.g. LLM summary, tool-output truncation) before storage or retrieval.
|
|
8
|
-
module Compression
|
|
9
|
-
end
|
|
10
|
-
end
|
|
11
|
-
end
|
|
@@ -1,213 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Memory
|
|
5
|
-
# ConversationManager combines the three independent axes of conversation handling:
|
|
6
|
-
# - Storage: where messages are persisted (InMemory, ActiveRecord, ...)
|
|
7
|
-
# - Retrieval: which messages to select (Recent, Semantic, ...)
|
|
8
|
-
# - Compression: how to reduce message size before storage (Summary, ToolOutputPruner, ...)
|
|
9
|
-
#
|
|
10
|
-
# This is the primary entry point for context region 4 (Conversation) in Agent::Base.
|
|
11
|
-
#
|
|
12
|
-
# === Original preservation policy
|
|
13
|
-
#
|
|
14
|
-
# All original messages are appended to Storage's raw history with a
|
|
15
|
-
# monotonically increasing seq number (0-based, per thread). Raw messages
|
|
16
|
-
# are never modified or deleted.
|
|
17
|
-
#
|
|
18
|
-
# When Compression::Summary performs a compaction, a compaction record
|
|
19
|
-
# { start_seq:, end_seq:, summary_text: } is saved in Storage alongside the
|
|
20
|
-
# raw messages. This allows callers to reconstruct the full history or audit
|
|
21
|
-
# which messages were summarised.
|
|
22
|
-
#
|
|
23
|
-
# On #load, the message list is reconstructed from raw history + compaction
|
|
24
|
-
# records: each compacted range is replaced by a single summary system message,
|
|
25
|
-
# and uncompacted messages are returned verbatim.
|
|
26
|
-
#
|
|
27
|
-
# @example Simple recency-based in-memory manager
|
|
28
|
-
# manager = Phronomy::Memory::ConversationManager.new(
|
|
29
|
-
# storage: Phronomy::Memory::Storage::InMemory.new,
|
|
30
|
-
# retrieval: Phronomy::Memory::Retrieval::Recent.new(k: 10)
|
|
31
|
-
# )
|
|
32
|
-
#
|
|
33
|
-
# @example With LLM summary compaction
|
|
34
|
-
# manager = Phronomy::Memory::ConversationManager.new(
|
|
35
|
-
# storage: Phronomy::Memory::Storage::InMemory.new,
|
|
36
|
-
# retrieval: Phronomy::Memory::Retrieval::Recent.new(k: 5),
|
|
37
|
-
# compression: Phronomy::Memory::Compression::Summary.new(max_tokens: 4000)
|
|
38
|
-
# )
|
|
39
|
-
class ConversationManager
|
|
40
|
-
# @param storage [Memory::Storage::Base] persistence backend (required)
|
|
41
|
-
# @param retrieval [Memory::Retrieval::Base] selection strategy (required)
|
|
42
|
-
# @param compression [Memory::Compression::Base, nil] optional compression strategy
|
|
43
|
-
# @param ttl [Integer, nil] message time-to-live in seconds; messages older
|
|
44
|
-
# than this value are removed from storage on each {#load} call.
|
|
45
|
-
# +nil+ disables TTL (default).
|
|
46
|
-
def initialize(storage:, retrieval:, compression: nil, ttl: nil)
|
|
47
|
-
@storage = storage
|
|
48
|
-
@retrieval = retrieval
|
|
49
|
-
@compression = compression
|
|
50
|
-
@ttl = ttl
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Load conversation messages for a thread, applying retrieval selection.
|
|
54
|
-
#
|
|
55
|
-
# When a TTL is configured, raw messages older than the TTL are permanently
|
|
56
|
-
# removed from storage before reconstruction.
|
|
57
|
-
#
|
|
58
|
-
# Reconstructs the message list from raw history + compaction records:
|
|
59
|
-
# - Each compacted range [start_seq..end_seq] is replaced by a summary
|
|
60
|
-
# system message.
|
|
61
|
-
# - Uncompacted messages are returned in original order.
|
|
62
|
-
#
|
|
63
|
-
# @param thread_id [String]
|
|
64
|
-
# @param query [String, nil] current user input for query-aware retrieval
|
|
65
|
-
# @return [Array]
|
|
66
|
-
def load(thread_id:, query: nil)
|
|
67
|
-
@storage.purge_older_than(thread_id: thread_id, older_than: Time.now - @ttl) if @ttl
|
|
68
|
-
messages = reconstruct(thread_id)
|
|
69
|
-
@retrieval.select(messages, query: query, thread_id: thread_id)
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
# Persist new messages for a thread and optionally apply compression.
|
|
73
|
-
#
|
|
74
|
-
# New messages are determined by comparing the incoming array length with
|
|
75
|
-
# the existing raw history length (messages are always append-only).
|
|
76
|
-
# Only truly new messages (beyond raw.length) are appended to raw storage.
|
|
77
|
-
#
|
|
78
|
-
# When a compression strategy is configured, it is evaluated against the
|
|
79
|
-
# full set of uncompacted raw messages. If compaction fires, the resulting
|
|
80
|
-
# compaction record is saved in storage (originals are preserved).
|
|
81
|
-
#
|
|
82
|
-
# @param thread_id [String]
|
|
83
|
-
# @param messages [Array] full conversation history up to this point
|
|
84
|
-
def save(thread_id:, messages:)
|
|
85
|
-
@storage.with_thread_lock(thread_id: thread_id) do
|
|
86
|
-
append_new_messages(thread_id: thread_id, messages: messages)
|
|
87
|
-
compress_and_save(thread_id: thread_id, messages: messages)
|
|
88
|
-
end
|
|
89
|
-
@retrieval.index(thread_id: thread_id, messages: messages) if @retrieval.respond_to?(:index)
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
# Delete all messages (raw, compaction records, and legacy store) for a thread.
|
|
93
|
-
#
|
|
94
|
-
# @param thread_id [String]
|
|
95
|
-
def clear(thread_id:)
|
|
96
|
-
@storage.clear(thread_id: thread_id)
|
|
97
|
-
@retrieval.clear_index(thread_id: thread_id) if @retrieval.respond_to?(:clear_index)
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
# Permanently erase all stored data for a thread (right-to-erasure / purge).
|
|
101
|
-
# Delegates to the storage backend's {Storage::Base#purge} and also clears
|
|
102
|
-
# any retrieval index for the thread.
|
|
103
|
-
#
|
|
104
|
-
# @param thread_id [String]
|
|
105
|
-
def purge(thread_id:)
|
|
106
|
-
@storage.purge(thread_id: thread_id)
|
|
107
|
-
@retrieval.clear_index(thread_id: thread_id) if @retrieval.respond_to?(:clear_index)
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
# Record an application-driven compaction for a thread.
|
|
111
|
-
# Called by CompactionContext when the on_compact callback invokes ctx.compact.
|
|
112
|
-
#
|
|
113
|
-
# @param thread_id [String]
|
|
114
|
-
# @param start_seq [Integer] first seq number in the compacted range
|
|
115
|
-
# @param end_seq [Integer] last seq number in the compacted range
|
|
116
|
-
# @param summary_text [String] replacement text for the compacted messages
|
|
117
|
-
def save_compaction(thread_id:, start_seq:, end_seq:, summary_text:)
|
|
118
|
-
@storage.save_compaction(
|
|
119
|
-
thread_id: thread_id,
|
|
120
|
-
start_seq: start_seq,
|
|
121
|
-
end_seq: end_seq,
|
|
122
|
-
summary_text: summary_text
|
|
123
|
-
)
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
private
|
|
127
|
-
|
|
128
|
-
# Append messages that are new since the last save to the raw history.
|
|
129
|
-
# Must be called while holding the per-thread lock (via Storage#with_thread_lock).
|
|
130
|
-
# Messages are append-only; existing raw entries are never modified.
|
|
131
|
-
#
|
|
132
|
-
# The next seq number is derived from Storage#next_seq, which owns the
|
|
133
|
-
# high-water-mark counter. This survives TTL purges because Storage tracks
|
|
134
|
-
# the HWM independently of the stored raw entries.
|
|
135
|
-
def append_new_messages(thread_id:, messages:)
|
|
136
|
-
next_seq = @storage.next_seq(thread_id: thread_id)
|
|
137
|
-
new_messages = messages[next_seq..]
|
|
138
|
-
@storage.append_raw(thread_id: thread_id, messages: new_messages, starting_seq: next_seq) if new_messages&.any?
|
|
139
|
-
end
|
|
140
|
-
|
|
141
|
-
# Apply the configured compression strategy and persist the result.
|
|
142
|
-
# When no strategy is configured, saves messages directly to the legacy store.
|
|
143
|
-
# When compression fires, also persists the compaction record.
|
|
144
|
-
# If the compression strategy raises (e.g. LLM timeout), we fall back to
|
|
145
|
-
# saving the messages without compaction so the conversation is never lost
|
|
146
|
-
# due to a transient summarization failure (Issue #58).
|
|
147
|
-
def compress_and_save(thread_id:, messages:)
|
|
148
|
-
unless @compression
|
|
149
|
-
@storage.save(thread_id: thread_id, messages: messages)
|
|
150
|
-
return
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
compactions = @storage.load_compactions(thread_id: thread_id)
|
|
154
|
-
uncompacted_start_seq = compactions.any? ? compactions.last[:end_seq] + 1 : 0
|
|
155
|
-
all_raw = @storage.load_raw(thread_id: thread_id)
|
|
156
|
-
uncompacted = all_raw.select { |r| r[:seq] >= uncompacted_start_seq }.map { |r| r[:message] }
|
|
157
|
-
|
|
158
|
-
result = begin
|
|
159
|
-
@compression.compress(
|
|
160
|
-
thread_id: thread_id,
|
|
161
|
-
messages: uncompacted,
|
|
162
|
-
seq_offset: uncompacted_start_seq
|
|
163
|
-
)
|
|
164
|
-
rescue => e
|
|
165
|
-
warn "[Phronomy] Compression failed (#{e.class}: #{e.message}); saving without compaction."
|
|
166
|
-
{messages: messages, compaction: nil}
|
|
167
|
-
end
|
|
168
|
-
|
|
169
|
-
if result[:compaction]
|
|
170
|
-
@storage.save_compaction(
|
|
171
|
-
thread_id: thread_id,
|
|
172
|
-
start_seq: result[:compaction][:start_seq],
|
|
173
|
-
end_seq: result[:compaction][:end_seq],
|
|
174
|
-
summary_text: result[:compaction][:summary_text]
|
|
175
|
-
)
|
|
176
|
-
end
|
|
177
|
-
|
|
178
|
-
# For non-Summary compressors (ToolOutputPruner), store the pruned
|
|
179
|
-
# version in the legacy store so legacy #load still works.
|
|
180
|
-
@storage.save(thread_id: thread_id, messages: result[:messages])
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
# Reconstruct context-ready messages from raw history + compaction records.
|
|
184
|
-
# When no compaction records exist (no Summary compaction has fired), we
|
|
185
|
-
# return the legacy store directly — this preserves the effect of content
|
|
186
|
-
# pruners like ToolOutputPruner, whose pruned messages are saved there.
|
|
187
|
-
# When compaction records exist, we rebuild the context from raw history:
|
|
188
|
-
# each compacted seq range is replaced by a single summary system message.
|
|
189
|
-
def reconstruct(thread_id)
|
|
190
|
-
compactions = @storage.load_compactions(thread_id: thread_id)
|
|
191
|
-
return @storage.load(thread_id: thread_id) if compactions.empty?
|
|
192
|
-
|
|
193
|
-
raw = @storage.load_raw(thread_id: thread_id)
|
|
194
|
-
last_compacted_seq = compactions.last[:end_seq]
|
|
195
|
-
summary_msgs = compactions.map { |c| summary_message(c[:summary_text]) }
|
|
196
|
-
uncompacted = raw.select { |r| r[:seq] > last_compacted_seq }.map { |r| r[:message] }
|
|
197
|
-
summary_msgs + uncompacted
|
|
198
|
-
end
|
|
199
|
-
|
|
200
|
-
# Immutable value object used as a summary placeholder in reconstructed context.
|
|
201
|
-
SummaryMessage = Data.define(:role, :content)
|
|
202
|
-
|
|
203
|
-
def summary_message(text)
|
|
204
|
-
content = <<~CONTEXT.chomp
|
|
205
|
-
<context type="summary" source="memory" trusted="false">
|
|
206
|
-
#{text}
|
|
207
|
-
</context>
|
|
208
|
-
CONTEXT
|
|
209
|
-
SummaryMessage.new(role: :system, content: content)
|
|
210
|
-
end
|
|
211
|
-
end
|
|
212
|
-
end
|
|
213
|
-
end
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Memory
|
|
5
|
-
module Retrieval
|
|
6
|
-
# Abstract base class for conversation retrieval strategies.
|
|
7
|
-
#
|
|
8
|
-
# @abstract Subclass and implement #select.
|
|
9
|
-
class Base
|
|
10
|
-
# Select messages to inject into the context from a full chronological history.
|
|
11
|
-
#
|
|
12
|
-
# @param messages [Array] full history in chronological order
|
|
13
|
-
# @param query [String, nil] current user input for query-aware retrieval
|
|
14
|
-
# @param thread_id [String, nil] active thread identifier for scoped retrieval
|
|
15
|
-
# @return [Array] subset of messages in chronological order
|
|
16
|
-
def select(messages, query: nil, thread_id: nil)
|
|
17
|
-
raise NotImplementedError, "#{self.class}#select is not implemented"
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Memory
|
|
5
|
-
module Retrieval
|
|
6
|
-
# Retrieval strategy that merges results from multiple child retrieval strategies.
|
|
7
|
-
#
|
|
8
|
-
# Each child is given a weight that controls what fraction of a token budget
|
|
9
|
-
# it should consume. Results are deduplicated (by role + content) and
|
|
10
|
-
# system messages are sorted to the front.
|
|
11
|
-
#
|
|
12
|
-
# @example
|
|
13
|
-
# composite = Phronomy::Memory::Retrieval::Composite.new(
|
|
14
|
-
# sources: [
|
|
15
|
-
# { retrieval: Phronomy::Memory::Retrieval::Recent.new(k: 5), weight: 0.4 },
|
|
16
|
-
# { retrieval: Phronomy::Memory::Retrieval::Semantic.new(...), weight: 0.6 }
|
|
17
|
-
# ]
|
|
18
|
-
# )
|
|
19
|
-
# manager = Phronomy::Memory::ConversationManager.new(
|
|
20
|
-
# storage: Phronomy::Memory::Storage::InMemory.new,
|
|
21
|
-
# retrieval: composite
|
|
22
|
-
# )
|
|
23
|
-
class Composite < Base
|
|
24
|
-
# @param sources [Array<Hash>] each entry: { retrieval:, weight: } (weight default 1.0)
|
|
25
|
-
def initialize(sources:)
|
|
26
|
-
@sources = sources.map { |s| {retrieval: s[:retrieval], weight: (s[:weight] || 1.0).to_f} }
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# Merge results from all child retrievals, deduplicating by role+content.
|
|
30
|
-
# System messages are sorted to the front; others preserve insertion order.
|
|
31
|
-
#
|
|
32
|
-
# @param messages [Array] full chronological history
|
|
33
|
-
# @param query [String, nil] forwarded to each child retrieval
|
|
34
|
-
# @param thread_id [String, nil] forwarded to each child retrieval
|
|
35
|
-
# @return [Array]
|
|
36
|
-
def select(messages, query: nil, thread_id: nil)
|
|
37
|
-
all_messages = []
|
|
38
|
-
seen = {}
|
|
39
|
-
|
|
40
|
-
@sources.each do |source|
|
|
41
|
-
source[:retrieval].select(messages, query: query, thread_id: thread_id).each do |msg|
|
|
42
|
-
key = "#{msg.role}:#{msg.content}"
|
|
43
|
-
next if seen[key]
|
|
44
|
-
|
|
45
|
-
seen[key] = true
|
|
46
|
-
all_messages << msg
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
systems = all_messages.select { |m| m.role.to_sym == :system }
|
|
51
|
-
others = all_messages.reject { |m| m.role.to_sym == :system }
|
|
52
|
-
systems + others
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Forward index calls to all child retrievals that support it.
|
|
56
|
-
#
|
|
57
|
-
# @param thread_id [String]
|
|
58
|
-
# @param messages [Array]
|
|
59
|
-
def index(thread_id:, messages:)
|
|
60
|
-
@sources.each do |source|
|
|
61
|
-
source[:retrieval].index(thread_id: thread_id, messages: messages) if source[:retrieval].respond_to?(:index)
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
# Forward clear_index to all child retrievals that support it.
|
|
66
|
-
#
|
|
67
|
-
# @param thread_id [String]
|
|
68
|
-
def clear_index(thread_id:)
|
|
69
|
-
@sources.each do |source|
|
|
70
|
-
source[:retrieval].clear_index(thread_id: thread_id) if source[:retrieval].respond_to?(:clear_index)
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
end
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Memory
|
|
5
|
-
module Retrieval
|
|
6
|
-
# Retrieval strategy that returns the most recent k turns (k*2 messages).
|
|
7
|
-
#
|
|
8
|
-
# This is the simplest and most predictable strategy: older messages are
|
|
9
|
-
# discarded without compression.
|
|
10
|
-
#
|
|
11
|
-
# @example
|
|
12
|
-
# retrieval = Phronomy::Memory::Retrieval::Recent.new(k: 10)
|
|
13
|
-
# manager = Phronomy::Memory::ConversationManager.new(
|
|
14
|
-
# storage: storage,
|
|
15
|
-
# retrieval: retrieval
|
|
16
|
-
# )
|
|
17
|
-
class Recent < Base
|
|
18
|
-
# @param k [Integer] number of turns to retain (each turn = 1 user + 1 assistant message)
|
|
19
|
-
def initialize(k: 10)
|
|
20
|
-
@k = k
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Returns the last k*2 messages from the history.
|
|
24
|
-
#
|
|
25
|
-
# @param messages [Array] full chronological history
|
|
26
|
-
# @param query [String, nil] unused for recency-based retrieval
|
|
27
|
-
# @param thread_id [String, nil] unused for recency-based retrieval
|
|
28
|
-
# @return [Array]
|
|
29
|
-
def select(messages, query: nil, thread_id: nil)
|
|
30
|
-
messages.last(@k * 2)
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
end
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Memory
|
|
5
|
-
module Retrieval
|
|
6
|
-
# Retrieval strategy that returns the k semantically closest messages to the query.
|
|
7
|
-
#
|
|
8
|
-
# Messages are indexed in a VectorStore on save. On retrieval, the query is
|
|
9
|
-
# embedded and the k nearest messages are returned. Falls back to the k most
|
|
10
|
-
# recent messages when no query is provided.
|
|
11
|
-
#
|
|
12
|
-
# @example
|
|
13
|
-
# retrieval = Phronomy::Memory::Retrieval::Semantic.new(
|
|
14
|
-
# embeddings: Phronomy::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small"),
|
|
15
|
-
# k: 10
|
|
16
|
-
# )
|
|
17
|
-
class Semantic < Base
|
|
18
|
-
# @param store [Phronomy::VectorStore::Base] vector store (default InMemory)
|
|
19
|
-
# @param embeddings [Phronomy::Embeddings::Base] embeddings adapter
|
|
20
|
-
# @param k [Integer] number of messages to retrieve
|
|
21
|
-
# @param max_index_size [Integer, nil] maximum number of entries kept in the
|
|
22
|
-
# local index. When nil, the index grows unboundedly. When exceeded, the
|
|
23
|
-
# oldest entries (by insertion order) are evicted.
|
|
24
|
-
def initialize(embeddings:, store: nil, k: 10, max_index_size: nil)
|
|
25
|
-
@store = store || Phronomy::VectorStore::InMemory.new
|
|
26
|
-
@embeddings = embeddings
|
|
27
|
-
@k = k
|
|
28
|
-
@index = {} # id => message (insertion-ordered via Ruby Hash)
|
|
29
|
-
@counter = 0
|
|
30
|
-
@max_index_size = max_index_size
|
|
31
|
-
@actor = Phronomy::Actor.new
|
|
32
|
-
@indexed_object_ids = {} # thread_id => { object_id => true }
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Index a new batch of messages so they are searchable on future #select calls.
|
|
36
|
-
# Called by ConversationManager#save.
|
|
37
|
-
#
|
|
38
|
-
# Messages are deduplicated by object identity: if a message object has already
|
|
39
|
-
# been indexed for the given thread_id, it is skipped (no duplicate embed call).
|
|
40
|
-
#
|
|
41
|
-
# @param thread_id [String]
|
|
42
|
-
# @param messages [Array]
|
|
43
|
-
def index(thread_id:, messages:)
|
|
44
|
-
messages.each do |msg|
|
|
45
|
-
# Fast path: skip already-indexed messages without calling embed.
|
|
46
|
-
already_indexed = @actor.call do
|
|
47
|
-
(@indexed_object_ids[thread_id] ||= {})[msg.object_id]
|
|
48
|
-
end
|
|
49
|
-
next if already_indexed
|
|
50
|
-
|
|
51
|
-
embedding = @embeddings.embed(msg.content.to_s)
|
|
52
|
-
@actor.call do
|
|
53
|
-
# Re-check inside Actor to handle concurrent callers for the same thread.
|
|
54
|
-
indexed = (@indexed_object_ids[thread_id] ||= {})
|
|
55
|
-
next if indexed[msg.object_id]
|
|
56
|
-
|
|
57
|
-
id = "#{thread_id}:#{@counter}"
|
|
58
|
-
@counter += 1
|
|
59
|
-
@store.add(id: id, embedding: embedding, metadata: {thread_id: thread_id, message: msg})
|
|
60
|
-
@index[id] = msg
|
|
61
|
-
indexed[msg.object_id] = true
|
|
62
|
-
evict_oldest! if @max_index_size && @index.size > @max_index_size
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# Clear indexed messages for a thread.
|
|
68
|
-
#
|
|
69
|
-
# @param thread_id [String]
|
|
70
|
-
def clear_index(thread_id:)
|
|
71
|
-
@actor.call do
|
|
72
|
-
ids = @index.keys.select { |id| id.start_with?("#{thread_id}:") }
|
|
73
|
-
ids.each do |id|
|
|
74
|
-
@index.delete(id)
|
|
75
|
-
@store.remove(id: id)
|
|
76
|
-
end
|
|
77
|
-
@indexed_object_ids.delete(thread_id)
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
# Return semantically relevant messages, or recent messages when query is nil.
|
|
82
|
-
#
|
|
83
|
-
# @param messages [Array] full history (used as fallback when query is nil)
|
|
84
|
-
# @param query [String, nil] current user input for semantic search
|
|
85
|
-
# @param thread_id [String, nil] when provided, results are filtered to this thread
|
|
86
|
-
# @return [Array]
|
|
87
|
-
def select(messages, query: nil, thread_id: nil)
|
|
88
|
-
if query && !query.strip.empty?
|
|
89
|
-
query_embedding = @embeddings.embed(query)
|
|
90
|
-
results = @actor.call { @store.search(query_embedding: query_embedding, k: @k * 3) }
|
|
91
|
-
results
|
|
92
|
-
.select { |r| thread_id.nil? || r[:metadata][:thread_id] == thread_id }
|
|
93
|
-
.first(@k)
|
|
94
|
-
.map { |r| r[:metadata][:message] }
|
|
95
|
-
else
|
|
96
|
-
messages.last(@k)
|
|
97
|
-
end
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
private
|
|
101
|
-
|
|
102
|
-
# Evicts the oldest index entry to enforce max_index_size.
|
|
103
|
-
# Must be called inside the Actor.
|
|
104
|
-
def evict_oldest!
|
|
105
|
-
oldest_id = @index.keys.first
|
|
106
|
-
return unless oldest_id
|
|
107
|
-
|
|
108
|
-
@index.delete(oldest_id)
|
|
109
|
-
@store.remove(id: oldest_id)
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
|
-
end
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Memory
|
|
5
|
-
# Retrieval is the selection axis of conversation management.
|
|
6
|
-
# Implementations decide which messages from a full history to return
|
|
7
|
-
# given a query and a maximum message count or token limit.
|
|
8
|
-
# Token budgeting is NOT their responsibility — that belongs to Context::Assembler.
|
|
9
|
-
module Retrieval
|
|
10
|
-
end
|
|
11
|
-
end
|
|
12
|
-
end
|