swarm_sdk 2.7.13 → 3.0.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/swarm_sdk/ruby_llm_patches/chat_callbacks_patch.rb +43 -22
- data/lib/swarm_sdk/ruby_llm_patches/init.rb +6 -0
- data/lib/swarm_sdk/ruby_llm_patches/mcp_ssl_patch.rb +144 -0
- data/lib/swarm_sdk/ruby_llm_patches/tool_concurrency_patch.rb +3 -4
- data/lib/swarm_sdk/v3/agent.rb +1165 -0
- data/lib/swarm_sdk/v3/agent_builder.rb +533 -0
- data/lib/swarm_sdk/v3/agent_definition.rb +330 -0
- data/lib/swarm_sdk/v3/configuration.rb +490 -0
- data/lib/swarm_sdk/v3/debug_log.rb +86 -0
- data/lib/swarm_sdk/v3/event_stream.rb +130 -0
- data/lib/swarm_sdk/v3/hooks/context.rb +112 -0
- data/lib/swarm_sdk/v3/hooks/result.rb +115 -0
- data/lib/swarm_sdk/v3/hooks/runner.rb +128 -0
- data/lib/swarm_sdk/v3/mcp/connector.rb +183 -0
- data/lib/swarm_sdk/v3/mcp/mcp_error.rb +15 -0
- data/lib/swarm_sdk/v3/mcp/server_definition.rb +125 -0
- data/lib/swarm_sdk/v3/mcp/ssl_http_transport.rb +103 -0
- data/lib/swarm_sdk/v3/mcp/stdio_transport.rb +135 -0
- data/lib/swarm_sdk/v3/mcp/tool_proxy.rb +53 -0
- data/lib/swarm_sdk/v3/memory/adapters/base.rb +297 -0
- data/lib/swarm_sdk/v3/memory/adapters/faiss_support.rb +194 -0
- data/lib/swarm_sdk/v3/memory/adapters/filesystem_adapter.rb +212 -0
- data/lib/swarm_sdk/v3/memory/adapters/sqlite_adapter.rb +507 -0
- data/lib/swarm_sdk/v3/memory/adapters/vector_utils.rb +88 -0
- data/lib/swarm_sdk/v3/memory/card.rb +206 -0
- data/lib/swarm_sdk/v3/memory/cluster.rb +146 -0
- data/lib/swarm_sdk/v3/memory/compressor.rb +496 -0
- data/lib/swarm_sdk/v3/memory/consolidator.rb +427 -0
- data/lib/swarm_sdk/v3/memory/context_builder.rb +339 -0
- data/lib/swarm_sdk/v3/memory/edge.rb +105 -0
- data/lib/swarm_sdk/v3/memory/embedder.rb +185 -0
- data/lib/swarm_sdk/v3/memory/exposure_tracker.rb +104 -0
- data/lib/swarm_sdk/v3/memory/ingestion_pipeline.rb +394 -0
- data/lib/swarm_sdk/v3/memory/retriever.rb +289 -0
- data/lib/swarm_sdk/v3/memory/store.rb +489 -0
- data/lib/swarm_sdk/v3/skills/loader.rb +147 -0
- data/lib/swarm_sdk/v3/skills/manifest.rb +45 -0
- data/lib/swarm_sdk/v3/sub_task_agent.rb +248 -0
- data/lib/swarm_sdk/v3/tools/base.rb +80 -0
- data/lib/swarm_sdk/v3/tools/bash.rb +174 -0
- data/lib/swarm_sdk/v3/tools/clock.rb +32 -0
- data/lib/swarm_sdk/v3/tools/edit.rb +111 -0
- data/lib/swarm_sdk/v3/tools/glob.rb +96 -0
- data/lib/swarm_sdk/v3/tools/grep.rb +200 -0
- data/lib/swarm_sdk/v3/tools/message_teammate.rb +15 -0
- data/lib/swarm_sdk/v3/tools/message_user.rb +15 -0
- data/lib/swarm_sdk/v3/tools/read.rb +181 -0
- data/lib/swarm_sdk/v3/tools/read_tracker.rb +40 -0
- data/lib/swarm_sdk/v3/tools/registry.rb +208 -0
- data/lib/swarm_sdk/v3/tools/sub_task.rb +183 -0
- data/lib/swarm_sdk/v3/tools/think.rb +88 -0
- data/lib/swarm_sdk/v3/tools/write.rb +87 -0
- data/lib/swarm_sdk/v3.rb +145 -0
- metadata +84 -148
- data/lib/swarm_sdk/agent/RETRY_LOGIC.md +0 -175
- data/lib/swarm_sdk/agent/builder.rb +0 -680
- data/lib/swarm_sdk/agent/chat.rb +0 -1432
- data/lib/swarm_sdk/agent/chat_helpers/context_tracker.rb +0 -375
- data/lib/swarm_sdk/agent/chat_helpers/event_emitter.rb +0 -204
- data/lib/swarm_sdk/agent/chat_helpers/hook_integration.rb +0 -480
- data/lib/swarm_sdk/agent/chat_helpers/instrumentation.rb +0 -85
- data/lib/swarm_sdk/agent/chat_helpers/llm_configuration.rb +0 -290
- data/lib/swarm_sdk/agent/chat_helpers/logging_helpers.rb +0 -116
- data/lib/swarm_sdk/agent/chat_helpers/serialization.rb +0 -83
- data/lib/swarm_sdk/agent/chat_helpers/system_reminder_injector.rb +0 -134
- data/lib/swarm_sdk/agent/chat_helpers/system_reminders.rb +0 -79
- data/lib/swarm_sdk/agent/chat_helpers/token_tracking.rb +0 -146
- data/lib/swarm_sdk/agent/context.rb +0 -115
- data/lib/swarm_sdk/agent/context_manager.rb +0 -315
- data/lib/swarm_sdk/agent/definition.rb +0 -581
- data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +0 -226
- data/lib/swarm_sdk/agent/system_prompt_builder.rb +0 -161
- data/lib/swarm_sdk/agent/tool_registry.rb +0 -189
- data/lib/swarm_sdk/agent_registry.rb +0 -146
- data/lib/swarm_sdk/builders/base_builder.rb +0 -553
- data/lib/swarm_sdk/claude_code_agent_adapter.rb +0 -205
- data/lib/swarm_sdk/concerns/cleanupable.rb +0 -39
- data/lib/swarm_sdk/concerns/snapshotable.rb +0 -67
- data/lib/swarm_sdk/concerns/validatable.rb +0 -55
- data/lib/swarm_sdk/config.rb +0 -367
- data/lib/swarm_sdk/configuration/parser.rb +0 -397
- data/lib/swarm_sdk/configuration/translator.rb +0 -283
- data/lib/swarm_sdk/configuration.rb +0 -165
- data/lib/swarm_sdk/context_compactor/metrics.rb +0 -147
- data/lib/swarm_sdk/context_compactor/token_counter.rb +0 -102
- data/lib/swarm_sdk/context_compactor.rb +0 -335
- data/lib/swarm_sdk/context_management/builder.rb +0 -128
- data/lib/swarm_sdk/context_management/context.rb +0 -328
- data/lib/swarm_sdk/custom_tool_registry.rb +0 -226
- data/lib/swarm_sdk/defaults.rb +0 -251
- data/lib/swarm_sdk/events_to_messages.rb +0 -199
- data/lib/swarm_sdk/hooks/adapter.rb +0 -359
- data/lib/swarm_sdk/hooks/context.rb +0 -197
- data/lib/swarm_sdk/hooks/definition.rb +0 -80
- data/lib/swarm_sdk/hooks/error.rb +0 -29
- data/lib/swarm_sdk/hooks/executor.rb +0 -146
- data/lib/swarm_sdk/hooks/registry.rb +0 -147
- data/lib/swarm_sdk/hooks/result.rb +0 -150
- data/lib/swarm_sdk/hooks/shell_executor.rb +0 -256
- data/lib/swarm_sdk/hooks/tool_call.rb +0 -35
- data/lib/swarm_sdk/hooks/tool_result.rb +0 -62
- data/lib/swarm_sdk/log_collector.rb +0 -227
- data/lib/swarm_sdk/log_stream.rb +0 -127
- data/lib/swarm_sdk/markdown_parser.rb +0 -75
- data/lib/swarm_sdk/model_aliases.json +0 -8
- data/lib/swarm_sdk/models.json +0 -44002
- data/lib/swarm_sdk/models.rb +0 -161
- data/lib/swarm_sdk/node_context.rb +0 -245
- data/lib/swarm_sdk/observer/builder.rb +0 -81
- data/lib/swarm_sdk/observer/config.rb +0 -45
- data/lib/swarm_sdk/observer/manager.rb +0 -236
- data/lib/swarm_sdk/patterns/agent_observer.rb +0 -160
- data/lib/swarm_sdk/permissions/config.rb +0 -239
- data/lib/swarm_sdk/permissions/error_formatter.rb +0 -121
- data/lib/swarm_sdk/permissions/path_matcher.rb +0 -35
- data/lib/swarm_sdk/permissions/validator.rb +0 -173
- data/lib/swarm_sdk/permissions_builder.rb +0 -122
- data/lib/swarm_sdk/plugin.rb +0 -309
- data/lib/swarm_sdk/plugin_registry.rb +0 -101
- data/lib/swarm_sdk/proc_helpers.rb +0 -53
- data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +0 -117
- data/lib/swarm_sdk/restore_result.rb +0 -65
- data/lib/swarm_sdk/result.rb +0 -212
- data/lib/swarm_sdk/snapshot.rb +0 -156
- data/lib/swarm_sdk/snapshot_from_events.rb +0 -397
- data/lib/swarm_sdk/state_restorer.rb +0 -476
- data/lib/swarm_sdk/state_snapshot.rb +0 -334
- data/lib/swarm_sdk/swarm/agent_initializer.rb +0 -648
- data/lib/swarm_sdk/swarm/all_agents_builder.rb +0 -195
- data/lib/swarm_sdk/swarm/builder.rb +0 -256
- data/lib/swarm_sdk/swarm/executor.rb +0 -290
- data/lib/swarm_sdk/swarm/hook_triggers.rb +0 -151
- data/lib/swarm_sdk/swarm/lazy_delegate_chat.rb +0 -372
- data/lib/swarm_sdk/swarm/logging_callbacks.rb +0 -360
- data/lib/swarm_sdk/swarm/mcp_configurator.rb +0 -270
- data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +0 -67
- data/lib/swarm_sdk/swarm/tool_configurator.rb +0 -392
- data/lib/swarm_sdk/swarm.rb +0 -843
- data/lib/swarm_sdk/swarm_loader.rb +0 -145
- data/lib/swarm_sdk/swarm_registry.rb +0 -136
- data/lib/swarm_sdk/tools/base.rb +0 -63
- data/lib/swarm_sdk/tools/bash.rb +0 -280
- data/lib/swarm_sdk/tools/clock.rb +0 -46
- data/lib/swarm_sdk/tools/delegate.rb +0 -389
- data/lib/swarm_sdk/tools/document_converters/base_converter.rb +0 -83
- data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +0 -99
- data/lib/swarm_sdk/tools/document_converters/html_converter.rb +0 -101
- data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +0 -78
- data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +0 -194
- data/lib/swarm_sdk/tools/edit.rb +0 -145
- data/lib/swarm_sdk/tools/glob.rb +0 -166
- data/lib/swarm_sdk/tools/grep.rb +0 -235
- data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +0 -43
- data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +0 -167
- data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +0 -65
- data/lib/swarm_sdk/tools/mcp_tool_stub.rb +0 -198
- data/lib/swarm_sdk/tools/multi_edit.rb +0 -236
- data/lib/swarm_sdk/tools/path_resolver.rb +0 -92
- data/lib/swarm_sdk/tools/read.rb +0 -261
- data/lib/swarm_sdk/tools/registry.rb +0 -205
- data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +0 -117
- data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +0 -97
- data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +0 -108
- data/lib/swarm_sdk/tools/stores/read_tracker.rb +0 -96
- data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +0 -273
- data/lib/swarm_sdk/tools/stores/storage.rb +0 -142
- data/lib/swarm_sdk/tools/stores/todo_manager.rb +0 -65
- data/lib/swarm_sdk/tools/think.rb +0 -100
- data/lib/swarm_sdk/tools/todo_write.rb +0 -237
- data/lib/swarm_sdk/tools/web_fetch.rb +0 -264
- data/lib/swarm_sdk/tools/write.rb +0 -112
- data/lib/swarm_sdk/transcript_builder.rb +0 -278
- data/lib/swarm_sdk/utils.rb +0 -68
- data/lib/swarm_sdk/validation_result.rb +0 -33
- data/lib/swarm_sdk/version.rb +0 -5
- data/lib/swarm_sdk/workflow/agent_config.rb +0 -95
- data/lib/swarm_sdk/workflow/builder.rb +0 -227
- data/lib/swarm_sdk/workflow/executor.rb +0 -497
- data/lib/swarm_sdk/workflow/node_builder.rb +0 -593
- data/lib/swarm_sdk/workflow/transformer_executor.rb +0 -250
- data/lib/swarm_sdk/workflow.rb +0 -589
- data/lib/swarm_sdk.rb +0 -718
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SwarmSDK
|
|
4
|
+
module V3
|
|
5
|
+
module Memory
|
|
6
|
+
# Periodic maintenance for the memory system
|
|
7
|
+
#
|
|
8
|
+
# Performs:
|
|
9
|
+
# - Deduplication and canonicalization of similar cards
|
|
10
|
+
# - Cluster summary updates
|
|
11
|
+
# - Conflict detection between contradicting cards
|
|
12
|
+
# - Merging of redundant cards
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# consolidator = Consolidator.new(adapter: adapter, embedder: embedder)
|
|
16
|
+
# consolidator.run
|
|
17
|
+
class Consolidator
|
|
18
|
+
# Edge types that indicate a supporting relationship between cards.
|
|
19
|
+
# Pairs connected by these edges should never be flagged as contradictions.
|
|
20
|
+
# Derived from Edge::TYPES so new edge types are non-conflicting by default.
|
|
21
|
+
SUPPORTING_EDGE_TYPES = (Edge::TYPES - [:contradicts]).freeze
|
|
22
|
+
|
|
23
|
+
# @param adapter [Adapters::Base] Storage adapter
|
|
24
|
+
# @param embedder [Embedder] Text embedder
|
|
25
|
+
def initialize(adapter:, embedder:)
|
|
26
|
+
@adapter = adapter
|
|
27
|
+
@embedder = embedder
|
|
28
|
+
@config = Configuration.instance
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Run full consolidation
|
|
32
|
+
#
|
|
33
|
+
# @return [Hash] Summary of actions taken
|
|
34
|
+
def run
|
|
35
|
+
deduped = deduplicate
|
|
36
|
+
conflicts = detect_conflicts
|
|
37
|
+
clusters_updated = update_cluster_summaries
|
|
38
|
+
|
|
39
|
+
{
|
|
40
|
+
duplicates_merged: deduped,
|
|
41
|
+
conflicts_detected: conflicts,
|
|
42
|
+
clusters_updated: clusters_updated,
|
|
43
|
+
}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Run full consolidation with progress events
|
|
47
|
+
#
|
|
48
|
+
# @param base_completed [Integer] Items completed before this phase
|
|
49
|
+
# @param total_items [Integer] Total items across all phases
|
|
50
|
+
# @return [Hash] Summary of actions taken
|
|
51
|
+
def run_with_progress(base_completed, total_items)
|
|
52
|
+
deduped = deduplicate_with_progress(base_completed, total_items)
|
|
53
|
+
|
|
54
|
+
# Update base for next sub-phase
|
|
55
|
+
dedup_count = @adapter.list_cards.select(&:embedding).size
|
|
56
|
+
conflicts = detect_conflicts_with_progress(base_completed + dedup_count, total_items)
|
|
57
|
+
|
|
58
|
+
# Update base for cluster phase
|
|
59
|
+
conflict_types = [:constraint, :decision, :fact]
|
|
60
|
+
conflict_count = @adapter.list_cards.count { |c| conflict_types.include?(c.type) && c.embedding }
|
|
61
|
+
clusters_updated = update_cluster_summaries_with_progress(
|
|
62
|
+
base_completed + dedup_count + conflict_count,
|
|
63
|
+
total_items,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
{
|
|
67
|
+
duplicates_merged: deduped,
|
|
68
|
+
conflicts_detected: conflicts,
|
|
69
|
+
clusters_updated: clusters_updated,
|
|
70
|
+
}
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Find and merge duplicate cards
|
|
74
|
+
#
|
|
75
|
+
# @return [Integer] Number of duplicates merged
|
|
76
|
+
def deduplicate
|
|
77
|
+
cards = @adapter.list_cards
|
|
78
|
+
merged_count = 0
|
|
79
|
+
seen = Set.new
|
|
80
|
+
|
|
81
|
+
cards.each do |card|
|
|
82
|
+
next if seen.include?(card.id)
|
|
83
|
+
next unless card.embedding
|
|
84
|
+
|
|
85
|
+
# Find similar cards via vector search
|
|
86
|
+
similar = @adapter.vector_search(card.embedding, top_k: 5, threshold: @config.consolidator_dedup_threshold)
|
|
87
|
+
|
|
88
|
+
similar.each do |result|
|
|
89
|
+
next if result[:id] == card.id
|
|
90
|
+
next if seen.include?(result[:id])
|
|
91
|
+
|
|
92
|
+
duplicate = @adapter.read_card(result[:id])
|
|
93
|
+
next unless duplicate
|
|
94
|
+
|
|
95
|
+
merge_cards(canonical: card, duplicate: duplicate)
|
|
96
|
+
seen.add(duplicate.id)
|
|
97
|
+
merged_count += 1
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
seen.add(card.id)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
merged_count
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Detect contradicting cards and create conflict edges
|
|
107
|
+
#
|
|
108
|
+
# Finds cards with high text similarity but different content,
|
|
109
|
+
# particularly when both are constraint or decision type cards.
|
|
110
|
+
# Creates `contradicts` edges between them.
|
|
111
|
+
#
|
|
112
|
+
# @return [Integer] Number of conflicts detected
|
|
113
|
+
def detect_conflicts
|
|
114
|
+
cards = @adapter.list_cards
|
|
115
|
+
conflict_types = [:constraint, :decision, :fact]
|
|
116
|
+
candidates = cards.select { |c| conflict_types.include?(c.type) && c.embedding }
|
|
117
|
+
conflict_count = 0
|
|
118
|
+
seen_pairs = Set.new
|
|
119
|
+
|
|
120
|
+
candidates.each do |card|
|
|
121
|
+
# Find similar cards that might contradict
|
|
122
|
+
similar = @adapter.vector_search(card.embedding, top_k: 5, threshold: @config.consolidator_conflict_threshold)
|
|
123
|
+
|
|
124
|
+
similar.each do |result|
|
|
125
|
+
next if result[:id] == card.id
|
|
126
|
+
next if result[:similarity] > @config.consolidator_dedup_threshold # Too similar = duplicate, not conflict
|
|
127
|
+
|
|
128
|
+
pair_key = [card.id, result[:id]].sort.join(":")
|
|
129
|
+
next if seen_pairs.include?(pair_key)
|
|
130
|
+
|
|
131
|
+
other = @adapter.read_card(result[:id])
|
|
132
|
+
next unless other
|
|
133
|
+
next unless conflict_types.include?(other.type)
|
|
134
|
+
|
|
135
|
+
# Check if an edge already exists between them
|
|
136
|
+
existing_edges = @adapter.edges_for(card.id)
|
|
137
|
+
other_id = result[:id]
|
|
138
|
+
|
|
139
|
+
already_contradicts = existing_edges.any? do |e|
|
|
140
|
+
(e.from_id == other_id || e.to_id == other_id) && e.type == :contradicts
|
|
141
|
+
end
|
|
142
|
+
next if already_contradicts
|
|
143
|
+
|
|
144
|
+
# Skip pairs connected by supporting edges — these reinforce
|
|
145
|
+
# the same fact in different phrasings, not contradictions
|
|
146
|
+
has_supporting_edge = existing_edges.any? do |e|
|
|
147
|
+
(e.from_id == other_id || e.to_id == other_id) && SUPPORTING_EDGE_TYPES.include?(e.type)
|
|
148
|
+
end
|
|
149
|
+
next if has_supporting_edge
|
|
150
|
+
|
|
151
|
+
# Cards are similar enough to be about the same topic but different
|
|
152
|
+
# enough to potentially contradict — flag them
|
|
153
|
+
edge = Edge.new(
|
|
154
|
+
from_id: card.id,
|
|
155
|
+
to_id: result[:id],
|
|
156
|
+
type: :contradicts,
|
|
157
|
+
weight: result[:similarity],
|
|
158
|
+
)
|
|
159
|
+
@adapter.write_edge(edge)
|
|
160
|
+
seen_pairs.add(pair_key)
|
|
161
|
+
conflict_count += 1
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
conflict_count
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Update rolling summaries for all clusters
|
|
169
|
+
#
|
|
170
|
+
# @return [Integer] Number of clusters updated
|
|
171
|
+
def update_cluster_summaries
|
|
172
|
+
clusters = @adapter.list_clusters
|
|
173
|
+
updated = 0
|
|
174
|
+
|
|
175
|
+
clusters.each do |cluster|
|
|
176
|
+
next if cluster.card_ids.empty?
|
|
177
|
+
|
|
178
|
+
# Load cluster cards
|
|
179
|
+
cards = cluster.card_ids.filter_map { |id| @adapter.read_card(id) }
|
|
180
|
+
next if cards.empty?
|
|
181
|
+
|
|
182
|
+
# Update key entities from member cards
|
|
183
|
+
all_entities = cards.flat_map(&:entities).tally
|
|
184
|
+
cluster.key_entities = all_entities.sort_by { |_, count| -count }.take(10).map(&:first)
|
|
185
|
+
|
|
186
|
+
# Update rolling summary from card texts
|
|
187
|
+
cluster.rolling_summary = cards.map(&:text).join(" | ").slice(0, 500)
|
|
188
|
+
|
|
189
|
+
# Update cluster embedding (average of member embeddings)
|
|
190
|
+
embeddings = cards.filter_map(&:embedding)
|
|
191
|
+
cluster.embedding = average_embedding(embeddings) unless embeddings.empty?
|
|
192
|
+
|
|
193
|
+
cluster.updated_at = Time.now
|
|
194
|
+
@adapter.write_cluster(cluster)
|
|
195
|
+
updated += 1
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
updated
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Find and merge duplicate cards with progress events
|
|
202
|
+
#
|
|
203
|
+
# @param base_completed [Integer] Items completed before this phase
|
|
204
|
+
# @param total_items [Integer] Total items across all phases
|
|
205
|
+
# @return [Integer] Number of duplicates merged
|
|
206
|
+
def deduplicate_with_progress(base_completed, total_items)
|
|
207
|
+
cards = @adapter.list_cards
|
|
208
|
+
cards_with_embedding = cards.select(&:embedding)
|
|
209
|
+
merged_count = 0
|
|
210
|
+
seen = Set.new
|
|
211
|
+
|
|
212
|
+
cards_with_embedding.each_with_index do |card, index|
|
|
213
|
+
unless seen.include?(card.id)
|
|
214
|
+
# Find similar cards via vector search
|
|
215
|
+
similar = @adapter.vector_search(card.embedding, top_k: 5, threshold: @config.consolidator_dedup_threshold)
|
|
216
|
+
|
|
217
|
+
similar.each do |result|
|
|
218
|
+
next if result[:id] == card.id
|
|
219
|
+
next if seen.include?(result[:id])
|
|
220
|
+
|
|
221
|
+
duplicate = @adapter.read_card(result[:id])
|
|
222
|
+
next unless duplicate
|
|
223
|
+
|
|
224
|
+
merge_cards(canonical: card, duplicate: duplicate)
|
|
225
|
+
seen.add(duplicate.id)
|
|
226
|
+
merged_count += 1
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
seen.add(card.id)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
EventStream.emit(
|
|
233
|
+
type: "memory_defrag_progress",
|
|
234
|
+
phase: "consolidate_dedup",
|
|
235
|
+
description: "Finding and merging duplicate memory cards",
|
|
236
|
+
phase_current: index + 1,
|
|
237
|
+
phase_total: cards_with_embedding.size,
|
|
238
|
+
overall_current: base_completed + index + 1,
|
|
239
|
+
overall_total: total_items,
|
|
240
|
+
)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
merged_count
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Detect contradicting cards with progress events
|
|
247
|
+
#
|
|
248
|
+
# @param base_completed [Integer] Items completed before this phase
|
|
249
|
+
# @param total_items [Integer] Total items across all phases
|
|
250
|
+
# @return [Integer] Number of conflicts detected
|
|
251
|
+
def detect_conflicts_with_progress(base_completed, total_items)
|
|
252
|
+
cards = @adapter.list_cards
|
|
253
|
+
conflict_types = [:constraint, :decision, :fact]
|
|
254
|
+
candidates = cards.select { |c| conflict_types.include?(c.type) && c.embedding }
|
|
255
|
+
conflict_count = 0
|
|
256
|
+
seen_pairs = Set.new
|
|
257
|
+
|
|
258
|
+
candidates.each_with_index do |card, index|
|
|
259
|
+
# Find similar cards that might contradict
|
|
260
|
+
similar = @adapter.vector_search(card.embedding, top_k: 5, threshold: @config.consolidator_conflict_threshold)
|
|
261
|
+
|
|
262
|
+
similar.each do |result|
|
|
263
|
+
next if result[:id] == card.id
|
|
264
|
+
next if result[:similarity] > @config.consolidator_dedup_threshold
|
|
265
|
+
|
|
266
|
+
pair_key = [card.id, result[:id]].sort.join(":")
|
|
267
|
+
next if seen_pairs.include?(pair_key)
|
|
268
|
+
|
|
269
|
+
other = @adapter.read_card(result[:id])
|
|
270
|
+
next unless other
|
|
271
|
+
next unless conflict_types.include?(other.type)
|
|
272
|
+
|
|
273
|
+
existing_edges = @adapter.edges_for(card.id)
|
|
274
|
+
other_id = result[:id]
|
|
275
|
+
|
|
276
|
+
already_contradicts = existing_edges.any? do |e|
|
|
277
|
+
(e.from_id == other_id || e.to_id == other_id) && e.type == :contradicts
|
|
278
|
+
end
|
|
279
|
+
next if already_contradicts
|
|
280
|
+
|
|
281
|
+
has_supporting_edge = existing_edges.any? do |e|
|
|
282
|
+
(e.from_id == other_id || e.to_id == other_id) && SUPPORTING_EDGE_TYPES.include?(e.type)
|
|
283
|
+
end
|
|
284
|
+
next if has_supporting_edge
|
|
285
|
+
|
|
286
|
+
edge = Edge.new(
|
|
287
|
+
from_id: card.id,
|
|
288
|
+
to_id: result[:id],
|
|
289
|
+
type: :contradicts,
|
|
290
|
+
weight: result[:similarity],
|
|
291
|
+
)
|
|
292
|
+
@adapter.write_edge(edge)
|
|
293
|
+
seen_pairs.add(pair_key)
|
|
294
|
+
conflict_count += 1
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
EventStream.emit(
|
|
298
|
+
type: "memory_defrag_progress",
|
|
299
|
+
phase: "consolidate_conflicts",
|
|
300
|
+
description: "Detecting contradicting information in memory",
|
|
301
|
+
phase_current: index + 1,
|
|
302
|
+
phase_total: candidates.size,
|
|
303
|
+
overall_current: base_completed + index + 1,
|
|
304
|
+
overall_total: total_items,
|
|
305
|
+
)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
conflict_count
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
# Update cluster summaries with progress events
|
|
312
|
+
#
|
|
313
|
+
# @param base_completed [Integer] Items completed before this phase
|
|
314
|
+
# @param total_items [Integer] Total items across all phases
|
|
315
|
+
# @return [Integer] Number of clusters updated
|
|
316
|
+
def update_cluster_summaries_with_progress(base_completed, total_items)
|
|
317
|
+
clusters = @adapter.list_clusters
|
|
318
|
+
updated = 0
|
|
319
|
+
|
|
320
|
+
clusters.each_with_index do |cluster, index|
|
|
321
|
+
unless cluster.card_ids.empty?
|
|
322
|
+
cards = cluster.card_ids.filter_map { |id| @adapter.read_card(id) }
|
|
323
|
+
unless cards.empty?
|
|
324
|
+
all_entities = cards.flat_map(&:entities).tally
|
|
325
|
+
cluster.key_entities = all_entities.sort_by { |_, count| -count }.take(10).map(&:first)
|
|
326
|
+
cluster.rolling_summary = cards.map(&:text).join(" | ").slice(0, 500)
|
|
327
|
+
embeddings = cards.filter_map(&:embedding)
|
|
328
|
+
cluster.embedding = average_embedding(embeddings) unless embeddings.empty?
|
|
329
|
+
cluster.updated_at = Time.now
|
|
330
|
+
@adapter.write_cluster(cluster)
|
|
331
|
+
updated += 1
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
EventStream.emit(
|
|
336
|
+
type: "memory_defrag_progress",
|
|
337
|
+
phase: "consolidate_clusters",
|
|
338
|
+
description: "Updating cluster summaries and embeddings",
|
|
339
|
+
phase_current: index + 1,
|
|
340
|
+
phase_total: clusters.size,
|
|
341
|
+
overall_current: base_completed + index + 1,
|
|
342
|
+
overall_total: total_items,
|
|
343
|
+
)
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
updated
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
private
|
|
350
|
+
|
|
351
|
+
# Merge a duplicate card into a canonical card
|
|
352
|
+
#
|
|
353
|
+
# @param canonical [Card] Card to keep
|
|
354
|
+
# @param duplicate [Card] Card to merge and mark
|
|
355
|
+
# @return [void]
|
|
356
|
+
def merge_cards(canonical:, duplicate:)
|
|
357
|
+
# Transfer source turn IDs
|
|
358
|
+
combined_turns = (canonical.source_turn_ids + duplicate.source_turn_ids).uniq
|
|
359
|
+
|
|
360
|
+
# Transfer entities
|
|
361
|
+
combined_entities = (canonical.entities + duplicate.entities).uniq
|
|
362
|
+
|
|
363
|
+
# Keep the longer/more detailed text
|
|
364
|
+
if duplicate.text.length > canonical.text.length && duplicate.compression_level <= canonical.compression_level
|
|
365
|
+
canonical.text = duplicate.text
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
canonical.source_turn_ids = combined_turns
|
|
369
|
+
canonical.entities = combined_entities
|
|
370
|
+
canonical.access_count += duplicate.access_count
|
|
371
|
+
canonical.updated_at = Time.now
|
|
372
|
+
|
|
373
|
+
# Mark duplicate as merged
|
|
374
|
+
duplicate.canonical_id = canonical.id
|
|
375
|
+
duplicate.updated_at = Time.now
|
|
376
|
+
|
|
377
|
+
# Transfer edges from duplicate to canonical
|
|
378
|
+
edges = @adapter.edges_for(duplicate.id)
|
|
379
|
+
edges.each do |edge|
|
|
380
|
+
new_from = edge.from_id == duplicate.id ? canonical.id : edge.from_id
|
|
381
|
+
new_to = edge.to_id == duplicate.id ? canonical.id : edge.to_id
|
|
382
|
+
next if new_from == new_to # Skip self-edges
|
|
383
|
+
|
|
384
|
+
new_edge = Edge.new(
|
|
385
|
+
from_id: new_from,
|
|
386
|
+
to_id: new_to,
|
|
387
|
+
type: edge.type,
|
|
388
|
+
weight: edge.weight,
|
|
389
|
+
)
|
|
390
|
+
@adapter.write_edge(new_edge)
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# Persist changes
|
|
394
|
+
@adapter.write_card(canonical)
|
|
395
|
+
@adapter.delete_card(duplicate.id)
|
|
396
|
+
@adapter.delete_edges_for(duplicate.id)
|
|
397
|
+
|
|
398
|
+
# Remove from clusters
|
|
399
|
+
@adapter.list_clusters.each do |cluster|
|
|
400
|
+
next unless cluster.card_ids.include?(duplicate.id)
|
|
401
|
+
|
|
402
|
+
cluster.remove_card(duplicate.id)
|
|
403
|
+
cluster.add_card(canonical.id) unless cluster.card_ids.include?(canonical.id)
|
|
404
|
+
@adapter.write_cluster(cluster)
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
# Average multiple embedding vectors
|
|
409
|
+
#
|
|
410
|
+
# @param embeddings [Array<Array<Float>>] Vectors to average
|
|
411
|
+
# @return [Array<Float>] Averaged vector
|
|
412
|
+
def average_embedding(embeddings)
|
|
413
|
+
return embeddings.first if embeddings.size == 1
|
|
414
|
+
|
|
415
|
+
dims = embeddings.first.size
|
|
416
|
+
avg = Array.new(dims, 0.0)
|
|
417
|
+
|
|
418
|
+
embeddings.each do |emb|
|
|
419
|
+
emb.each_with_index { |v, i| avg[i] += v }
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
avg.map { |v| v / embeddings.size }
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
end
|