claude_memory 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude/memory.sqlite3-shm +0 -0
- data/.claude/memory.sqlite3-wal +0 -0
- data/.claude/settings.json +78 -6
- data/.claude/settings.local.json +2 -1
- data/.claude/skills/improve/SKILL.md +113 -25
- data/.claude-plugin/commands/distill-transcripts.md +98 -0
- data/.claude-plugin/commands/memory-recall.md +67 -0
- data/.claude-plugin/marketplace.json +1 -1
- data/.claude-plugin/plugin.json +1 -1
- data/CHANGELOG.md +49 -1
- data/CLAUDE.md +29 -5
- data/docs/improvements.md +18 -56
- data/docs/quality_review.md +119 -224
- data/hooks/hooks.json +39 -7
- data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
- data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
- data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
- data/lib/claude_memory/commands/completion_command.rb +179 -0
- data/lib/claude_memory/commands/doctor_command.rb +2 -0
- data/lib/claude_memory/commands/help_command.rb +4 -0
- data/lib/claude_memory/commands/hook_command.rb +2 -1
- data/lib/claude_memory/commands/index_command.rb +85 -78
- data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
- data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
- data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
- data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
- data/lib/claude_memory/commands/install_skill_command.rb +78 -0
- data/lib/claude_memory/commands/registry.rb +3 -1
- data/lib/claude_memory/commands/skills/distill-transcripts.md +98 -0
- data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
- data/lib/claude_memory/core/fact_ranker.rb +2 -2
- data/lib/claude_memory/core/rr_fusion.rb +23 -6
- data/lib/claude_memory/core/snippet_extractor.rb +7 -3
- data/lib/claude_memory/core/text_builder.rb +11 -0
- data/lib/claude_memory/domain/provenance.rb +0 -1
- data/lib/claude_memory/embeddings/api_adapter.rb +96 -0
- data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
- data/lib/claude_memory/embeddings/fastembed_adapter.rb +4 -0
- data/lib/claude_memory/embeddings/generator.rb +4 -0
- data/lib/claude_memory/embeddings/resolver.rb +18 -0
- data/lib/claude_memory/hook/context_injector.rb +58 -2
- data/lib/claude_memory/hook/distillation_runner.rb +46 -0
- data/lib/claude_memory/hook/handler.rb +11 -2
- data/lib/claude_memory/index/vector_index.rb +15 -2
- data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
- data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
- data/lib/claude_memory/mcp/handlers/management_handlers.rb +145 -0
- data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
- data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
- data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
- data/lib/claude_memory/mcp/handlers/stats_handlers.rb +202 -0
- data/lib/claude_memory/mcp/instructions_builder.rb +2 -1
- data/lib/claude_memory/mcp/query_guide.rb +10 -0
- data/lib/claude_memory/mcp/response_formatter.rb +1 -0
- data/lib/claude_memory/mcp/text_summary.rb +26 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +30 -1
- data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
- data/lib/claude_memory/mcp/tools.rb +39 -678
- data/lib/claude_memory/recall/dual_engine.rb +105 -0
- data/lib/claude_memory/recall/legacy_engine.rb +138 -0
- data/lib/claude_memory/recall/query_core.rb +371 -0
- data/lib/claude_memory/recall.rb +29 -662
- data/lib/claude_memory/shortcuts.rb +4 -4
- data/lib/claude_memory/store/retry_handler.rb +61 -0
- data/lib/claude_memory/store/schema_manager.rb +68 -0
- data/lib/claude_memory/store/sqlite_store.rb +85 -201
- data/lib/claude_memory/templates/hooks.example.json +26 -7
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +11 -0
- metadata +23 -1
data/lib/claude_memory/recall.rb
CHANGED
|
@@ -26,702 +26,69 @@ module ClaudeMemory
|
|
|
26
26
|
|
|
27
27
|
def initialize(store_or_manager, fts: nil, project_path: nil, env: ENV, embedding_generator: nil)
|
|
28
28
|
config = Configuration.new(env)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if store_or_manager.is_a?(Store::StoreManager)
|
|
33
|
-
|
|
34
|
-
|
|
29
|
+
resolved_project_path = project_path || config.project_dir
|
|
30
|
+
resolved_generator = embedding_generator || Embeddings.resolve(env: env)
|
|
31
|
+
|
|
32
|
+
@engine = if store_or_manager.is_a?(Store::StoreManager)
|
|
33
|
+
DualEngine.new(
|
|
34
|
+
store_or_manager,
|
|
35
|
+
embedding_generator: resolved_generator,
|
|
36
|
+
project_path: resolved_project_path
|
|
37
|
+
)
|
|
35
38
|
else
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
+
LegacyEngine.new(
|
|
40
|
+
store_or_manager,
|
|
41
|
+
fts: fts || Index::LexicalFTS.new(store_or_manager),
|
|
42
|
+
embedding_generator: resolved_generator,
|
|
43
|
+
project_path: resolved_project_path
|
|
44
|
+
)
|
|
39
45
|
end
|
|
40
46
|
end
|
|
41
47
|
|
|
42
|
-
def query(query_text, limit: 10, scope: SCOPE_ALL, include_raw_text: false)
|
|
43
|
-
|
|
44
|
-
query_legacy(query_text, limit: limit, scope: scope)
|
|
45
|
-
else
|
|
46
|
-
query_dual(query_text, limit: limit, scope: scope, include_raw_text: include_raw_text)
|
|
47
|
-
end
|
|
48
|
+
def query(query_text, limit: 10, scope: SCOPE_ALL, include_raw_text: false, intent: nil)
|
|
49
|
+
@engine.query(query_text, limit: limit, scope: scope, include_raw_text: include_raw_text, intent: intent)
|
|
48
50
|
end
|
|
49
51
|
|
|
50
|
-
def query_index(query_text, limit: 20, scope: SCOPE_ALL)
|
|
51
|
-
|
|
52
|
-
query_index_legacy(query_text, limit: limit, scope: scope)
|
|
53
|
-
else
|
|
54
|
-
query_index_dual(query_text, limit: limit, scope: scope)
|
|
55
|
-
end
|
|
52
|
+
def query_index(query_text, limit: 20, scope: SCOPE_ALL, intent: nil)
|
|
53
|
+
@engine.query_index(query_text, limit: limit, scope: scope, intent: intent)
|
|
56
54
|
end
|
|
57
55
|
|
|
58
56
|
def fact_graph(fact_id, depth: 2, scope: nil)
|
|
59
|
-
|
|
60
|
-
Core::FactGraph.build(@legacy_store, fact_id, depth: depth)
|
|
61
|
-
else
|
|
62
|
-
scope ||= SCOPE_PROJECT
|
|
63
|
-
store = @manager.store_for_scope(scope)
|
|
64
|
-
Core::FactGraph.build(store, fact_id, depth: depth)
|
|
65
|
-
end
|
|
57
|
+
@engine.fact_graph(fact_id, depth: depth, scope: scope)
|
|
66
58
|
end
|
|
67
59
|
|
|
68
60
|
def explain(fact_id_or_docid, scope: nil)
|
|
69
|
-
|
|
70
|
-
fact_id = resolve_fact_identifier(@legacy_store, fact_id_or_docid)
|
|
71
|
-
explain_from_store(@legacy_store, fact_id)
|
|
72
|
-
else
|
|
73
|
-
scope ||= SCOPE_PROJECT
|
|
74
|
-
store = @manager.store_for_scope(scope)
|
|
75
|
-
fact_id = resolve_fact_identifier(store, fact_id_or_docid)
|
|
76
|
-
explain_from_store(store, fact_id)
|
|
77
|
-
end
|
|
61
|
+
@engine.explain(fact_id_or_docid, scope: scope)
|
|
78
62
|
end
|
|
79
63
|
|
|
80
64
|
def changes(since:, limit: 50, scope: SCOPE_ALL)
|
|
81
|
-
|
|
82
|
-
changes_legacy(since: since, limit: limit, scope: scope)
|
|
83
|
-
else
|
|
84
|
-
changes_dual(since: since, limit: limit, scope: scope)
|
|
85
|
-
end
|
|
65
|
+
@engine.changes(since: since, limit: limit, scope: scope)
|
|
86
66
|
end
|
|
87
67
|
|
|
88
68
|
def conflicts(scope: SCOPE_ALL)
|
|
89
|
-
|
|
90
|
-
conflicts_legacy(scope: scope)
|
|
91
|
-
else
|
|
92
|
-
conflicts_dual(scope: scope)
|
|
93
|
-
end
|
|
69
|
+
@engine.conflicts(scope: scope)
|
|
94
70
|
end
|
|
95
71
|
|
|
96
72
|
def facts_by_branch(branch_name, limit: 20, scope: SCOPE_ALL)
|
|
97
|
-
|
|
98
|
-
facts_by_context_legacy(:git_branch, branch_name, limit: limit, scope: scope)
|
|
99
|
-
else
|
|
100
|
-
facts_by_context_dual(:git_branch, branch_name, limit: limit, scope: scope)
|
|
101
|
-
end
|
|
73
|
+
@engine.facts_by_branch(branch_name, limit: limit, scope: scope)
|
|
102
74
|
end
|
|
103
75
|
|
|
104
76
|
def facts_by_directory(cwd, limit: 20, scope: SCOPE_ALL)
|
|
105
|
-
|
|
106
|
-
facts_by_context_legacy(:cwd, cwd, limit: limit, scope: scope)
|
|
107
|
-
else
|
|
108
|
-
facts_by_context_dual(:cwd, cwd, limit: limit, scope: scope)
|
|
109
|
-
end
|
|
77
|
+
@engine.facts_by_directory(cwd, limit: limit, scope: scope)
|
|
110
78
|
end
|
|
111
79
|
|
|
112
80
|
def facts_by_tool(tool_name, limit: 20, scope: SCOPE_ALL)
|
|
113
|
-
|
|
114
|
-
facts_by_tool_legacy(tool_name, limit: limit, scope: scope)
|
|
115
|
-
else
|
|
116
|
-
facts_by_tool_dual(tool_name, limit: limit, scope: scope)
|
|
117
|
-
end
|
|
81
|
+
@engine.facts_by_tool(tool_name, limit: limit, scope: scope)
|
|
118
82
|
end
|
|
119
83
|
|
|
120
|
-
def query_semantic(text, limit: 10, scope: SCOPE_ALL, mode: :both)
|
|
121
|
-
|
|
122
|
-
query_semantic_legacy(text, limit: limit, scope: scope, mode: mode)
|
|
123
|
-
else
|
|
124
|
-
query_semantic_dual(text, limit: limit, scope: scope, mode: mode)
|
|
125
|
-
end
|
|
84
|
+
def query_semantic(text, limit: 10, scope: SCOPE_ALL, mode: :both, explain: false, intent: nil)
|
|
85
|
+
@engine.query_semantic(text, limit: limit, scope: scope, mode: mode, explain: explain, intent: intent)
|
|
126
86
|
end
|
|
127
87
|
|
|
128
88
|
def query_concepts(concepts, limit: 10, scope: SCOPE_ALL)
|
|
129
89
|
raise ArgumentError, "Must provide 2-5 concepts" unless (2..5).cover?(concepts.size)
|
|
130
90
|
|
|
131
|
-
|
|
132
|
-
query_concepts_legacy(concepts, limit: limit, scope: scope)
|
|
133
|
-
else
|
|
134
|
-
query_concepts_dual(concepts, limit: limit, scope: scope)
|
|
135
|
-
end
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
private
|
|
139
|
-
|
|
140
|
-
def query_dual(query_text, limit:, scope:, include_raw_text: false)
|
|
141
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
142
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
143
|
-
query_single_store(store, query_text, limit: limit, source: source, include_raw_text: include_raw_text)
|
|
144
|
-
end
|
|
145
|
-
dedupe_and_sort(results, limit)
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
def query_index_dual(query_text, limit:, scope:)
|
|
149
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
150
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
151
|
-
query_index_single_store(store, query_text, limit: limit, source: source)
|
|
152
|
-
end
|
|
153
|
-
dedupe_and_sort_index(results, limit)
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
def query_index_single_store(store, query_text, limit:, source:)
|
|
157
|
-
options = Index::QueryOptions.new(
|
|
158
|
-
query_text: query_text,
|
|
159
|
-
limit: limit,
|
|
160
|
-
scope: :all,
|
|
161
|
-
source: source
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
query = Index::IndexQuery.new(store, options)
|
|
165
|
-
query.execute
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
def dedupe_and_sort_index(results, limit)
|
|
169
|
-
Core::FactRanker.dedupe_and_sort_index(results, limit)
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
def query_single_store(store, query_text, limit:, source:, include_raw_text: false)
|
|
173
|
-
fts = Index::LexicalFTS.new(store)
|
|
174
|
-
content_ids = fts.search(query_text, limit: limit * 3)
|
|
175
|
-
return [] if content_ids.empty?
|
|
176
|
-
|
|
177
|
-
# Batch query: fetch ALL provenance records at once using WHERE IN
|
|
178
|
-
provenance_by_content = store.provenance
|
|
179
|
-
.select(:fact_id, :content_item_id)
|
|
180
|
-
.where(content_item_id: content_ids)
|
|
181
|
-
.all
|
|
182
|
-
.group_by { |p| p[:content_item_id] }
|
|
183
|
-
|
|
184
|
-
# Collect fact IDs in content order, deduplicated
|
|
185
|
-
ordered_fact_ids = Core::FactCollector.collect_ordered_fact_ids(
|
|
186
|
-
provenance_by_content,
|
|
187
|
-
content_ids,
|
|
188
|
-
limit
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
return [] if ordered_fact_ids.empty?
|
|
192
|
-
|
|
193
|
-
# Batch query all facts at once
|
|
194
|
-
facts_by_id = batch_find_facts(store, ordered_fact_ids)
|
|
195
|
-
|
|
196
|
-
# Batch query all receipts at once
|
|
197
|
-
receipts_by_fact_id = batch_find_receipts(store, ordered_fact_ids, include_raw_text: include_raw_text)
|
|
198
|
-
|
|
199
|
-
# Build results maintaining order
|
|
200
|
-
Core::ResultBuilder.build_results(
|
|
201
|
-
ordered_fact_ids,
|
|
202
|
-
facts_by_id: facts_by_id,
|
|
203
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
204
|
-
source: source
|
|
205
|
-
)
|
|
206
|
-
end
|
|
207
|
-
|
|
208
|
-
def batch_find_facts(store, fact_ids)
|
|
209
|
-
Core::FactQueryBuilder.batch_find_facts(store, fact_ids)
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
def batch_find_receipts(store, fact_ids, include_raw_text: false)
|
|
213
|
-
Core::FactQueryBuilder.batch_find_receipts(store, fact_ids, include_raw_text: include_raw_text)
|
|
214
|
-
end
|
|
215
|
-
|
|
216
|
-
def dedupe_and_sort(results, limit)
|
|
217
|
-
Core::FactRanker.dedupe_and_sort(results, limit)
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
def changes_dual(since:, limit:, scope:)
|
|
221
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
222
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
223
|
-
changes = fetch_changes(store, since, limit)
|
|
224
|
-
Core::ResultSorter.annotate_source(changes, source)
|
|
225
|
-
end
|
|
226
|
-
Core::ResultSorter.sort_by_timestamp(results, limit)
|
|
227
|
-
end
|
|
228
|
-
|
|
229
|
-
def fetch_changes(store, since, limit)
|
|
230
|
-
Core::FactQueryBuilder.fetch_changes(store, since, limit)
|
|
231
|
-
end
|
|
232
|
-
|
|
233
|
-
def conflicts_dual(scope:)
|
|
234
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
235
|
-
template.execute(scope: scope) do |store, source|
|
|
236
|
-
conflicts = store.open_conflicts
|
|
237
|
-
Core::ResultSorter.annotate_source(conflicts, source)
|
|
238
|
-
end
|
|
239
|
-
end
|
|
240
|
-
|
|
241
|
-
# Resolve a fact identifier to an integer ID
|
|
242
|
-
# Accepts either an integer ID or an 8-char docid string
|
|
243
|
-
def resolve_fact_identifier(store, identifier)
|
|
244
|
-
return identifier if identifier.is_a?(Integer)
|
|
245
|
-
|
|
246
|
-
str = identifier.to_s
|
|
247
|
-
# If it looks like a pure integer, use as ID
|
|
248
|
-
return str.to_i if str.match?(/\A\d+\z/)
|
|
249
|
-
|
|
250
|
-
# Otherwise treat as docid
|
|
251
|
-
fact = Core::FactQueryBuilder.find_fact_by_docid(store, str)
|
|
252
|
-
fact ? fact[:id] : nil
|
|
253
|
-
end
|
|
254
|
-
|
|
255
|
-
def explain_from_store(store, fact_id)
|
|
256
|
-
fact = find_fact_from_store(store, fact_id)
|
|
257
|
-
return Core::NullExplanation.new unless fact
|
|
258
|
-
|
|
259
|
-
{
|
|
260
|
-
fact: fact,
|
|
261
|
-
receipts: find_receipts_from_store(store, fact_id),
|
|
262
|
-
superseded_by: find_superseded_by_from_store(store, fact_id),
|
|
263
|
-
supersedes: find_supersedes_from_store(store, fact_id),
|
|
264
|
-
conflicts: find_conflicts_from_store(store, fact_id)
|
|
265
|
-
}
|
|
266
|
-
end
|
|
267
|
-
|
|
268
|
-
def find_fact_from_store(store, fact_id)
|
|
269
|
-
Core::FactQueryBuilder.find_fact(store, fact_id)
|
|
270
|
-
end
|
|
271
|
-
|
|
272
|
-
def find_receipts_from_store(store, fact_id)
|
|
273
|
-
Core::FactQueryBuilder.find_receipts(store, fact_id)
|
|
274
|
-
end
|
|
275
|
-
|
|
276
|
-
def find_superseded_by_from_store(store, fact_id)
|
|
277
|
-
Core::FactQueryBuilder.find_superseded_by(store, fact_id)
|
|
278
|
-
end
|
|
279
|
-
|
|
280
|
-
def find_supersedes_from_store(store, fact_id)
|
|
281
|
-
Core::FactQueryBuilder.find_supersedes(store, fact_id)
|
|
282
|
-
end
|
|
283
|
-
|
|
284
|
-
def find_conflicts_from_store(store, fact_id)
|
|
285
|
-
Core::FactQueryBuilder.find_conflicts(store, fact_id)
|
|
286
|
-
end
|
|
287
|
-
|
|
288
|
-
def query_legacy(query_text, limit:, scope:)
|
|
289
|
-
content_ids = @legacy_fts.search(query_text, limit: limit * 3)
|
|
290
|
-
return [] if content_ids.empty?
|
|
291
|
-
|
|
292
|
-
# Batch query: fetch ALL provenance records at once using WHERE IN
|
|
293
|
-
provenance_by_content = @legacy_store.provenance
|
|
294
|
-
.select(:fact_id, :content_item_id)
|
|
295
|
-
.where(content_item_id: content_ids)
|
|
296
|
-
.all
|
|
297
|
-
.group_by { |p| p[:content_item_id] }
|
|
298
|
-
|
|
299
|
-
# Collect ordered unique fact IDs from provenance
|
|
300
|
-
all_fact_ids = []
|
|
301
|
-
seen_fact_ids = Set.new
|
|
302
|
-
content_ids.each do |content_id|
|
|
303
|
-
(provenance_by_content[content_id] || []).each do |prov|
|
|
304
|
-
next if seen_fact_ids.include?(prov[:fact_id])
|
|
305
|
-
seen_fact_ids.add(prov[:fact_id])
|
|
306
|
-
all_fact_ids << prov[:fact_id]
|
|
307
|
-
end
|
|
308
|
-
end
|
|
309
|
-
|
|
310
|
-
return [] if all_fact_ids.empty?
|
|
311
|
-
|
|
312
|
-
# Batch query: fetch ALL facts at once
|
|
313
|
-
facts_by_id = batch_find_facts(@legacy_store, all_fact_ids)
|
|
314
|
-
|
|
315
|
-
# Filter by scope and apply limit
|
|
316
|
-
selected_fact_ids = []
|
|
317
|
-
all_fact_ids.each do |fact_id|
|
|
318
|
-
fact = facts_by_id[fact_id]
|
|
319
|
-
next unless fact
|
|
320
|
-
next unless fact_matches_scope?(fact, scope)
|
|
321
|
-
selected_fact_ids << fact_id
|
|
322
|
-
break if selected_fact_ids.size >= limit
|
|
323
|
-
end
|
|
324
|
-
|
|
325
|
-
return [] if selected_fact_ids.empty?
|
|
326
|
-
|
|
327
|
-
# Batch query: fetch ALL receipts at once
|
|
328
|
-
receipts_by_fact_id = batch_find_receipts(@legacy_store, selected_fact_ids)
|
|
329
|
-
|
|
330
|
-
facts_with_provenance = selected_fact_ids.map do |fact_id|
|
|
331
|
-
{
|
|
332
|
-
fact: facts_by_id[fact_id],
|
|
333
|
-
receipts: receipts_by_fact_id[fact_id] || []
|
|
334
|
-
}
|
|
335
|
-
end
|
|
336
|
-
|
|
337
|
-
sort_by_scope_priority(facts_with_provenance)
|
|
338
|
-
end
|
|
339
|
-
|
|
340
|
-
def query_index_legacy(query_text, limit:, scope:)
|
|
341
|
-
options = Index::QueryOptions.new(
|
|
342
|
-
query_text: query_text,
|
|
343
|
-
limit: limit,
|
|
344
|
-
scope: :all,
|
|
345
|
-
source: :legacy
|
|
346
|
-
)
|
|
347
|
-
|
|
348
|
-
query = Index::IndexQuery.new(@legacy_store, options)
|
|
349
|
-
results = query.execute
|
|
350
|
-
|
|
351
|
-
# Filter by scope in legacy mode
|
|
352
|
-
results.select do |result|
|
|
353
|
-
# Need to get full fact to check scope
|
|
354
|
-
fact = find_fact(result[:id])
|
|
355
|
-
fact && fact_matches_scope?(fact, scope)
|
|
356
|
-
end
|
|
357
|
-
end
|
|
358
|
-
|
|
359
|
-
def changes_legacy(since:, limit:, scope:)
|
|
360
|
-
ds = @legacy_store.facts
|
|
361
|
-
.select(:id, :docid, :subject_entity_id, :predicate, :object_literal, :status, :created_at, :scope, :project_path)
|
|
362
|
-
.where { created_at >= since }
|
|
363
|
-
.order(Sequel.desc(:created_at))
|
|
364
|
-
.limit(limit)
|
|
365
|
-
|
|
366
|
-
ds = apply_scope_filter(ds, scope)
|
|
367
|
-
ds.all
|
|
368
|
-
end
|
|
369
|
-
|
|
370
|
-
def conflicts_legacy(scope:)
|
|
371
|
-
all_conflicts = @legacy_store.open_conflicts
|
|
372
|
-
return all_conflicts if scope == SCOPE_ALL
|
|
373
|
-
|
|
374
|
-
all_conflicts.select do |conflict|
|
|
375
|
-
fact_a = find_fact(conflict[:fact_a_id])
|
|
376
|
-
fact_b = find_fact(conflict[:fact_b_id])
|
|
377
|
-
|
|
378
|
-
fact_matches_scope?(fact_a, scope) || fact_matches_scope?(fact_b, scope)
|
|
379
|
-
end
|
|
380
|
-
end
|
|
381
|
-
|
|
382
|
-
def fact_matches_scope?(fact, scope)
|
|
383
|
-
Core::ScopeFilter.matches?(fact, scope, @project_path)
|
|
384
|
-
end
|
|
385
|
-
|
|
386
|
-
def apply_scope_filter(dataset, scope)
|
|
387
|
-
Core::ScopeFilter.apply_to_dataset(dataset, scope, @project_path)
|
|
388
|
-
end
|
|
389
|
-
|
|
390
|
-
def sort_by_scope_priority(facts_with_provenance)
|
|
391
|
-
Core::FactRanker.sort_by_scope_priority(facts_with_provenance, @project_path)
|
|
392
|
-
end
|
|
393
|
-
|
|
394
|
-
def find_provenance_by_content(content_id)
|
|
395
|
-
Core::FactQueryBuilder.find_provenance_by_content(@legacy_store, content_id)
|
|
396
|
-
end
|
|
397
|
-
|
|
398
|
-
def find_fact(fact_id)
|
|
399
|
-
find_fact_from_store(@legacy_store, fact_id)
|
|
400
|
-
end
|
|
401
|
-
|
|
402
|
-
def find_receipts(fact_id)
|
|
403
|
-
find_receipts_from_store(@legacy_store, fact_id)
|
|
404
|
-
end
|
|
405
|
-
|
|
406
|
-
# Context-aware query helpers
|
|
407
|
-
|
|
408
|
-
def facts_by_context_dual(column, value, limit:, scope:)
|
|
409
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
410
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
411
|
-
facts_by_context_single(store, column, value, limit: limit, source: source)
|
|
412
|
-
end
|
|
413
|
-
dedupe_and_sort(results, limit)
|
|
414
|
-
end
|
|
415
|
-
|
|
416
|
-
def facts_by_context_legacy(column, value, limit:, scope:)
|
|
417
|
-
facts_by_context_single(@legacy_store, column, value, limit: limit, source: :legacy)
|
|
418
|
-
end
|
|
419
|
-
|
|
420
|
-
def facts_by_context_single(store, column, value, limit:, source:)
|
|
421
|
-
# Find content items matching the context
|
|
422
|
-
content_ids = store.content_items
|
|
423
|
-
.where(column => value)
|
|
424
|
-
.select(:id)
|
|
425
|
-
.map { |row| row[:id] }
|
|
426
|
-
|
|
427
|
-
return [] if content_ids.empty?
|
|
428
|
-
|
|
429
|
-
# Find facts linked to those content items via provenance
|
|
430
|
-
fact_ids = store.provenance
|
|
431
|
-
.where(content_item_id: content_ids)
|
|
432
|
-
.select(:fact_id)
|
|
433
|
-
.distinct
|
|
434
|
-
.map { |row| row[:fact_id] }
|
|
435
|
-
|
|
436
|
-
return [] if fact_ids.empty?
|
|
437
|
-
|
|
438
|
-
# Batch fetch facts and their provenance
|
|
439
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
440
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
441
|
-
|
|
442
|
-
results = Core::ResultBuilder.build_results(
|
|
443
|
-
fact_ids,
|
|
444
|
-
facts_by_id: facts_by_id,
|
|
445
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
446
|
-
source: source
|
|
447
|
-
)
|
|
448
|
-
results.take(limit)
|
|
449
|
-
end
|
|
450
|
-
|
|
451
|
-
def facts_by_tool_dual(tool_name, limit:, scope:)
|
|
452
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
453
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
454
|
-
facts_by_tool_single(store, tool_name, limit: limit, source: source)
|
|
455
|
-
end
|
|
456
|
-
dedupe_and_sort(results, limit)
|
|
457
|
-
end
|
|
458
|
-
|
|
459
|
-
def facts_by_tool_legacy(tool_name, limit:, scope:)
|
|
460
|
-
facts_by_tool_single(@legacy_store, tool_name, limit: limit, source: :legacy)
|
|
461
|
-
end
|
|
462
|
-
|
|
463
|
-
def facts_by_tool_single(store, tool_name, limit:, source:)
|
|
464
|
-
# Find content items where the tool was used
|
|
465
|
-
content_ids = store.tool_calls
|
|
466
|
-
.where(tool_name: tool_name)
|
|
467
|
-
.select(:content_item_id)
|
|
468
|
-
.distinct
|
|
469
|
-
.map { |row| row[:content_item_id] }
|
|
470
|
-
|
|
471
|
-
return [] if content_ids.empty?
|
|
472
|
-
|
|
473
|
-
# Find facts linked to those content items via provenance
|
|
474
|
-
fact_ids = store.provenance
|
|
475
|
-
.where(content_item_id: content_ids)
|
|
476
|
-
.select(:fact_id)
|
|
477
|
-
.distinct
|
|
478
|
-
.map { |row| row[:fact_id] }
|
|
479
|
-
|
|
480
|
-
return [] if fact_ids.empty?
|
|
481
|
-
|
|
482
|
-
# Batch fetch facts and their provenance
|
|
483
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
484
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
485
|
-
|
|
486
|
-
results = Core::ResultBuilder.build_results(
|
|
487
|
-
fact_ids,
|
|
488
|
-
facts_by_id: facts_by_id,
|
|
489
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
490
|
-
source: source
|
|
491
|
-
)
|
|
492
|
-
results.take(limit)
|
|
493
|
-
end
|
|
494
|
-
|
|
495
|
-
# Semantic search helpers
|
|
496
|
-
|
|
497
|
-
def query_semantic_dual(text, limit:, scope:, mode:)
|
|
498
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
499
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
500
|
-
query_semantic_single(store, text, limit: limit * 3, mode: mode, source: source)
|
|
501
|
-
end
|
|
502
|
-
# Use similarity-preserving dedupe (not source/time sort) to keep RRF ordering
|
|
503
|
-
Core::FactRanker.dedupe_by_fact_id(results, limit)
|
|
504
|
-
end
|
|
505
|
-
|
|
506
|
-
def query_semantic_legacy(text, limit:, scope:, mode:)
|
|
507
|
-
query_semantic_single(@legacy_store, text, limit: limit, mode: mode, source: :legacy)
|
|
508
|
-
end
|
|
509
|
-
|
|
510
|
-
def query_semantic_single(store, text, limit:, mode:, source:)
|
|
511
|
-
vector_results = []
|
|
512
|
-
text_results = []
|
|
513
|
-
|
|
514
|
-
# Text search mode (FTS) - run first for expansion detection
|
|
515
|
-
if mode == :text || mode == :both
|
|
516
|
-
text_results = search_by_fts(store, text, limit, source)
|
|
517
|
-
end
|
|
518
|
-
|
|
519
|
-
# Vector search mode - skip if FTS already found strong match
|
|
520
|
-
if mode == :vector || mode == :both
|
|
521
|
-
skip_vector = mode == :both && strong_fts_signal?(store, text)
|
|
522
|
-
vector_results = search_by_vector(store, text, limit, source) unless skip_vector
|
|
523
|
-
end
|
|
524
|
-
|
|
525
|
-
# Merge and deduplicate
|
|
526
|
-
merge_search_results(vector_results, text_results, limit)
|
|
527
|
-
end
|
|
528
|
-
|
|
529
|
-
def search_by_vector(store, query_text, limit, source)
|
|
530
|
-
# Generate query embedding
|
|
531
|
-
query_embedding = @embedding_generator.generate(query_text)
|
|
532
|
-
|
|
533
|
-
# Fast path: use sqlite-vec KNN when available
|
|
534
|
-
vec_index = store.vector_index
|
|
535
|
-
if vec_index.available?
|
|
536
|
-
return search_by_vector_native(store, vec_index, query_embedding, limit, source)
|
|
537
|
-
end
|
|
538
|
-
|
|
539
|
-
# Fallback: JSON + Ruby cosine similarity
|
|
540
|
-
search_by_vector_fallback(store, query_embedding, limit, source)
|
|
541
|
-
end
|
|
542
|
-
|
|
543
|
-
def search_by_vector_native(store, vec_index, query_embedding, limit, source)
|
|
544
|
-
matches = vec_index.search(query_embedding, k: limit)
|
|
545
|
-
return [] if matches.empty?
|
|
546
|
-
|
|
547
|
-
fact_ids = matches.map { |m| m[:fact_id] }
|
|
548
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
549
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
550
|
-
|
|
551
|
-
Core::ResultBuilder.build_results_with_scores(
|
|
552
|
-
matches,
|
|
553
|
-
facts_by_id: facts_by_id,
|
|
554
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
555
|
-
source: source
|
|
556
|
-
)
|
|
557
|
-
end
|
|
558
|
-
|
|
559
|
-
def search_by_vector_fallback(store, query_embedding, limit, source)
|
|
560
|
-
# Load facts with embeddings
|
|
561
|
-
facts_data = store.facts_with_embeddings(limit: 5000)
|
|
562
|
-
return [] if facts_data.empty?
|
|
563
|
-
|
|
564
|
-
# Deduplicate: group facts by embedding, score unique embeddings only, fan out
|
|
565
|
-
unique_candidates, fact_groups = dedup_candidates(facts_data)
|
|
566
|
-
return [] if unique_candidates.empty?
|
|
567
|
-
|
|
568
|
-
# Calculate similarities on unique embeddings only
|
|
569
|
-
top_unique = Embeddings::Similarity.top_k(query_embedding, unique_candidates, limit)
|
|
570
|
-
|
|
571
|
-
# Fan out: expand unique matches back to all fact_ids sharing that embedding
|
|
572
|
-
top_matches = fan_out_matches(top_unique, fact_groups, limit)
|
|
573
|
-
|
|
574
|
-
# Batch fetch full fact details
|
|
575
|
-
fact_ids = top_matches.map { |m| m[:candidate][:fact_id] }
|
|
576
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
577
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
578
|
-
|
|
579
|
-
# Build results with similarity scores
|
|
580
|
-
Core::ResultBuilder.build_results_with_scores(
|
|
581
|
-
top_matches,
|
|
582
|
-
facts_by_id: facts_by_id,
|
|
583
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
584
|
-
source: source
|
|
585
|
-
)
|
|
586
|
-
end
|
|
587
|
-
|
|
588
|
-
# Group facts by embedding_json, return unique candidates + mapping
|
|
589
|
-
def dedup_candidates(facts_data)
|
|
590
|
-
groups = {} # embedding_json → [fact_ids]
|
|
591
|
-
unique = {} # embedding_json → parsed candidate (first occurrence)
|
|
592
|
-
|
|
593
|
-
facts_data.each do |row|
|
|
594
|
-
key = row[:embedding_json]
|
|
595
|
-
if unique.key?(key)
|
|
596
|
-
groups[key] << row[:id]
|
|
597
|
-
else
|
|
598
|
-
candidate = Core::EmbeddingCandidateBuilder.parse_candidate(row)
|
|
599
|
-
next unless candidate
|
|
600
|
-
unique[key] = candidate
|
|
601
|
-
groups[key] = [row[:id]]
|
|
602
|
-
end
|
|
603
|
-
end
|
|
604
|
-
|
|
605
|
-
[unique.values, groups]
|
|
606
|
-
end
|
|
607
|
-
|
|
608
|
-
# Expand unique matches back to all fact_ids sharing the same embedding
|
|
609
|
-
def fan_out_matches(top_unique, fact_groups, limit)
|
|
610
|
-
results = []
|
|
611
|
-
top_unique.each do |match|
|
|
612
|
-
candidate = match[:candidate]
|
|
613
|
-
similarity = match[:similarity]
|
|
614
|
-
|
|
615
|
-
# Find the group key for this candidate's embedding
|
|
616
|
-
group_key = fact_groups.find { |_key, ids| ids.include?(candidate[:fact_id]) }&.first
|
|
617
|
-
next unless group_key
|
|
618
|
-
|
|
619
|
-
fact_groups[group_key].each do |fact_id|
|
|
620
|
-
results << {
|
|
621
|
-
candidate: candidate.merge(fact_id: fact_id),
|
|
622
|
-
similarity: similarity
|
|
623
|
-
}
|
|
624
|
-
break if results.size >= limit
|
|
625
|
-
end
|
|
626
|
-
break if results.size >= limit
|
|
627
|
-
end
|
|
628
|
-
|
|
629
|
-
results
|
|
630
|
-
end
|
|
631
|
-
|
|
632
|
-
def search_by_fts(store, query_text, limit, source)
|
|
633
|
-
fts = Index::LexicalFTS.new(store)
|
|
634
|
-
ranked_results = fts.search_with_ranks(query_text, limit: limit * 2)
|
|
635
|
-
|
|
636
|
-
return [] if ranked_results.empty?
|
|
637
|
-
|
|
638
|
-
content_ids = ranked_results.map { |r| r[:content_item_id] }
|
|
639
|
-
|
|
640
|
-
# Map content_item_ids to fact_ids, preserving FTS rank order
|
|
641
|
-
provenance_rows = store.provenance
|
|
642
|
-
.where(content_item_id: content_ids)
|
|
643
|
-
.select(:fact_id, :content_item_id)
|
|
644
|
-
.all
|
|
645
|
-
|
|
646
|
-
content_to_facts = provenance_rows.group_by { |r| r[:content_item_id] }
|
|
647
|
-
|
|
648
|
-
# Build ordered fact list with normalized BM25 scores
|
|
649
|
-
# FTS5 rank values are negative (more negative = better match)
|
|
650
|
-
ranks = ranked_results.map { |r| r[:rank] }
|
|
651
|
-
min_rank = ranks.min # Most negative = best
|
|
652
|
-
max_rank = ranks.max # Least negative = worst
|
|
653
|
-
range = (max_rank - min_rank).abs
|
|
654
|
-
|
|
655
|
-
seen_fact_ids = Set.new
|
|
656
|
-
scored_matches = []
|
|
657
|
-
|
|
658
|
-
ranked_results.each do |r|
|
|
659
|
-
similarity = if range > 0
|
|
660
|
-
# Normalize: best rank → 1.0, worst rank → 0.1
|
|
661
|
-
0.1 + 0.9 * ((max_rank - r[:rank]).abs / range)
|
|
662
|
-
else
|
|
663
|
-
0.8 # Single result gets a reasonable score
|
|
664
|
-
end
|
|
665
|
-
|
|
666
|
-
fact_ids = content_to_facts[r[:content_item_id]]&.map { |p| p[:fact_id] } || []
|
|
667
|
-
fact_ids.each do |fid|
|
|
668
|
-
next if seen_fact_ids.include?(fid)
|
|
669
|
-
seen_fact_ids.add(fid)
|
|
670
|
-
scored_matches << {fact_id: fid, similarity: similarity}
|
|
671
|
-
end
|
|
672
|
-
end
|
|
673
|
-
|
|
674
|
-
return [] if scored_matches.empty?
|
|
675
|
-
|
|
676
|
-
fact_ids = scored_matches.map { |m| m[:fact_id] }
|
|
677
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
678
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
679
|
-
|
|
680
|
-
Core::ResultBuilder.build_results_with_scores(
|
|
681
|
-
scored_matches,
|
|
682
|
-
facts_by_id: facts_by_id,
|
|
683
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
684
|
-
source: source
|
|
685
|
-
).take(limit)
|
|
686
|
-
end
|
|
687
|
-
|
|
688
|
-
def merge_search_results(vector_results, text_results, limit)
|
|
689
|
-
Core::FactRanker.merge_search_results(vector_results, text_results, limit)
|
|
690
|
-
end
|
|
691
|
-
|
|
692
|
-
def strong_fts_signal?(store, query_text)
|
|
693
|
-
fts = Index::LexicalFTS.new(store)
|
|
694
|
-
ranked_results = fts.search_with_ranks(query_text, limit: 5)
|
|
695
|
-
Recall::ExpansionDetector.strong_fts_signal?(ranked_results)
|
|
696
|
-
end
|
|
697
|
-
|
|
698
|
-
# Multi-concept search helpers
|
|
699
|
-
|
|
700
|
-
def query_concepts_dual(concepts, limit:, scope:)
|
|
701
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
702
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
703
|
-
query_concepts_single(store, concepts, limit: limit * 2, source: source)
|
|
704
|
-
end
|
|
705
|
-
# Deduplicate and sort by average similarity
|
|
706
|
-
dedupe_by_fact_id(results, limit)
|
|
707
|
-
end
|
|
708
|
-
|
|
709
|
-
def query_concepts_legacy(concepts, limit:, scope:)
|
|
710
|
-
query_concepts_single(@legacy_store, concepts, limit: limit, source: :legacy)
|
|
711
|
-
end
|
|
712
|
-
|
|
713
|
-
def query_concepts_single(store, concepts, limit:, source:)
|
|
714
|
-
# I/O: Search each concept independently with higher limit for intersection
|
|
715
|
-
concept_results = concepts.map do |concept|
|
|
716
|
-
search_by_vector(store, concept, limit * 5, source)
|
|
717
|
-
end
|
|
718
|
-
|
|
719
|
-
# Pure logic: Rank by average similarity across all concepts
|
|
720
|
-
Core::ConceptRanker.rank_by_concepts(concept_results, limit)
|
|
721
|
-
end
|
|
722
|
-
|
|
723
|
-
def dedupe_by_fact_id(results, limit)
|
|
724
|
-
Core::FactRanker.dedupe_by_fact_id(results, limit)
|
|
91
|
+
@engine.query_concepts(concepts, limit: limit, scope: scope)
|
|
725
92
|
end
|
|
726
93
|
end
|
|
727
94
|
end
|