claude_memory 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/memory.sqlite3 +0 -0
  3. data/.claude/memory.sqlite3-shm +0 -0
  4. data/.claude/memory.sqlite3-wal +0 -0
  5. data/.claude/settings.json +78 -6
  6. data/.claude/settings.local.json +2 -1
  7. data/.claude/skills/improve/SKILL.md +113 -25
  8. data/.claude-plugin/commands/distill-transcripts.md +98 -0
  9. data/.claude-plugin/commands/memory-recall.md +67 -0
  10. data/.claude-plugin/marketplace.json +1 -1
  11. data/.claude-plugin/plugin.json +1 -1
  12. data/CHANGELOG.md +49 -1
  13. data/CLAUDE.md +29 -5
  14. data/docs/improvements.md +18 -56
  15. data/docs/quality_review.md +119 -224
  16. data/hooks/hooks.json +39 -7
  17. data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
  18. data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
  19. data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
  20. data/lib/claude_memory/commands/completion_command.rb +179 -0
  21. data/lib/claude_memory/commands/doctor_command.rb +2 -0
  22. data/lib/claude_memory/commands/help_command.rb +4 -0
  23. data/lib/claude_memory/commands/hook_command.rb +2 -1
  24. data/lib/claude_memory/commands/index_command.rb +85 -78
  25. data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
  26. data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
  27. data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
  28. data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
  29. data/lib/claude_memory/commands/install_skill_command.rb +78 -0
  30. data/lib/claude_memory/commands/registry.rb +3 -1
  31. data/lib/claude_memory/commands/skills/distill-transcripts.md +98 -0
  32. data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
  33. data/lib/claude_memory/core/fact_ranker.rb +2 -2
  34. data/lib/claude_memory/core/rr_fusion.rb +23 -6
  35. data/lib/claude_memory/core/snippet_extractor.rb +7 -3
  36. data/lib/claude_memory/core/text_builder.rb +11 -0
  37. data/lib/claude_memory/domain/provenance.rb +0 -1
  38. data/lib/claude_memory/embeddings/api_adapter.rb +96 -0
  39. data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
  40. data/lib/claude_memory/embeddings/fastembed_adapter.rb +4 -0
  41. data/lib/claude_memory/embeddings/generator.rb +4 -0
  42. data/lib/claude_memory/embeddings/resolver.rb +18 -0
  43. data/lib/claude_memory/hook/context_injector.rb +58 -2
  44. data/lib/claude_memory/hook/distillation_runner.rb +46 -0
  45. data/lib/claude_memory/hook/handler.rb +11 -2
  46. data/lib/claude_memory/index/vector_index.rb +15 -2
  47. data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
  48. data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
  49. data/lib/claude_memory/mcp/handlers/management_handlers.rb +145 -0
  50. data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
  51. data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
  52. data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
  53. data/lib/claude_memory/mcp/handlers/stats_handlers.rb +202 -0
  54. data/lib/claude_memory/mcp/instructions_builder.rb +2 -1
  55. data/lib/claude_memory/mcp/query_guide.rb +10 -0
  56. data/lib/claude_memory/mcp/response_formatter.rb +1 -0
  57. data/lib/claude_memory/mcp/text_summary.rb +26 -0
  58. data/lib/claude_memory/mcp/tool_definitions.rb +30 -1
  59. data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
  60. data/lib/claude_memory/mcp/tools.rb +39 -678
  61. data/lib/claude_memory/recall/dual_engine.rb +105 -0
  62. data/lib/claude_memory/recall/legacy_engine.rb +138 -0
  63. data/lib/claude_memory/recall/query_core.rb +371 -0
  64. data/lib/claude_memory/recall.rb +29 -662
  65. data/lib/claude_memory/shortcuts.rb +4 -4
  66. data/lib/claude_memory/store/retry_handler.rb +61 -0
  67. data/lib/claude_memory/store/schema_manager.rb +68 -0
  68. data/lib/claude_memory/store/sqlite_store.rb +85 -201
  69. data/lib/claude_memory/templates/hooks.example.json +26 -7
  70. data/lib/claude_memory/version.rb +1 -1
  71. data/lib/claude_memory.rb +11 -0
  72. metadata +23 -1
@@ -26,702 +26,69 @@ module ClaudeMemory
26
26
 
27
27
  def initialize(store_or_manager, fts: nil, project_path: nil, env: ENV, embedding_generator: nil)
28
28
  config = Configuration.new(env)
29
- @project_path = project_path || config.project_dir
30
- @embedding_generator = embedding_generator || Embeddings::Generator.new
31
-
32
- if store_or_manager.is_a?(Store::StoreManager)
33
- @manager = store_or_manager
34
- @legacy_mode = false
29
+ resolved_project_path = project_path || config.project_dir
30
+ resolved_generator = embedding_generator || Embeddings.resolve(env: env)
31
+
32
+ @engine = if store_or_manager.is_a?(Store::StoreManager)
33
+ DualEngine.new(
34
+ store_or_manager,
35
+ embedding_generator: resolved_generator,
36
+ project_path: resolved_project_path
37
+ )
35
38
  else
36
- @legacy_store = store_or_manager
37
- @legacy_fts = fts || Index::LexicalFTS.new(store_or_manager)
38
- @legacy_mode = true
39
+ LegacyEngine.new(
40
+ store_or_manager,
41
+ fts: fts || Index::LexicalFTS.new(store_or_manager),
42
+ embedding_generator: resolved_generator,
43
+ project_path: resolved_project_path
44
+ )
39
45
  end
40
46
  end
41
47
 
42
- def query(query_text, limit: 10, scope: SCOPE_ALL, include_raw_text: false)
43
- if @legacy_mode
44
- query_legacy(query_text, limit: limit, scope: scope)
45
- else
46
- query_dual(query_text, limit: limit, scope: scope, include_raw_text: include_raw_text)
47
- end
48
+ def query(query_text, limit: 10, scope: SCOPE_ALL, include_raw_text: false, intent: nil)
49
+ @engine.query(query_text, limit: limit, scope: scope, include_raw_text: include_raw_text, intent: intent)
48
50
  end
49
51
 
50
- def query_index(query_text, limit: 20, scope: SCOPE_ALL)
51
- if @legacy_mode
52
- query_index_legacy(query_text, limit: limit, scope: scope)
53
- else
54
- query_index_dual(query_text, limit: limit, scope: scope)
55
- end
52
+ def query_index(query_text, limit: 20, scope: SCOPE_ALL, intent: nil)
53
+ @engine.query_index(query_text, limit: limit, scope: scope, intent: intent)
56
54
  end
57
55
 
58
56
  def fact_graph(fact_id, depth: 2, scope: nil)
59
- if @legacy_mode
60
- Core::FactGraph.build(@legacy_store, fact_id, depth: depth)
61
- else
62
- scope ||= SCOPE_PROJECT
63
- store = @manager.store_for_scope(scope)
64
- Core::FactGraph.build(store, fact_id, depth: depth)
65
- end
57
+ @engine.fact_graph(fact_id, depth: depth, scope: scope)
66
58
  end
67
59
 
68
60
  def explain(fact_id_or_docid, scope: nil)
69
- if @legacy_mode
70
- fact_id = resolve_fact_identifier(@legacy_store, fact_id_or_docid)
71
- explain_from_store(@legacy_store, fact_id)
72
- else
73
- scope ||= SCOPE_PROJECT
74
- store = @manager.store_for_scope(scope)
75
- fact_id = resolve_fact_identifier(store, fact_id_or_docid)
76
- explain_from_store(store, fact_id)
77
- end
61
+ @engine.explain(fact_id_or_docid, scope: scope)
78
62
  end
79
63
 
80
64
  def changes(since:, limit: 50, scope: SCOPE_ALL)
81
- if @legacy_mode
82
- changes_legacy(since: since, limit: limit, scope: scope)
83
- else
84
- changes_dual(since: since, limit: limit, scope: scope)
85
- end
65
+ @engine.changes(since: since, limit: limit, scope: scope)
86
66
  end
87
67
 
88
68
  def conflicts(scope: SCOPE_ALL)
89
- if @legacy_mode
90
- conflicts_legacy(scope: scope)
91
- else
92
- conflicts_dual(scope: scope)
93
- end
69
+ @engine.conflicts(scope: scope)
94
70
  end
95
71
 
96
72
  def facts_by_branch(branch_name, limit: 20, scope: SCOPE_ALL)
97
- if @legacy_mode
98
- facts_by_context_legacy(:git_branch, branch_name, limit: limit, scope: scope)
99
- else
100
- facts_by_context_dual(:git_branch, branch_name, limit: limit, scope: scope)
101
- end
73
+ @engine.facts_by_branch(branch_name, limit: limit, scope: scope)
102
74
  end
103
75
 
104
76
  def facts_by_directory(cwd, limit: 20, scope: SCOPE_ALL)
105
- if @legacy_mode
106
- facts_by_context_legacy(:cwd, cwd, limit: limit, scope: scope)
107
- else
108
- facts_by_context_dual(:cwd, cwd, limit: limit, scope: scope)
109
- end
77
+ @engine.facts_by_directory(cwd, limit: limit, scope: scope)
110
78
  end
111
79
 
112
80
  def facts_by_tool(tool_name, limit: 20, scope: SCOPE_ALL)
113
- if @legacy_mode
114
- facts_by_tool_legacy(tool_name, limit: limit, scope: scope)
115
- else
116
- facts_by_tool_dual(tool_name, limit: limit, scope: scope)
117
- end
81
+ @engine.facts_by_tool(tool_name, limit: limit, scope: scope)
118
82
  end
119
83
 
120
- def query_semantic(text, limit: 10, scope: SCOPE_ALL, mode: :both)
121
- if @legacy_mode
122
- query_semantic_legacy(text, limit: limit, scope: scope, mode: mode)
123
- else
124
- query_semantic_dual(text, limit: limit, scope: scope, mode: mode)
125
- end
84
+ def query_semantic(text, limit: 10, scope: SCOPE_ALL, mode: :both, explain: false, intent: nil)
85
+ @engine.query_semantic(text, limit: limit, scope: scope, mode: mode, explain: explain, intent: intent)
126
86
  end
127
87
 
128
88
  def query_concepts(concepts, limit: 10, scope: SCOPE_ALL)
129
89
  raise ArgumentError, "Must provide 2-5 concepts" unless (2..5).cover?(concepts.size)
130
90
 
131
- if @legacy_mode
132
- query_concepts_legacy(concepts, limit: limit, scope: scope)
133
- else
134
- query_concepts_dual(concepts, limit: limit, scope: scope)
135
- end
136
- end
137
-
138
- private
139
-
140
- def query_dual(query_text, limit:, scope:, include_raw_text: false)
141
- template = Recall::DualQueryTemplate.new(@manager)
142
- results = template.execute(scope: scope, limit: limit) do |store, source|
143
- query_single_store(store, query_text, limit: limit, source: source, include_raw_text: include_raw_text)
144
- end
145
- dedupe_and_sort(results, limit)
146
- end
147
-
148
- def query_index_dual(query_text, limit:, scope:)
149
- template = Recall::DualQueryTemplate.new(@manager)
150
- results = template.execute(scope: scope, limit: limit) do |store, source|
151
- query_index_single_store(store, query_text, limit: limit, source: source)
152
- end
153
- dedupe_and_sort_index(results, limit)
154
- end
155
-
156
- def query_index_single_store(store, query_text, limit:, source:)
157
- options = Index::QueryOptions.new(
158
- query_text: query_text,
159
- limit: limit,
160
- scope: :all,
161
- source: source
162
- )
163
-
164
- query = Index::IndexQuery.new(store, options)
165
- query.execute
166
- end
167
-
168
- def dedupe_and_sort_index(results, limit)
169
- Core::FactRanker.dedupe_and_sort_index(results, limit)
170
- end
171
-
172
- def query_single_store(store, query_text, limit:, source:, include_raw_text: false)
173
- fts = Index::LexicalFTS.new(store)
174
- content_ids = fts.search(query_text, limit: limit * 3)
175
- return [] if content_ids.empty?
176
-
177
- # Batch query: fetch ALL provenance records at once using WHERE IN
178
- provenance_by_content = store.provenance
179
- .select(:fact_id, :content_item_id)
180
- .where(content_item_id: content_ids)
181
- .all
182
- .group_by { |p| p[:content_item_id] }
183
-
184
- # Collect fact IDs in content order, deduplicated
185
- ordered_fact_ids = Core::FactCollector.collect_ordered_fact_ids(
186
- provenance_by_content,
187
- content_ids,
188
- limit
189
- )
190
-
191
- return [] if ordered_fact_ids.empty?
192
-
193
- # Batch query all facts at once
194
- facts_by_id = batch_find_facts(store, ordered_fact_ids)
195
-
196
- # Batch query all receipts at once
197
- receipts_by_fact_id = batch_find_receipts(store, ordered_fact_ids, include_raw_text: include_raw_text)
198
-
199
- # Build results maintaining order
200
- Core::ResultBuilder.build_results(
201
- ordered_fact_ids,
202
- facts_by_id: facts_by_id,
203
- receipts_by_fact_id: receipts_by_fact_id,
204
- source: source
205
- )
206
- end
207
-
208
- def batch_find_facts(store, fact_ids)
209
- Core::FactQueryBuilder.batch_find_facts(store, fact_ids)
210
- end
211
-
212
- def batch_find_receipts(store, fact_ids, include_raw_text: false)
213
- Core::FactQueryBuilder.batch_find_receipts(store, fact_ids, include_raw_text: include_raw_text)
214
- end
215
-
216
- def dedupe_and_sort(results, limit)
217
- Core::FactRanker.dedupe_and_sort(results, limit)
218
- end
219
-
220
- def changes_dual(since:, limit:, scope:)
221
- template = Recall::DualQueryTemplate.new(@manager)
222
- results = template.execute(scope: scope, limit: limit) do |store, source|
223
- changes = fetch_changes(store, since, limit)
224
- Core::ResultSorter.annotate_source(changes, source)
225
- end
226
- Core::ResultSorter.sort_by_timestamp(results, limit)
227
- end
228
-
229
- def fetch_changes(store, since, limit)
230
- Core::FactQueryBuilder.fetch_changes(store, since, limit)
231
- end
232
-
233
- def conflicts_dual(scope:)
234
- template = Recall::DualQueryTemplate.new(@manager)
235
- template.execute(scope: scope) do |store, source|
236
- conflicts = store.open_conflicts
237
- Core::ResultSorter.annotate_source(conflicts, source)
238
- end
239
- end
240
-
241
- # Resolve a fact identifier to an integer ID
242
- # Accepts either an integer ID or an 8-char docid string
243
- def resolve_fact_identifier(store, identifier)
244
- return identifier if identifier.is_a?(Integer)
245
-
246
- str = identifier.to_s
247
- # If it looks like a pure integer, use as ID
248
- return str.to_i if str.match?(/\A\d+\z/)
249
-
250
- # Otherwise treat as docid
251
- fact = Core::FactQueryBuilder.find_fact_by_docid(store, str)
252
- fact ? fact[:id] : nil
253
- end
254
-
255
- def explain_from_store(store, fact_id)
256
- fact = find_fact_from_store(store, fact_id)
257
- return Core::NullExplanation.new unless fact
258
-
259
- {
260
- fact: fact,
261
- receipts: find_receipts_from_store(store, fact_id),
262
- superseded_by: find_superseded_by_from_store(store, fact_id),
263
- supersedes: find_supersedes_from_store(store, fact_id),
264
- conflicts: find_conflicts_from_store(store, fact_id)
265
- }
266
- end
267
-
268
- def find_fact_from_store(store, fact_id)
269
- Core::FactQueryBuilder.find_fact(store, fact_id)
270
- end
271
-
272
- def find_receipts_from_store(store, fact_id)
273
- Core::FactQueryBuilder.find_receipts(store, fact_id)
274
- end
275
-
276
- def find_superseded_by_from_store(store, fact_id)
277
- Core::FactQueryBuilder.find_superseded_by(store, fact_id)
278
- end
279
-
280
- def find_supersedes_from_store(store, fact_id)
281
- Core::FactQueryBuilder.find_supersedes(store, fact_id)
282
- end
283
-
284
- def find_conflicts_from_store(store, fact_id)
285
- Core::FactQueryBuilder.find_conflicts(store, fact_id)
286
- end
287
-
288
- def query_legacy(query_text, limit:, scope:)
289
- content_ids = @legacy_fts.search(query_text, limit: limit * 3)
290
- return [] if content_ids.empty?
291
-
292
- # Batch query: fetch ALL provenance records at once using WHERE IN
293
- provenance_by_content = @legacy_store.provenance
294
- .select(:fact_id, :content_item_id)
295
- .where(content_item_id: content_ids)
296
- .all
297
- .group_by { |p| p[:content_item_id] }
298
-
299
- # Collect ordered unique fact IDs from provenance
300
- all_fact_ids = []
301
- seen_fact_ids = Set.new
302
- content_ids.each do |content_id|
303
- (provenance_by_content[content_id] || []).each do |prov|
304
- next if seen_fact_ids.include?(prov[:fact_id])
305
- seen_fact_ids.add(prov[:fact_id])
306
- all_fact_ids << prov[:fact_id]
307
- end
308
- end
309
-
310
- return [] if all_fact_ids.empty?
311
-
312
- # Batch query: fetch ALL facts at once
313
- facts_by_id = batch_find_facts(@legacy_store, all_fact_ids)
314
-
315
- # Filter by scope and apply limit
316
- selected_fact_ids = []
317
- all_fact_ids.each do |fact_id|
318
- fact = facts_by_id[fact_id]
319
- next unless fact
320
- next unless fact_matches_scope?(fact, scope)
321
- selected_fact_ids << fact_id
322
- break if selected_fact_ids.size >= limit
323
- end
324
-
325
- return [] if selected_fact_ids.empty?
326
-
327
- # Batch query: fetch ALL receipts at once
328
- receipts_by_fact_id = batch_find_receipts(@legacy_store, selected_fact_ids)
329
-
330
- facts_with_provenance = selected_fact_ids.map do |fact_id|
331
- {
332
- fact: facts_by_id[fact_id],
333
- receipts: receipts_by_fact_id[fact_id] || []
334
- }
335
- end
336
-
337
- sort_by_scope_priority(facts_with_provenance)
338
- end
339
-
340
- def query_index_legacy(query_text, limit:, scope:)
341
- options = Index::QueryOptions.new(
342
- query_text: query_text,
343
- limit: limit,
344
- scope: :all,
345
- source: :legacy
346
- )
347
-
348
- query = Index::IndexQuery.new(@legacy_store, options)
349
- results = query.execute
350
-
351
- # Filter by scope in legacy mode
352
- results.select do |result|
353
- # Need to get full fact to check scope
354
- fact = find_fact(result[:id])
355
- fact && fact_matches_scope?(fact, scope)
356
- end
357
- end
358
-
359
- def changes_legacy(since:, limit:, scope:)
360
- ds = @legacy_store.facts
361
- .select(:id, :docid, :subject_entity_id, :predicate, :object_literal, :status, :created_at, :scope, :project_path)
362
- .where { created_at >= since }
363
- .order(Sequel.desc(:created_at))
364
- .limit(limit)
365
-
366
- ds = apply_scope_filter(ds, scope)
367
- ds.all
368
- end
369
-
370
- def conflicts_legacy(scope:)
371
- all_conflicts = @legacy_store.open_conflicts
372
- return all_conflicts if scope == SCOPE_ALL
373
-
374
- all_conflicts.select do |conflict|
375
- fact_a = find_fact(conflict[:fact_a_id])
376
- fact_b = find_fact(conflict[:fact_b_id])
377
-
378
- fact_matches_scope?(fact_a, scope) || fact_matches_scope?(fact_b, scope)
379
- end
380
- end
381
-
382
- def fact_matches_scope?(fact, scope)
383
- Core::ScopeFilter.matches?(fact, scope, @project_path)
384
- end
385
-
386
- def apply_scope_filter(dataset, scope)
387
- Core::ScopeFilter.apply_to_dataset(dataset, scope, @project_path)
388
- end
389
-
390
- def sort_by_scope_priority(facts_with_provenance)
391
- Core::FactRanker.sort_by_scope_priority(facts_with_provenance, @project_path)
392
- end
393
-
394
- def find_provenance_by_content(content_id)
395
- Core::FactQueryBuilder.find_provenance_by_content(@legacy_store, content_id)
396
- end
397
-
398
- def find_fact(fact_id)
399
- find_fact_from_store(@legacy_store, fact_id)
400
- end
401
-
402
- def find_receipts(fact_id)
403
- find_receipts_from_store(@legacy_store, fact_id)
404
- end
405
-
406
- # Context-aware query helpers
407
-
408
- def facts_by_context_dual(column, value, limit:, scope:)
409
- template = Recall::DualQueryTemplate.new(@manager)
410
- results = template.execute(scope: scope, limit: limit) do |store, source|
411
- facts_by_context_single(store, column, value, limit: limit, source: source)
412
- end
413
- dedupe_and_sort(results, limit)
414
- end
415
-
416
- def facts_by_context_legacy(column, value, limit:, scope:)
417
- facts_by_context_single(@legacy_store, column, value, limit: limit, source: :legacy)
418
- end
419
-
420
- def facts_by_context_single(store, column, value, limit:, source:)
421
- # Find content items matching the context
422
- content_ids = store.content_items
423
- .where(column => value)
424
- .select(:id)
425
- .map { |row| row[:id] }
426
-
427
- return [] if content_ids.empty?
428
-
429
- # Find facts linked to those content items via provenance
430
- fact_ids = store.provenance
431
- .where(content_item_id: content_ids)
432
- .select(:fact_id)
433
- .distinct
434
- .map { |row| row[:fact_id] }
435
-
436
- return [] if fact_ids.empty?
437
-
438
- # Batch fetch facts and their provenance
439
- facts_by_id = batch_find_facts(store, fact_ids)
440
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
441
-
442
- results = Core::ResultBuilder.build_results(
443
- fact_ids,
444
- facts_by_id: facts_by_id,
445
- receipts_by_fact_id: receipts_by_fact_id,
446
- source: source
447
- )
448
- results.take(limit)
449
- end
450
-
451
- def facts_by_tool_dual(tool_name, limit:, scope:)
452
- template = Recall::DualQueryTemplate.new(@manager)
453
- results = template.execute(scope: scope, limit: limit) do |store, source|
454
- facts_by_tool_single(store, tool_name, limit: limit, source: source)
455
- end
456
- dedupe_and_sort(results, limit)
457
- end
458
-
459
- def facts_by_tool_legacy(tool_name, limit:, scope:)
460
- facts_by_tool_single(@legacy_store, tool_name, limit: limit, source: :legacy)
461
- end
462
-
463
- def facts_by_tool_single(store, tool_name, limit:, source:)
464
- # Find content items where the tool was used
465
- content_ids = store.tool_calls
466
- .where(tool_name: tool_name)
467
- .select(:content_item_id)
468
- .distinct
469
- .map { |row| row[:content_item_id] }
470
-
471
- return [] if content_ids.empty?
472
-
473
- # Find facts linked to those content items via provenance
474
- fact_ids = store.provenance
475
- .where(content_item_id: content_ids)
476
- .select(:fact_id)
477
- .distinct
478
- .map { |row| row[:fact_id] }
479
-
480
- return [] if fact_ids.empty?
481
-
482
- # Batch fetch facts and their provenance
483
- facts_by_id = batch_find_facts(store, fact_ids)
484
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
485
-
486
- results = Core::ResultBuilder.build_results(
487
- fact_ids,
488
- facts_by_id: facts_by_id,
489
- receipts_by_fact_id: receipts_by_fact_id,
490
- source: source
491
- )
492
- results.take(limit)
493
- end
494
-
495
- # Semantic search helpers
496
-
497
- def query_semantic_dual(text, limit:, scope:, mode:)
498
- template = Recall::DualQueryTemplate.new(@manager)
499
- results = template.execute(scope: scope, limit: limit) do |store, source|
500
- query_semantic_single(store, text, limit: limit * 3, mode: mode, source: source)
501
- end
502
- # Use similarity-preserving dedupe (not source/time sort) to keep RRF ordering
503
- Core::FactRanker.dedupe_by_fact_id(results, limit)
504
- end
505
-
506
- def query_semantic_legacy(text, limit:, scope:, mode:)
507
- query_semantic_single(@legacy_store, text, limit: limit, mode: mode, source: :legacy)
508
- end
509
-
510
- def query_semantic_single(store, text, limit:, mode:, source:)
511
- vector_results = []
512
- text_results = []
513
-
514
- # Text search mode (FTS) - run first for expansion detection
515
- if mode == :text || mode == :both
516
- text_results = search_by_fts(store, text, limit, source)
517
- end
518
-
519
- # Vector search mode - skip if FTS already found strong match
520
- if mode == :vector || mode == :both
521
- skip_vector = mode == :both && strong_fts_signal?(store, text)
522
- vector_results = search_by_vector(store, text, limit, source) unless skip_vector
523
- end
524
-
525
- # Merge and deduplicate
526
- merge_search_results(vector_results, text_results, limit)
527
- end
528
-
529
- def search_by_vector(store, query_text, limit, source)
530
- # Generate query embedding
531
- query_embedding = @embedding_generator.generate(query_text)
532
-
533
- # Fast path: use sqlite-vec KNN when available
534
- vec_index = store.vector_index
535
- if vec_index.available?
536
- return search_by_vector_native(store, vec_index, query_embedding, limit, source)
537
- end
538
-
539
- # Fallback: JSON + Ruby cosine similarity
540
- search_by_vector_fallback(store, query_embedding, limit, source)
541
- end
542
-
543
- def search_by_vector_native(store, vec_index, query_embedding, limit, source)
544
- matches = vec_index.search(query_embedding, k: limit)
545
- return [] if matches.empty?
546
-
547
- fact_ids = matches.map { |m| m[:fact_id] }
548
- facts_by_id = batch_find_facts(store, fact_ids)
549
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
550
-
551
- Core::ResultBuilder.build_results_with_scores(
552
- matches,
553
- facts_by_id: facts_by_id,
554
- receipts_by_fact_id: receipts_by_fact_id,
555
- source: source
556
- )
557
- end
558
-
559
- def search_by_vector_fallback(store, query_embedding, limit, source)
560
- # Load facts with embeddings
561
- facts_data = store.facts_with_embeddings(limit: 5000)
562
- return [] if facts_data.empty?
563
-
564
- # Deduplicate: group facts by embedding, score unique embeddings only, fan out
565
- unique_candidates, fact_groups = dedup_candidates(facts_data)
566
- return [] if unique_candidates.empty?
567
-
568
- # Calculate similarities on unique embeddings only
569
- top_unique = Embeddings::Similarity.top_k(query_embedding, unique_candidates, limit)
570
-
571
- # Fan out: expand unique matches back to all fact_ids sharing that embedding
572
- top_matches = fan_out_matches(top_unique, fact_groups, limit)
573
-
574
- # Batch fetch full fact details
575
- fact_ids = top_matches.map { |m| m[:candidate][:fact_id] }
576
- facts_by_id = batch_find_facts(store, fact_ids)
577
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
578
-
579
- # Build results with similarity scores
580
- Core::ResultBuilder.build_results_with_scores(
581
- top_matches,
582
- facts_by_id: facts_by_id,
583
- receipts_by_fact_id: receipts_by_fact_id,
584
- source: source
585
- )
586
- end
587
-
588
- # Group facts by embedding_json, return unique candidates + mapping
589
- def dedup_candidates(facts_data)
590
- groups = {} # embedding_json → [fact_ids]
591
- unique = {} # embedding_json → parsed candidate (first occurrence)
592
-
593
- facts_data.each do |row|
594
- key = row[:embedding_json]
595
- if unique.key?(key)
596
- groups[key] << row[:id]
597
- else
598
- candidate = Core::EmbeddingCandidateBuilder.parse_candidate(row)
599
- next unless candidate
600
- unique[key] = candidate
601
- groups[key] = [row[:id]]
602
- end
603
- end
604
-
605
- [unique.values, groups]
606
- end
607
-
608
- # Expand unique matches back to all fact_ids sharing the same embedding
609
- def fan_out_matches(top_unique, fact_groups, limit)
610
- results = []
611
- top_unique.each do |match|
612
- candidate = match[:candidate]
613
- similarity = match[:similarity]
614
-
615
- # Find the group key for this candidate's embedding
616
- group_key = fact_groups.find { |_key, ids| ids.include?(candidate[:fact_id]) }&.first
617
- next unless group_key
618
-
619
- fact_groups[group_key].each do |fact_id|
620
- results << {
621
- candidate: candidate.merge(fact_id: fact_id),
622
- similarity: similarity
623
- }
624
- break if results.size >= limit
625
- end
626
- break if results.size >= limit
627
- end
628
-
629
- results
630
- end
631
-
632
- def search_by_fts(store, query_text, limit, source)
633
- fts = Index::LexicalFTS.new(store)
634
- ranked_results = fts.search_with_ranks(query_text, limit: limit * 2)
635
-
636
- return [] if ranked_results.empty?
637
-
638
- content_ids = ranked_results.map { |r| r[:content_item_id] }
639
-
640
- # Map content_item_ids to fact_ids, preserving FTS rank order
641
- provenance_rows = store.provenance
642
- .where(content_item_id: content_ids)
643
- .select(:fact_id, :content_item_id)
644
- .all
645
-
646
- content_to_facts = provenance_rows.group_by { |r| r[:content_item_id] }
647
-
648
- # Build ordered fact list with normalized BM25 scores
649
- # FTS5 rank values are negative (more negative = better match)
650
- ranks = ranked_results.map { |r| r[:rank] }
651
- min_rank = ranks.min # Most negative = best
652
- max_rank = ranks.max # Least negative = worst
653
- range = (max_rank - min_rank).abs
654
-
655
- seen_fact_ids = Set.new
656
- scored_matches = []
657
-
658
- ranked_results.each do |r|
659
- similarity = if range > 0
660
- # Normalize: best rank → 1.0, worst rank → 0.1
661
- 0.1 + 0.9 * ((max_rank - r[:rank]).abs / range)
662
- else
663
- 0.8 # Single result gets a reasonable score
664
- end
665
-
666
- fact_ids = content_to_facts[r[:content_item_id]]&.map { |p| p[:fact_id] } || []
667
- fact_ids.each do |fid|
668
- next if seen_fact_ids.include?(fid)
669
- seen_fact_ids.add(fid)
670
- scored_matches << {fact_id: fid, similarity: similarity}
671
- end
672
- end
673
-
674
- return [] if scored_matches.empty?
675
-
676
- fact_ids = scored_matches.map { |m| m[:fact_id] }
677
- facts_by_id = batch_find_facts(store, fact_ids)
678
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
679
-
680
- Core::ResultBuilder.build_results_with_scores(
681
- scored_matches,
682
- facts_by_id: facts_by_id,
683
- receipts_by_fact_id: receipts_by_fact_id,
684
- source: source
685
- ).take(limit)
686
- end
687
-
688
- def merge_search_results(vector_results, text_results, limit)
689
- Core::FactRanker.merge_search_results(vector_results, text_results, limit)
690
- end
691
-
692
- def strong_fts_signal?(store, query_text)
693
- fts = Index::LexicalFTS.new(store)
694
- ranked_results = fts.search_with_ranks(query_text, limit: 5)
695
- Recall::ExpansionDetector.strong_fts_signal?(ranked_results)
696
- end
697
-
698
- # Multi-concept search helpers
699
-
700
- def query_concepts_dual(concepts, limit:, scope:)
701
- template = Recall::DualQueryTemplate.new(@manager)
702
- results = template.execute(scope: scope, limit: limit) do |store, source|
703
- query_concepts_single(store, concepts, limit: limit * 2, source: source)
704
- end
705
- # Deduplicate and sort by average similarity
706
- dedupe_by_fact_id(results, limit)
707
- end
708
-
709
- def query_concepts_legacy(concepts, limit:, scope:)
710
- query_concepts_single(@legacy_store, concepts, limit: limit, source: :legacy)
711
- end
712
-
713
- def query_concepts_single(store, concepts, limit:, source:)
714
- # I/O: Search each concept independently with higher limit for intersection
715
- concept_results = concepts.map do |concept|
716
- search_by_vector(store, concept, limit * 5, source)
717
- end
718
-
719
- # Pure logic: Rank by average similarity across all concepts
720
- Core::ConceptRanker.rank_by_concepts(concept_results, limit)
721
- end
722
-
723
- def dedupe_by_fact_id(results, limit)
724
- Core::FactRanker.dedupe_by_fact_id(results, limit)
91
+ @engine.query_concepts(concepts, limit: limit, scope: scope)
725
92
  end
726
93
  end
727
94
  end