claude_memory 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/memory.sqlite3 +0 -0
  3. data/.claude/rules/claude_memory.generated.md +32 -2
  4. data/.claude/settings.json +65 -15
  5. data/.claude/settings.local.json +5 -2
  6. data/.claude/skills/improve/SKILL.md +113 -25
  7. data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
  8. data/.claude-plugin/commands/distill-transcripts.md +98 -0
  9. data/.claude-plugin/commands/memory-recall.md +67 -0
  10. data/.claude-plugin/marketplace.json +2 -2
  11. data/.claude-plugin/plugin.json +3 -3
  12. data/.claude-plugin/scripts/hook-runner.sh +14 -0
  13. data/.claude-plugin/scripts/serve-mcp.sh +14 -0
  14. data/.ruby-version +1 -1
  15. data/CHANGELOG.md +90 -1
  16. data/CLAUDE.md +56 -18
  17. data/README.md +35 -0
  18. data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
  19. data/db/migrations/014_canonicalize_predicates.rb +30 -0
  20. data/docs/improvements.md +74 -74
  21. data/docs/influence/claude-mem.md +1 -0
  22. data/docs/influence/claude-supermemory.md +1 -0
  23. data/docs/influence/episodic-memory.md +1 -0
  24. data/docs/influence/grepai.md +1 -0
  25. data/docs/influence/kbs.md +1 -0
  26. data/docs/influence/lossless-claw.md +1 -0
  27. data/docs/influence/qmd.md +1 -0
  28. data/docs/quality_review.md +119 -224
  29. data/hooks/hooks.json +39 -7
  30. data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
  31. data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
  32. data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
  33. data/lib/claude_memory/commands/completion_command.rb +149 -0
  34. data/lib/claude_memory/commands/doctor_command.rb +2 -0
  35. data/lib/claude_memory/commands/embeddings_command.rb +198 -0
  36. data/lib/claude_memory/commands/help_command.rb +12 -1
  37. data/lib/claude_memory/commands/hook_command.rb +2 -1
  38. data/lib/claude_memory/commands/index_command.rb +85 -78
  39. data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
  40. data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
  41. data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
  42. data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
  43. data/lib/claude_memory/commands/install_skill_command.rb +78 -0
  44. data/lib/claude_memory/commands/registry.rb +47 -32
  45. data/lib/claude_memory/commands/reject_command.rb +62 -0
  46. data/lib/claude_memory/commands/restore_command.rb +77 -0
  47. data/lib/claude_memory/commands/skills/distill-transcripts.md +102 -0
  48. data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
  49. data/lib/claude_memory/commands/stats_command.rb +98 -2
  50. data/lib/claude_memory/configuration.rb +14 -1
  51. data/lib/claude_memory/core/fact_ranker.rb +2 -2
  52. data/lib/claude_memory/core/rr_fusion.rb +23 -6
  53. data/lib/claude_memory/core/snippet_extractor.rb +7 -3
  54. data/lib/claude_memory/core/text_builder.rb +11 -0
  55. data/lib/claude_memory/distill/json_schema.md +8 -4
  56. data/lib/claude_memory/distill/null_distiller.rb +2 -0
  57. data/lib/claude_memory/domain/entity.rb +13 -1
  58. data/lib/claude_memory/domain/fact.rb +26 -2
  59. data/lib/claude_memory/domain/provenance.rb +0 -1
  60. data/lib/claude_memory/embeddings/api_adapter.rb +97 -0
  61. data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
  62. data/lib/claude_memory/embeddings/fastembed_adapter.rb +46 -12
  63. data/lib/claude_memory/embeddings/generator.rb +4 -0
  64. data/lib/claude_memory/embeddings/inspector.rb +91 -0
  65. data/lib/claude_memory/embeddings/model_registry.rb +210 -0
  66. data/lib/claude_memory/embeddings/resolver.rb +44 -0
  67. data/lib/claude_memory/hook/context_injector.rb +58 -2
  68. data/lib/claude_memory/hook/distillation_runner.rb +46 -0
  69. data/lib/claude_memory/hook/handler.rb +11 -2
  70. data/lib/claude_memory/index/vector_index.rb +15 -2
  71. data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
  72. data/lib/claude_memory/ingest/ingester.rb +17 -0
  73. data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
  74. data/lib/claude_memory/mcp/handlers/management_handlers.rb +169 -0
  75. data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
  76. data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
  77. data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
  78. data/lib/claude_memory/mcp/handlers/stats_handlers.rb +205 -0
  79. data/lib/claude_memory/mcp/instructions_builder.rb +19 -1
  80. data/lib/claude_memory/mcp/query_guide.rb +10 -0
  81. data/lib/claude_memory/mcp/response_formatter.rb +1 -0
  82. data/lib/claude_memory/mcp/server.rb +22 -1
  83. data/lib/claude_memory/mcp/telemetry.rb +86 -0
  84. data/lib/claude_memory/mcp/text_summary.rb +26 -0
  85. data/lib/claude_memory/mcp/tool_definitions.rb +116 -4
  86. data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
  87. data/lib/claude_memory/mcp/tools.rb +50 -679
  88. data/lib/claude_memory/publish.rb +40 -5
  89. data/lib/claude_memory/recall/dual_engine.rb +105 -0
  90. data/lib/claude_memory/recall/legacy_engine.rb +138 -0
  91. data/lib/claude_memory/recall/query_core.rb +371 -0
  92. data/lib/claude_memory/recall.rb +121 -673
  93. data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
  94. data/lib/claude_memory/resolve/resolver.rb +43 -0
  95. data/lib/claude_memory/shortcuts.rb +4 -4
  96. data/lib/claude_memory/store/retry_handler.rb +61 -0
  97. data/lib/claude_memory/store/schema_manager.rb +68 -0
  98. data/lib/claude_memory/store/sqlite_store.rb +334 -201
  99. data/lib/claude_memory/store/store_manager.rb +50 -1
  100. data/lib/claude_memory/sweep/maintenance.rb +115 -1
  101. data/lib/claude_memory/sweep/sweeper.rb +3 -0
  102. data/lib/claude_memory/templates/hooks.example.json +26 -7
  103. data/lib/claude_memory/version.rb +1 -1
  104. data/lib/claude_memory.rb +16 -0
  105. metadata +48 -8
  106. data/.claude/memory.sqlite3-shm +0 -0
  107. data/.claude/memory.sqlite3-wal +0 -0
@@ -1,727 +1,175 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ClaudeMemory
4
+ # Query interface for facts across dual databases (global + project).
5
+ # Delegates to DualEngine or LegacyEngine depending on the store type.
4
6
  class Recall
7
+ # @return [String] query only project-scoped facts
5
8
  SCOPE_PROJECT = "project"
9
+ # @return [String] query only global-scoped facts
6
10
  SCOPE_GLOBAL = "global"
11
+ # @return [String] query both project and global facts (default)
7
12
  SCOPE_ALL = "all"
8
13
 
9
14
  class << self
15
+ # @param manager [Store::StoreManager] dual-database manager
16
+ # @param limit [Integer] max results
17
+ # @return [Array<Hash>] recent decision facts
10
18
  def recent_decisions(manager, limit: 10)
11
19
  Shortcuts.for(:decisions, manager, limit: limit)
12
20
  end
13
21
 
22
+ # @param manager [Store::StoreManager] dual-database manager
23
+ # @param limit [Integer] max results
24
+ # @return [Array<Hash>] architecture-related facts
14
25
  def architecture_choices(manager, limit: 10)
15
26
  Shortcuts.for(:architecture, manager, limit: limit)
16
27
  end
17
28
 
29
+ # @param manager [Store::StoreManager] dual-database manager
30
+ # @param limit [Integer] max results
31
+ # @return [Array<Hash>] convention facts
18
32
  def conventions(manager, limit: 20)
19
33
  Shortcuts.for(:conventions, manager, limit: limit)
20
34
  end
21
35
 
36
+ # @param manager [Store::StoreManager] dual-database manager
37
+ # @param limit [Integer] max results
38
+ # @return [Array<Hash>] project configuration facts
22
39
  def project_config(manager, limit: 10)
23
40
  Shortcuts.for(:project_config, manager, limit: limit)
24
41
  end
25
42
  end
26
43
 
44
+ # @param store_or_manager [Store::SQLiteStore, Store::StoreManager] database store or dual-database manager
45
+ # @param fts [Index::LexicalFTS, nil] full-text search index (used only with legacy single-store)
46
+ # @param project_path [String, nil] project root path (defaults to Configuration#project_dir)
47
+ # @param env [Hash] environment variables
48
+ # @param embedding_generator [Object, nil] vector embedding generator for semantic search
27
49
  def initialize(store_or_manager, fts: nil, project_path: nil, env: ENV, embedding_generator: nil)
28
50
  config = Configuration.new(env)
29
- @project_path = project_path || config.project_dir
30
- @embedding_generator = embedding_generator || Embeddings::Generator.new
31
-
32
- if store_or_manager.is_a?(Store::StoreManager)
33
- @manager = store_or_manager
34
- @legacy_mode = false
35
- else
36
- @legacy_store = store_or_manager
37
- @legacy_fts = fts || Index::LexicalFTS.new(store_or_manager)
38
- @legacy_mode = true
39
- end
40
- end
41
-
42
- def query(query_text, limit: 10, scope: SCOPE_ALL, include_raw_text: false)
43
- if @legacy_mode
44
- query_legacy(query_text, limit: limit, scope: scope)
45
- else
46
- query_dual(query_text, limit: limit, scope: scope, include_raw_text: include_raw_text)
47
- end
48
- end
49
-
50
- def query_index(query_text, limit: 20, scope: SCOPE_ALL)
51
- if @legacy_mode
52
- query_index_legacy(query_text, limit: limit, scope: scope)
51
+ resolved_project_path = project_path || config.project_dir
52
+ resolved_generator = embedding_generator || Embeddings.resolve(env: env)
53
+
54
+ @engine = if store_or_manager.is_a?(Store::StoreManager)
55
+ DualEngine.new(
56
+ store_or_manager,
57
+ embedding_generator: resolved_generator,
58
+ project_path: resolved_project_path
59
+ )
53
60
  else
54
- query_index_dual(query_text, limit: limit, scope: scope)
55
- end
56
- end
57
-
61
+ LegacyEngine.new(
62
+ store_or_manager,
63
+ fts: fts || Index::LexicalFTS.new(store_or_manager),
64
+ embedding_generator: resolved_generator,
65
+ project_path: resolved_project_path
66
+ )
67
+ end
68
+ end
69
+
70
+ # Search facts by text query using FTS5
71
+ # @param query_text [String] search terms
72
+ # @param limit [Integer] max results
73
+ # @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
74
+ # @param include_raw_text [Boolean] include source content text in results
75
+ # @param intent [String, nil] query intent hint for ranking
76
+ # @return [Array<Hash>] matching facts with provenance
77
+ def query(query_text, limit: 10, scope: SCOPE_ALL, include_raw_text: false, intent: nil)
78
+ @engine.query(query_text, limit: limit, scope: scope, include_raw_text: include_raw_text, intent: intent)
79
+ end
80
+
81
+ # Search content items (not facts) via FTS5 index
82
+ # @param query_text [String] search terms
83
+ # @param limit [Integer] max results
84
+ # @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
85
+ # @param intent [String, nil] query intent hint for ranking
86
+ # @return [Array<Hash>] matching content items
87
+ def query_index(query_text, limit: 20, scope: SCOPE_ALL, intent: nil)
88
+ @engine.query_index(query_text, limit: limit, scope: scope, intent: intent)
89
+ end
90
+
91
+ # Traverse fact relationships (supersessions, conflicts) as a graph
92
+ # @param fact_id [Integer] starting fact ID
93
+ # @param depth [Integer] traversal depth
94
+ # @param scope [String, nil] optional scope filter
95
+ # @return [Hash] graph with nodes and edges
58
96
  def fact_graph(fact_id, depth: 2, scope: nil)
59
- if @legacy_mode
60
- Core::FactGraph.build(@legacy_store, fact_id, depth: depth)
61
- else
62
- scope ||= SCOPE_PROJECT
63
- store = @manager.store_for_scope(scope)
64
- Core::FactGraph.build(store, fact_id, depth: depth)
65
- end
97
+ @engine.fact_graph(fact_id, depth: depth, scope: scope)
66
98
  end
67
99
 
100
+ # Show provenance chain for a fact
101
+ # @param fact_id_or_docid [Integer, String] fact ID or document ID
102
+ # @param scope [String, nil] optional scope filter
103
+ # @return [Hash] provenance details including source content
68
104
  def explain(fact_id_or_docid, scope: nil)
69
- if @legacy_mode
70
- fact_id = resolve_fact_identifier(@legacy_store, fact_id_or_docid)
71
- explain_from_store(@legacy_store, fact_id)
72
- else
73
- scope ||= SCOPE_PROJECT
74
- store = @manager.store_for_scope(scope)
75
- fact_id = resolve_fact_identifier(store, fact_id_or_docid)
76
- explain_from_store(store, fact_id)
77
- end
105
+ @engine.explain(fact_id_or_docid, scope: scope)
78
106
  end
79
107
 
108
+ # List facts created or modified since a given time
109
+ # @param since [String] ISO 8601 timestamp
110
+ # @param limit [Integer] max results
111
+ # @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
112
+ # @return [Array<Hash>] recently changed facts
80
113
  def changes(since:, limit: 50, scope: SCOPE_ALL)
81
- if @legacy_mode
82
- changes_legacy(since: since, limit: limit, scope: scope)
83
- else
84
- changes_dual(since: since, limit: limit, scope: scope)
85
- end
114
+ @engine.changes(since: since, limit: limit, scope: scope)
86
115
  end
87
116
 
117
+ # List open fact conflicts
118
+ # @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
119
+ # @return [Array<Hash>] unresolved conflicts
88
120
  def conflicts(scope: SCOPE_ALL)
89
- if @legacy_mode
90
- conflicts_legacy(scope: scope)
91
- else
92
- conflicts_dual(scope: scope)
93
- end
121
+ @engine.conflicts(scope: scope)
94
122
  end
95
123
 
124
+ # Find facts associated with a git branch
125
+ # @param branch_name [String] git branch name
126
+ # @param limit [Integer] max results
127
+ # @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
128
+ # @return [Array<Hash>] facts from the given branch
96
129
  def facts_by_branch(branch_name, limit: 20, scope: SCOPE_ALL)
97
- if @legacy_mode
98
- facts_by_context_legacy(:git_branch, branch_name, limit: limit, scope: scope)
99
- else
100
- facts_by_context_dual(:git_branch, branch_name, limit: limit, scope: scope)
101
- end
130
+ @engine.facts_by_branch(branch_name, limit: limit, scope: scope)
102
131
  end
103
132
 
133
+ # Find facts associated with a working directory
134
+ # @param cwd [String] directory path
135
+ # @param limit [Integer] max results
136
+ # @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
137
+ # @return [Array<Hash>] facts from the given directory
104
138
  def facts_by_directory(cwd, limit: 20, scope: SCOPE_ALL)
105
- if @legacy_mode
106
- facts_by_context_legacy(:cwd, cwd, limit: limit, scope: scope)
107
- else
108
- facts_by_context_dual(:cwd, cwd, limit: limit, scope: scope)
109
- end
139
+ @engine.facts_by_directory(cwd, limit: limit, scope: scope)
110
140
  end
111
141
 
142
+ # Find facts associated with a specific tool
143
+ # @param tool_name [String] tool name (e.g., "Read", "Bash")
144
+ # @param limit [Integer] max results
145
+ # @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
146
+ # @return [Array<Hash>] facts from sessions using the given tool
112
147
  def facts_by_tool(tool_name, limit: 20, scope: SCOPE_ALL)
113
- if @legacy_mode
114
- facts_by_tool_legacy(tool_name, limit: limit, scope: scope)
115
- else
116
- facts_by_tool_dual(tool_name, limit: limit, scope: scope)
117
- end
118
- end
119
-
120
- def query_semantic(text, limit: 10, scope: SCOPE_ALL, mode: :both)
121
- if @legacy_mode
122
- query_semantic_legacy(text, limit: limit, scope: scope, mode: mode)
123
- else
124
- query_semantic_dual(text, limit: limit, scope: scope, mode: mode)
125
- end
126
- end
127
-
148
+ @engine.facts_by_tool(tool_name, limit: limit, scope: scope)
149
+ end
150
+
151
+ # Search facts using vector embeddings (semantic similarity)
152
+ # @param text [String] natural language query
153
+ # @param limit [Integer] max results
154
+ # @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
155
+ # @param mode [Symbol] :vector, :lexical, or :both (hybrid RRF)
156
+ # @param explain [Boolean] include scoring breakdown in results
157
+ # @param intent [String, nil] query intent hint for ranking
158
+ # @return [Array<Hash>] semantically similar facts
159
+ def query_semantic(text, limit: 10, scope: SCOPE_ALL, mode: :both, explain: false, intent: nil)
160
+ @engine.query_semantic(text, limit: limit, scope: scope, mode: mode, explain: explain, intent: intent)
161
+ end
162
+
163
+ # Find facts at the intersection of multiple concepts
164
+ # @param concepts [Array<String>] 2-5 concept terms to intersect
165
+ # @param limit [Integer] max results
166
+ # @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
167
+ # @return [Array<Hash>] facts matching all given concepts
168
+ # @raise [ArgumentError] if concepts count is not 2-5
128
169
  def query_concepts(concepts, limit: 10, scope: SCOPE_ALL)
129
170
  raise ArgumentError, "Must provide 2-5 concepts" unless (2..5).cover?(concepts.size)
130
171
 
131
- if @legacy_mode
132
- query_concepts_legacy(concepts, limit: limit, scope: scope)
133
- else
134
- query_concepts_dual(concepts, limit: limit, scope: scope)
135
- end
136
- end
137
-
138
- private
139
-
140
- def query_dual(query_text, limit:, scope:, include_raw_text: false)
141
- template = Recall::DualQueryTemplate.new(@manager)
142
- results = template.execute(scope: scope, limit: limit) do |store, source|
143
- query_single_store(store, query_text, limit: limit, source: source, include_raw_text: include_raw_text)
144
- end
145
- dedupe_and_sort(results, limit)
146
- end
147
-
148
- def query_index_dual(query_text, limit:, scope:)
149
- template = Recall::DualQueryTemplate.new(@manager)
150
- results = template.execute(scope: scope, limit: limit) do |store, source|
151
- query_index_single_store(store, query_text, limit: limit, source: source)
152
- end
153
- dedupe_and_sort_index(results, limit)
154
- end
155
-
156
- def query_index_single_store(store, query_text, limit:, source:)
157
- options = Index::QueryOptions.new(
158
- query_text: query_text,
159
- limit: limit,
160
- scope: :all,
161
- source: source
162
- )
163
-
164
- query = Index::IndexQuery.new(store, options)
165
- query.execute
166
- end
167
-
168
- def dedupe_and_sort_index(results, limit)
169
- Core::FactRanker.dedupe_and_sort_index(results, limit)
170
- end
171
-
172
- def query_single_store(store, query_text, limit:, source:, include_raw_text: false)
173
- fts = Index::LexicalFTS.new(store)
174
- content_ids = fts.search(query_text, limit: limit * 3)
175
- return [] if content_ids.empty?
176
-
177
- # Batch query: fetch ALL provenance records at once using WHERE IN
178
- provenance_by_content = store.provenance
179
- .select(:fact_id, :content_item_id)
180
- .where(content_item_id: content_ids)
181
- .all
182
- .group_by { |p| p[:content_item_id] }
183
-
184
- # Collect fact IDs in content order, deduplicated
185
- ordered_fact_ids = Core::FactCollector.collect_ordered_fact_ids(
186
- provenance_by_content,
187
- content_ids,
188
- limit
189
- )
190
-
191
- return [] if ordered_fact_ids.empty?
192
-
193
- # Batch query all facts at once
194
- facts_by_id = batch_find_facts(store, ordered_fact_ids)
195
-
196
- # Batch query all receipts at once
197
- receipts_by_fact_id = batch_find_receipts(store, ordered_fact_ids, include_raw_text: include_raw_text)
198
-
199
- # Build results maintaining order
200
- Core::ResultBuilder.build_results(
201
- ordered_fact_ids,
202
- facts_by_id: facts_by_id,
203
- receipts_by_fact_id: receipts_by_fact_id,
204
- source: source
205
- )
206
- end
207
-
208
- def batch_find_facts(store, fact_ids)
209
- Core::FactQueryBuilder.batch_find_facts(store, fact_ids)
210
- end
211
-
212
- def batch_find_receipts(store, fact_ids, include_raw_text: false)
213
- Core::FactQueryBuilder.batch_find_receipts(store, fact_ids, include_raw_text: include_raw_text)
214
- end
215
-
216
- def dedupe_and_sort(results, limit)
217
- Core::FactRanker.dedupe_and_sort(results, limit)
218
- end
219
-
220
- def changes_dual(since:, limit:, scope:)
221
- template = Recall::DualQueryTemplate.new(@manager)
222
- results = template.execute(scope: scope, limit: limit) do |store, source|
223
- changes = fetch_changes(store, since, limit)
224
- Core::ResultSorter.annotate_source(changes, source)
225
- end
226
- Core::ResultSorter.sort_by_timestamp(results, limit)
227
- end
228
-
229
- def fetch_changes(store, since, limit)
230
- Core::FactQueryBuilder.fetch_changes(store, since, limit)
231
- end
232
-
233
- def conflicts_dual(scope:)
234
- template = Recall::DualQueryTemplate.new(@manager)
235
- template.execute(scope: scope) do |store, source|
236
- conflicts = store.open_conflicts
237
- Core::ResultSorter.annotate_source(conflicts, source)
238
- end
239
- end
240
-
241
- # Resolve a fact identifier to an integer ID
242
- # Accepts either an integer ID or an 8-char docid string
243
- def resolve_fact_identifier(store, identifier)
244
- return identifier if identifier.is_a?(Integer)
245
-
246
- str = identifier.to_s
247
- # If it looks like a pure integer, use as ID
248
- return str.to_i if str.match?(/\A\d+\z/)
249
-
250
- # Otherwise treat as docid
251
- fact = Core::FactQueryBuilder.find_fact_by_docid(store, str)
252
- fact ? fact[:id] : nil
253
- end
254
-
255
- def explain_from_store(store, fact_id)
256
- fact = find_fact_from_store(store, fact_id)
257
- return Core::NullExplanation.new unless fact
258
-
259
- {
260
- fact: fact,
261
- receipts: find_receipts_from_store(store, fact_id),
262
- superseded_by: find_superseded_by_from_store(store, fact_id),
263
- supersedes: find_supersedes_from_store(store, fact_id),
264
- conflicts: find_conflicts_from_store(store, fact_id)
265
- }
266
- end
267
-
268
- def find_fact_from_store(store, fact_id)
269
- Core::FactQueryBuilder.find_fact(store, fact_id)
270
- end
271
-
272
- def find_receipts_from_store(store, fact_id)
273
- Core::FactQueryBuilder.find_receipts(store, fact_id)
274
- end
275
-
276
- def find_superseded_by_from_store(store, fact_id)
277
- Core::FactQueryBuilder.find_superseded_by(store, fact_id)
278
- end
279
-
280
- def find_supersedes_from_store(store, fact_id)
281
- Core::FactQueryBuilder.find_supersedes(store, fact_id)
282
- end
283
-
284
- def find_conflicts_from_store(store, fact_id)
285
- Core::FactQueryBuilder.find_conflicts(store, fact_id)
286
- end
287
-
288
- def query_legacy(query_text, limit:, scope:)
289
- content_ids = @legacy_fts.search(query_text, limit: limit * 3)
290
- return [] if content_ids.empty?
291
-
292
- # Batch query: fetch ALL provenance records at once using WHERE IN
293
- provenance_by_content = @legacy_store.provenance
294
- .select(:fact_id, :content_item_id)
295
- .where(content_item_id: content_ids)
296
- .all
297
- .group_by { |p| p[:content_item_id] }
298
-
299
- # Collect ordered unique fact IDs from provenance
300
- all_fact_ids = []
301
- seen_fact_ids = Set.new
302
- content_ids.each do |content_id|
303
- (provenance_by_content[content_id] || []).each do |prov|
304
- next if seen_fact_ids.include?(prov[:fact_id])
305
- seen_fact_ids.add(prov[:fact_id])
306
- all_fact_ids << prov[:fact_id]
307
- end
308
- end
309
-
310
- return [] if all_fact_ids.empty?
311
-
312
- # Batch query: fetch ALL facts at once
313
- facts_by_id = batch_find_facts(@legacy_store, all_fact_ids)
314
-
315
- # Filter by scope and apply limit
316
- selected_fact_ids = []
317
- all_fact_ids.each do |fact_id|
318
- fact = facts_by_id[fact_id]
319
- next unless fact
320
- next unless fact_matches_scope?(fact, scope)
321
- selected_fact_ids << fact_id
322
- break if selected_fact_ids.size >= limit
323
- end
324
-
325
- return [] if selected_fact_ids.empty?
326
-
327
- # Batch query: fetch ALL receipts at once
328
- receipts_by_fact_id = batch_find_receipts(@legacy_store, selected_fact_ids)
329
-
330
- facts_with_provenance = selected_fact_ids.map do |fact_id|
331
- {
332
- fact: facts_by_id[fact_id],
333
- receipts: receipts_by_fact_id[fact_id] || []
334
- }
335
- end
336
-
337
- sort_by_scope_priority(facts_with_provenance)
338
- end
339
-
340
- def query_index_legacy(query_text, limit:, scope:)
341
- options = Index::QueryOptions.new(
342
- query_text: query_text,
343
- limit: limit,
344
- scope: :all,
345
- source: :legacy
346
- )
347
-
348
- query = Index::IndexQuery.new(@legacy_store, options)
349
- results = query.execute
350
-
351
- # Filter by scope in legacy mode
352
- results.select do |result|
353
- # Need to get full fact to check scope
354
- fact = find_fact(result[:id])
355
- fact && fact_matches_scope?(fact, scope)
356
- end
357
- end
358
-
359
- def changes_legacy(since:, limit:, scope:)
360
- ds = @legacy_store.facts
361
- .select(:id, :docid, :subject_entity_id, :predicate, :object_literal, :status, :created_at, :scope, :project_path)
362
- .where { created_at >= since }
363
- .order(Sequel.desc(:created_at))
364
- .limit(limit)
365
-
366
- ds = apply_scope_filter(ds, scope)
367
- ds.all
368
- end
369
-
370
- def conflicts_legacy(scope:)
371
- all_conflicts = @legacy_store.open_conflicts
372
- return all_conflicts if scope == SCOPE_ALL
373
-
374
- all_conflicts.select do |conflict|
375
- fact_a = find_fact(conflict[:fact_a_id])
376
- fact_b = find_fact(conflict[:fact_b_id])
377
-
378
- fact_matches_scope?(fact_a, scope) || fact_matches_scope?(fact_b, scope)
379
- end
380
- end
381
-
382
- def fact_matches_scope?(fact, scope)
383
- Core::ScopeFilter.matches?(fact, scope, @project_path)
384
- end
385
-
386
- def apply_scope_filter(dataset, scope)
387
- Core::ScopeFilter.apply_to_dataset(dataset, scope, @project_path)
388
- end
389
-
390
- def sort_by_scope_priority(facts_with_provenance)
391
- Core::FactRanker.sort_by_scope_priority(facts_with_provenance, @project_path)
392
- end
393
-
394
- def find_provenance_by_content(content_id)
395
- Core::FactQueryBuilder.find_provenance_by_content(@legacy_store, content_id)
396
- end
397
-
398
- def find_fact(fact_id)
399
- find_fact_from_store(@legacy_store, fact_id)
400
- end
401
-
402
- def find_receipts(fact_id)
403
- find_receipts_from_store(@legacy_store, fact_id)
404
- end
405
-
406
- # Context-aware query helpers
407
-
408
- def facts_by_context_dual(column, value, limit:, scope:)
409
- template = Recall::DualQueryTemplate.new(@manager)
410
- results = template.execute(scope: scope, limit: limit) do |store, source|
411
- facts_by_context_single(store, column, value, limit: limit, source: source)
412
- end
413
- dedupe_and_sort(results, limit)
414
- end
415
-
416
- def facts_by_context_legacy(column, value, limit:, scope:)
417
- facts_by_context_single(@legacy_store, column, value, limit: limit, source: :legacy)
418
- end
419
-
420
- def facts_by_context_single(store, column, value, limit:, source:)
421
- # Find content items matching the context
422
- content_ids = store.content_items
423
- .where(column => value)
424
- .select(:id)
425
- .map { |row| row[:id] }
426
-
427
- return [] if content_ids.empty?
428
-
429
- # Find facts linked to those content items via provenance
430
- fact_ids = store.provenance
431
- .where(content_item_id: content_ids)
432
- .select(:fact_id)
433
- .distinct
434
- .map { |row| row[:fact_id] }
435
-
436
- return [] if fact_ids.empty?
437
-
438
- # Batch fetch facts and their provenance
439
- facts_by_id = batch_find_facts(store, fact_ids)
440
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
441
-
442
- results = Core::ResultBuilder.build_results(
443
- fact_ids,
444
- facts_by_id: facts_by_id,
445
- receipts_by_fact_id: receipts_by_fact_id,
446
- source: source
447
- )
448
- results.take(limit)
449
- end
450
-
451
- def facts_by_tool_dual(tool_name, limit:, scope:)
452
- template = Recall::DualQueryTemplate.new(@manager)
453
- results = template.execute(scope: scope, limit: limit) do |store, source|
454
- facts_by_tool_single(store, tool_name, limit: limit, source: source)
455
- end
456
- dedupe_and_sort(results, limit)
457
- end
458
-
459
- def facts_by_tool_legacy(tool_name, limit:, scope:)
460
- facts_by_tool_single(@legacy_store, tool_name, limit: limit, source: :legacy)
461
- end
462
-
463
- def facts_by_tool_single(store, tool_name, limit:, source:)
464
- # Find content items where the tool was used
465
- content_ids = store.tool_calls
466
- .where(tool_name: tool_name)
467
- .select(:content_item_id)
468
- .distinct
469
- .map { |row| row[:content_item_id] }
470
-
471
- return [] if content_ids.empty?
472
-
473
- # Find facts linked to those content items via provenance
474
- fact_ids = store.provenance
475
- .where(content_item_id: content_ids)
476
- .select(:fact_id)
477
- .distinct
478
- .map { |row| row[:fact_id] }
479
-
480
- return [] if fact_ids.empty?
481
-
482
- # Batch fetch facts and their provenance
483
- facts_by_id = batch_find_facts(store, fact_ids)
484
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
485
-
486
- results = Core::ResultBuilder.build_results(
487
- fact_ids,
488
- facts_by_id: facts_by_id,
489
- receipts_by_fact_id: receipts_by_fact_id,
490
- source: source
491
- )
492
- results.take(limit)
493
- end
494
-
495
- # Semantic search helpers
496
-
497
- def query_semantic_dual(text, limit:, scope:, mode:)
498
- template = Recall::DualQueryTemplate.new(@manager)
499
- results = template.execute(scope: scope, limit: limit) do |store, source|
500
- query_semantic_single(store, text, limit: limit * 3, mode: mode, source: source)
501
- end
502
- # Use similarity-preserving dedupe (not source/time sort) to keep RRF ordering
503
- Core::FactRanker.dedupe_by_fact_id(results, limit)
504
- end
505
-
506
- def query_semantic_legacy(text, limit:, scope:, mode:)
507
- query_semantic_single(@legacy_store, text, limit: limit, mode: mode, source: :legacy)
508
- end
509
-
510
- def query_semantic_single(store, text, limit:, mode:, source:)
511
- vector_results = []
512
- text_results = []
513
-
514
- # Text search mode (FTS) - run first for expansion detection
515
- if mode == :text || mode == :both
516
- text_results = search_by_fts(store, text, limit, source)
517
- end
518
-
519
- # Vector search mode - skip if FTS already found strong match
520
- if mode == :vector || mode == :both
521
- skip_vector = mode == :both && strong_fts_signal?(store, text)
522
- vector_results = search_by_vector(store, text, limit, source) unless skip_vector
523
- end
524
-
525
- # Merge and deduplicate
526
- merge_search_results(vector_results, text_results, limit)
527
- end
528
-
529
- def search_by_vector(store, query_text, limit, source)
530
- # Generate query embedding
531
- query_embedding = @embedding_generator.generate(query_text)
532
-
533
- # Fast path: use sqlite-vec KNN when available
534
- vec_index = store.vector_index
535
- if vec_index.available?
536
- return search_by_vector_native(store, vec_index, query_embedding, limit, source)
537
- end
538
-
539
- # Fallback: JSON + Ruby cosine similarity
540
- search_by_vector_fallback(store, query_embedding, limit, source)
541
- end
542
-
543
- def search_by_vector_native(store, vec_index, query_embedding, limit, source)
544
- matches = vec_index.search(query_embedding, k: limit)
545
- return [] if matches.empty?
546
-
547
- fact_ids = matches.map { |m| m[:fact_id] }
548
- facts_by_id = batch_find_facts(store, fact_ids)
549
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
550
-
551
- Core::ResultBuilder.build_results_with_scores(
552
- matches,
553
- facts_by_id: facts_by_id,
554
- receipts_by_fact_id: receipts_by_fact_id,
555
- source: source
556
- )
557
- end
558
-
559
- def search_by_vector_fallback(store, query_embedding, limit, source)
560
- # Load facts with embeddings
561
- facts_data = store.facts_with_embeddings(limit: 5000)
562
- return [] if facts_data.empty?
563
-
564
- # Deduplicate: group facts by embedding, score unique embeddings only, fan out
565
- unique_candidates, fact_groups = dedup_candidates(facts_data)
566
- return [] if unique_candidates.empty?
567
-
568
- # Calculate similarities on unique embeddings only
569
- top_unique = Embeddings::Similarity.top_k(query_embedding, unique_candidates, limit)
570
-
571
- # Fan out: expand unique matches back to all fact_ids sharing that embedding
572
- top_matches = fan_out_matches(top_unique, fact_groups, limit)
573
-
574
- # Batch fetch full fact details
575
- fact_ids = top_matches.map { |m| m[:candidate][:fact_id] }
576
- facts_by_id = batch_find_facts(store, fact_ids)
577
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
578
-
579
- # Build results with similarity scores
580
- Core::ResultBuilder.build_results_with_scores(
581
- top_matches,
582
- facts_by_id: facts_by_id,
583
- receipts_by_fact_id: receipts_by_fact_id,
584
- source: source
585
- )
586
- end
587
-
588
- # Group facts by embedding_json, return unique candidates + mapping
589
- def dedup_candidates(facts_data)
590
- groups = {} # embedding_json → [fact_ids]
591
- unique = {} # embedding_json → parsed candidate (first occurrence)
592
-
593
- facts_data.each do |row|
594
- key = row[:embedding_json]
595
- if unique.key?(key)
596
- groups[key] << row[:id]
597
- else
598
- candidate = Core::EmbeddingCandidateBuilder.parse_candidate(row)
599
- next unless candidate
600
- unique[key] = candidate
601
- groups[key] = [row[:id]]
602
- end
603
- end
604
-
605
- [unique.values, groups]
606
- end
607
-
608
- # Expand unique matches back to all fact_ids sharing the same embedding
609
- def fan_out_matches(top_unique, fact_groups, limit)
610
- results = []
611
- top_unique.each do |match|
612
- candidate = match[:candidate]
613
- similarity = match[:similarity]
614
-
615
- # Find the group key for this candidate's embedding
616
- group_key = fact_groups.find { |_key, ids| ids.include?(candidate[:fact_id]) }&.first
617
- next unless group_key
618
-
619
- fact_groups[group_key].each do |fact_id|
620
- results << {
621
- candidate: candidate.merge(fact_id: fact_id),
622
- similarity: similarity
623
- }
624
- break if results.size >= limit
625
- end
626
- break if results.size >= limit
627
- end
628
-
629
- results
630
- end
631
-
632
- def search_by_fts(store, query_text, limit, source)
633
- fts = Index::LexicalFTS.new(store)
634
- ranked_results = fts.search_with_ranks(query_text, limit: limit * 2)
635
-
636
- return [] if ranked_results.empty?
637
-
638
- content_ids = ranked_results.map { |r| r[:content_item_id] }
639
-
640
- # Map content_item_ids to fact_ids, preserving FTS rank order
641
- provenance_rows = store.provenance
642
- .where(content_item_id: content_ids)
643
- .select(:fact_id, :content_item_id)
644
- .all
645
-
646
- content_to_facts = provenance_rows.group_by { |r| r[:content_item_id] }
647
-
648
- # Build ordered fact list with normalized BM25 scores
649
- # FTS5 rank values are negative (more negative = better match)
650
- ranks = ranked_results.map { |r| r[:rank] }
651
- min_rank = ranks.min # Most negative = best
652
- max_rank = ranks.max # Least negative = worst
653
- range = (max_rank - min_rank).abs
654
-
655
- seen_fact_ids = Set.new
656
- scored_matches = []
657
-
658
- ranked_results.each do |r|
659
- similarity = if range > 0
660
- # Normalize: best rank → 1.0, worst rank → 0.1
661
- 0.1 + 0.9 * ((max_rank - r[:rank]).abs / range)
662
- else
663
- 0.8 # Single result gets a reasonable score
664
- end
665
-
666
- fact_ids = content_to_facts[r[:content_item_id]]&.map { |p| p[:fact_id] } || []
667
- fact_ids.each do |fid|
668
- next if seen_fact_ids.include?(fid)
669
- seen_fact_ids.add(fid)
670
- scored_matches << {fact_id: fid, similarity: similarity}
671
- end
672
- end
673
-
674
- return [] if scored_matches.empty?
675
-
676
- fact_ids = scored_matches.map { |m| m[:fact_id] }
677
- facts_by_id = batch_find_facts(store, fact_ids)
678
- receipts_by_fact_id = batch_find_receipts(store, fact_ids)
679
-
680
- Core::ResultBuilder.build_results_with_scores(
681
- scored_matches,
682
- facts_by_id: facts_by_id,
683
- receipts_by_fact_id: receipts_by_fact_id,
684
- source: source
685
- ).take(limit)
686
- end
687
-
688
- def merge_search_results(vector_results, text_results, limit)
689
- Core::FactRanker.merge_search_results(vector_results, text_results, limit)
690
- end
691
-
692
- def strong_fts_signal?(store, query_text)
693
- fts = Index::LexicalFTS.new(store)
694
- ranked_results = fts.search_with_ranks(query_text, limit: 5)
695
- Recall::ExpansionDetector.strong_fts_signal?(ranked_results)
696
- end
697
-
698
- # Multi-concept search helpers
699
-
700
- def query_concepts_dual(concepts, limit:, scope:)
701
- template = Recall::DualQueryTemplate.new(@manager)
702
- results = template.execute(scope: scope, limit: limit) do |store, source|
703
- query_concepts_single(store, concepts, limit: limit * 2, source: source)
704
- end
705
- # Deduplicate and sort by average similarity
706
- dedupe_by_fact_id(results, limit)
707
- end
708
-
709
- def query_concepts_legacy(concepts, limit:, scope:)
710
- query_concepts_single(@legacy_store, concepts, limit: limit, source: :legacy)
711
- end
712
-
713
- def query_concepts_single(store, concepts, limit:, source:)
714
- # I/O: Search each concept independently with higher limit for intersection
715
- concept_results = concepts.map do |concept|
716
- search_by_vector(store, concept, limit * 5, source)
717
- end
718
-
719
- # Pure logic: Rank by average similarity across all concepts
720
- Core::ConceptRanker.rank_by_concepts(concept_results, limit)
721
- end
722
-
723
- def dedupe_by_fact_id(results, limit)
724
- Core::FactRanker.dedupe_by_fact_id(results, limit)
172
+ @engine.query_concepts(concepts, limit: limit, scope: scope)
725
173
  end
726
174
  end
727
175
  end