claude_memory 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/CLAUDE.md +1 -1
  3. data/.claude/memory.sqlite3 +0 -0
  4. data/.claude/memory.sqlite3-shm +0 -0
  5. data/.claude/memory.sqlite3-wal +0 -0
  6. data/.claude/rules/claude_memory.generated.md +1 -1
  7. data/.claude/settings.json +5 -0
  8. data/.claude/settings.local.json +19 -1
  9. data/.claude-plugin/marketplace.json +5 -2
  10. data/.claude-plugin/plugin.json +16 -3
  11. data/.gitattributes +1 -0
  12. data/CHANGELOG.md +91 -0
  13. data/CLAUDE.md +28 -14
  14. data/README.md +6 -2
  15. data/Rakefile +22 -0
  16. data/db/migrations/011_add_tool_call_summaries.rb +18 -0
  17. data/db/migrations/012_add_vec_indexing_support.rb +19 -0
  18. data/docs/improvements.md +225 -61
  19. data/docs/influence/claude-mem.md +253 -0
  20. data/docs/influence/claude-supermemory.md +158 -430
  21. data/docs/influence/episodic-memory.md +217 -0
  22. data/docs/influence/grepai.md +163 -839
  23. data/docs/influence/kbs.md +437 -0
  24. data/docs/influence/qmd.md +210 -481
  25. data/docs/quality_review.md +344 -56
  26. data/hooks/hooks.json +19 -15
  27. data/lefthook.yml +4 -0
  28. data/lib/claude_memory/commands/checks/database_check.rb +7 -0
  29. data/lib/claude_memory/commands/checks/vec_check.rb +73 -0
  30. data/lib/claude_memory/commands/compact_command.rb +104 -0
  31. data/lib/claude_memory/commands/doctor_command.rb +1 -0
  32. data/lib/claude_memory/commands/export_command.rb +116 -0
  33. data/lib/claude_memory/commands/git_lfs_command.rb +117 -0
  34. data/lib/claude_memory/commands/help_command.rb +2 -0
  35. data/lib/claude_memory/commands/hook_command.rb +110 -9
  36. data/lib/claude_memory/commands/index_command.rb +63 -8
  37. data/lib/claude_memory/commands/initializers/global_initializer.rb +26 -7
  38. data/lib/claude_memory/commands/initializers/project_initializer.rb +35 -12
  39. data/lib/claude_memory/commands/registry.rb +4 -1
  40. data/lib/claude_memory/commands/serve_mcp_command.rb +10 -1
  41. data/lib/claude_memory/commands/stats_command.rb +12 -1
  42. data/lib/claude_memory/configuration.rb +40 -1
  43. data/lib/claude_memory/core/snippet_extractor.rb +21 -19
  44. data/lib/claude_memory/hook/context_injector.rb +75 -0
  45. data/lib/claude_memory/hook/error_classifier.rb +67 -0
  46. data/lib/claude_memory/hook/handler.rb +21 -1
  47. data/lib/claude_memory/index/lexical_fts.rb +88 -16
  48. data/lib/claude_memory/index/vector_index.rb +171 -0
  49. data/lib/claude_memory/infrastructure/schema_validator.rb +5 -1
  50. data/lib/claude_memory/ingest/ingester.rb +26 -1
  51. data/lib/claude_memory/ingest/observation_compressor.rb +177 -0
  52. data/lib/claude_memory/mcp/instructions_builder.rb +76 -0
  53. data/lib/claude_memory/mcp/server.rb +3 -1
  54. data/lib/claude_memory/mcp/tool_definitions.rb +65 -27
  55. data/lib/claude_memory/mcp/tools.rb +137 -2
  56. data/lib/claude_memory/publish.rb +28 -27
  57. data/lib/claude_memory/recall/dual_query_template.rb +1 -12
  58. data/lib/claude_memory/recall.rb +71 -17
  59. data/lib/claude_memory/resolve/resolver.rb +22 -18
  60. data/lib/claude_memory/store/sqlite_store.rb +17 -1
  61. data/lib/claude_memory/store/store_manager.rb +19 -24
  62. data/lib/claude_memory/sweep/sweeper.rb +41 -2
  63. data/lib/claude_memory/version.rb +1 -1
  64. data/lib/claude_memory.rb +15 -0
  65. data/scripts/hook-runner.sh +14 -0
  66. data/scripts/serve-mcp.sh +14 -0
  67. data/skills/setup-memory/SKILL.md +6 -0
  68. metadata +36 -2
@@ -5,27 +5,37 @@ module ClaudeMemory
5
5
  # MCP tool definitions for Claude Memory
6
6
  # Pure data structure - no logic, just tool schemas
7
7
  module ToolDefinitions
8
+ # Annotations for read-only query tools (safe to call anytime)
9
+ READ_ONLY = {readOnlyHint: true, idempotentHint: true, destructiveHint: false}.freeze
10
+
11
+ # Annotations for state-changing but non-destructive tools
12
+ WRITE = {readOnlyHint: false, idempotentHint: false, destructiveHint: false}.freeze
13
+
14
+ # Annotations for idempotent writes (safe to retry)
15
+ WRITE_IDEMPOTENT = {readOnlyHint: false, idempotentHint: true, destructiveHint: false}.freeze
16
+
8
17
  # Returns array of tool definitions for MCP protocol
9
18
  # @return [Array<Hash>] Tool definitions with name, description, and inputSchema
10
19
  def self.all
11
20
  [
12
21
  {
13
22
  name: "memory.recall",
14
- description: "Search facts matching a query from both global and project memory databases.",
23
+ description: "Search facts matching a query from both global and project memory databases. Returns full facts with provenance (~800 tokens/result, ~300 with compact: true). For token-efficient browsing, use memory.recall_index first (~200 tokens/result), then memory.recall_details for selected facts.",
15
24
  inputSchema: {
16
25
  type: "object",
17
26
  properties: {
18
27
  query: {type: "string", description: "Search query for existing knowledge (e.g., 'authentication flow', 'error handling', 'database setup')"},
19
28
  limit: {type: "integer", description: "Max results", default: 10},
20
29
  scope: {type: "string", enum: ["all", "global", "project"], description: "Filter by scope: 'all' (default), 'global', or 'project'", default: "all"},
21
- compact: {type: "boolean", description: "Omit provenance receipts for ~60% smaller responses", default: false}
30
+ compact: {type: "boolean", description: "Omit provenance receipts for ~60% smaller responses (~800 → ~300 tokens/result)", default: false}
22
31
  },
23
32
  required: ["query"]
24
- }
33
+ },
34
+ annotations: READ_ONLY
25
35
  },
26
36
  {
27
37
  name: "memory.recall_index",
28
- description: "Lightweight search returning fact previews, IDs, and token costs. Follow up with memory.recall_details for full information.",
38
+ description: "Lightweight search returning fact previews, IDs, and token costs (~200 tokens/result). Step 1 of progressive disclosure: browse results here, then call memory.recall_details with selected fact IDs for full information (~500 tokens/fact). Saves ~60% tokens vs memory.recall when you only need a few facts.",
29
39
  inputSchema: {
30
40
  type: "object",
31
41
  properties: {
@@ -34,11 +44,12 @@ module ClaudeMemory
34
44
  scope: {type: "string", enum: ["all", "global", "project"], description: "Scope: 'all' (both), 'global' (user-wide), 'project' (current only)", default: "all"}
35
45
  },
36
46
  required: ["query"]
37
- }
47
+ },
48
+ annotations: READ_ONLY
38
49
  },
39
50
  {
40
51
  name: "memory.recall_details",
41
- description: "Fetch full details for specific fact IDs. Use after memory.recall_index.",
52
+ description: "Fetch full details for specific fact IDs (~500 tokens/fact). Step 2 of progressive disclosure: use after memory.recall_index to get provenance and metadata for selected facts only.",
42
53
  inputSchema: {
43
54
  type: "object",
44
55
  properties: {
@@ -46,7 +57,8 @@ module ClaudeMemory
46
57
  scope: {type: "string", enum: ["project", "global"], description: "Database to query", default: "project"}
47
58
  },
48
59
  required: ["fact_ids"]
49
- }
60
+ },
61
+ annotations: READ_ONLY
50
62
  },
51
63
  {
52
64
  name: "memory.explain",
@@ -58,7 +70,8 @@ module ClaudeMemory
58
70
  scope: {type: "string", enum: ["global", "project"], description: "Which database to look in", default: "project"}
59
71
  },
60
72
  required: ["fact_id"]
61
- }
73
+ },
74
+ annotations: READ_ONLY
62
75
  },
63
76
  {
64
77
  name: "memory.changes",
@@ -70,7 +83,8 @@ module ClaudeMemory
70
83
  limit: {type: "integer", default: 20},
71
84
  scope: {type: "string", enum: ["all", "global", "project"], default: "all"}
72
85
  }
73
- }
86
+ },
87
+ annotations: READ_ONLY
74
88
  },
75
89
  {
76
90
  name: "memory.conflicts",
@@ -80,7 +94,8 @@ module ClaudeMemory
80
94
  properties: {
81
95
  scope: {type: "string", enum: ["all", "global", "project"], default: "all"}
82
96
  }
83
- }
97
+ },
98
+ annotations: READ_ONLY
84
99
  },
85
100
  {
86
101
  name: "memory.sweep_now",
@@ -91,7 +106,8 @@ module ClaudeMemory
91
106
  budget_seconds: {type: "integer", default: 5},
92
107
  scope: {type: "string", enum: ["global", "project"], default: "project"}
93
108
  }
94
- }
109
+ },
110
+ annotations: WRITE
95
111
  },
96
112
  {
97
113
  name: "memory.status",
@@ -99,7 +115,8 @@ module ClaudeMemory
99
115
  inputSchema: {
100
116
  type: "object",
101
117
  properties: {}
102
- }
118
+ },
119
+ annotations: READ_ONLY
103
120
  },
104
121
  {
105
122
  name: "memory.stats",
@@ -109,7 +126,8 @@ module ClaudeMemory
109
126
  properties: {
110
127
  scope: {type: "string", enum: ["all", "global", "project"], description: "Show stats for: all (default), global, or project", default: "all"}
111
128
  }
112
- }
129
+ },
130
+ annotations: READ_ONLY
113
131
  },
114
132
  {
115
133
  name: "memory.promote",
@@ -120,7 +138,8 @@ module ClaudeMemory
120
138
  fact_id: {type: "integer", description: "Project fact ID to promote to global"}
121
139
  },
122
140
  required: ["fact_id"]
123
- }
141
+ },
142
+ annotations: WRITE_IDEMPOTENT
124
143
  },
125
144
  {
126
145
  name: "memory.store_extraction",
@@ -174,7 +193,8 @@ module ClaudeMemory
174
193
  scope: {type: "string", enum: ["global", "project"], description: "Default scope for facts", default: "project"}
175
194
  },
176
195
  required: ["facts"]
177
- }
196
+ },
197
+ annotations: WRITE
178
198
  },
179
199
  {
180
200
  name: "memory.decisions",
@@ -184,7 +204,8 @@ module ClaudeMemory
184
204
  properties: {
185
205
  limit: {type: "integer", default: 10, description: "Maximum results to return"}
186
206
  }
187
- }
207
+ },
208
+ annotations: READ_ONLY
188
209
  },
189
210
  {
190
211
  name: "memory.conventions",
@@ -194,7 +215,8 @@ module ClaudeMemory
194
215
  properties: {
195
216
  limit: {type: "integer", default: 20, description: "Maximum results to return"}
196
217
  }
197
- }
218
+ },
219
+ annotations: READ_ONLY
198
220
  },
199
221
  {
200
222
  name: "memory.architecture",
@@ -204,7 +226,8 @@ module ClaudeMemory
204
226
  properties: {
205
227
  limit: {type: "integer", default: 10, description: "Maximum results to return"}
206
228
  }
207
- }
229
+ },
230
+ annotations: READ_ONLY
208
231
  },
209
232
  {
210
233
  name: "memory.facts_by_tool",
@@ -217,7 +240,8 @@ module ClaudeMemory
217
240
  scope: {type: "string", enum: ["all", "global", "project"], default: "all", description: "Filter by scope"}
218
241
  },
219
242
  required: ["tool_name"]
220
- }
243
+ },
244
+ annotations: READ_ONLY
221
245
  },
222
246
  {
223
247
  name: "memory.facts_by_context",
@@ -230,11 +254,12 @@ module ClaudeMemory
230
254
  limit: {type: "integer", default: 20, description: "Maximum results to return"},
231
255
  scope: {type: "string", enum: ["all", "global", "project"], default: "all", description: "Filter by scope"}
232
256
  }
233
- }
257
+ },
258
+ annotations: READ_ONLY
234
259
  },
235
260
  {
236
261
  name: "memory.recall_semantic",
237
- description: "Search facts using semantic similarity (finds conceptually related facts using vector embeddings)",
262
+ description: "Search facts using semantic similarity (finds conceptually related facts using vector embeddings). ~800 tokens/result, ~300 with compact: true.",
238
263
  inputSchema: {
239
264
  type: "object",
240
265
  properties: {
@@ -242,10 +267,11 @@ module ClaudeMemory
242
267
  mode: {type: "string", enum: ["vector", "text", "both"], default: "both", description: "Search mode: vector (embeddings), text (FTS), or both (hybrid)"},
243
268
  limit: {type: "integer", default: 10, description: "Maximum results to return"},
244
269
  scope: {type: "string", enum: ["all", "global", "project"], default: "all", description: "Filter by scope"},
245
- compact: {type: "boolean", description: "Omit provenance receipts for ~60% smaller responses", default: false}
270
+ compact: {type: "boolean", description: "Omit provenance receipts for ~60% smaller responses (~800 → ~300 tokens/result)", default: false}
246
271
  },
247
272
  required: ["query"]
248
- }
273
+ },
274
+ annotations: READ_ONLY
249
275
  },
250
276
  {
251
277
  name: "memory.search_concepts",
@@ -262,10 +288,11 @@ module ClaudeMemory
262
288
  },
263
289
  limit: {type: "integer", default: 10, description: "Maximum results to return"},
264
290
  scope: {type: "string", enum: ["all", "global", "project"], default: "all", description: "Filter by scope"},
265
- compact: {type: "boolean", description: "Omit provenance receipts for ~60% smaller responses", default: false}
291
+ compact: {type: "boolean", description: "Omit provenance receipts for ~60% smaller responses (~800 → ~300 tokens/result)", default: false}
266
292
  },
267
293
  required: ["concepts"]
268
- }
294
+ },
295
+ annotations: READ_ONLY
269
296
  },
270
297
  {
271
298
  name: "memory.fact_graph",
@@ -278,7 +305,8 @@ module ClaudeMemory
278
305
  scope: {type: "string", enum: ["global", "project"], description: "Which database to search", default: "project"}
279
306
  },
280
307
  required: ["fact_id"]
281
- }
308
+ },
309
+ annotations: READ_ONLY
282
310
  },
283
311
  {
284
312
  name: "memory.check_setup",
@@ -286,7 +314,17 @@ module ClaudeMemory
286
314
  inputSchema: {
287
315
  type: "object",
288
316
  properties: {}
289
- }
317
+ },
318
+ annotations: READ_ONLY
319
+ },
320
+ {
321
+ name: "memory.list_projects",
322
+ description: "List all known memory databases with fact counts and status. Shows global database, current project, and other projects discovered from promoted facts. Helps discover available search scopes before querying.",
323
+ inputSchema: {
324
+ type: "object",
325
+ properties: {}
326
+ },
327
+ annotations: READ_ONLY
290
328
  }
291
329
  ]
292
330
  end
@@ -68,6 +68,8 @@ module ClaudeMemory
68
68
  fact_graph(arguments)
69
69
  when "memory.check_setup"
70
70
  check_setup
71
+ when "memory.list_projects"
72
+ list_projects
71
73
  else
72
74
  {error: "Unknown tool: #{name}"}
73
75
  end
@@ -507,8 +509,112 @@ module ClaudeMemory
507
509
  }
508
510
  end
509
511
 
512
+ def list_projects
513
+ result = {global: nil, current_project: nil, other_projects: []}
514
+
515
+ if @manager
516
+ result[:global] = list_global_database
517
+ result[:current_project] = list_current_project
518
+ result[:other_projects] = discover_other_projects
519
+ elsif @legacy_store
520
+ result[:global] = {
521
+ exists: true,
522
+ path: @legacy_store.db.opts[:database],
523
+ facts_active: @legacy_store.facts.where(status: "active").count,
524
+ entities: @legacy_store.entities.count
525
+ }
526
+ end
527
+
528
+ result[:project_count] = 1 + result[:other_projects].size
529
+ result
530
+ end
531
+
532
+ def list_global_database
533
+ if @manager.global_exists?
534
+ @manager.ensure_global!
535
+ store = @manager.global_store
536
+ {
537
+ exists: true,
538
+ path: @manager.global_db_path,
539
+ facts_active: store.facts.where(status: "active").count,
540
+ facts_total: store.facts.count,
541
+ entities: store.entities.count
542
+ }
543
+ else
544
+ {exists: false, path: @manager.global_db_path}
545
+ end
546
+ end
547
+
548
+ def list_current_project
549
+ if @manager.project_exists?
550
+ @manager.ensure_project!
551
+ store = @manager.project_store
552
+ {
553
+ exists: true,
554
+ path: @manager.project_path,
555
+ db_path: @manager.project_db_path,
556
+ facts_active: store.facts.where(status: "active").count,
557
+ facts_total: store.facts.count,
558
+ entities: store.entities.count
559
+ }
560
+ else
561
+ {exists: false, path: @manager.project_path, db_path: @manager.project_db_path}
562
+ end
563
+ end
564
+
565
+ def discover_other_projects
566
+ return [] unless @manager.global_exists?
567
+
568
+ @manager.ensure_global!
569
+ global = @manager.global_store
570
+
571
+ # Find project paths from promoted facts
572
+ promoted_paths = global.facts
573
+ .where(Sequel.like(:created_from, "promoted:%"))
574
+ .select(:created_from)
575
+ .distinct
576
+ .all
577
+ .filter_map { |f|
578
+ match = f[:created_from]&.match(/\Apromoted:(.+):\d+\z/)
579
+ match[1] if match
580
+ }
581
+ .uniq
582
+
583
+ # Also check for project_path values on facts
584
+ fact_paths = global.facts
585
+ .exclude(project_path: nil)
586
+ .select(:project_path)
587
+ .distinct
588
+ .all
589
+ .map { |f| f[:project_path] }
590
+
591
+ all_paths = (promoted_paths + fact_paths).uniq
592
+ current = @manager.project_path
593
+
594
+ all_paths.filter_map { |path|
595
+ next if path == current
596
+
597
+ db_path = File.join(path, ".claude", "memory.sqlite3")
598
+ entry = {path: path, db_path: db_path, exists: File.exist?(db_path)}
599
+
600
+ if entry[:exists]
601
+ begin
602
+ temp_store = Store::SQLiteStore.new(db_path)
603
+ entry[:facts_active] = temp_store.facts.where(status: "active").count
604
+ entry[:facts_total] = temp_store.facts.count
605
+ entry[:entities] = temp_store.entities.count
606
+ temp_store.close
607
+ rescue Sequel::DatabaseError, Extralite::Error, IOError => _e
608
+ entry[:error] = "Could not read database"
609
+ end
610
+ end
611
+
612
+ entry
613
+ }
614
+ end
615
+
510
616
  def db_stats(store)
511
- {
617
+ stats = {
512
618
  exists: true,
513
619
  facts_total: store.facts.count,
514
620
  facts_active: store.facts.where(status: "active").count,
@@ -516,12 +622,30 @@ module ClaudeMemory
516
622
  open_conflicts: store.conflicts.where(status: "open").count,
517
623
  schema_version: store.schema_version
518
624
  }
625
+
626
+ vec_index = store.vector_index
627
+ stats[:vec_available] = vec_index.available?
628
+ stats[:vec_indexed] = vec_index.coverage_stats[:vec_indexed] if vec_index.available?
629
+
630
+ if fts_legacy?(store)
631
+ stats[:fts_legacy] = true
632
+ stats[:optimization_hint] = "Run 'claude-memory compact' to reduce database size by ~40%"
633
+ end
634
+
635
+ stats
636
+ end
637
+
638
+ def fts_legacy?(store)
639
+ row = store.db.fetch("SELECT sql FROM sqlite_master WHERE name = 'content_fts' AND type = 'table'").first
640
+ row && !row[:sql].to_s.include?("content=''")
641
+ rescue
642
+ false
519
643
  end
520
644
 
521
645
  def detailed_stats(store)
522
646
  active_facts = store.facts.where(status: "active").count
523
647
 
524
- {
648
+ stats = {
525
649
  exists: true,
526
650
  facts: fact_stats(store, active_facts),
527
651
  entities: entity_stats(store),
@@ -530,6 +654,10 @@ module ClaudeMemory
530
654
  conflicts: conflict_stats(store),
531
655
  schema_version: store.schema_version
532
656
  }
657
+
658
+ stats[:vec] = vec_stats(store, active_facts)
659
+
660
+ stats
533
661
  end
534
662
 
535
663
  def fact_stats(store, active_facts)
@@ -594,6 +722,13 @@ module ClaudeMemory
594
722
  }
595
723
  end
596
724
 
725
+ def vec_stats(store, _active_facts)
726
+ vec_index = store.vector_index
727
+ result = {available: vec_index.available?}
728
+ result.merge!(vec_index.coverage_stats) if vec_index.available?
729
+ result
730
+ end
731
+
597
732
  def conflict_stats(store)
598
733
  open = store.conflicts.where(status: "open").count
599
734
  resolved = store.conflicts.where(status: "resolved").count
@@ -14,19 +14,8 @@ module ClaudeMemory
14
14
  end
15
15
 
16
16
  def generate_snapshot(since: nil)
17
- facts = fetch_active_facts
18
- conflicts = @store.open_conflicts
19
- recent_supersessions = fetch_recent_supersessions(since)
20
-
21
- sections = []
22
- sections << generate_decisions_section(facts)
23
- sections << generate_conventions_section(facts)
24
- sections << generate_constraints_section(facts)
25
- sections << generate_conflicts_section(conflicts) if conflicts.any?
26
- sections << generate_supersessions_section(recent_supersessions) if recent_supersessions.any?
27
-
28
17
  header = <<~HEADER
29
- <!--
18
+ <!--
30
19
  This file is auto-generated by claude-memory.
31
20
  Do not edit manually - changes will be overwritten.
32
21
  Generated: #{Time.now.utc.iso8601}
@@ -36,14 +25,15 @@ module ClaudeMemory
36
25
 
37
26
  HEADER
38
27
 
39
- header + sections.compact.reject(&:empty?).join("\n")
28
+ header + generate_body(since: since)
40
29
  end
41
30
 
42
31
  def publish!(mode: :shared, granularity: :repo, since: nil, rules_dir: nil)
43
- content = generate_snapshot(since: since)
44
32
  path = output_path(mode, rules_dir: rules_dir)
33
+ body = generate_body(since: since)
45
34
 
46
- if should_write?(path, content)
35
+ if should_write?(path, body)
36
+ content = generate_snapshot(since: since)
47
37
  @fs.write(path, content)
48
38
  ensure_import_exists(mode, path, rules_dir: rules_dir)
49
39
  {status: :updated, path: path}
@@ -163,22 +153,33 @@ module ClaudeMemory
163
153
  lines.join("\n") + "\n"
164
154
  end
165
155
 
166
- def should_write?(path, content)
167
- return true unless @fs.exist?(path)
156
+ def generate_body(since: nil)
157
+ facts = fetch_active_facts
158
+ conflicts = @store.open_conflicts
159
+ recent_supersessions = fetch_recent_supersessions(since)
160
+
161
+ sections = []
162
+ sections << generate_decisions_section(facts)
163
+ sections << generate_conventions_section(facts)
164
+ sections << generate_constraints_section(facts)
165
+ sections << generate_conflicts_section(conflicts) if conflicts.any?
166
+ sections << generate_supersessions_section(recent_supersessions) if recent_supersessions.any?
168
167
 
169
- # Compare content without timestamp to avoid unnecessary rewrites
170
- existing_content = @fs.read(path)
171
- existing_normalized = normalize_for_comparison(existing_content)
172
- new_normalized = normalize_for_comparison(content)
168
+ sections.compact.reject(&:empty?).join("\n")
169
+ end
170
+
171
+ def should_write?(path, new_body)
172
+ return true unless @fs.exist?(path)
173
173
 
174
- existing_hash = Digest::SHA256.hexdigest(existing_normalized)
175
- new_hash = Digest::SHA256.hexdigest(new_normalized)
176
- existing_hash != new_hash
174
+ existing_body = extract_body(@fs.read(path))
175
+ existing_body != new_body
177
176
  end
178
177
 
179
- def normalize_for_comparison(content)
180
- # Remove timestamp line for comparison to prevent churn on timestamp-only changes
181
- content.gsub(/^ Generated: .+$/, "")
178
+ def extract_body(content)
179
+ # Strip the HTML comment header and "# Project Memory" heading
180
+ content
181
+ .sub(/\A<!--.*?-->\s*/m, "")
182
+ .sub(/\A# Project Memory\s*/m, "")
182
183
  end
183
184
 
184
185
  def ensure_import_exists(mode, path, rules_dir: nil)
@@ -44,20 +44,9 @@ module ClaudeMemory
44
44
  end
45
45
 
46
46
  def query_store(source_label, &operation)
47
- store = (source_label == :project) ? @manager.project_store : @manager.global_store
48
- return [] unless store
49
-
50
- ensure_store!(source_label)
47
+ store = @manager.store_for_scope(source_label.to_s)
51
48
  operation.call(store, source_label)
52
49
  end
53
-
54
- def ensure_store!(source_label)
55
- if source_label == :project
56
- @manager.ensure_project!
57
- else
58
- @manager.ensure_global!
59
- end
60
- end
61
50
  end
62
51
  end
63
52
  end
@@ -499,7 +499,8 @@ module ClaudeMemory
499
499
  results = template.execute(scope: scope, limit: limit) do |store, source|
500
500
  query_semantic_single(store, text, limit: limit * 3, mode: mode, source: source)
501
501
  end
502
- dedupe_and_sort(results, limit)
502
+ # Use similarity-preserving dedupe (not source/time sort) to keep RRF ordering
503
+ Core::FactRanker.dedupe_by_fact_id(results, limit)
503
504
  end
504
505
 
505
506
  def query_semantic_legacy(text, limit:, scope:, mode:)
@@ -529,6 +530,33 @@ module ClaudeMemory
529
530
  # Generate query embedding
530
531
  query_embedding = @embedding_generator.generate(query_text)
531
532
 
533
+ # Fast path: use sqlite-vec KNN when available
534
+ vec_index = store.vector_index
535
+ if vec_index.available?
536
+ return search_by_vector_native(store, vec_index, query_embedding, limit, source)
537
+ end
538
+
539
+ # Fallback: JSON + Ruby cosine similarity
540
+ search_by_vector_fallback(store, query_embedding, limit, source)
541
+ end
542
+
543
+ def search_by_vector_native(store, vec_index, query_embedding, limit, source)
544
+ matches = vec_index.search(query_embedding, k: limit)
545
+ return [] if matches.empty?
546
+
547
+ fact_ids = matches.map { |m| m[:fact_id] }
548
+ facts_by_id = batch_find_facts(store, fact_ids)
549
+ receipts_by_fact_id = batch_find_receipts(store, fact_ids)
550
+
551
+ Core::ResultBuilder.build_results_with_scores(
552
+ matches,
553
+ facts_by_id: facts_by_id,
554
+ receipts_by_fact_id: receipts_by_fact_id,
555
+ source: source
556
+ )
557
+ end
558
+
559
+ def search_by_vector_fallback(store, query_embedding, limit, source)
532
560
  # Load facts with embeddings
533
561
  facts_data = store.facts_with_embeddings(limit: 5000)
534
562
  return [] if facts_data.empty?
@@ -556,33 +584,59 @@ module ClaudeMemory
556
584
  end
557
585
 
558
586
  def search_by_fts(store, query_text, limit, source)
559
- # Use existing FTS search infrastructure
560
587
  fts = Index::LexicalFTS.new(store)
561
- content_ids = fts.search(query_text, limit: limit * 2)
588
+ ranked_results = fts.search_with_ranks(query_text, limit: limit * 2)
562
589
 
563
- return [] if content_ids.empty?
590
+ return [] if ranked_results.empty?
564
591
 
565
- # Find facts from content items
566
- fact_ids = store.provenance
592
+ content_ids = ranked_results.map { |r| r[:content_item_id] }
593
+
594
+ # Map content_item_ids to fact_ids, preserving FTS rank order
595
+ provenance_rows = store.provenance
567
596
  .where(content_item_id: content_ids)
568
- .select(:fact_id)
569
- .distinct
570
- .map { |row| row[:fact_id] }
597
+ .select(:fact_id, :content_item_id)
598
+ .all
571
599
 
572
- return [] if fact_ids.empty?
600
+ content_to_facts = provenance_rows.group_by { |r| r[:content_item_id] }
601
+
602
+ # Build ordered fact list with normalized BM25 scores
603
+ # FTS5 rank values are negative (more negative = better match)
604
+ ranks = ranked_results.map { |r| r[:rank] }
605
+ min_rank = ranks.min # Most negative = best
606
+ max_rank = ranks.max # Least negative = worst
607
+ range = (max_rank - min_rank).abs
608
+
609
+ seen_fact_ids = Set.new
610
+ scored_matches = []
611
+
612
+ ranked_results.each do |r|
613
+ similarity = if range > 0
614
+ # Normalize: best rank → 1.0, worst rank → 0.1
615
+ 0.1 + 0.9 * ((max_rank - r[:rank]).abs / range)
616
+ else
617
+ 0.8 # Single result gets a reasonable score
618
+ end
573
619
 
574
- # Batch fetch facts
620
+ fact_ids = content_to_facts[r[:content_item_id]]&.map { |p| p[:fact_id] } || []
621
+ fact_ids.each do |fid|
622
+ next if seen_fact_ids.include?(fid)
623
+ seen_fact_ids.add(fid)
624
+ scored_matches << {fact_id: fid, similarity: similarity}
625
+ end
626
+ end
627
+
628
+ return [] if scored_matches.empty?
629
+
630
+ fact_ids = scored_matches.map { |m| m[:fact_id] }
575
631
  facts_by_id = batch_find_facts(store, fact_ids)
576
632
  receipts_by_fact_id = batch_find_receipts(store, fact_ids)
577
633
 
578
- results = Core::ResultBuilder.build_results(
579
- fact_ids,
634
+ Core::ResultBuilder.build_results_with_scores(
635
+ scored_matches,
580
636
  facts_by_id: facts_by_id,
581
637
  receipts_by_fact_id: receipts_by_fact_id,
582
- source: source,
583
- similarity: 0.5 # Default score for FTS results
584
- )
585
- results.take(limit)
638
+ source: source
639
+ ).take(limit)
586
640
  end
587
641
 
588
642
  def merge_search_results(vector_results, text_results, limit)