claude_memory 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/CLAUDE.md +1 -1
  3. data/.claude/memory.sqlite3 +0 -0
  4. data/.claude/memory.sqlite3-shm +0 -0
  5. data/.claude/memory.sqlite3-wal +0 -0
  6. data/.claude/settings.local.json +13 -1
  7. data/.claude-plugin/marketplace.json +1 -1
  8. data/.claude-plugin/plugin.json +1 -2
  9. data/.gitattributes +1 -0
  10. data/CHANGELOG.md +61 -0
  11. data/CLAUDE.md +4 -2
  12. data/README.md +1 -1
  13. data/docs/improvements.md +164 -22
  14. data/docs/influence/lossless-claw.md +409 -0
  15. data/docs/influence/qmd.md +201 -130
  16. data/docs/quality_review.md +344 -56
  17. data/lib/claude_memory/commands/checks/database_check.rb +7 -0
  18. data/lib/claude_memory/commands/compact_command.rb +10 -0
  19. data/lib/claude_memory/commands/export_command.rb +14 -6
  20. data/lib/claude_memory/commands/git_lfs_command.rb +117 -0
  21. data/lib/claude_memory/commands/index_command.rb +30 -2
  22. data/lib/claude_memory/commands/registry.rb +2 -1
  23. data/lib/claude_memory/commands/serve_mcp_command.rb +10 -1
  24. data/lib/claude_memory/commands/stats_command.rb +12 -1
  25. data/lib/claude_memory/configuration.rb +40 -1
  26. data/lib/claude_memory/core/snippet_extractor.rb +21 -19
  27. data/lib/claude_memory/index/lexical_fts.rb +88 -16
  28. data/lib/claude_memory/ingest/ingester.rb +1 -1
  29. data/lib/claude_memory/mcp/error_classifier.rb +171 -0
  30. data/lib/claude_memory/mcp/instructions_builder.rb +62 -4
  31. data/lib/claude_memory/mcp/query_guide.rb +41 -22
  32. data/lib/claude_memory/mcp/response_formatter.rb +3 -1
  33. data/lib/claude_memory/mcp/server.rb +1 -0
  34. data/lib/claude_memory/mcp/text_summary.rb +2 -1
  35. data/lib/claude_memory/mcp/tool_definitions.rb +54 -23
  36. data/lib/claude_memory/mcp/tools.rb +33 -16
  37. data/lib/claude_memory/recall.rb +51 -5
  38. data/lib/claude_memory/resolve/resolver.rb +22 -18
  39. data/lib/claude_memory/store/store_manager.rb +19 -24
  40. data/lib/claude_memory/sweep/maintenance.rb +126 -0
  41. data/lib/claude_memory/sweep/sweeper.rb +82 -67
  42. data/lib/claude_memory/version.rb +1 -1
  43. data/lib/claude_memory.rb +8 -0
  44. data/v0.6.0.ANNOUNCE +32 -0
  45. metadata +10 -1
@@ -11,50 +11,69 @@ module ClaudeMemory
11
11
  PROMPT_TEXT = <<~GUIDE
12
12
  # ClaudeMemory Search Strategy Guide
13
13
 
14
- ## Tool Selection
14
+ ## Tool Escalation — Cheap to Expensive
15
15
 
16
- **memory.recall** Full-text keyword search (fastest)
16
+ Start with fast, cheap tools. Escalate only when you need more detail.
17
+
18
+ ### Tier 1: Fast Lookup (< 50ms, low tokens)
19
+
20
+ **memory.recall** — Full-text keyword search
17
21
  - Use for: exact terms, known predicates, specific entity names
18
22
  - Example: "PostgreSQL", "authentication", "deployment"
19
23
  - Returns: facts with provenance receipts
24
+ - Cost: ~200-500 tokens per call
25
+
26
+ **memory.decisions** / **memory.conventions** / **memory.architecture**
27
+ - Use for: quick access to known categories
28
+ - Cost: ~100-300 tokens per call
29
+
30
+ ### Tier 2: Broad Search (< 200ms, moderate tokens)
20
31
 
21
32
  **memory.recall_semantic** — Vector similarity search
22
33
  - Use for: conceptual queries, paraphrased questions, "find things like X"
23
34
  - Modes: `vector` (embeddings only), `text` (FTS only), `both` (hybrid, recommended)
24
- - Example: "how does the app handle user sessions" (no exact keyword match needed)
35
+ - Example: "how does the app handle user sessions"
25
36
  - Returns: facts ranked by similarity score (0.0-1.0)
37
+ - Cost: ~300-800 tokens per call
26
38
 
27
39
  **memory.search_concepts** — Multi-concept AND query
28
40
  - Use for: intersection of 2-5 concepts that must ALL be present
29
41
  - Example: concepts=["authentication", "JWT", "middleware"]
30
- - Returns: facts matching all concepts, ranked by average similarity
42
+ - Cost: ~300-800 tokens per call
31
43
 
32
- **memory.recall_index** → **memory.recall_details** Progressive disclosure
33
- - Use for: browsing large result sets efficiently
34
- - Step 1: `recall_index` returns lightweight previews with token estimates
35
- - Step 2: `recall_details` fetches full data for selected fact IDs
36
- - Saves tokens when you only need a few facts from many matches
44
+ **memory.recall_index** — Lightweight previews
45
+ - Use for: browsing large result sets before committing to full details
46
+ - Cost: ~100-200 tokens (compact previews)
37
47
 
38
- ## Shortcut Tools
48
+ ### Tier 3: Targeted Deep Dive (moderate tokens)
39
49
 
40
- **memory.decisions** — Architectural decisions and constraints
41
- **memory.conventions** Coding style preferences and rules
42
- **memory.architecture** Framework choices and patterns
50
+ **memory.recall_details** — Full details for selected fact IDs
51
+ - Use after: `recall_index` to fetch only the facts you need
52
+ - Cost: ~200-600 tokens per call
43
53
 
44
- ## Context-Aware Tools
54
+ **memory.explain** Detailed provenance for a specific fact
55
+ - Use when: you need to know where a fact came from and how confident it is
56
+ - Cost: ~300-500 tokens per call
57
+
58
+ ### Tier 4: Relationship Exploration (higher tokens)
59
+
60
+ **memory.fact_graph** — Dependency graph visualization
61
+ - Use when: you need to understand how facts relate (supersession chains, conflicts)
62
+ - Cost: ~400-1000 tokens per call
45
63
 
46
64
  **memory.facts_by_tool** — Facts discovered via specific tool (Read, Edit, Bash)
47
65
  **memory.facts_by_context** — Facts from specific git branch or directory
66
+ - Use when: you need facts from a specific workflow context
67
+ - Cost: ~300-800 tokens per call
68
+
69
+ ## Recommended Workflow
48
70
 
49
- ## Decision Tree
71
+ 1. **Start broad**: `memory.recall` or shortcut tools (decisions/conventions/architecture)
72
+ 2. **Refine if needed**: `memory.recall_semantic` for fuzzy matches
73
+ 3. **Drill into specifics**: `memory.recall_details` or `memory.explain` for selected facts
74
+ 4. **Explore relationships**: `memory.fact_graph` only when you need lineage/conflicts
50
75
 
51
- 1. Know the exact keyword? `memory.recall`
52
- 2. Conceptual/fuzzy question? → `memory.recall_semantic` (mode: both)
53
- 3. Need intersection of topics? → `memory.search_concepts`
54
- 4. Looking for decisions? → `memory.decisions`
55
- 5. Looking for conventions? → `memory.conventions`
56
- 6. Many results expected? → `memory.recall_index` then `memory.recall_details`
57
- 7. Need provenance? → `memory.explain` with fact ID
76
+ Do NOT jump to Tier 3-4 tools first. Tier 1 tools answer most questions.
58
77
 
59
78
  ## Score Interpretation (semantic search)
60
79
 
@@ -233,7 +233,7 @@ module ClaudeMemory
233
233
  # @param stats [Hash] Sweeper stats
234
234
  # @return [Hash] Formatted sweep response
235
235
  def self.format_sweep_stats(scope, stats)
236
- {
236
+ result = {
237
237
  scope: scope,
238
238
  proposed_expired: stats[:proposed_facts_expired],
239
239
  disputed_expired: stats[:disputed_facts_expired],
@@ -241,6 +241,8 @@ module ClaudeMemory
241
241
  content_pruned: stats[:old_content_pruned],
242
242
  elapsed_seconds: stats[:elapsed_seconds].round(3)
243
243
  }
244
+ result[:escalation_level] = stats[:escalation_level].to_s if stats[:escalation_level]
245
+ result
244
246
  end
245
247
 
246
248
  # Format semantic search results with similarity scores
@@ -4,6 +4,7 @@ require "json"
4
4
  require_relative "instructions_builder"
5
5
  require_relative "query_guide"
6
6
  require_relative "text_summary"
7
+ require_relative "error_classifier"
7
8
 
8
9
  module ClaudeMemory
9
10
  module MCP
@@ -104,7 +104,8 @@ module ClaudeMemory
104
104
  end
105
105
 
106
106
  def self.summarize_sweep(result)
107
- "Sweep (#{result[:scope]}): #{result[:proposed_expired]} proposed expired, " \
107
+ escalation = result[:escalation_level] ? " [#{result[:escalation_level]}]" : ""
108
+ "Sweep (#{result[:scope]})#{escalation}: #{result[:proposed_expired]} proposed expired, " \
108
109
  "#{result[:disputed_expired]} disputed expired, " \
109
110
  "#{result[:orphaned_deleted]} orphaned deleted, " \
110
111
  "#{result[:content_pruned]} content pruned " \
@@ -5,6 +5,15 @@ module ClaudeMemory
5
5
  # MCP tool definitions for Claude Memory
6
6
  # Pure data structure - no logic, just tool schemas
7
7
  module ToolDefinitions
8
+ # Annotations for read-only query tools (safe to call anytime)
9
+ READ_ONLY = {readOnlyHint: true, idempotentHint: true, destructiveHint: false}.freeze
10
+
11
+ # Annotations for state-changing but non-destructive tools
12
+ WRITE = {readOnlyHint: false, idempotentHint: false, destructiveHint: false}.freeze
13
+
14
+ # Annotations for idempotent writes (safe to retry)
15
+ WRITE_IDEMPOTENT = {readOnlyHint: false, idempotentHint: true, destructiveHint: false}.freeze
16
+
8
17
  # Returns array of tool definitions for MCP protocol
9
18
  # @return [Array<Hash>] Tool definitions with name, description, and inputSchema
10
19
  def self.all
@@ -21,7 +30,8 @@ module ClaudeMemory
21
30
  compact: {type: "boolean", description: "Omit provenance receipts for ~60% smaller responses (~800 → ~300 tokens/result)", default: false}
22
31
  },
23
32
  required: ["query"]
24
- }
33
+ },
34
+ annotations: READ_ONLY
25
35
  },
26
36
  {
27
37
  name: "memory.recall_index",
@@ -34,7 +44,8 @@ module ClaudeMemory
34
44
  scope: {type: "string", enum: ["all", "global", "project"], description: "Scope: 'all' (both), 'global' (user-wide), 'project' (current only)", default: "all"}
35
45
  },
36
46
  required: ["query"]
37
- }
47
+ },
48
+ annotations: READ_ONLY
38
49
  },
39
50
  {
40
51
  name: "memory.recall_details",
@@ -46,7 +57,8 @@ module ClaudeMemory
46
57
  scope: {type: "string", enum: ["project", "global"], description: "Database to query", default: "project"}
47
58
  },
48
59
  required: ["fact_ids"]
49
- }
60
+ },
61
+ annotations: READ_ONLY
50
62
  },
51
63
  {
52
64
  name: "memory.explain",
@@ -58,7 +70,8 @@ module ClaudeMemory
58
70
  scope: {type: "string", enum: ["global", "project"], description: "Which database to look in", default: "project"}
59
71
  },
60
72
  required: ["fact_id"]
61
- }
73
+ },
74
+ annotations: READ_ONLY
62
75
  },
63
76
  {
64
77
  name: "memory.changes",
@@ -70,7 +83,8 @@ module ClaudeMemory
70
83
  limit: {type: "integer", default: 20},
71
84
  scope: {type: "string", enum: ["all", "global", "project"], default: "all"}
72
85
  }
73
- }
86
+ },
87
+ annotations: READ_ONLY
74
88
  },
75
89
  {
76
90
  name: "memory.conflicts",
@@ -80,18 +94,21 @@ module ClaudeMemory
80
94
  properties: {
81
95
  scope: {type: "string", enum: ["all", "global", "project"], default: "all"}
82
96
  }
83
- }
97
+ },
98
+ annotations: READ_ONLY
84
99
  },
85
100
  {
86
101
  name: "memory.sweep_now",
87
- description: "Run maintenance sweep on a database",
102
+ description: "Run maintenance sweep on a database. Use escalate: true for guaranteed progress (normal → aggressive → fallback).",
88
103
  inputSchema: {
89
104
  type: "object",
90
105
  properties: {
91
106
  budget_seconds: {type: "integer", default: 5},
92
- scope: {type: "string", enum: ["global", "project"], default: "project"}
107
+ scope: {type: "string", enum: ["global", "project"], default: "project"},
108
+ escalate: {type: "boolean", default: false, description: "Enable three-level escalation (normal → aggressive → fallback) to guarantee progress"}
93
109
  }
94
- }
110
+ },
111
+ annotations: WRITE
95
112
  },
96
113
  {
97
114
  name: "memory.status",
@@ -99,7 +116,8 @@ module ClaudeMemory
99
116
  inputSchema: {
100
117
  type: "object",
101
118
  properties: {}
102
- }
119
+ },
120
+ annotations: READ_ONLY
103
121
  },
104
122
  {
105
123
  name: "memory.stats",
@@ -109,7 +127,8 @@ module ClaudeMemory
109
127
  properties: {
110
128
  scope: {type: "string", enum: ["all", "global", "project"], description: "Show stats for: all (default), global, or project", default: "all"}
111
129
  }
112
- }
130
+ },
131
+ annotations: READ_ONLY
113
132
  },
114
133
  {
115
134
  name: "memory.promote",
@@ -120,7 +139,8 @@ module ClaudeMemory
120
139
  fact_id: {type: "integer", description: "Project fact ID to promote to global"}
121
140
  },
122
141
  required: ["fact_id"]
123
- }
142
+ },
143
+ annotations: WRITE_IDEMPOTENT
124
144
  },
125
145
  {
126
146
  name: "memory.store_extraction",
@@ -174,7 +194,8 @@ module ClaudeMemory
174
194
  scope: {type: "string", enum: ["global", "project"], description: "Default scope for facts", default: "project"}
175
195
  },
176
196
  required: ["facts"]
177
- }
197
+ },
198
+ annotations: WRITE
178
199
  },
179
200
  {
180
201
  name: "memory.decisions",
@@ -184,7 +205,8 @@ module ClaudeMemory
184
205
  properties: {
185
206
  limit: {type: "integer", default: 10, description: "Maximum results to return"}
186
207
  }
187
- }
208
+ },
209
+ annotations: READ_ONLY
188
210
  },
189
211
  {
190
212
  name: "memory.conventions",
@@ -194,7 +216,8 @@ module ClaudeMemory
194
216
  properties: {
195
217
  limit: {type: "integer", default: 20, description: "Maximum results to return"}
196
218
  }
197
- }
219
+ },
220
+ annotations: READ_ONLY
198
221
  },
199
222
  {
200
223
  name: "memory.architecture",
@@ -204,7 +227,8 @@ module ClaudeMemory
204
227
  properties: {
205
228
  limit: {type: "integer", default: 10, description: "Maximum results to return"}
206
229
  }
207
- }
230
+ },
231
+ annotations: READ_ONLY
208
232
  },
209
233
  {
210
234
  name: "memory.facts_by_tool",
@@ -217,7 +241,8 @@ module ClaudeMemory
217
241
  scope: {type: "string", enum: ["all", "global", "project"], default: "all", description: "Filter by scope"}
218
242
  },
219
243
  required: ["tool_name"]
220
- }
244
+ },
245
+ annotations: READ_ONLY
221
246
  },
222
247
  {
223
248
  name: "memory.facts_by_context",
@@ -230,7 +255,8 @@ module ClaudeMemory
230
255
  limit: {type: "integer", default: 20, description: "Maximum results to return"},
231
256
  scope: {type: "string", enum: ["all", "global", "project"], default: "all", description: "Filter by scope"}
232
257
  }
233
- }
258
+ },
259
+ annotations: READ_ONLY
234
260
  },
235
261
  {
236
262
  name: "memory.recall_semantic",
@@ -245,7 +271,8 @@ module ClaudeMemory
245
271
  compact: {type: "boolean", description: "Omit provenance receipts for ~60% smaller responses (~800 → ~300 tokens/result)", default: false}
246
272
  },
247
273
  required: ["query"]
248
- }
274
+ },
275
+ annotations: READ_ONLY
249
276
  },
250
277
  {
251
278
  name: "memory.search_concepts",
@@ -265,7 +292,8 @@ module ClaudeMemory
265
292
  compact: {type: "boolean", description: "Omit provenance receipts for ~60% smaller responses (~800 → ~300 tokens/result)", default: false}
266
293
  },
267
294
  required: ["concepts"]
268
- }
295
+ },
296
+ annotations: READ_ONLY
269
297
  },
270
298
  {
271
299
  name: "memory.fact_graph",
@@ -278,7 +306,8 @@ module ClaudeMemory
278
306
  scope: {type: "string", enum: ["global", "project"], description: "Which database to search", default: "project"}
279
307
  },
280
308
  required: ["fact_id"]
281
- }
309
+ },
310
+ annotations: READ_ONLY
282
311
  },
283
312
  {
284
313
  name: "memory.check_setup",
@@ -286,7 +315,8 @@ module ClaudeMemory
286
315
  inputSchema: {
287
316
  type: "object",
288
317
  properties: {}
289
- }
318
+ },
319
+ annotations: READ_ONLY
290
320
  },
291
321
  {
292
322
  name: "memory.list_projects",
@@ -294,7 +324,8 @@ module ClaudeMemory
294
324
  inputSchema: {
295
325
  type: "object",
296
326
  properties: {}
297
- }
327
+ },
328
+ annotations: READ_ONLY
298
329
  }
299
330
  ]
300
331
  end
@@ -6,6 +6,7 @@ require_relative "tool_helpers"
6
6
  require_relative "response_formatter"
7
7
  require_relative "tool_definitions"
8
8
  require_relative "setup_status_analyzer"
9
+ require_relative "error_classifier"
9
10
 
10
11
  module ClaudeMemory
11
12
  module MCP
@@ -79,7 +80,7 @@ module ClaudeMemory
79
80
 
80
81
  def recall(args)
81
82
  # Check if databases exist before querying
82
- return database_not_found_error(StandardError.new("Database not initialized")) unless databases_exist?
83
+ return database_not_found_error unless databases_exist?
83
84
 
84
85
  scope = extract_scope(args)
85
86
  limit = extract_limit(args)
@@ -87,8 +88,8 @@ module ClaudeMemory
87
88
  query = args["query"]
88
89
  results = @recall.query(query, limit: limit, scope: scope, include_raw_text: !compact)
89
90
  ResponseFormatter.format_recall_results(results, compact: compact, query: query)
90
- rescue Sequel::DatabaseError, Sequel::DatabaseConnectionError, SQLite3::CantOpenException, Errno::ENOENT => e
91
- database_not_found_error(e)
91
+ rescue Sequel::DatabaseError, Sequel::DatabaseConnectionError, Errno::ENOENT => e
92
+ classified_error(e, tool_name: "memory.recall")
92
93
  end
93
94
 
94
95
  def recall_index(args)
@@ -143,7 +144,12 @@ module ClaudeMemory
143
144
  return {error: "Database not available"} unless store
144
145
 
145
146
  sweeper = Sweep::Sweeper.new(store)
146
- stats = sweeper.run!(budget_seconds: args["budget_seconds"] || 5)
147
+ budget = args["budget_seconds"] || 5
148
+ stats = if args["escalate"]
149
+ sweeper.run_with_escalation!(budget_seconds: budget)
150
+ else
151
+ sweeper.run!(budget_seconds: budget)
152
+ end
147
153
  ResponseFormatter.format_sweep_stats(scope, stats)
148
154
  end
149
155
 
@@ -399,17 +405,16 @@ module ClaudeMemory
399
405
  end
400
406
  end
401
407
 
402
- def database_not_found_error(error)
403
- {
404
- error: "Database not found or not accessible",
405
- message: "ClaudeMemory may not be initialized. Run memory.check_setup for detailed status.",
406
- details: error.message,
407
- recommendations: [
408
- "Run memory.check_setup to diagnose the issue",
409
- "If not initialized, run: claude-memory init",
410
- "For help: claude-memory doctor"
411
- ]
412
- }
408
+ def database_not_found_error(error = nil)
409
+ if error
410
+ ErrorClassifier.build_error_response(error, tool_name: "recall")
411
+ else
412
+ ErrorClassifier.build_benign_response(:not_initialized, tool_name: "recall")
413
+ end
414
+ end
415
+
416
+ def classified_error(error, tool_name: nil)
417
+ ErrorClassifier.build_error_response(error, tool_name: tool_name)
413
418
  end
414
419
 
415
420
  def check_setup
@@ -604,7 +609,7 @@ module ClaudeMemory
604
609
  entry[:facts_total] = temp_store.facts.count
605
610
  entry[:entities] = temp_store.entities.count
606
611
  temp_store.close
607
- rescue => _e
612
+ rescue Sequel::DatabaseError, Extralite::Error, IOError => _e
608
613
  entry[:error] = "Could not read database"
609
614
  end
610
615
  end
@@ -627,9 +632,21 @@ module ClaudeMemory
627
632
  stats[:vec_available] = vec_index.available?
628
633
  stats[:vec_indexed] = vec_index.coverage_stats[:vec_indexed] if vec_index.available?
629
634
 
635
+ if fts_legacy?(store)
636
+ stats[:fts_legacy] = true
637
+ stats[:optimization_hint] = "Run 'claude-memory compact' to reduce database size by ~40%"
638
+ end
639
+
630
640
  stats
631
641
  end
632
642
 
643
+ def fts_legacy?(store)
644
+ row = store.db.fetch("SELECT sql FROM sqlite_master WHERE name = 'content_fts' AND type = 'table'").first
645
+ row && !row[:sql].to_s.include?("content=''")
646
+ rescue
647
+ false
648
+ end
649
+
633
650
  def detailed_stats(store)
634
651
  active_facts = store.facts.where(status: "active").count
635
652
 
@@ -561,13 +561,15 @@ module ClaudeMemory
561
561
  facts_data = store.facts_with_embeddings(limit: 5000)
562
562
  return [] if facts_data.empty?
563
563
 
564
- # Parse embeddings and prepare candidates
565
- candidates = Core::EmbeddingCandidateBuilder.build_candidates(facts_data)
564
+ # Deduplicate: group facts by embedding, score unique embeddings only, fan out
565
+ unique_candidates, fact_groups = dedup_candidates(facts_data)
566
+ return [] if unique_candidates.empty?
566
567
 
567
- return [] if candidates.empty?
568
+ # Calculate similarities on unique embeddings only
569
+ top_unique = Embeddings::Similarity.top_k(query_embedding, unique_candidates, limit)
568
570
 
569
- # Calculate similarities and rank
570
- top_matches = Embeddings::Similarity.top_k(query_embedding, candidates, limit)
571
+ # Fan out: expand unique matches back to all fact_ids sharing that embedding
572
+ top_matches = fan_out_matches(top_unique, fact_groups, limit)
571
573
 
572
574
  # Batch fetch full fact details
573
575
  fact_ids = top_matches.map { |m| m[:candidate][:fact_id] }
@@ -583,6 +585,50 @@ module ClaudeMemory
583
585
  )
584
586
  end
585
587
 
588
+ # Group facts by embedding_json, return unique candidates + mapping
589
+ def dedup_candidates(facts_data)
590
+ groups = {} # embedding_json → [fact_ids]
591
+ unique = {} # embedding_json → parsed candidate (first occurrence)
592
+
593
+ facts_data.each do |row|
594
+ key = row[:embedding_json]
595
+ if unique.key?(key)
596
+ groups[key] << row[:id]
597
+ else
598
+ candidate = Core::EmbeddingCandidateBuilder.parse_candidate(row)
599
+ next unless candidate
600
+ unique[key] = candidate
601
+ groups[key] = [row[:id]]
602
+ end
603
+ end
604
+
605
+ [unique.values, groups]
606
+ end
607
+
608
+ # Expand unique matches back to all fact_ids sharing the same embedding
609
+ def fan_out_matches(top_unique, fact_groups, limit)
610
+ results = []
611
+ top_unique.each do |match|
612
+ candidate = match[:candidate]
613
+ similarity = match[:similarity]
614
+
615
+ # Find the group key for this candidate's embedding
616
+ group_key = fact_groups.find { |_key, ids| ids.include?(candidate[:fact_id]) }&.first
617
+ next unless group_key
618
+
619
+ fact_groups[group_key].each do |fact_id|
620
+ results << {
621
+ candidate: candidate.merge(fact_id: fact_id),
622
+ similarity: similarity
623
+ }
624
+ break if results.size >= limit
625
+ end
626
+ break if results.size >= limit
627
+ end
628
+
629
+ results
630
+ end
631
+
586
632
  def search_by_fts(store, query_text, limit, source)
587
633
  fts = Index::LexicalFTS.new(store)
588
634
  ranked_results = fts.search_with_ranks(query_text, limit: limit * 2)
@@ -9,8 +9,6 @@ module ClaudeMemory
9
9
 
10
10
  def apply(extraction, content_item_id: nil, occurred_at: nil, project_path: nil, scope: "project")
11
11
  occurred_at ||= Time.now.utc.iso8601
12
- @current_project_path = project_path
13
- @current_scope = scope
14
12
 
15
13
  result = {
16
14
  entities_created: 0,
@@ -27,7 +25,8 @@ module ClaudeMemory
27
25
  result[:entities_created] = entity_ids.size
28
26
 
29
27
  extraction.facts.each do |fact_data|
30
- outcome = resolve_fact(fact_data, entity_ids, content_item_id, occurred_at)
28
+ outcome = resolve_fact(fact_data, entity_ids, content_item_id, occurred_at,
29
+ project_path: project_path, scope: scope)
31
30
  result[:facts_created] += outcome[:created]
32
31
  result[:facts_superseded] += outcome[:superseded]
33
32
  result[:conflicts_created] += outcome[:conflicts]
@@ -49,12 +48,13 @@ module ClaudeMemory
49
48
  entity_ids
50
49
  end
51
50
 
52
- def resolve_fact(fact_data, entity_ids, content_item_id, occurred_at)
51
+ def resolve_fact(fact_data, entity_ids, content_item_id, occurred_at, project_path:, scope:)
53
52
  subject_id = resolve_subject(fact_data, entity_ids)
54
53
  existing_facts = @store.facts_for_slot(subject_id, fact_data[:predicate])
55
54
  resolution = determine_resolution(existing_facts, fact_data, entity_ids)
56
55
 
57
- apply_resolution(resolution, fact_data, subject_id, entity_ids, content_item_id, occurred_at, existing_facts)
56
+ apply_resolution(resolution, fact_data, subject_id, entity_ids, content_item_id, occurred_at, existing_facts,
57
+ project_path: project_path, scope: scope)
58
58
  end
59
59
 
60
60
  def resolve_subject(fact_data, entity_ids)
@@ -77,14 +77,16 @@ module ClaudeMemory
77
77
  end
78
78
  end
79
79
 
80
- def apply_resolution(resolution, fact_data, subject_id, entity_ids, content_item_id, occurred_at, existing_facts)
80
+ def apply_resolution(resolution, fact_data, subject_id, entity_ids, content_item_id, occurred_at, existing_facts, project_path:, scope:)
81
81
  case resolution
82
82
  when :reinforce
83
83
  apply_reinforcement(existing_facts, fact_data, entity_ids, content_item_id)
84
84
  when :conflict
85
- apply_conflict(existing_facts, fact_data, subject_id, content_item_id, occurred_at)
85
+ apply_conflict(existing_facts, fact_data, subject_id, content_item_id, occurred_at,
86
+ project_path: project_path, scope: scope)
86
87
  else
87
- apply_insert(fact_data, subject_id, entity_ids, content_item_id, occurred_at, existing_facts, resolution)
88
+ apply_insert(fact_data, subject_id, entity_ids, content_item_id, occurred_at, existing_facts, resolution,
89
+ project_path: project_path, scope: scope)
88
90
  end
89
91
  end
90
92
 
@@ -95,28 +97,30 @@ module ClaudeMemory
95
97
  {created: 0, superseded: 0, conflicts: 0, provenance: 1}
96
98
  end
97
99
 
98
- def apply_conflict(existing_facts, fact_data, subject_id, content_item_id, occurred_at)
99
- create_conflict(existing_facts.first[:id], fact_data, subject_id, content_item_id, occurred_at)
100
+ def apply_conflict(existing_facts, fact_data, subject_id, content_item_id, occurred_at, project_path:, scope:)
101
+ create_conflict(existing_facts.first[:id], fact_data, subject_id, content_item_id, occurred_at,
102
+ project_path: project_path, scope: scope)
100
103
  {created: 0, superseded: 0, conflicts: 1, provenance: 0}
101
104
  end
102
105
 
103
- def apply_insert(fact_data, subject_id, entity_ids, content_item_id, occurred_at, existing_facts, resolution)
106
+ def apply_insert(fact_data, subject_id, entity_ids, content_item_id, occurred_at, existing_facts, resolution, project_path:, scope:)
104
107
  superseded_count = 0
105
108
  if resolution == :supersede
106
109
  supersede_facts(existing_facts, occurred_at)
107
110
  superseded_count = existing_facts.size
108
111
  end
109
112
 
110
- fact_id = insert_new_fact(fact_data, subject_id, entity_ids, occurred_at)
113
+ fact_id = insert_new_fact(fact_data, subject_id, entity_ids, occurred_at,
114
+ project_path: project_path, scope: scope)
111
115
  link_superseded_facts(fact_id, existing_facts) if superseded_count > 0
112
116
  add_provenance(fact_id, content_item_id, fact_data)
113
117
 
114
118
  {created: 1, superseded: superseded_count, conflicts: 0, provenance: 1}
115
119
  end
116
120
 
117
- def insert_new_fact(fact_data, subject_id, entity_ids, occurred_at)
118
- fact_scope = fact_data[:scope_hint] || @current_scope
119
- fact_project = (fact_scope == "global") ? nil : @current_project_path
121
+ def insert_new_fact(fact_data, subject_id, entity_ids, occurred_at, project_path:, scope:)
122
+ fact_scope = fact_data[:scope_hint] || scope
123
+ fact_project = (fact_scope == "global") ? nil : project_path
120
124
 
121
125
  @store.insert_fact(
122
126
  subject_entity_id: subject_id,
@@ -153,7 +157,7 @@ module ClaudeMemory
153
157
  end
154
158
  end
155
159
 
156
- def create_conflict(existing_fact_id, new_fact_data, subject_id, content_item_id, occurred_at)
160
+ def create_conflict(existing_fact_id, new_fact_data, subject_id, content_item_id, occurred_at, project_path:, scope:)
157
161
  # Already within transaction from resolve_fact
158
162
  new_fact_id = @store.insert_fact(
159
163
  subject_entity_id: subject_id,
@@ -163,8 +167,8 @@ module ClaudeMemory
163
167
  confidence: new_fact_data[:confidence] || 1.0,
164
168
  status: "disputed",
165
169
  valid_from: occurred_at,
166
- scope: @current_scope,
167
- project_path: @current_project_path
170
+ scope: scope,
171
+ project_path: project_path
168
172
  )
169
173
 
170
174
  @store.insert_conflict(