claude_memory 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/memory.sqlite3 +0 -0
  3. data/.claude/rules/claude_memory.generated.md +32 -2
  4. data/.claude/settings.json +65 -15
  5. data/.claude/settings.local.json +5 -2
  6. data/.claude/skills/improve/SKILL.md +113 -25
  7. data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
  8. data/.claude-plugin/commands/distill-transcripts.md +98 -0
  9. data/.claude-plugin/commands/memory-recall.md +67 -0
  10. data/.claude-plugin/marketplace.json +2 -2
  11. data/.claude-plugin/plugin.json +3 -3
  12. data/.claude-plugin/scripts/hook-runner.sh +14 -0
  13. data/.claude-plugin/scripts/serve-mcp.sh +14 -0
  14. data/.ruby-version +1 -1
  15. data/CHANGELOG.md +90 -1
  16. data/CLAUDE.md +56 -18
  17. data/README.md +35 -0
  18. data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
  19. data/db/migrations/014_canonicalize_predicates.rb +30 -0
  20. data/docs/improvements.md +74 -74
  21. data/docs/influence/claude-mem.md +1 -0
  22. data/docs/influence/claude-supermemory.md +1 -0
  23. data/docs/influence/episodic-memory.md +1 -0
  24. data/docs/influence/grepai.md +1 -0
  25. data/docs/influence/kbs.md +1 -0
  26. data/docs/influence/lossless-claw.md +1 -0
  27. data/docs/influence/qmd.md +1 -0
  28. data/docs/quality_review.md +119 -224
  29. data/hooks/hooks.json +39 -7
  30. data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
  31. data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
  32. data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
  33. data/lib/claude_memory/commands/completion_command.rb +149 -0
  34. data/lib/claude_memory/commands/doctor_command.rb +2 -0
  35. data/lib/claude_memory/commands/embeddings_command.rb +198 -0
  36. data/lib/claude_memory/commands/help_command.rb +12 -1
  37. data/lib/claude_memory/commands/hook_command.rb +2 -1
  38. data/lib/claude_memory/commands/index_command.rb +85 -78
  39. data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
  40. data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
  41. data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
  42. data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
  43. data/lib/claude_memory/commands/install_skill_command.rb +78 -0
  44. data/lib/claude_memory/commands/registry.rb +47 -32
  45. data/lib/claude_memory/commands/reject_command.rb +62 -0
  46. data/lib/claude_memory/commands/restore_command.rb +77 -0
  47. data/lib/claude_memory/commands/skills/distill-transcripts.md +102 -0
  48. data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
  49. data/lib/claude_memory/commands/stats_command.rb +98 -2
  50. data/lib/claude_memory/configuration.rb +14 -1
  51. data/lib/claude_memory/core/fact_ranker.rb +2 -2
  52. data/lib/claude_memory/core/rr_fusion.rb +23 -6
  53. data/lib/claude_memory/core/snippet_extractor.rb +7 -3
  54. data/lib/claude_memory/core/text_builder.rb +11 -0
  55. data/lib/claude_memory/distill/json_schema.md +8 -4
  56. data/lib/claude_memory/distill/null_distiller.rb +2 -0
  57. data/lib/claude_memory/domain/entity.rb +13 -1
  58. data/lib/claude_memory/domain/fact.rb +26 -2
  59. data/lib/claude_memory/domain/provenance.rb +0 -1
  60. data/lib/claude_memory/embeddings/api_adapter.rb +97 -0
  61. data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
  62. data/lib/claude_memory/embeddings/fastembed_adapter.rb +46 -12
  63. data/lib/claude_memory/embeddings/generator.rb +4 -0
  64. data/lib/claude_memory/embeddings/inspector.rb +91 -0
  65. data/lib/claude_memory/embeddings/model_registry.rb +210 -0
  66. data/lib/claude_memory/embeddings/resolver.rb +44 -0
  67. data/lib/claude_memory/hook/context_injector.rb +58 -2
  68. data/lib/claude_memory/hook/distillation_runner.rb +46 -0
  69. data/lib/claude_memory/hook/handler.rb +11 -2
  70. data/lib/claude_memory/index/vector_index.rb +15 -2
  71. data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
  72. data/lib/claude_memory/ingest/ingester.rb +17 -0
  73. data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
  74. data/lib/claude_memory/mcp/handlers/management_handlers.rb +169 -0
  75. data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
  76. data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
  77. data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
  78. data/lib/claude_memory/mcp/handlers/stats_handlers.rb +205 -0
  79. data/lib/claude_memory/mcp/instructions_builder.rb +19 -1
  80. data/lib/claude_memory/mcp/query_guide.rb +10 -0
  81. data/lib/claude_memory/mcp/response_formatter.rb +1 -0
  82. data/lib/claude_memory/mcp/server.rb +22 -1
  83. data/lib/claude_memory/mcp/telemetry.rb +86 -0
  84. data/lib/claude_memory/mcp/text_summary.rb +26 -0
  85. data/lib/claude_memory/mcp/tool_definitions.rb +116 -4
  86. data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
  87. data/lib/claude_memory/mcp/tools.rb +50 -679
  88. data/lib/claude_memory/publish.rb +40 -5
  89. data/lib/claude_memory/recall/dual_engine.rb +105 -0
  90. data/lib/claude_memory/recall/legacy_engine.rb +138 -0
  91. data/lib/claude_memory/recall/query_core.rb +371 -0
  92. data/lib/claude_memory/recall.rb +121 -673
  93. data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
  94. data/lib/claude_memory/resolve/resolver.rb +43 -0
  95. data/lib/claude_memory/shortcuts.rb +4 -4
  96. data/lib/claude_memory/store/retry_handler.rb +61 -0
  97. data/lib/claude_memory/store/schema_manager.rb +68 -0
  98. data/lib/claude_memory/store/sqlite_store.rb +334 -201
  99. data/lib/claude_memory/store/store_manager.rb +50 -1
  100. data/lib/claude_memory/sweep/maintenance.rb +115 -1
  101. data/lib/claude_memory/sweep/sweeper.rb +3 -0
  102. data/lib/claude_memory/templates/hooks.example.json +26 -7
  103. data/lib/claude_memory/version.rb +1 -1
  104. data/lib/claude_memory.rb +16 -0
  105. metadata +48 -8
  106. data/.claude/memory.sqlite3-shm +0 -0
  107. data/.claude/memory.sqlite3-wal +0 -0
@@ -13,15 +13,21 @@ module ClaudeMemory
13
13
  SCOPE_PROJECT = "project"
14
14
 
15
15
  def call(args)
16
- opts = parse_options(args, {scope: SCOPE_ALL}) do |o|
16
+ opts = parse_options(args, {scope: SCOPE_ALL, tools: false, since_days: nil}) do |o|
17
17
  OptionParser.new do |parser|
18
18
  parser.banner = "Usage: claude-memory stats [options]"
19
19
  parser.on("--scope SCOPE", ["all", "global", "project"],
20
20
  "Show stats for: all (default), global, or project") { |v| o[:scope] = v }
21
+ parser.on("--tools", "Show MCP tool-call usage stats") { o[:tools] = true }
22
+ parser.on("--since DAYS", Integer, "Limit --tools to last N days") { |v| o[:since_days] = v }
21
23
  end
22
24
  end
23
25
  return 1 if opts.nil?
24
26
 
27
+ if opts[:tools]
28
+ return print_mcp_tool_call_stats(opts[:since_days])
29
+ end
30
+
25
31
  manager = ClaudeMemory::Store::StoreManager.new
26
32
 
27
33
  stdout.puts "ClaudeMemory Statistics"
@@ -42,6 +48,10 @@ module ClaudeMemory
42
48
 
43
49
  private
44
50
 
51
+ def open_readonly(db_path)
52
+ Sequel.connect("extralite://#{db_path}")
53
+ end
54
+
45
55
  def print_database_stats(label, db_path)
46
56
  stdout.puts "## #{label} DATABASE"
47
57
  stdout.puts
@@ -53,7 +63,7 @@ module ClaudeMemory
53
63
  end
54
64
 
55
65
  begin
56
- db = Sequel.sqlite(db_path, readonly: true)
66
+ db = open_readonly(db_path)
57
67
 
58
68
  # Facts statistics
59
69
  print_fact_stats(db)
@@ -245,6 +255,92 @@ module ClaudeMemory
245
255
  # Format number with comma separators (e.g., 1234567 => "1,234,567")
246
256
  num.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
247
257
  end
258
+
259
+ def print_mcp_tool_call_stats(since_days)
260
+ manager = ClaudeMemory::Store::StoreManager.new
261
+ db_path = manager.project_db_path
262
+
263
+ stdout.puts "MCP Tool Call Statistics"
264
+ stdout.puts "=" * 50
265
+
266
+ unless File.exist?(db_path)
267
+ stdout.puts "Project database does not exist: #{db_path}"
268
+ manager.close
269
+ return 0
270
+ end
271
+
272
+ db = open_readonly(db_path)
273
+
274
+ unless db.table_exists?(:mcp_tool_calls)
275
+ stdout.puts "No telemetry recorded yet (run MCP server first)."
276
+ db.disconnect
277
+ manager.close
278
+ return 0
279
+ end
280
+
281
+ dataset = db[:mcp_tool_calls]
282
+ if since_days
283
+ cutoff = (Time.now - since_days * 86400).utc.iso8601
284
+ dataset = dataset.where { called_at >= cutoff }
285
+ stdout.puts "Window: last #{since_days} day#{"s" unless since_days == 1}"
286
+ else
287
+ stdout.puts "Window: all time"
288
+ end
289
+ stdout.puts
290
+
291
+ total = dataset.count
292
+ if total.zero?
293
+ stdout.puts "No tool calls recorded in window."
294
+ db.disconnect
295
+ manager.close
296
+ return 0
297
+ end
298
+
299
+ errors = dataset.exclude(error_class: nil).count
300
+ error_rate = (errors * 100.0 / total).round(1)
301
+ stdout.puts "Total calls: #{format_number(total)}"
302
+ stdout.puts "Errors: #{format_number(errors)} (#{error_rate}%)"
303
+ stdout.puts
304
+
305
+ print_per_tool_breakdown(dataset)
306
+
307
+ db.disconnect
308
+ manager.close
309
+ 0
310
+ rescue Sequel::DatabaseError, Extralite::Error => e
311
+ stderr.puts "Error reading telemetry: #{e.message}"
312
+ 1
313
+ end
314
+
315
+ def print_per_tool_breakdown(dataset)
316
+ stdout.puts "Per-tool breakdown:"
317
+ stdout.puts " #{"Tool".ljust(28)} #{"Calls".rjust(7)} #{"Avg ms".rjust(8)} #{"P95 ms".rjust(8)} #{"Err %".rjust(6)}"
318
+
319
+ rows = dataset
320
+ .group_and_count(:tool_name)
321
+ .order(Sequel.desc(:count))
322
+ .all
323
+
324
+ rows.each do |row|
325
+ tool = row[:tool_name]
326
+ calls = row[:count]
327
+ durations = dataset.where(tool_name: tool).select_map(:duration_ms).sort
328
+ avg = (durations.sum.to_f / calls).round(1)
329
+ p95 = percentile(durations, 0.95)
330
+ tool_errors = dataset.where(tool_name: tool).exclude(error_class: nil).count
331
+ tool_err_rate = (tool_errors * 100.0 / calls).round(1)
332
+
333
+ stdout.puts " #{tool.to_s.ljust(28)} #{calls.to_s.rjust(7)} #{avg.to_s.rjust(8)} #{p95.to_s.rjust(8)} #{tool_err_rate.to_s.rjust(6)}"
334
+ end
335
+ end
336
+
337
+ def percentile(sorted, pct)
338
+ return 0 if sorted.empty?
339
+ idx = (sorted.size * pct).ceil - 1
340
+ idx = 0 if idx < 0
341
+ idx = sorted.size - 1 if idx >= sorted.size
342
+ sorted[idx]
343
+ end
248
344
  end
249
345
  end
250
346
  end
@@ -8,31 +8,44 @@ module ClaudeMemory
8
8
  class Configuration
9
9
  attr_reader :env
10
10
 
11
+ # @param env [Hash] environment variables (default: ENV)
11
12
  def initialize(env = ENV)
12
13
  @env = env
13
14
  end
14
15
 
16
+ # @return [String] user home directory
15
17
  def home_dir
16
18
  env["HOME"] || File.expand_path("~")
17
19
  end
18
20
 
21
+ # @return [String] project root directory (resolves git worktrees)
19
22
  def project_dir
20
23
  env["CLAUDE_PROJECT_DIR"] || resolve_project_dir
21
24
  end
22
25
 
26
+ # @return [String] Claude config directory (default: ~/.claude)
27
+ def claude_config_dir
28
+ env["CLAUDE_CONFIG_DIR"] || File.join(home_dir, ".claude")
29
+ end
30
+
31
+ # @return [String] path to global memory database
23
32
  def global_db_path
24
- File.join(home_dir, ".claude", "memory.sqlite3")
33
+ File.join(claude_config_dir, "memory.sqlite3")
25
34
  end
26
35
 
36
+ # @param project_path [String, nil] override project root (defaults to project_dir)
37
+ # @return [String] path to project memory database
27
38
  def project_db_path(project_path = nil)
28
39
  path = project_path || project_dir
29
40
  File.join(path, ".claude", "memory.sqlite3")
30
41
  end
31
42
 
43
+ # @return [String, nil] current Claude session ID from CLAUDE_SESSION_ID
32
44
  def session_id
33
45
  env["CLAUDE_SESSION_ID"]
34
46
  end
35
47
 
48
+ # @return [String, nil] path to current transcript from CLAUDE_TRANSCRIPT_PATH
36
49
  def transcript_path
37
50
  env["CLAUDE_TRANSCRIPT_PATH"]
38
51
  end
@@ -88,8 +88,8 @@ module ClaudeMemory
88
88
  # @param text_results [Array<Hash>] Results from text search with :fact and :similarity
89
89
  # @param limit [Integer] Maximum results to return
90
90
  # @return [Array<Hash>] Merged results sorted by RRF score descending
91
- def self.merge_search_results(vector_results, text_results, limit)
92
- RRFusion.fuse(vector_results, text_results, limit)
91
+ def self.merge_search_results(vector_results, text_results, limit, explain: false)
92
+ RRFusion.fuse(vector_results, text_results, limit, explain: explain)
93
93
  end
94
94
  end
95
95
  end
@@ -22,16 +22,23 @@ module ClaudeMemory
22
22
  # @param vector_weight [Float] Weight multiplier for vector rankings (default 1.0)
23
23
  # @param text_weight [Float] Weight multiplier for text rankings (default 1.0)
24
24
  # @return [Array<Hash>] Fused results sorted by RRF score, with :similarity set to RRF score
25
- def self.fuse(vector_results, text_results, limit, vector_weight: 1.0, text_weight: 1.0)
25
+ def self.fuse(vector_results, text_results, limit, vector_weight: 1.0, text_weight: 1.0, explain: false)
26
26
  scores = {}
27
+ traces = {} if explain
27
28
  fact_data = {}
28
29
 
29
30
  # Score vector results by rank position
30
31
  vector_results.each_with_index do |result, idx|
31
32
  fact_id = result[:fact][:id]
32
33
  rank = idx + 1 # 1-based rank
33
- scores[fact_id] = (scores[fact_id] || 0.0) + (vector_weight / (K + rank))
34
- scores[fact_id] += TOP_BONUS.fetch(rank, 0.0)
34
+ contribution = (vector_weight / (K + rank)) + TOP_BONUS.fetch(rank, 0.0)
35
+ scores[fact_id] = (scores[fact_id] || 0.0) + contribution
36
+ if explain
37
+ traces[fact_id] ||= {vec_rank: nil, vec_score: nil, fts_rank: nil, fts_score: nil, vec_rrf: nil, fts_rrf: nil}
38
+ traces[fact_id][:vec_rank] = rank
39
+ traces[fact_id][:vec_score] = result[:similarity]
40
+ traces[fact_id][:vec_rrf] = contribution.round(6)
41
+ end
35
42
  # Prefer vector result data (has real similarity score)
36
43
  fact_data[fact_id] = result
37
44
  end
@@ -40,8 +47,14 @@ module ClaudeMemory
40
47
  text_results.each_with_index do |result, idx|
41
48
  fact_id = result[:fact][:id]
42
49
  rank = idx + 1
43
- scores[fact_id] = (scores[fact_id] || 0.0) + (text_weight / (K + rank))
44
- scores[fact_id] += TOP_BONUS.fetch(rank, 0.0)
50
+ contribution = (text_weight / (K + rank)) + TOP_BONUS.fetch(rank, 0.0)
51
+ scores[fact_id] = (scores[fact_id] || 0.0) + contribution
52
+ if explain
53
+ traces[fact_id] ||= {vec_rank: nil, vec_score: nil, fts_rank: nil, fts_score: nil, vec_rrf: nil, fts_rrf: nil}
54
+ traces[fact_id][:fts_rank] = rank
55
+ traces[fact_id][:fts_score] = result[:similarity]
56
+ traces[fact_id][:fts_rrf] = contribution.round(6)
57
+ end
45
58
  # Only use text data if not already present from vector
46
59
  fact_data[fact_id] ||= result
47
60
  end
@@ -50,7 +63,11 @@ module ClaudeMemory
50
63
  scores
51
64
  .sort_by { |_id, score| -score }
52
65
  .take(limit)
53
- .map { |fact_id, score| fact_data[fact_id].merge(similarity: score) }
66
+ .map do |fact_id, score|
67
+ merged = fact_data[fact_id].merge(similarity: score)
68
+ merged[:score_trace] = traces[fact_id].merge(rrf_final: score.round(6)) if explain
69
+ merged
70
+ end
54
71
  end
55
72
  end
56
73
  end
@@ -32,8 +32,7 @@ module ClaudeMemory
32
32
 
33
33
  lines = parsed[:lines]
34
34
  best_line_idx = parsed[:best_line_idx]
35
- start_idx = [best_line_idx - CONTEXT_BEFORE, 0].max
36
- end_idx = [best_line_idx + CONTEXT_AFTER, lines.size - 1].min
35
+ start_idx, end_idx = snippet_range(lines, best_line_idx)
37
36
 
38
37
  {
39
38
  snippet: build_snippet(lines, best_line_idx),
@@ -81,10 +80,15 @@ module ClaudeMemory
81
80
  end
82
81
 
83
82
  # @api private
84
- def self.build_snippet(lines, center_idx)
83
+ def self.snippet_range(lines, center_idx)
85
84
  start_idx = [center_idx - CONTEXT_BEFORE, 0].max
86
85
  end_idx = [center_idx + CONTEXT_AFTER, lines.size - 1].min
86
+ [start_idx, end_idx]
87
+ end
87
88
 
89
+ # @api private
90
+ def self.build_snippet(lines, center_idx)
91
+ start_idx, end_idx = snippet_range(lines, center_idx)
88
92
  snippet = lines[start_idx..end_idx].join("\n")
89
93
  truncate(snippet)
90
94
  end
@@ -18,6 +18,17 @@ module ClaudeMemory
18
18
  parts.join(" ").strip
19
19
  end
20
20
 
21
+ # Truncate text to a maximum length with a suffix
22
+ # @param text [String, nil] Text to truncate
23
+ # @param max_length [Integer] Maximum length before truncation
24
+ # @param suffix [String] Suffix to append when truncated
25
+ # @return [String] Truncated text or original if within limit
26
+ def self.truncate(text, max_length, suffix: "...")
27
+ return "" if text.nil?
28
+ return text if text.length <= max_length
29
+ text[0, max_length] + suffix
30
+ end
31
+
21
32
  # Transform hash keys from strings to symbols
22
33
  # @param hash [Hash] Hash with string or symbol keys
23
34
  # @return [Hash] Hash with symbolized keys
@@ -66,13 +66,17 @@ This document defines the schema for extracted knowledge from transcripts.
66
66
  - **conflict**: `{kind: "conflict", value: true}` - indicates contradictory information detected
67
67
  - **time_boundary**: `{kind: "time_boundary", value: "2024-01-15"}` - temporal boundary marker
68
68
 
69
- ## Predicate Types (MVP)
69
+ ## Predicate Types
70
+
71
+ Canonical vocabulary defined in `lib/claude_memory/resolve/predicate_policy.rb`.
70
72
 
71
73
  | Predicate | Cardinality | Exclusive |
72
74
  |-----------|-------------|-----------|
73
75
  | convention | multi | no |
74
- | decision | multi (by scope) | no |
75
- | auth_method | single | yes |
76
+ | decision | multi | no |
77
+ | architecture | multi | no |
78
+ | uses_framework | multi | no |
79
+ | uses_language | multi | no |
76
80
  | uses_database | single | yes |
77
- | uses_framework | single | yes |
78
81
  | deployment_platform | single | yes |
82
+ | auth_method | single | yes |
@@ -73,6 +73,8 @@ module ClaudeMemory
73
73
  facts << build_fact("uses_framework", entity[:name], text, scope_hint)
74
74
  when "platform"
75
75
  facts << build_fact("deployment_platform", entity[:name], text, scope_hint)
76
+ when "language"
77
+ facts << build_fact("uses_language", entity[:name], text, scope_hint)
76
78
  end
77
79
  end
78
80
 
@@ -2,10 +2,18 @@
2
2
 
3
3
  module ClaudeMemory
4
4
  module Domain
5
- # Domain model representing an entity (database, framework, person, etc.)
5
+ # Domain model representing an entity (database, framework, person, etc.).
6
+ # Instances are immutable (frozen).
6
7
  class Entity
7
8
  attr_reader :id, :type, :canonical_name, :slug, :created_at
8
9
 
10
+ # @param attributes [Hash] entity attributes
11
+ # @option attributes [Integer] :id database primary key
12
+ # @option attributes [String] :type entity category (required, e.g. "database", "framework", "person")
13
+ # @option attributes [String] :canonical_name display name (required)
14
+ # @option attributes [String] :slug URL-safe identifier (required)
15
+ # @option attributes [String] :created_at ISO 8601 creation timestamp
16
+ # @raise [ArgumentError] if type, canonical_name, or slug is blank
9
17
  def initialize(attributes)
10
18
  @id = attributes[:id]
11
19
  @type = attributes[:type]
@@ -17,18 +25,22 @@ module ClaudeMemory
17
25
  freeze
18
26
  end
19
27
 
28
+ # @return [Boolean] true when type is "database"
20
29
  def database?
21
30
  type == "database"
22
31
  end
23
32
 
33
+ # @return [Boolean] true when type is "framework"
24
34
  def framework?
25
35
  type == "framework"
26
36
  end
27
37
 
38
+ # @return [Boolean] true when type is "person"
28
39
  def person?
29
40
  type == "person"
30
41
  end
31
42
 
43
+ # @return [Hash] all attributes as a plain hash
32
44
  def to_h
33
45
  {
34
46
  id: id,
@@ -2,13 +2,27 @@
2
2
 
3
3
  module ClaudeMemory
4
4
  module Domain
5
- # Domain model representing a fact in the memory system
6
- # Encapsulates business logic and validation
5
+ # Domain model representing a fact in the memory system.
6
+ # Encapsulates business logic and validation. Instances are immutable (frozen).
7
7
  class Fact
8
8
  attr_reader :id, :docid, :subject_name, :predicate, :object_literal,
9
9
  :status, :confidence, :scope, :project_path,
10
10
  :valid_from, :valid_to, :created_at
11
11
 
12
+ # @param attributes [Hash] fact attributes
13
+ # @option attributes [Integer] :id database primary key
14
+ # @option attributes [Integer] :docid FTS document id
15
+ # @option attributes [String] :subject_name entity name of the subject
16
+ # @option attributes [String] :predicate relationship type (required)
17
+ # @option attributes [String] :object_literal literal value (required)
18
+ # @option attributes [String] :status one of "active", "superseded", "rejected", "disputed"
19
+ # @option attributes [Float] :confidence score between 0 and 1 (default: 1.0)
20
+ # @option attributes [String] :scope "project" or "global" (default: "project")
21
+ # @option attributes [String] :project_path path for project-scoped facts
22
+ # @option attributes [String] :valid_from ISO 8601 start of validity
23
+ # @option attributes [String] :valid_to ISO 8601 end of validity (nil if current)
24
+ # @option attributes [String] :created_at ISO 8601 creation timestamp
25
+ # @raise [ArgumentError] if predicate, object_literal, or confidence is invalid
12
26
  def initialize(attributes)
13
27
  @id = attributes[:id]
14
28
  @docid = attributes[:docid]
@@ -27,22 +41,32 @@ module ClaudeMemory
27
41
  freeze
28
42
  end
29
43
 
44
+ # @return [Boolean] true when status is "active"
30
45
  def active?
31
46
  status == "active"
32
47
  end
33
48
 
49
+ # @return [Boolean] true when status is "superseded"
34
50
  def superseded?
35
51
  status == "superseded"
36
52
  end
37
53
 
54
+ # @return [Boolean] true when status is "rejected"
55
+ def rejected?
56
+ status == "rejected"
57
+ end
58
+
59
+ # @return [Boolean] true when scope is "global"
38
60
  def global?
39
61
  scope == "global"
40
62
  end
41
63
 
64
+ # @return [Boolean] true when scope is "project"
42
65
  def project?
43
66
  scope == "project"
44
67
  end
45
68
 
69
+ # @return [Hash] all attributes as a plain hash
46
70
  def to_h
47
71
  {
48
72
  id: id,
@@ -41,7 +41,6 @@ module ClaudeMemory
41
41
 
42
42
  def validate!
43
43
  raise ArgumentError, "fact_id required" if fact_id.nil?
44
- raise ArgumentError, "content_item_id required" if content_item_id.nil?
45
44
  end
46
45
  end
47
46
  end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+
7
+ module ClaudeMemory
8
+ module Embeddings
9
+ # Adapter for any OpenAI-compatible /v1/embeddings endpoint.
10
+ # Works with OpenAI, Voyage, Ollama, LiteLLM, etc.
11
+ #
12
+ # Required ENV:
13
+ # CLAUDE_MEMORY_EMBEDDING_API_KEY or OPENAI_API_KEY
14
+ #
15
+ # Optional ENV:
16
+ # CLAUDE_MEMORY_EMBEDDING_API_URL (default: https://api.openai.com/v1/embeddings)
17
+ # CLAUDE_MEMORY_EMBEDDING_MODEL (default: text-embedding-3-small)
18
+ #
19
+ class ApiAdapter
20
+ class ApiError < StandardError; end
21
+
22
+ DEFAULT_API_URL = "https://api.openai.com/v1/embeddings"
23
+ DEFAULT_MODEL = "text-embedding-3-small"
24
+
25
+ def initialize(model: nil, env: ENV)
26
+ @api_key = env["CLAUDE_MEMORY_EMBEDDING_API_KEY"] || env["OPENAI_API_KEY"]
27
+ @api_url = env["CLAUDE_MEMORY_EMBEDDING_API_URL"] || DEFAULT_API_URL
28
+ @model = model || env["CLAUDE_MEMORY_EMBEDDING_MODEL"] || DEFAULT_MODEL
29
+ @known_dimensions = ModelRegistry.dimensions_for(@model)
30
+
31
+ raise ArgumentError, "Set CLAUDE_MEMORY_EMBEDDING_API_KEY or OPENAI_API_KEY" unless @api_key
32
+ end
33
+
34
+ def name = "api"
35
+
36
+ # Dimensions resolved from registry if known, otherwise lazy from first API response.
37
+ def dimensions
38
+ @dimensions ||= @known_dimensions || fetch_dimensions
39
+ end
40
+
41
+ # Generate embedding for a query text.
42
+ # @param text [String] input text to embed
43
+ # @return [Array<Float>] embedding vector
44
+ def generate(text)
45
+ return zero_vector if text.nil? || text.empty?
46
+
47
+ response = call_api(text)
48
+ embedding = response.dig("data", 0, "embedding")
49
+
50
+ raise ApiError, "No embedding returned in API response" unless embedding
51
+
52
+ @dimensions ||= embedding.size
53
+ embedding
54
+ end
55
+
56
+ # Alias for passage encoding — API providers don't distinguish query vs passage
57
+ alias_method :generate_passage, :generate
58
+
59
+ private
60
+
61
+ def fetch_dimensions
62
+ # Make a minimal API call to discover dimensions
63
+ embedding = generate("dimension probe")
64
+ embedding.size
65
+ end
66
+
67
+ def call_api(text)
68
+ uri = URI(@api_url)
69
+ http = Net::HTTP.new(uri.host, uri.port)
70
+ http.use_ssl = uri.scheme == "https"
71
+ http.open_timeout = 10
72
+ http.read_timeout = 30
73
+
74
+ request = Net::HTTP::Post.new(uri.path)
75
+ request["Authorization"] = "Bearer #{@api_key}"
76
+ request["Content-Type"] = "application/json"
77
+ request.body = JSON.generate({input: text, model: @model})
78
+
79
+ response = http.request(request)
80
+
81
+ unless response.is_a?(Net::HTTPSuccess)
82
+ raise ApiError, "HTTP #{response.code}: #{response.body}"
83
+ end
84
+
85
+ JSON.parse(response.body)
86
+ end
87
+
88
+ def zero_vector
89
+ # If dimensions haven't been discovered yet, we can't return a properly-sized zero vector.
90
+ # Return empty array; callers handle nil/empty gracefully.
91
+ return [] unless @dimensions
92
+
93
+ Array.new(@dimensions, 0.0)
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Embeddings
5
+ # Value object that detects embedding dimension mismatches.
6
+ # Returns a Result so the caller decides how to handle mismatches —
7
+ # no hidden side effects like dropping tables.
8
+ class DimensionCheck
9
+ Result = Data.define(:status, :stored, :current)
10
+
11
+ # @param store [Store::SQLiteStore] database to check meta against
12
+ # @param provider [#dimensions] embedding provider
13
+ # @return [Result] status is :fresh, :match, or :mismatch
14
+ def self.call(store, provider)
15
+ stored = store.get_meta("embedding_dimensions")&.to_i
16
+ return Result.new(status: :fresh, stored: nil, current: provider.dimensions) unless stored
17
+ return Result.new(status: :match, stored: stored, current: provider.dimensions) if stored == provider.dimensions
18
+
19
+ Result.new(status: :mismatch, stored: stored, current: provider.dimensions)
20
+ end
21
+ end
22
+ end
23
+ end
@@ -2,33 +2,50 @@
2
2
 
3
3
  module ClaudeMemory
4
4
  module Embeddings
5
- # Adapter wrapping fastembed-rb for high-quality local embeddings
6
- # Uses BAAI/bge-small-en-v1.5 by default (384-dim, ~67MB ONNX model)
5
+ # Adapter wrapping fastembed-rb for high-quality local embeddings.
6
+ # Supports any model available in fastembed-rb's SUPPORTED_MODELS.
7
7
  #
8
- # Implements the same generate(text) interface as Generator for DI compatibility.
9
- # Supports asymmetric query/passage encoding for better retrieval accuracy.
8
+ # Model selection (in priority order):
9
+ # 1. Explicit model_name parameter
10
+ # 2. CLAUDE_MEMORY_EMBEDDING_MODEL env var
11
+ # 3. Default: BAAI/bge-small-en-v1.5 (384-dim, ~67MB ONNX)
12
+ #
13
+ # Dimensions are resolved from the ModelRegistry for known models,
14
+ # or probed from fastembed's ModelInfo for unknown models.
10
15
  #
11
16
  # Usage:
12
17
  # adapter = FastembedAdapter.new
13
18
  # query_vec = adapter.generate("What database?") # query encoding
14
19
  # passage_vec = adapter.generate_passage("Uses PostgreSQL") # passage encoding
15
20
  #
21
+ # # Use a larger model:
22
+ # adapter = FastembedAdapter.new(model_name: "BAAI/bge-base-en-v1.5")
23
+ # adapter.dimensions # => 768
24
+ #
16
25
  class FastembedAdapter
17
- EMBEDDING_DIM = 384
18
26
  DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
19
27
 
20
- def initialize(model_name: DEFAULT_MODEL)
28
+ attr_reader :model_name, :dimensions
29
+
30
+ def name = "fastembed"
31
+
32
+ def initialize(model_name: nil, env: ENV)
33
+ @model_name = model_name || env["CLAUDE_MEMORY_EMBEDDING_MODEL"] || DEFAULT_MODEL
34
+ @dimensions = resolve_dimensions(@model_name)
35
+
21
36
  require "fastembed"
22
- @model = Fastembed::TextEmbedding.new(model_name: model_name)
37
+ @model = Fastembed::TextEmbedding.new(model_name: @model_name)
38
+
39
+ # If dimensions weren't known from registry, probe from fastembed
40
+ @dimensions ||= probe_dimensions_from_fastembed
23
41
  rescue LoadError
24
42
  raise LoadError,
25
43
  "fastembed gem is required for FastembedAdapter. Add `gem 'fastembed'` to your Gemfile."
26
44
  end
27
45
 
28
46
  # Generate query embedding (optimized for search queries)
29
- # Compatible with Recall's embedding_generator interface
30
47
  # @param text [String] query text to embed
31
- # @return [Array<Float>] normalized 384-dimensional vector
48
+ # @return [Array<Float>] normalized embedding vector
32
49
  def generate(text)
33
50
  return zero_vector if text.nil? || text.empty?
34
51
 
@@ -36,9 +53,8 @@ module ClaudeMemory
36
53
  end
37
54
 
38
55
  # Generate passage embedding (optimized for document/fact indexing)
39
- # Use this when storing embeddings for facts
40
56
  # @param text [String] passage text to embed
41
- # @return [Array<Float>] normalized 384-dimensional vector
57
+ # @return [Array<Float>] normalized embedding vector
42
58
  def generate_passage(text)
43
59
  return zero_vector if text.nil? || text.empty?
44
60
 
@@ -47,8 +63,26 @@ module ClaudeMemory
47
63
 
48
64
  private
49
65
 
66
+ # Resolve dimensions from the model registry (fast, no I/O).
67
+ # Returns nil if the model isn't in the registry.
68
+ def resolve_dimensions(model)
69
+ ModelRegistry.dimensions_for(model)
70
+ end
71
+
72
+ # Fallback: probe fastembed's SUPPORTED_MODELS for dimension info.
73
+ # This handles models added to fastembed-rb but not yet in our registry.
74
+ def probe_dimensions_from_fastembed
75
+ if defined?(Fastembed::SUPPORTED_MODELS)
76
+ info = Fastembed::SUPPORTED_MODELS[@model_name]
77
+ return info.dim if info
78
+ end
79
+
80
+ # Last resort: generate a test embedding and measure its size
81
+ @model.query_embed("dimension probe").first.size
82
+ end
83
+
50
84
  def zero_vector
51
- Array.new(EMBEDDING_DIM, 0.0)
85
+ Array.new(@dimensions, 0.0)
52
86
  end
53
87
  end
54
88
  end
@@ -12,6 +12,10 @@ module ClaudeMemory
12
12
  class Generator
13
13
  EMBEDDING_DIM = 384
14
14
 
15
+ def name = "tfidf"
16
+
17
+ def dimensions = EMBEDDING_DIM
18
+
15
19
  # Common technical terms and programming concepts for vocabulary
16
20
  VOCABULARY = %w[
17
21
  database framework library module class function method