claude_memory 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/memory.sqlite3 +0 -0
  3. data/.claude/rules/claude_memory.generated.md +32 -2
  4. data/.claude/settings.json +65 -15
  5. data/.claude/settings.local.json +5 -2
  6. data/.claude/skills/improve/SKILL.md +113 -25
  7. data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
  8. data/.claude-plugin/commands/distill-transcripts.md +98 -0
  9. data/.claude-plugin/commands/memory-recall.md +67 -0
  10. data/.claude-plugin/marketplace.json +2 -2
  11. data/.claude-plugin/plugin.json +3 -3
  12. data/.claude-plugin/scripts/hook-runner.sh +14 -0
  13. data/.claude-plugin/scripts/serve-mcp.sh +14 -0
  14. data/.ruby-version +1 -1
  15. data/CHANGELOG.md +90 -1
  16. data/CLAUDE.md +56 -18
  17. data/README.md +35 -0
  18. data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
  19. data/db/migrations/014_canonicalize_predicates.rb +30 -0
  20. data/docs/improvements.md +74 -74
  21. data/docs/influence/claude-mem.md +1 -0
  22. data/docs/influence/claude-supermemory.md +1 -0
  23. data/docs/influence/episodic-memory.md +1 -0
  24. data/docs/influence/grepai.md +1 -0
  25. data/docs/influence/kbs.md +1 -0
  26. data/docs/influence/lossless-claw.md +1 -0
  27. data/docs/influence/qmd.md +1 -0
  28. data/docs/quality_review.md +119 -224
  29. data/hooks/hooks.json +39 -7
  30. data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
  31. data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
  32. data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
  33. data/lib/claude_memory/commands/completion_command.rb +149 -0
  34. data/lib/claude_memory/commands/doctor_command.rb +2 -0
  35. data/lib/claude_memory/commands/embeddings_command.rb +198 -0
  36. data/lib/claude_memory/commands/help_command.rb +12 -1
  37. data/lib/claude_memory/commands/hook_command.rb +2 -1
  38. data/lib/claude_memory/commands/index_command.rb +85 -78
  39. data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
  40. data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
  41. data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
  42. data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
  43. data/lib/claude_memory/commands/install_skill_command.rb +78 -0
  44. data/lib/claude_memory/commands/registry.rb +47 -32
  45. data/lib/claude_memory/commands/reject_command.rb +62 -0
  46. data/lib/claude_memory/commands/restore_command.rb +77 -0
  47. data/lib/claude_memory/commands/skills/distill-transcripts.md +102 -0
  48. data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
  49. data/lib/claude_memory/commands/stats_command.rb +98 -2
  50. data/lib/claude_memory/configuration.rb +14 -1
  51. data/lib/claude_memory/core/fact_ranker.rb +2 -2
  52. data/lib/claude_memory/core/rr_fusion.rb +23 -6
  53. data/lib/claude_memory/core/snippet_extractor.rb +7 -3
  54. data/lib/claude_memory/core/text_builder.rb +11 -0
  55. data/lib/claude_memory/distill/json_schema.md +8 -4
  56. data/lib/claude_memory/distill/null_distiller.rb +2 -0
  57. data/lib/claude_memory/domain/entity.rb +13 -1
  58. data/lib/claude_memory/domain/fact.rb +26 -2
  59. data/lib/claude_memory/domain/provenance.rb +0 -1
  60. data/lib/claude_memory/embeddings/api_adapter.rb +97 -0
  61. data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
  62. data/lib/claude_memory/embeddings/fastembed_adapter.rb +46 -12
  63. data/lib/claude_memory/embeddings/generator.rb +4 -0
  64. data/lib/claude_memory/embeddings/inspector.rb +91 -0
  65. data/lib/claude_memory/embeddings/model_registry.rb +210 -0
  66. data/lib/claude_memory/embeddings/resolver.rb +44 -0
  67. data/lib/claude_memory/hook/context_injector.rb +58 -2
  68. data/lib/claude_memory/hook/distillation_runner.rb +46 -0
  69. data/lib/claude_memory/hook/handler.rb +11 -2
  70. data/lib/claude_memory/index/vector_index.rb +15 -2
  71. data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
  72. data/lib/claude_memory/ingest/ingester.rb +17 -0
  73. data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
  74. data/lib/claude_memory/mcp/handlers/management_handlers.rb +169 -0
  75. data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
  76. data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
  77. data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
  78. data/lib/claude_memory/mcp/handlers/stats_handlers.rb +205 -0
  79. data/lib/claude_memory/mcp/instructions_builder.rb +19 -1
  80. data/lib/claude_memory/mcp/query_guide.rb +10 -0
  81. data/lib/claude_memory/mcp/response_formatter.rb +1 -0
  82. data/lib/claude_memory/mcp/server.rb +22 -1
  83. data/lib/claude_memory/mcp/telemetry.rb +86 -0
  84. data/lib/claude_memory/mcp/text_summary.rb +26 -0
  85. data/lib/claude_memory/mcp/tool_definitions.rb +116 -4
  86. data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
  87. data/lib/claude_memory/mcp/tools.rb +50 -679
  88. data/lib/claude_memory/publish.rb +40 -5
  89. data/lib/claude_memory/recall/dual_engine.rb +105 -0
  90. data/lib/claude_memory/recall/legacy_engine.rb +138 -0
  91. data/lib/claude_memory/recall/query_core.rb +371 -0
  92. data/lib/claude_memory/recall.rb +121 -673
  93. data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
  94. data/lib/claude_memory/resolve/resolver.rb +43 -0
  95. data/lib/claude_memory/shortcuts.rb +4 -4
  96. data/lib/claude_memory/store/retry_handler.rb +61 -0
  97. data/lib/claude_memory/store/schema_manager.rb +68 -0
  98. data/lib/claude_memory/store/sqlite_store.rb +334 -201
  99. data/lib/claude_memory/store/store_manager.rb +50 -1
  100. data/lib/claude_memory/sweep/maintenance.rb +115 -1
  101. data/lib/claude_memory/sweep/sweeper.rb +3 -0
  102. data/lib/claude_memory/templates/hooks.example.json +26 -7
  103. data/lib/claude_memory/version.rb +1 -1
  104. data/lib/claude_memory.rb +16 -0
  105. metadata +48 -8
  106. data/.claude/memory.sqlite3-shm +0 -0
  107. data/.claude/memory.sqlite3-wal +0 -0
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Embeddings
5
+ # Reads embedding metadata from global and project databases.
6
+ # Returns structured data — no I/O formatting or stdout output.
7
+ #
8
+ # Used by EmbeddingsCommand to separate DB concerns from presentation.
9
+ class Inspector
10
+ DatabaseState = Data.define(:label, :provider, :dimensions)
11
+ DimensionResult = Data.define(:label, :status, :stored_dims, :stored_provider, :current_dims)
12
+
13
+ def database_states
14
+ results = []
15
+
16
+ with_each_store do |label, store|
17
+ provider = store.get_meta("embedding_provider")
18
+ dims = store.get_meta("embedding_dimensions")
19
+
20
+ next unless provider || dims
21
+
22
+ results << DatabaseState.new(label: label, provider: provider, dimensions: dims)
23
+ end
24
+
25
+ results
26
+ end
27
+
28
+ def dimension_checks(provider_name, model_name)
29
+ results = []
30
+
31
+ with_each_store do |label, store|
32
+ stored_dims = store.get_meta("embedding_dimensions")&.to_i
33
+ stored_provider = store.get_meta("embedding_provider")
34
+
35
+ if stored_dims
36
+ current_dims = resolve_current_dimensions(provider_name, model_name)
37
+
38
+ status = if current_dims && current_dims != stored_dims
39
+ :mismatch
40
+ else
41
+ :match
42
+ end
43
+
44
+ results << DimensionResult.new(
45
+ label: label,
46
+ status: status,
47
+ stored_dims: stored_dims,
48
+ stored_provider: stored_provider,
49
+ current_dims: current_dims
50
+ )
51
+ else
52
+ results << DimensionResult.new(
53
+ label: label,
54
+ status: :fresh,
55
+ stored_dims: nil,
56
+ stored_provider: nil,
57
+ current_dims: nil
58
+ )
59
+ end
60
+ end
61
+
62
+ results
63
+ end
64
+
65
+ private
66
+
67
+ def resolve_current_dimensions(provider_name, model_name)
68
+ if model_name
69
+ ModelRegistry.dimensions_for(model_name)
70
+ else
71
+ ModelRegistry.default_for_provider(provider_name)&.dimensions
72
+ end
73
+ end
74
+
75
+ def with_each_store
76
+ config = Configuration.new
77
+
78
+ [["global", config.global_db_path], ["project", config.project_db_path]].each do |label, path|
79
+ next unless File.exist?(path)
80
+
81
+ store = Store::SQLiteStore.new(path)
82
+ begin
83
+ yield label, store
84
+ ensure
85
+ store.close
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Embeddings
5
+ # Registry of known embedding models with their properties.
6
+ # Enables model validation, dimension lookup, and discoverability.
7
+ #
8
+ # Models are registered by canonical name (e.g., "BAAI/bge-small-en-v1.5")
9
+ # with provider type, dimensions, and description.
10
+ #
11
+ # Usage:
12
+ # ModelRegistry.find("BAAI/bge-small-en-v1.5")
13
+ # # => {provider: "fastembed", dimensions: 384, description: "...", ...}
14
+ #
15
+ # ModelRegistry.models_for_provider("fastembed")
16
+ # # => [...]
17
+ #
18
+ class ModelRegistry
19
+ ModelInfo = Data.define(:name, :provider, :dimensions, :description, :size_mb, :max_tokens)
20
+
21
+ # Known models with validated dimensions.
22
+ # Fastembed models sourced from fastembed-rb SUPPORTED_MODELS.
23
+ # API models sourced from provider documentation.
24
+ MODELS = [
25
+ # --- fastembed: local ONNX models (no API key needed) ---
26
+ ModelInfo.new(
27
+ name: "BAAI/bge-small-en-v1.5",
28
+ provider: "fastembed",
29
+ dimensions: 384,
30
+ description: "Fast English embedding (default)",
31
+ size_mb: 67,
32
+ max_tokens: 512
33
+ ),
34
+ ModelInfo.new(
35
+ name: "BAAI/bge-base-en-v1.5",
36
+ provider: "fastembed",
37
+ dimensions: 768,
38
+ description: "Balanced English embedding, higher accuracy",
39
+ size_mb: 210,
40
+ max_tokens: 512
41
+ ),
42
+ ModelInfo.new(
43
+ name: "BAAI/bge-large-en-v1.5",
44
+ provider: "fastembed",
45
+ dimensions: 1024,
46
+ description: "High accuracy English embedding",
47
+ size_mb: 1200,
48
+ max_tokens: 512
49
+ ),
50
+ ModelInfo.new(
51
+ name: "sentence-transformers/all-MiniLM-L6-v2",
52
+ provider: "fastembed",
53
+ dimensions: 384,
54
+ description: "Lightweight general-purpose sentence embedding",
55
+ size_mb: 90,
56
+ max_tokens: 512
57
+ ),
58
+ ModelInfo.new(
59
+ name: "intfloat/multilingual-e5-small",
60
+ provider: "fastembed",
61
+ dimensions: 384,
62
+ description: "Multilingual embedding, 100+ languages",
63
+ size_mb: 450,
64
+ max_tokens: 512
65
+ ),
66
+ ModelInfo.new(
67
+ name: "intfloat/multilingual-e5-base",
68
+ provider: "fastembed",
69
+ dimensions: 768,
70
+ description: "Larger multilingual embedding",
71
+ size_mb: 1110,
72
+ max_tokens: 512
73
+ ),
74
+ ModelInfo.new(
75
+ name: "nomic-ai/nomic-embed-text-v1.5",
76
+ provider: "fastembed",
77
+ dimensions: 768,
78
+ description: "Long context (8192 tokens) with Matryoshka support",
79
+ size_mb: 520,
80
+ max_tokens: 8192
81
+ ),
82
+ ModelInfo.new(
83
+ name: "jinaai/jina-embeddings-v2-small-en",
84
+ provider: "fastembed",
85
+ dimensions: 512,
86
+ description: "Small English embedding, 8192 token context",
87
+ size_mb: 60,
88
+ max_tokens: 8192
89
+ ),
90
+ ModelInfo.new(
91
+ name: "jinaai/jina-embeddings-v2-base-en",
92
+ provider: "fastembed",
93
+ dimensions: 768,
94
+ description: "Base English embedding, 8192 token context",
95
+ size_mb: 520,
96
+ max_tokens: 8192
97
+ ),
98
+
99
+ # --- api: OpenAI-compatible endpoints ---
100
+ ModelInfo.new(
101
+ name: "text-embedding-3-small",
102
+ provider: "api",
103
+ dimensions: 1536,
104
+ description: "OpenAI small embedding (default API model)",
105
+ size_mb: nil,
106
+ max_tokens: 8191
107
+ ),
108
+ ModelInfo.new(
109
+ name: "text-embedding-3-large",
110
+ provider: "api",
111
+ dimensions: 3072,
112
+ description: "OpenAI large embedding, highest accuracy",
113
+ size_mb: nil,
114
+ max_tokens: 8191
115
+ ),
116
+ ModelInfo.new(
117
+ name: "text-embedding-ada-002",
118
+ provider: "api",
119
+ dimensions: 1536,
120
+ description: "OpenAI legacy embedding",
121
+ size_mb: nil,
122
+ max_tokens: 8191
123
+ ),
124
+ ModelInfo.new(
125
+ name: "voyage-3",
126
+ provider: "api",
127
+ dimensions: 1024,
128
+ description: "Voyage AI general-purpose embedding",
129
+ size_mb: nil,
130
+ max_tokens: 32000
131
+ ),
132
+ ModelInfo.new(
133
+ name: "voyage-3-lite",
134
+ provider: "api",
135
+ dimensions: 512,
136
+ description: "Voyage AI lightweight embedding",
137
+ size_mb: nil,
138
+ max_tokens: 32000
139
+ ),
140
+ ModelInfo.new(
141
+ name: "voyage-code-3",
142
+ provider: "api",
143
+ dimensions: 1024,
144
+ description: "Voyage AI code-optimized embedding",
145
+ size_mb: nil,
146
+ max_tokens: 32000
147
+ ),
148
+
149
+ # --- tfidf: built-in, no dependencies ---
150
+ ModelInfo.new(
151
+ name: "tfidf",
152
+ provider: "tfidf",
153
+ dimensions: 384,
154
+ description: "Built-in TF-IDF embedding (no dependencies)",
155
+ size_mb: 0,
156
+ max_tokens: nil
157
+ )
158
+ ].freeze
159
+
160
+ MODELS_BY_NAME = MODELS.each_with_object({}) { |m, h| h[m.name] = m }.freeze
161
+
162
+ DEFAULTS = {
163
+ "fastembed" => "BAAI/bge-small-en-v1.5",
164
+ "api" => "text-embedding-3-small",
165
+ "tfidf" => "tfidf"
166
+ }.freeze
167
+
168
+ # Find a model by name.
169
+ # @param name [String] model name (e.g., "BAAI/bge-small-en-v1.5")
170
+ # @return [ModelInfo, nil]
171
+ def self.find(name)
172
+ MODELS_BY_NAME[name]
173
+ end
174
+
175
+ # List all models for a given provider.
176
+ # @param provider [String] "fastembed", "api", or "tfidf"
177
+ # @return [Array<ModelInfo>]
178
+ def self.models_for_provider(provider)
179
+ MODELS.select { |m| m.provider == provider }
180
+ end
181
+
182
+ # All known model names.
183
+ # @return [Array<String>]
184
+ def self.model_names
185
+ MODELS.map(&:name)
186
+ end
187
+
188
+ # All provider names.
189
+ # @return [Array<String>]
190
+ def self.providers
191
+ MODELS.map(&:provider).uniq
192
+ end
193
+
194
+ # Look up dimensions for a model name. Returns nil if unknown.
195
+ # @param name [String] model name
196
+ # @return [Integer, nil]
197
+ def self.dimensions_for(name)
198
+ find(name)&.dimensions
199
+ end
200
+
201
+ # Return the default ModelInfo for a provider.
202
+ # @param provider [String] "fastembed", "api", or "tfidf"
203
+ # @return [ModelInfo, nil]
204
+ def self.default_for_provider(provider)
205
+ default_name = DEFAULTS[provider]
206
+ find(default_name) if default_name
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Embeddings
5
+ # Resolves an embedding provider by name, model, or ENV.
6
+ #
7
+ # Provider selection (in priority order):
8
+ # 1. Explicit name parameter
9
+ # 2. CLAUDE_MEMORY_EMBEDDING_PROVIDER env var
10
+ # 3. Default: "tfidf"
11
+ #
12
+ # Model selection is forwarded to the provider via CLAUDE_MEMORY_EMBEDDING_MODEL
13
+ # or the model parameter. The model can also imply the provider:
14
+ # - "BAAI/bge-small-en-v1.5" → fastembed
15
+ # - "text-embedding-3-small" → api
16
+ #
17
+ # Examples:
18
+ # Embeddings.resolve # tfidf default
19
+ # Embeddings.resolve("fastembed") # fastembed with default model
20
+ # Embeddings.resolve("fastembed", model: "BAAI/bge-base-en-v1.5")
21
+ # Embeddings.resolve(model: "text-embedding-3-small") # auto-detects api provider
22
+ #
23
+ def self.resolve(name = nil, model: nil, env: ENV)
24
+ model ||= env["CLAUDE_MEMORY_EMBEDDING_MODEL"]
25
+ provider = name || env["CLAUDE_MEMORY_EMBEDDING_PROVIDER"] || infer_provider(model) || "tfidf"
26
+
27
+ case provider
28
+ when "tfidf" then Generator.new
29
+ when "fastembed" then FastembedAdapter.new(model_name: model, env: env)
30
+ when "api" then ApiAdapter.new(model: model, env: env)
31
+ else raise ArgumentError, "Unknown embedding provider: #{provider}. Available: tfidf, fastembed, api"
32
+ end
33
+ end
34
+
35
+ # Infer provider from a model name using the registry.
36
+ # Returns nil if the model is unknown.
37
+ def self.infer_provider(model)
38
+ return nil unless model
39
+
40
+ ModelRegistry.find(model)&.provider
41
+ end
42
+ private_class_method :infer_provider
43
+ end
44
+ end
@@ -9,6 +9,10 @@ module ClaudeMemory
9
9
  MAX_DECISIONS = 5
10
10
  MAX_CONVENTIONS = 5
11
11
  MAX_ARCHITECTURE = 5
12
+ MAX_UNDISTILLED = 3
13
+ MAX_TEXT_PER_ITEM = 1500
14
+
15
+ FRESH_SESSION_SOURCES = %w[startup resume clear].freeze
12
16
 
13
17
  QUERIES = {
14
18
  decisions: {query: "decision constraint rule requirement", scope: "all"},
@@ -16,8 +20,9 @@ module ClaudeMemory
16
20
  architecture: {query: "uses framework implements architecture pattern", scope: "all"}
17
21
  }.freeze
18
22
 
19
- def initialize(manager)
23
+ def initialize(manager, source: nil)
20
24
  @manager = manager
25
+ @source = source
21
26
  @recall = Recall.new(manager)
22
27
  end
23
28
 
@@ -33,6 +38,11 @@ module ClaudeMemory
33
38
  architecture = fetch(:architecture, MAX_ARCHITECTURE)
34
39
  sections << format_section("Architecture", architecture) if architecture.any?
35
40
 
41
+ if fresh_session?
42
+ undistilled = fetch_undistilled(MAX_UNDISTILLED)
43
+ sections << format_distillation_prompt(undistilled) if undistilled.any?
44
+ end
45
+
36
46
  return nil if sections.empty?
37
47
 
38
48
  sections.join("\n")
@@ -40,11 +50,16 @@ module ClaudeMemory
40
50
 
41
51
  private
42
52
 
53
+ def fresh_session?
54
+ @source.nil? || FRESH_SESSION_SOURCES.include?(@source)
55
+ end
56
+
43
57
  def fetch(category, limit)
44
58
  config = QUERIES.fetch(category)
45
59
  results = @recall.query(config[:query], limit: limit, scope: config[:scope])
46
60
  results.map { |r| format_fact(r[:fact]) }
47
- rescue => _e
61
+ rescue => e
62
+ ClaudeMemory.logger.debug("ContextInjector#fetch(#{category}) failed: #{e.message}")
48
63
  []
49
64
  end
50
65
 
@@ -62,6 +77,47 @@ module ClaudeMemory
62
77
  end
63
78
  end
64
79
 
80
+ def fetch_undistilled(limit)
81
+ stores = []
82
+ stores << @manager.project_store if @manager.project_store
83
+ stores << @manager.global_store if @manager.global_store
84
+
85
+ items = stores.flat_map { |s|
86
+ s.undistilled_content_items(limit: limit, min_length: 200)
87
+ }
88
+
89
+ items
90
+ .sort_by { |i| i[:occurred_at] || "" }
91
+ .reverse
92
+ .first(limit)
93
+ rescue => e
94
+ ClaudeMemory.logger.warn("ContextInjector#fetch_undistilled failed: #{e.message}")
95
+ []
96
+ end
97
+
98
+ def format_distillation_prompt(items)
99
+ lines = [
100
+ "## Pending Knowledge Extraction",
101
+ "",
102
+ "The following transcript segments haven't been deeply analyzed yet.",
103
+ "Extract facts, entities, and decisions, then call `memory.store_extraction`",
104
+ "followed by `memory.mark_distilled` for each item.",
105
+ "",
106
+ "**What to extract:** technology decisions, conventions, preferences, architecture",
107
+ "**What to skip:** debugging steps, code output, transient errors"
108
+ ]
109
+
110
+ items.each do |item|
111
+ ago = Core::RelativeTime.format(item[:occurred_at]) || "unknown"
112
+ truncated = Core::TextBuilder.truncate(item[:raw_text], MAX_TEXT_PER_ITEM)
113
+ lines << ""
114
+ lines << "### Content Item #{item[:id]} (#{ago})"
115
+ lines << truncated
116
+ end
117
+
118
+ lines.join("\n")
119
+ end
120
+
65
121
  def format_section(title, items)
66
122
  items = items.compact.uniq
67
123
  return nil if items.empty?
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Hook
5
+ class DistillationRunner
6
+ MIN_TEXT_LENGTH = 200
7
+
8
+ def initialize(store, distiller: Distill::NullDistiller.new)
9
+ @store = store
10
+ @distiller = distiller
11
+ end
12
+
13
+ def distill_item(content_id, project_path:, scope: "project")
14
+ item = @store.get_content_item(content_id)
15
+ return unless item
16
+
17
+ raw_text = item[:raw_text]
18
+ return unless raw_text && raw_text.length >= MIN_TEXT_LENGTH
19
+
20
+ extraction = @distiller.distill(raw_text, content_item_id: content_id)
21
+ return if extraction.empty?
22
+
23
+ resolver = Resolve::Resolver.new(@store)
24
+ @store.db.transaction do
25
+ resolve_result = resolver.apply(
26
+ extraction, content_item_id: content_id,
27
+ project_path: project_path, scope: scope
28
+ )
29
+ @store.record_ingestion_metrics(
30
+ content_item_id: content_id, input_tokens: 0,
31
+ output_tokens: 0, facts_extracted: resolve_result[:facts_created]
32
+ )
33
+ end
34
+ rescue => e
35
+ ClaudeMemory.logger.warn("DistillationRunner#distill_item(#{content_id}) failed: #{e.class} - #{e.message}")
36
+ ClaudeMemory.logger.warn(e.backtrace.first(5).join("\n"))
37
+ end
38
+
39
+ def distill_batch(project_path:, limit: 5)
40
+ items = @store.undistilled_content_items(limit: limit, min_length: MIN_TEXT_LENGTH)
41
+ items.each { |item| distill_item(item[:id], project_path: project_path) }
42
+ items.size
43
+ end
44
+ end
45
+ end
46
+ end
@@ -23,12 +23,20 @@ module ClaudeMemory
23
23
  raise PayloadError, "Missing required field: transcript_path" if transcript_path.nil? || transcript_path.empty?
24
24
 
25
25
  ingester = Ingest::Ingester.new(@store, env: @env)
26
- ingester.ingest(
26
+ result = ingester.ingest(
27
27
  source: "claude_code",
28
28
  session_id: session_id,
29
29
  transcript_path: transcript_path,
30
30
  project_path: project_path
31
31
  )
32
+
33
+ if result[:status] == :ingested && result[:content_id]
34
+ DistillationRunner.new(@store).distill_item(
35
+ result[:content_id], project_path: project_path
36
+ )
37
+ end
38
+
39
+ result
32
40
  rescue Ingest::TranscriptReader::FileNotFoundError => e
33
41
  # Transcript file doesn't exist (e.g., headless Claude session)
34
42
  # This is expected, not an error - return success with no-op status
@@ -56,7 +64,8 @@ module ClaudeMemory
56
64
  manager = @manager || build_manager(payload)
57
65
  manager.ensure_both!
58
66
 
59
- injector = ContextInjector.new(manager)
67
+ source = payload["source"]
68
+ injector = ContextInjector.new(manager, source: source)
60
69
  context_text = injector.generate_context
61
70
 
62
71
  {status: :ok, context: context_text}
@@ -6,13 +6,16 @@ module ClaudeMemory
6
6
  # Follows the same lazy-init pattern as LexicalFTS:
7
7
  # the extension and virtual table are created on first use.
8
8
  class VectorIndex
9
- EMBEDDING_DIMENSIONS = 384
9
+ DEFAULT_DIMENSIONS = 384
10
+
11
+ attr_reader :dimensions
10
12
 
11
13
  def initialize(store)
12
14
  @store = store
13
15
  @db = store.db
14
16
  @available = nil
15
17
  @vec_table_ensured = false
18
+ @dimensions = store.get_meta("embedding_dimensions")&.to_i || DEFAULT_DIMENSIONS
16
19
  end
17
20
 
18
21
  # Is the sqlite-vec extension loadable?
@@ -121,6 +124,16 @@ module ClaudeMemory
121
124
  indexed_ids.size
122
125
  end
123
126
 
127
+ # Delete all entries from the vec0 virtual table.
128
+ # Used when clearing stale embeddings after a dimension change.
129
+ def clear!
130
+ return false unless available?
131
+
132
+ ensure_vec_table!
133
+ @db.run("DELETE FROM facts_vec")
134
+ true
135
+ end
136
+
124
137
  # Number of entries in the vec0 virtual table
125
138
  def count
126
139
  return 0 unless available?
@@ -162,7 +175,7 @@ module ClaudeMemory
162
175
 
163
176
  @db.run(<<~SQL)
164
177
  CREATE VIRTUAL TABLE IF NOT EXISTS facts_vec
165
- USING vec0(fact_id INTEGER PRIMARY KEY, embedding float[#{EMBEDDING_DIMENSIONS}] distance_metric=cosine)
178
+ USING vec0(fact_id INTEGER PRIMARY KEY, embedding float[#{@dimensions}] distance_metric=cosine)
166
179
  SQL
167
180
  @vec_table_ensured = true
168
181
  end
@@ -166,7 +166,7 @@ module ClaudeMemory
166
166
  end
167
167
 
168
168
  def check_embedding_dimensions(issues)
169
- # Check that all embeddings have correct dimensions (384)
169
+ expected = @store.get_meta("embedding_dimensions")&.to_i || 384
170
170
  facts_with_embeddings = @store.facts
171
171
  .where(Sequel.~(embedding_json: nil))
172
172
  .select(:id, :embedding_json)
@@ -174,8 +174,8 @@ module ClaudeMemory
174
174
 
175
175
  facts_with_embeddings.each do |fact|
176
176
  embedding = JSON.parse(fact[:embedding_json])
177
- if embedding.size != 384
178
- issues << {severity: "error", message: "Fact #{fact[:id]} has embedding with incorrect dimensions (#{embedding.size}, expected 384)"}
177
+ if embedding.size != expected
178
+ issues << {severity: "error", message: "Fact #{fact[:id]} has embedding with incorrect dimensions (#{embedding.size}, expected #{expected})"}
179
179
  break # Only report first occurrence
180
180
  end
181
181
  end
@@ -4,7 +4,17 @@ require "digest"
4
4
 
5
5
  module ClaudeMemory
6
6
  module Ingest
7
+ # Delta-based transcript ingestion with cursor tracking.
8
+ # Reads new content from transcripts, extracts metadata and tool calls,
9
+ # sanitizes private tags, and persists to the content_items table with FTS indexing.
7
10
  class Ingester
11
+ # @param store [Store::SQLiteStore] database store for persistence
12
+ # @param fts [Index::LexicalFTS, nil] full-text search index (default: new from store)
13
+ # @param env [Hash] environment variables
14
+ # @param metadata_extractor [MetadataExtractor, nil] extracts git branch, cwd, etc.
15
+ # @param tool_extractor [ToolExtractor, nil] extracts tool calls from transcript text
16
+ # @param tool_filter [ToolFilter, nil] filters irrelevant tool calls
17
+ # @param observation_compressor [ObservationCompressor, nil] compresses tool observations
8
18
  def initialize(store, fts: nil, env: ENV, metadata_extractor: nil, tool_extractor: nil, tool_filter: nil, observation_compressor: nil)
9
19
  @store = store
10
20
  @fts = fts || Index::LexicalFTS.new(store)
@@ -15,6 +25,13 @@ module ClaudeMemory
15
25
  @observation_compressor = observation_compressor || ObservationCompressor.new
16
26
  end
17
27
 
28
+ # Ingest new content from a transcript file
29
+ # @param source [String] content source identifier (e.g., "hook", "cli")
30
+ # @param session_id [String] Claude session ID
31
+ # @param transcript_path [String] path to the transcript file
32
+ # @param project_path [String, nil] project root (defaults to detected path)
33
+ # @return [Hash] result with :status (:ingested, :skipped, or :no_change),
34
+ # :content_id, :bytes_read, and optional :reason
18
35
  def ingest(source:, session_id:, transcript_path:, project_path: nil)
19
36
  unless should_ingest?(transcript_path)
20
37
  ClaudeMemory.logger.debug("ingest", message: "Skipped unchanged file", transcript_path: transcript_path)
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module MCP
5
+ module Handlers
6
+ # Context-aware query handlers (facts by tool, branch, directory)
7
+ module ContextHandlers
8
+ def facts_by_tool(args)
9
+ tool_name = args["tool_name"]
10
+ scope = extract_scope(args)
11
+ limit = extract_limit(args, default: 20)
12
+
13
+ results = @recall.facts_by_tool(tool_name, limit: limit, scope: scope)
14
+ ResponseFormatter.format_tool_facts(tool_name, scope, results)
15
+ end
16
+
17
+ def facts_by_context(args)
18
+ scope = extract_scope(args)
19
+ limit = extract_limit(args, default: 20)
20
+
21
+ if args["git_branch"]
22
+ results = @recall.facts_by_branch(args["git_branch"], limit: limit, scope: scope)
23
+ context_type = "git_branch"
24
+ context_value = args["git_branch"]
25
+ elsif args["cwd"]
26
+ results = @recall.facts_by_directory(args["cwd"], limit: limit, scope: scope)
27
+ context_type = "cwd"
28
+ context_value = args["cwd"]
29
+ else
30
+ return {error: "Must provide either git_branch or cwd parameter"}
31
+ end
32
+
33
+ ResponseFormatter.format_context_facts(context_type, context_value, scope, results)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end