claude_memory 0.7.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude/rules/claude_memory.generated.md +32 -2
- data/.claude/settings.json +65 -15
- data/.claude/settings.local.json +5 -2
- data/.claude/skills/improve/SKILL.md +113 -25
- data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
- data/.claude-plugin/commands/distill-transcripts.md +98 -0
- data/.claude-plugin/commands/memory-recall.md +67 -0
- data/.claude-plugin/marketplace.json +2 -2
- data/.claude-plugin/plugin.json +3 -3
- data/.claude-plugin/scripts/hook-runner.sh +14 -0
- data/.claude-plugin/scripts/serve-mcp.sh +14 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +90 -1
- data/CLAUDE.md +56 -18
- data/README.md +35 -0
- data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
- data/db/migrations/014_canonicalize_predicates.rb +30 -0
- data/docs/improvements.md +74 -74
- data/docs/influence/claude-mem.md +1 -0
- data/docs/influence/claude-supermemory.md +1 -0
- data/docs/influence/episodic-memory.md +1 -0
- data/docs/influence/grepai.md +1 -0
- data/docs/influence/kbs.md +1 -0
- data/docs/influence/lossless-claw.md +1 -0
- data/docs/influence/qmd.md +1 -0
- data/docs/quality_review.md +119 -224
- data/hooks/hooks.json +39 -7
- data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
- data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
- data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
- data/lib/claude_memory/commands/completion_command.rb +149 -0
- data/lib/claude_memory/commands/doctor_command.rb +2 -0
- data/lib/claude_memory/commands/embeddings_command.rb +198 -0
- data/lib/claude_memory/commands/help_command.rb +12 -1
- data/lib/claude_memory/commands/hook_command.rb +2 -1
- data/lib/claude_memory/commands/index_command.rb +85 -78
- data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
- data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
- data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
- data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
- data/lib/claude_memory/commands/install_skill_command.rb +78 -0
- data/lib/claude_memory/commands/registry.rb +47 -32
- data/lib/claude_memory/commands/reject_command.rb +62 -0
- data/lib/claude_memory/commands/restore_command.rb +77 -0
- data/lib/claude_memory/commands/skills/distill-transcripts.md +102 -0
- data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
- data/lib/claude_memory/commands/stats_command.rb +98 -2
- data/lib/claude_memory/configuration.rb +14 -1
- data/lib/claude_memory/core/fact_ranker.rb +2 -2
- data/lib/claude_memory/core/rr_fusion.rb +23 -6
- data/lib/claude_memory/core/snippet_extractor.rb +7 -3
- data/lib/claude_memory/core/text_builder.rb +11 -0
- data/lib/claude_memory/distill/json_schema.md +8 -4
- data/lib/claude_memory/distill/null_distiller.rb +2 -0
- data/lib/claude_memory/domain/entity.rb +13 -1
- data/lib/claude_memory/domain/fact.rb +26 -2
- data/lib/claude_memory/domain/provenance.rb +0 -1
- data/lib/claude_memory/embeddings/api_adapter.rb +97 -0
- data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
- data/lib/claude_memory/embeddings/fastembed_adapter.rb +46 -12
- data/lib/claude_memory/embeddings/generator.rb +4 -0
- data/lib/claude_memory/embeddings/inspector.rb +91 -0
- data/lib/claude_memory/embeddings/model_registry.rb +210 -0
- data/lib/claude_memory/embeddings/resolver.rb +44 -0
- data/lib/claude_memory/hook/context_injector.rb +58 -2
- data/lib/claude_memory/hook/distillation_runner.rb +46 -0
- data/lib/claude_memory/hook/handler.rb +11 -2
- data/lib/claude_memory/index/vector_index.rb +15 -2
- data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
- data/lib/claude_memory/ingest/ingester.rb +17 -0
- data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
- data/lib/claude_memory/mcp/handlers/management_handlers.rb +169 -0
- data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
- data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
- data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
- data/lib/claude_memory/mcp/handlers/stats_handlers.rb +205 -0
- data/lib/claude_memory/mcp/instructions_builder.rb +19 -1
- data/lib/claude_memory/mcp/query_guide.rb +10 -0
- data/lib/claude_memory/mcp/response_formatter.rb +1 -0
- data/lib/claude_memory/mcp/server.rb +22 -1
- data/lib/claude_memory/mcp/telemetry.rb +86 -0
- data/lib/claude_memory/mcp/text_summary.rb +26 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +116 -4
- data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
- data/lib/claude_memory/mcp/tools.rb +50 -679
- data/lib/claude_memory/publish.rb +40 -5
- data/lib/claude_memory/recall/dual_engine.rb +105 -0
- data/lib/claude_memory/recall/legacy_engine.rb +138 -0
- data/lib/claude_memory/recall/query_core.rb +371 -0
- data/lib/claude_memory/recall.rb +121 -673
- data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
- data/lib/claude_memory/resolve/resolver.rb +43 -0
- data/lib/claude_memory/shortcuts.rb +4 -4
- data/lib/claude_memory/store/retry_handler.rb +61 -0
- data/lib/claude_memory/store/schema_manager.rb +68 -0
- data/lib/claude_memory/store/sqlite_store.rb +334 -201
- data/lib/claude_memory/store/store_manager.rb +50 -1
- data/lib/claude_memory/sweep/maintenance.rb +115 -1
- data/lib/claude_memory/sweep/sweeper.rb +3 -0
- data/lib/claude_memory/templates/hooks.example.json +26 -7
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +16 -0
- metadata +48 -8
- data/.claude/memory.sqlite3-shm +0 -0
- data/.claude/memory.sqlite3-wal +0 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Embeddings
|
|
5
|
+
# Reads embedding metadata from global and project databases.
|
|
6
|
+
# Returns structured data — no I/O formatting or stdout output.
|
|
7
|
+
#
|
|
8
|
+
# Used by EmbeddingsCommand to separate DB concerns from presentation.
|
|
9
|
+
class Inspector
|
|
10
|
+
DatabaseState = Data.define(:label, :provider, :dimensions)
|
|
11
|
+
DimensionResult = Data.define(:label, :status, :stored_dims, :stored_provider, :current_dims)
|
|
12
|
+
|
|
13
|
+
def database_states
|
|
14
|
+
results = []
|
|
15
|
+
|
|
16
|
+
with_each_store do |label, store|
|
|
17
|
+
provider = store.get_meta("embedding_provider")
|
|
18
|
+
dims = store.get_meta("embedding_dimensions")
|
|
19
|
+
|
|
20
|
+
next unless provider || dims
|
|
21
|
+
|
|
22
|
+
results << DatabaseState.new(label: label, provider: provider, dimensions: dims)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
results
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def dimension_checks(provider_name, model_name)
|
|
29
|
+
results = []
|
|
30
|
+
|
|
31
|
+
with_each_store do |label, store|
|
|
32
|
+
stored_dims = store.get_meta("embedding_dimensions")&.to_i
|
|
33
|
+
stored_provider = store.get_meta("embedding_provider")
|
|
34
|
+
|
|
35
|
+
if stored_dims
|
|
36
|
+
current_dims = resolve_current_dimensions(provider_name, model_name)
|
|
37
|
+
|
|
38
|
+
status = if current_dims && current_dims != stored_dims
|
|
39
|
+
:mismatch
|
|
40
|
+
else
|
|
41
|
+
:match
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
results << DimensionResult.new(
|
|
45
|
+
label: label,
|
|
46
|
+
status: status,
|
|
47
|
+
stored_dims: stored_dims,
|
|
48
|
+
stored_provider: stored_provider,
|
|
49
|
+
current_dims: current_dims
|
|
50
|
+
)
|
|
51
|
+
else
|
|
52
|
+
results << DimensionResult.new(
|
|
53
|
+
label: label,
|
|
54
|
+
status: :fresh,
|
|
55
|
+
stored_dims: nil,
|
|
56
|
+
stored_provider: nil,
|
|
57
|
+
current_dims: nil
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
results
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def resolve_current_dimensions(provider_name, model_name)
|
|
68
|
+
if model_name
|
|
69
|
+
ModelRegistry.dimensions_for(model_name)
|
|
70
|
+
else
|
|
71
|
+
ModelRegistry.default_for_provider(provider_name)&.dimensions
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def with_each_store
|
|
76
|
+
config = Configuration.new
|
|
77
|
+
|
|
78
|
+
[["global", config.global_db_path], ["project", config.project_db_path]].each do |label, path|
|
|
79
|
+
next unless File.exist?(path)
|
|
80
|
+
|
|
81
|
+
store = Store::SQLiteStore.new(path)
|
|
82
|
+
begin
|
|
83
|
+
yield label, store
|
|
84
|
+
ensure
|
|
85
|
+
store.close
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Embeddings
|
|
5
|
+
# Registry of known embedding models with their properties.
|
|
6
|
+
# Enables model validation, dimension lookup, and discoverability.
|
|
7
|
+
#
|
|
8
|
+
# Models are registered by canonical name (e.g., "BAAI/bge-small-en-v1.5")
|
|
9
|
+
# with provider type, dimensions, and description.
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# ModelRegistry.find("BAAI/bge-small-en-v1.5")
|
|
13
|
+
# # => {provider: "fastembed", dimensions: 384, description: "...", ...}
|
|
14
|
+
#
|
|
15
|
+
# ModelRegistry.models_for_provider("fastembed")
|
|
16
|
+
# # => [...]
|
|
17
|
+
#
|
|
18
|
+
class ModelRegistry
|
|
19
|
+
ModelInfo = Data.define(:name, :provider, :dimensions, :description, :size_mb, :max_tokens)
|
|
20
|
+
|
|
21
|
+
# Known models with validated dimensions.
|
|
22
|
+
# Fastembed models sourced from fastembed-rb SUPPORTED_MODELS.
|
|
23
|
+
# API models sourced from provider documentation.
|
|
24
|
+
MODELS = [
|
|
25
|
+
# --- fastembed: local ONNX models (no API key needed) ---
|
|
26
|
+
ModelInfo.new(
|
|
27
|
+
name: "BAAI/bge-small-en-v1.5",
|
|
28
|
+
provider: "fastembed",
|
|
29
|
+
dimensions: 384,
|
|
30
|
+
description: "Fast English embedding (default)",
|
|
31
|
+
size_mb: 67,
|
|
32
|
+
max_tokens: 512
|
|
33
|
+
),
|
|
34
|
+
ModelInfo.new(
|
|
35
|
+
name: "BAAI/bge-base-en-v1.5",
|
|
36
|
+
provider: "fastembed",
|
|
37
|
+
dimensions: 768,
|
|
38
|
+
description: "Balanced English embedding, higher accuracy",
|
|
39
|
+
size_mb: 210,
|
|
40
|
+
max_tokens: 512
|
|
41
|
+
),
|
|
42
|
+
ModelInfo.new(
|
|
43
|
+
name: "BAAI/bge-large-en-v1.5",
|
|
44
|
+
provider: "fastembed",
|
|
45
|
+
dimensions: 1024,
|
|
46
|
+
description: "High accuracy English embedding",
|
|
47
|
+
size_mb: 1200,
|
|
48
|
+
max_tokens: 512
|
|
49
|
+
),
|
|
50
|
+
ModelInfo.new(
|
|
51
|
+
name: "sentence-transformers/all-MiniLM-L6-v2",
|
|
52
|
+
provider: "fastembed",
|
|
53
|
+
dimensions: 384,
|
|
54
|
+
description: "Lightweight general-purpose sentence embedding",
|
|
55
|
+
size_mb: 90,
|
|
56
|
+
max_tokens: 512
|
|
57
|
+
),
|
|
58
|
+
ModelInfo.new(
|
|
59
|
+
name: "intfloat/multilingual-e5-small",
|
|
60
|
+
provider: "fastembed",
|
|
61
|
+
dimensions: 384,
|
|
62
|
+
description: "Multilingual embedding, 100+ languages",
|
|
63
|
+
size_mb: 450,
|
|
64
|
+
max_tokens: 512
|
|
65
|
+
),
|
|
66
|
+
ModelInfo.new(
|
|
67
|
+
name: "intfloat/multilingual-e5-base",
|
|
68
|
+
provider: "fastembed",
|
|
69
|
+
dimensions: 768,
|
|
70
|
+
description: "Larger multilingual embedding",
|
|
71
|
+
size_mb: 1110,
|
|
72
|
+
max_tokens: 512
|
|
73
|
+
),
|
|
74
|
+
ModelInfo.new(
|
|
75
|
+
name: "nomic-ai/nomic-embed-text-v1.5",
|
|
76
|
+
provider: "fastembed",
|
|
77
|
+
dimensions: 768,
|
|
78
|
+
description: "Long context (8192 tokens) with Matryoshka support",
|
|
79
|
+
size_mb: 520,
|
|
80
|
+
max_tokens: 8192
|
|
81
|
+
),
|
|
82
|
+
ModelInfo.new(
|
|
83
|
+
name: "jinaai/jina-embeddings-v2-small-en",
|
|
84
|
+
provider: "fastembed",
|
|
85
|
+
dimensions: 512,
|
|
86
|
+
description: "Small English embedding, 8192 token context",
|
|
87
|
+
size_mb: 60,
|
|
88
|
+
max_tokens: 8192
|
|
89
|
+
),
|
|
90
|
+
ModelInfo.new(
|
|
91
|
+
name: "jinaai/jina-embeddings-v2-base-en",
|
|
92
|
+
provider: "fastembed",
|
|
93
|
+
dimensions: 768,
|
|
94
|
+
description: "Base English embedding, 8192 token context",
|
|
95
|
+
size_mb: 520,
|
|
96
|
+
max_tokens: 8192
|
|
97
|
+
),
|
|
98
|
+
|
|
99
|
+
# --- api: OpenAI-compatible endpoints ---
|
|
100
|
+
ModelInfo.new(
|
|
101
|
+
name: "text-embedding-3-small",
|
|
102
|
+
provider: "api",
|
|
103
|
+
dimensions: 1536,
|
|
104
|
+
description: "OpenAI small embedding (default API model)",
|
|
105
|
+
size_mb: nil,
|
|
106
|
+
max_tokens: 8191
|
|
107
|
+
),
|
|
108
|
+
ModelInfo.new(
|
|
109
|
+
name: "text-embedding-3-large",
|
|
110
|
+
provider: "api",
|
|
111
|
+
dimensions: 3072,
|
|
112
|
+
description: "OpenAI large embedding, highest accuracy",
|
|
113
|
+
size_mb: nil,
|
|
114
|
+
max_tokens: 8191
|
|
115
|
+
),
|
|
116
|
+
ModelInfo.new(
|
|
117
|
+
name: "text-embedding-ada-002",
|
|
118
|
+
provider: "api",
|
|
119
|
+
dimensions: 1536,
|
|
120
|
+
description: "OpenAI legacy embedding",
|
|
121
|
+
size_mb: nil,
|
|
122
|
+
max_tokens: 8191
|
|
123
|
+
),
|
|
124
|
+
ModelInfo.new(
|
|
125
|
+
name: "voyage-3",
|
|
126
|
+
provider: "api",
|
|
127
|
+
dimensions: 1024,
|
|
128
|
+
description: "Voyage AI general-purpose embedding",
|
|
129
|
+
size_mb: nil,
|
|
130
|
+
max_tokens: 32000
|
|
131
|
+
),
|
|
132
|
+
ModelInfo.new(
|
|
133
|
+
name: "voyage-3-lite",
|
|
134
|
+
provider: "api",
|
|
135
|
+
dimensions: 512,
|
|
136
|
+
description: "Voyage AI lightweight embedding",
|
|
137
|
+
size_mb: nil,
|
|
138
|
+
max_tokens: 32000
|
|
139
|
+
),
|
|
140
|
+
ModelInfo.new(
|
|
141
|
+
name: "voyage-code-3",
|
|
142
|
+
provider: "api",
|
|
143
|
+
dimensions: 1024,
|
|
144
|
+
description: "Voyage AI code-optimized embedding",
|
|
145
|
+
size_mb: nil,
|
|
146
|
+
max_tokens: 32000
|
|
147
|
+
),
|
|
148
|
+
|
|
149
|
+
# --- tfidf: built-in, no dependencies ---
|
|
150
|
+
ModelInfo.new(
|
|
151
|
+
name: "tfidf",
|
|
152
|
+
provider: "tfidf",
|
|
153
|
+
dimensions: 384,
|
|
154
|
+
description: "Built-in TF-IDF embedding (no dependencies)",
|
|
155
|
+
size_mb: 0,
|
|
156
|
+
max_tokens: nil
|
|
157
|
+
)
|
|
158
|
+
].freeze
|
|
159
|
+
|
|
160
|
+
MODELS_BY_NAME = MODELS.each_with_object({}) { |m, h| h[m.name] = m }.freeze
|
|
161
|
+
|
|
162
|
+
DEFAULTS = {
|
|
163
|
+
"fastembed" => "BAAI/bge-small-en-v1.5",
|
|
164
|
+
"api" => "text-embedding-3-small",
|
|
165
|
+
"tfidf" => "tfidf"
|
|
166
|
+
}.freeze
|
|
167
|
+
|
|
168
|
+
# Find a model by name.
|
|
169
|
+
# @param name [String] model name (e.g., "BAAI/bge-small-en-v1.5")
|
|
170
|
+
# @return [ModelInfo, nil]
|
|
171
|
+
def self.find(name)
|
|
172
|
+
MODELS_BY_NAME[name]
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# List all models for a given provider.
|
|
176
|
+
# @param provider [String] "fastembed", "api", or "tfidf"
|
|
177
|
+
# @return [Array<ModelInfo>]
|
|
178
|
+
def self.models_for_provider(provider)
|
|
179
|
+
MODELS.select { |m| m.provider == provider }
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# All known model names.
|
|
183
|
+
# @return [Array<String>]
|
|
184
|
+
def self.model_names
|
|
185
|
+
MODELS.map(&:name)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# All provider names.
|
|
189
|
+
# @return [Array<String>]
|
|
190
|
+
def self.providers
|
|
191
|
+
MODELS.map(&:provider).uniq
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Look up dimensions for a model name. Returns nil if unknown.
|
|
195
|
+
# @param name [String] model name
|
|
196
|
+
# @return [Integer, nil]
|
|
197
|
+
def self.dimensions_for(name)
|
|
198
|
+
find(name)&.dimensions
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Return the default ModelInfo for a provider.
|
|
202
|
+
# @param provider [String] "fastembed", "api", or "tfidf"
|
|
203
|
+
# @return [ModelInfo, nil]
|
|
204
|
+
def self.default_for_provider(provider)
|
|
205
|
+
default_name = DEFAULTS[provider]
|
|
206
|
+
find(default_name) if default_name
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Embeddings
|
|
5
|
+
# Resolves an embedding provider by name, model, or ENV.
|
|
6
|
+
#
|
|
7
|
+
# Provider selection (in priority order):
|
|
8
|
+
# 1. Explicit name parameter
|
|
9
|
+
# 2. CLAUDE_MEMORY_EMBEDDING_PROVIDER env var
|
|
10
|
+
# 3. Default: "tfidf"
|
|
11
|
+
#
|
|
12
|
+
# Model selection is forwarded to the provider via CLAUDE_MEMORY_EMBEDDING_MODEL
|
|
13
|
+
# or the model parameter. The model can also imply the provider:
|
|
14
|
+
# - "BAAI/bge-small-en-v1.5" → fastembed
|
|
15
|
+
# - "text-embedding-3-small" → api
|
|
16
|
+
#
|
|
17
|
+
# Examples:
|
|
18
|
+
# Embeddings.resolve # tfidf default
|
|
19
|
+
# Embeddings.resolve("fastembed") # fastembed with default model
|
|
20
|
+
# Embeddings.resolve("fastembed", model: "BAAI/bge-base-en-v1.5")
|
|
21
|
+
# Embeddings.resolve(model: "text-embedding-3-small") # auto-detects api provider
|
|
22
|
+
#
|
|
23
|
+
def self.resolve(name = nil, model: nil, env: ENV)
|
|
24
|
+
model ||= env["CLAUDE_MEMORY_EMBEDDING_MODEL"]
|
|
25
|
+
provider = name || env["CLAUDE_MEMORY_EMBEDDING_PROVIDER"] || infer_provider(model) || "tfidf"
|
|
26
|
+
|
|
27
|
+
case provider
|
|
28
|
+
when "tfidf" then Generator.new
|
|
29
|
+
when "fastembed" then FastembedAdapter.new(model_name: model, env: env)
|
|
30
|
+
when "api" then ApiAdapter.new(model: model, env: env)
|
|
31
|
+
else raise ArgumentError, "Unknown embedding provider: #{provider}. Available: tfidf, fastembed, api"
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Infer provider from a model name using the registry.
|
|
36
|
+
# Returns nil if the model is unknown.
|
|
37
|
+
def self.infer_provider(model)
|
|
38
|
+
return nil unless model
|
|
39
|
+
|
|
40
|
+
ModelRegistry.find(model)&.provider
|
|
41
|
+
end
|
|
42
|
+
private_class_method :infer_provider
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -9,6 +9,10 @@ module ClaudeMemory
|
|
|
9
9
|
MAX_DECISIONS = 5
|
|
10
10
|
MAX_CONVENTIONS = 5
|
|
11
11
|
MAX_ARCHITECTURE = 5
|
|
12
|
+
MAX_UNDISTILLED = 3
|
|
13
|
+
MAX_TEXT_PER_ITEM = 1500
|
|
14
|
+
|
|
15
|
+
FRESH_SESSION_SOURCES = %w[startup resume clear].freeze
|
|
12
16
|
|
|
13
17
|
QUERIES = {
|
|
14
18
|
decisions: {query: "decision constraint rule requirement", scope: "all"},
|
|
@@ -16,8 +20,9 @@ module ClaudeMemory
|
|
|
16
20
|
architecture: {query: "uses framework implements architecture pattern", scope: "all"}
|
|
17
21
|
}.freeze
|
|
18
22
|
|
|
19
|
-
def initialize(manager)
|
|
23
|
+
def initialize(manager, source: nil)
|
|
20
24
|
@manager = manager
|
|
25
|
+
@source = source
|
|
21
26
|
@recall = Recall.new(manager)
|
|
22
27
|
end
|
|
23
28
|
|
|
@@ -33,6 +38,11 @@ module ClaudeMemory
|
|
|
33
38
|
architecture = fetch(:architecture, MAX_ARCHITECTURE)
|
|
34
39
|
sections << format_section("Architecture", architecture) if architecture.any?
|
|
35
40
|
|
|
41
|
+
if fresh_session?
|
|
42
|
+
undistilled = fetch_undistilled(MAX_UNDISTILLED)
|
|
43
|
+
sections << format_distillation_prompt(undistilled) if undistilled.any?
|
|
44
|
+
end
|
|
45
|
+
|
|
36
46
|
return nil if sections.empty?
|
|
37
47
|
|
|
38
48
|
sections.join("\n")
|
|
@@ -40,11 +50,16 @@ module ClaudeMemory
|
|
|
40
50
|
|
|
41
51
|
private
|
|
42
52
|
|
|
53
|
+
def fresh_session?
|
|
54
|
+
@source.nil? || FRESH_SESSION_SOURCES.include?(@source)
|
|
55
|
+
end
|
|
56
|
+
|
|
43
57
|
def fetch(category, limit)
|
|
44
58
|
config = QUERIES.fetch(category)
|
|
45
59
|
results = @recall.query(config[:query], limit: limit, scope: config[:scope])
|
|
46
60
|
results.map { |r| format_fact(r[:fact]) }
|
|
47
|
-
rescue =>
|
|
61
|
+
rescue => e
|
|
62
|
+
ClaudeMemory.logger.debug("ContextInjector#fetch(#{category}) failed: #{e.message}")
|
|
48
63
|
[]
|
|
49
64
|
end
|
|
50
65
|
|
|
@@ -62,6 +77,47 @@ module ClaudeMemory
|
|
|
62
77
|
end
|
|
63
78
|
end
|
|
64
79
|
|
|
80
|
+
def fetch_undistilled(limit)
|
|
81
|
+
stores = []
|
|
82
|
+
stores << @manager.project_store if @manager.project_store
|
|
83
|
+
stores << @manager.global_store if @manager.global_store
|
|
84
|
+
|
|
85
|
+
items = stores.flat_map { |s|
|
|
86
|
+
s.undistilled_content_items(limit: limit, min_length: 200)
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
items
|
|
90
|
+
.sort_by { |i| i[:occurred_at] || "" }
|
|
91
|
+
.reverse
|
|
92
|
+
.first(limit)
|
|
93
|
+
rescue => e
|
|
94
|
+
ClaudeMemory.logger.warn("ContextInjector#fetch_undistilled failed: #{e.message}")
|
|
95
|
+
[]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def format_distillation_prompt(items)
|
|
99
|
+
lines = [
|
|
100
|
+
"## Pending Knowledge Extraction",
|
|
101
|
+
"",
|
|
102
|
+
"The following transcript segments haven't been deeply analyzed yet.",
|
|
103
|
+
"Extract facts, entities, and decisions, then call `memory.store_extraction`",
|
|
104
|
+
"followed by `memory.mark_distilled` for each item.",
|
|
105
|
+
"",
|
|
106
|
+
"**What to extract:** technology decisions, conventions, preferences, architecture",
|
|
107
|
+
"**What to skip:** debugging steps, code output, transient errors"
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
items.each do |item|
|
|
111
|
+
ago = Core::RelativeTime.format(item[:occurred_at]) || "unknown"
|
|
112
|
+
truncated = Core::TextBuilder.truncate(item[:raw_text], MAX_TEXT_PER_ITEM)
|
|
113
|
+
lines << ""
|
|
114
|
+
lines << "### Content Item #{item[:id]} (#{ago})"
|
|
115
|
+
lines << truncated
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
lines.join("\n")
|
|
119
|
+
end
|
|
120
|
+
|
|
65
121
|
def format_section(title, items)
|
|
66
122
|
items = items.compact.uniq
|
|
67
123
|
return nil if items.empty?
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Hook
|
|
5
|
+
class DistillationRunner
|
|
6
|
+
MIN_TEXT_LENGTH = 200
|
|
7
|
+
|
|
8
|
+
def initialize(store, distiller: Distill::NullDistiller.new)
|
|
9
|
+
@store = store
|
|
10
|
+
@distiller = distiller
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def distill_item(content_id, project_path:, scope: "project")
|
|
14
|
+
item = @store.get_content_item(content_id)
|
|
15
|
+
return unless item
|
|
16
|
+
|
|
17
|
+
raw_text = item[:raw_text]
|
|
18
|
+
return unless raw_text && raw_text.length >= MIN_TEXT_LENGTH
|
|
19
|
+
|
|
20
|
+
extraction = @distiller.distill(raw_text, content_item_id: content_id)
|
|
21
|
+
return if extraction.empty?
|
|
22
|
+
|
|
23
|
+
resolver = Resolve::Resolver.new(@store)
|
|
24
|
+
@store.db.transaction do
|
|
25
|
+
resolve_result = resolver.apply(
|
|
26
|
+
extraction, content_item_id: content_id,
|
|
27
|
+
project_path: project_path, scope: scope
|
|
28
|
+
)
|
|
29
|
+
@store.record_ingestion_metrics(
|
|
30
|
+
content_item_id: content_id, input_tokens: 0,
|
|
31
|
+
output_tokens: 0, facts_extracted: resolve_result[:facts_created]
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
rescue => e
|
|
35
|
+
ClaudeMemory.logger.warn("DistillationRunner#distill_item(#{content_id}) failed: #{e.class} - #{e.message}")
|
|
36
|
+
ClaudeMemory.logger.warn(e.backtrace.first(5).join("\n"))
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def distill_batch(project_path:, limit: 5)
|
|
40
|
+
items = @store.undistilled_content_items(limit: limit, min_length: MIN_TEXT_LENGTH)
|
|
41
|
+
items.each { |item| distill_item(item[:id], project_path: project_path) }
|
|
42
|
+
items.size
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -23,12 +23,20 @@ module ClaudeMemory
|
|
|
23
23
|
raise PayloadError, "Missing required field: transcript_path" if transcript_path.nil? || transcript_path.empty?
|
|
24
24
|
|
|
25
25
|
ingester = Ingest::Ingester.new(@store, env: @env)
|
|
26
|
-
ingester.ingest(
|
|
26
|
+
result = ingester.ingest(
|
|
27
27
|
source: "claude_code",
|
|
28
28
|
session_id: session_id,
|
|
29
29
|
transcript_path: transcript_path,
|
|
30
30
|
project_path: project_path
|
|
31
31
|
)
|
|
32
|
+
|
|
33
|
+
if result[:status] == :ingested && result[:content_id]
|
|
34
|
+
DistillationRunner.new(@store).distill_item(
|
|
35
|
+
result[:content_id], project_path: project_path
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
result
|
|
32
40
|
rescue Ingest::TranscriptReader::FileNotFoundError => e
|
|
33
41
|
# Transcript file doesn't exist (e.g., headless Claude session)
|
|
34
42
|
# This is expected, not an error - return success with no-op status
|
|
@@ -56,7 +64,8 @@ module ClaudeMemory
|
|
|
56
64
|
manager = @manager || build_manager(payload)
|
|
57
65
|
manager.ensure_both!
|
|
58
66
|
|
|
59
|
-
|
|
67
|
+
source = payload["source"]
|
|
68
|
+
injector = ContextInjector.new(manager, source: source)
|
|
60
69
|
context_text = injector.generate_context
|
|
61
70
|
|
|
62
71
|
{status: :ok, context: context_text}
|
|
@@ -6,13 +6,16 @@ module ClaudeMemory
|
|
|
6
6
|
# Follows the same lazy-init pattern as LexicalFTS:
|
|
7
7
|
# the extension and virtual table are created on first use.
|
|
8
8
|
class VectorIndex
|
|
9
|
-
|
|
9
|
+
DEFAULT_DIMENSIONS = 384
|
|
10
|
+
|
|
11
|
+
attr_reader :dimensions
|
|
10
12
|
|
|
11
13
|
def initialize(store)
|
|
12
14
|
@store = store
|
|
13
15
|
@db = store.db
|
|
14
16
|
@available = nil
|
|
15
17
|
@vec_table_ensured = false
|
|
18
|
+
@dimensions = store.get_meta("embedding_dimensions")&.to_i || DEFAULT_DIMENSIONS
|
|
16
19
|
end
|
|
17
20
|
|
|
18
21
|
# Is the sqlite-vec extension loadable?
|
|
@@ -121,6 +124,16 @@ module ClaudeMemory
|
|
|
121
124
|
indexed_ids.size
|
|
122
125
|
end
|
|
123
126
|
|
|
127
|
+
# Delete all entries from the vec0 virtual table.
|
|
128
|
+
# Used when clearing stale embeddings after a dimension change.
|
|
129
|
+
def clear!
|
|
130
|
+
return false unless available?
|
|
131
|
+
|
|
132
|
+
ensure_vec_table!
|
|
133
|
+
@db.run("DELETE FROM facts_vec")
|
|
134
|
+
true
|
|
135
|
+
end
|
|
136
|
+
|
|
124
137
|
# Number of entries in the vec0 virtual table
|
|
125
138
|
def count
|
|
126
139
|
return 0 unless available?
|
|
@@ -162,7 +175,7 @@ module ClaudeMemory
|
|
|
162
175
|
|
|
163
176
|
@db.run(<<~SQL)
|
|
164
177
|
CREATE VIRTUAL TABLE IF NOT EXISTS facts_vec
|
|
165
|
-
USING vec0(fact_id INTEGER PRIMARY KEY, embedding float[#{
|
|
178
|
+
USING vec0(fact_id INTEGER PRIMARY KEY, embedding float[#{@dimensions}] distance_metric=cosine)
|
|
166
179
|
SQL
|
|
167
180
|
@vec_table_ensured = true
|
|
168
181
|
end
|
|
@@ -166,7 +166,7 @@ module ClaudeMemory
|
|
|
166
166
|
end
|
|
167
167
|
|
|
168
168
|
def check_embedding_dimensions(issues)
|
|
169
|
-
|
|
169
|
+
expected = @store.get_meta("embedding_dimensions")&.to_i || 384
|
|
170
170
|
facts_with_embeddings = @store.facts
|
|
171
171
|
.where(Sequel.~(embedding_json: nil))
|
|
172
172
|
.select(:id, :embedding_json)
|
|
@@ -174,8 +174,8 @@ module ClaudeMemory
|
|
|
174
174
|
|
|
175
175
|
facts_with_embeddings.each do |fact|
|
|
176
176
|
embedding = JSON.parse(fact[:embedding_json])
|
|
177
|
-
if embedding.size !=
|
|
178
|
-
issues << {severity: "error", message: "Fact #{fact[:id]} has embedding with incorrect dimensions (#{embedding.size}, expected
|
|
177
|
+
if embedding.size != expected
|
|
178
|
+
issues << {severity: "error", message: "Fact #{fact[:id]} has embedding with incorrect dimensions (#{embedding.size}, expected #{expected})"}
|
|
179
179
|
break # Only report first occurrence
|
|
180
180
|
end
|
|
181
181
|
end
|
|
@@ -4,7 +4,17 @@ require "digest"
|
|
|
4
4
|
|
|
5
5
|
module ClaudeMemory
|
|
6
6
|
module Ingest
|
|
7
|
+
# Delta-based transcript ingestion with cursor tracking.
|
|
8
|
+
# Reads new content from transcripts, extracts metadata and tool calls,
|
|
9
|
+
# sanitizes private tags, and persists to the content_items table with FTS indexing.
|
|
7
10
|
class Ingester
|
|
11
|
+
# @param store [Store::SQLiteStore] database store for persistence
|
|
12
|
+
# @param fts [Index::LexicalFTS, nil] full-text search index (default: new from store)
|
|
13
|
+
# @param env [Hash] environment variables
|
|
14
|
+
# @param metadata_extractor [MetadataExtractor, nil] extracts git branch, cwd, etc.
|
|
15
|
+
# @param tool_extractor [ToolExtractor, nil] extracts tool calls from transcript text
|
|
16
|
+
# @param tool_filter [ToolFilter, nil] filters irrelevant tool calls
|
|
17
|
+
# @param observation_compressor [ObservationCompressor, nil] compresses tool observations
|
|
8
18
|
def initialize(store, fts: nil, env: ENV, metadata_extractor: nil, tool_extractor: nil, tool_filter: nil, observation_compressor: nil)
|
|
9
19
|
@store = store
|
|
10
20
|
@fts = fts || Index::LexicalFTS.new(store)
|
|
@@ -15,6 +25,13 @@ module ClaudeMemory
|
|
|
15
25
|
@observation_compressor = observation_compressor || ObservationCompressor.new
|
|
16
26
|
end
|
|
17
27
|
|
|
28
|
+
# Ingest new content from a transcript file
|
|
29
|
+
# @param source [String] content source identifier (e.g., "hook", "cli")
|
|
30
|
+
# @param session_id [String] Claude session ID
|
|
31
|
+
# @param transcript_path [String] path to the transcript file
|
|
32
|
+
# @param project_path [String, nil] project root (defaults to detected path)
|
|
33
|
+
# @return [Hash] result with :status (:ingested, :skipped, or :no_change),
|
|
34
|
+
# :content_id, :bytes_read, and optional :reason
|
|
18
35
|
def ingest(source:, session_id:, transcript_path:, project_path: nil)
|
|
19
36
|
unless should_ingest?(transcript_path)
|
|
20
37
|
ClaudeMemory.logger.debug("ingest", message: "Skipped unchanged file", transcript_path: transcript_path)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module MCP
|
|
5
|
+
module Handlers
|
|
6
|
+
# Context-aware query handlers (facts by tool, branch, directory)
|
|
7
|
+
module ContextHandlers
|
|
8
|
+
def facts_by_tool(args)
|
|
9
|
+
tool_name = args["tool_name"]
|
|
10
|
+
scope = extract_scope(args)
|
|
11
|
+
limit = extract_limit(args, default: 20)
|
|
12
|
+
|
|
13
|
+
results = @recall.facts_by_tool(tool_name, limit: limit, scope: scope)
|
|
14
|
+
ResponseFormatter.format_tool_facts(tool_name, scope, results)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def facts_by_context(args)
|
|
18
|
+
scope = extract_scope(args)
|
|
19
|
+
limit = extract_limit(args, default: 20)
|
|
20
|
+
|
|
21
|
+
if args["git_branch"]
|
|
22
|
+
results = @recall.facts_by_branch(args["git_branch"], limit: limit, scope: scope)
|
|
23
|
+
context_type = "git_branch"
|
|
24
|
+
context_value = args["git_branch"]
|
|
25
|
+
elsif args["cwd"]
|
|
26
|
+
results = @recall.facts_by_directory(args["cwd"], limit: limit, scope: scope)
|
|
27
|
+
context_type = "cwd"
|
|
28
|
+
context_value = args["cwd"]
|
|
29
|
+
else
|
|
30
|
+
return {error: "Must provide either git_branch or cwd parameter"}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
ResponseFormatter.format_context_facts(context_type, context_value, scope, results)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|