claude_memory 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude/rules/claude_memory.generated.md +32 -2
- data/.claude/settings.json +30 -52
- data/.claude/settings.local.json +3 -1
- data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
- data/.claude-plugin/marketplace.json +2 -2
- data/.claude-plugin/plugin.json +3 -3
- data/.claude-plugin/scripts/hook-runner.sh +14 -0
- data/.claude-plugin/scripts/serve-mcp.sh +14 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +41 -0
- data/CLAUDE.md +31 -17
- data/README.md +35 -0
- data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
- data/db/migrations/014_canonicalize_predicates.rb +30 -0
- data/docs/improvements.md +58 -20
- data/docs/influence/claude-mem.md +1 -0
- data/docs/influence/claude-supermemory.md +1 -0
- data/docs/influence/episodic-memory.md +1 -0
- data/docs/influence/grepai.md +1 -0
- data/docs/influence/kbs.md +1 -0
- data/docs/influence/lossless-claw.md +1 -0
- data/docs/influence/qmd.md +1 -0
- data/lib/claude_memory/commands/completion_command.rb +1 -31
- data/lib/claude_memory/commands/embeddings_command.rb +198 -0
- data/lib/claude_memory/commands/help_command.rb +8 -1
- data/lib/claude_memory/commands/registry.rb +47 -34
- data/lib/claude_memory/commands/reject_command.rb +62 -0
- data/lib/claude_memory/commands/restore_command.rb +77 -0
- data/lib/claude_memory/commands/skills/distill-transcripts.md +5 -1
- data/lib/claude_memory/commands/stats_command.rb +98 -2
- data/lib/claude_memory/configuration.rb +14 -1
- data/lib/claude_memory/distill/json_schema.md +8 -4
- data/lib/claude_memory/distill/null_distiller.rb +2 -0
- data/lib/claude_memory/domain/entity.rb +13 -1
- data/lib/claude_memory/domain/fact.rb +26 -2
- data/lib/claude_memory/embeddings/api_adapter.rb +5 -4
- data/lib/claude_memory/embeddings/fastembed_adapter.rb +43 -13
- data/lib/claude_memory/embeddings/inspector.rb +91 -0
- data/lib/claude_memory/embeddings/model_registry.rb +210 -0
- data/lib/claude_memory/embeddings/resolver.rb +32 -6
- data/lib/claude_memory/ingest/ingester.rb +17 -0
- data/lib/claude_memory/mcp/handlers/management_handlers.rb +24 -0
- data/lib/claude_memory/mcp/handlers/stats_handlers.rb +5 -2
- data/lib/claude_memory/mcp/instructions_builder.rb +17 -0
- data/lib/claude_memory/mcp/server.rb +22 -1
- data/lib/claude_memory/mcp/telemetry.rb +86 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +86 -3
- data/lib/claude_memory/mcp/tools.rb +10 -0
- data/lib/claude_memory/publish.rb +40 -5
- data/lib/claude_memory/recall.rb +81 -0
- data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
- data/lib/claude_memory/resolve/resolver.rb +43 -0
- data/lib/claude_memory/store/schema_manager.rb +1 -1
- data/lib/claude_memory/store/sqlite_store.rb +250 -1
- data/lib/claude_memory/store/store_manager.rb +50 -1
- data/lib/claude_memory/sweep/maintenance.rb +115 -1
- data/lib/claude_memory/sweep/sweeper.rb +3 -0
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +5 -0
- metadata +26 -8
- data/.claude/memory.sqlite3-shm +0 -0
- data/.claude/memory.sqlite3-wal +0 -0
|
@@ -2,37 +2,50 @@
|
|
|
2
2
|
|
|
3
3
|
module ClaudeMemory
|
|
4
4
|
module Embeddings
|
|
5
|
-
# Adapter wrapping fastembed-rb for high-quality local embeddings
|
|
6
|
-
#
|
|
5
|
+
# Adapter wrapping fastembed-rb for high-quality local embeddings.
|
|
6
|
+
# Supports any model available in fastembed-rb's SUPPORTED_MODELS.
|
|
7
7
|
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
8
|
+
# Model selection (in priority order):
|
|
9
|
+
# 1. Explicit model_name parameter
|
|
10
|
+
# 2. CLAUDE_MEMORY_EMBEDDING_MODEL env var
|
|
11
|
+
# 3. Default: BAAI/bge-small-en-v1.5 (384-dim, ~67MB ONNX)
|
|
12
|
+
#
|
|
13
|
+
# Dimensions are resolved from the ModelRegistry for known models,
|
|
14
|
+
# or probed from fastembed's ModelInfo for unknown models.
|
|
10
15
|
#
|
|
11
16
|
# Usage:
|
|
12
17
|
# adapter = FastembedAdapter.new
|
|
13
18
|
# query_vec = adapter.generate("What database?") # query encoding
|
|
14
19
|
# passage_vec = adapter.generate_passage("Uses PostgreSQL") # passage encoding
|
|
15
20
|
#
|
|
21
|
+
# # Use a larger model:
|
|
22
|
+
# adapter = FastembedAdapter.new(model_name: "BAAI/bge-base-en-v1.5")
|
|
23
|
+
# adapter.dimensions # => 768
|
|
24
|
+
#
|
|
16
25
|
class FastembedAdapter
|
|
17
|
-
EMBEDDING_DIM = 384
|
|
18
26
|
DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
|
|
19
27
|
|
|
28
|
+
attr_reader :model_name, :dimensions
|
|
29
|
+
|
|
20
30
|
def name = "fastembed"
|
|
21
31
|
|
|
22
|
-
def
|
|
32
|
+
def initialize(model_name: nil, env: ENV)
|
|
33
|
+
@model_name = model_name || env["CLAUDE_MEMORY_EMBEDDING_MODEL"] || DEFAULT_MODEL
|
|
34
|
+
@dimensions = resolve_dimensions(@model_name)
|
|
23
35
|
|
|
24
|
-
def initialize(model_name: DEFAULT_MODEL)
|
|
25
36
|
require "fastembed"
|
|
26
|
-
@model = Fastembed::TextEmbedding.new(model_name: model_name)
|
|
37
|
+
@model = Fastembed::TextEmbedding.new(model_name: @model_name)
|
|
38
|
+
|
|
39
|
+
# If dimensions weren't known from registry, probe from fastembed
|
|
40
|
+
@dimensions ||= probe_dimensions_from_fastembed
|
|
27
41
|
rescue LoadError
|
|
28
42
|
raise LoadError,
|
|
29
43
|
"fastembed gem is required for FastembedAdapter. Add `gem 'fastembed'` to your Gemfile."
|
|
30
44
|
end
|
|
31
45
|
|
|
32
46
|
# Generate query embedding (optimized for search queries)
|
|
33
|
-
# Compatible with Recall's embedding_generator interface
|
|
34
47
|
# @param text [String] query text to embed
|
|
35
|
-
# @return [Array<Float>] normalized
|
|
48
|
+
# @return [Array<Float>] normalized embedding vector
|
|
36
49
|
def generate(text)
|
|
37
50
|
return zero_vector if text.nil? || text.empty?
|
|
38
51
|
|
|
@@ -40,9 +53,8 @@ module ClaudeMemory
|
|
|
40
53
|
end
|
|
41
54
|
|
|
42
55
|
# Generate passage embedding (optimized for document/fact indexing)
|
|
43
|
-
# Use this when storing embeddings for facts
|
|
44
56
|
# @param text [String] passage text to embed
|
|
45
|
-
# @return [Array<Float>] normalized
|
|
57
|
+
# @return [Array<Float>] normalized embedding vector
|
|
46
58
|
def generate_passage(text)
|
|
47
59
|
return zero_vector if text.nil? || text.empty?
|
|
48
60
|
|
|
@@ -51,8 +63,26 @@ module ClaudeMemory
|
|
|
51
63
|
|
|
52
64
|
private
|
|
53
65
|
|
|
66
|
+
# Resolve dimensions from the model registry (fast, no I/O).
|
|
67
|
+
# Returns nil if the model isn't in the registry.
|
|
68
|
+
def resolve_dimensions(model)
|
|
69
|
+
ModelRegistry.dimensions_for(model)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Fallback: probe fastembed's SUPPORTED_MODELS for dimension info.
|
|
73
|
+
# This handles models added to fastembed-rb but not yet in our registry.
|
|
74
|
+
def probe_dimensions_from_fastembed
|
|
75
|
+
if defined?(Fastembed::SUPPORTED_MODELS)
|
|
76
|
+
info = Fastembed::SUPPORTED_MODELS[@model_name]
|
|
77
|
+
return info.dim if info
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Last resort: generate a test embedding and measure its size
|
|
81
|
+
@model.query_embed("dimension probe").first.size
|
|
82
|
+
end
|
|
83
|
+
|
|
54
84
|
def zero_vector
|
|
55
|
-
Array.new(
|
|
85
|
+
Array.new(@dimensions, 0.0)
|
|
56
86
|
end
|
|
57
87
|
end
|
|
58
88
|
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Embeddings
|
|
5
|
+
# Reads embedding metadata from global and project databases.
|
|
6
|
+
# Returns structured data — no I/O formatting or stdout output.
|
|
7
|
+
#
|
|
8
|
+
# Used by EmbeddingsCommand to separate DB concerns from presentation.
|
|
9
|
+
class Inspector
|
|
10
|
+
DatabaseState = Data.define(:label, :provider, :dimensions)
|
|
11
|
+
DimensionResult = Data.define(:label, :status, :stored_dims, :stored_provider, :current_dims)
|
|
12
|
+
|
|
13
|
+
def database_states
|
|
14
|
+
results = []
|
|
15
|
+
|
|
16
|
+
with_each_store do |label, store|
|
|
17
|
+
provider = store.get_meta("embedding_provider")
|
|
18
|
+
dims = store.get_meta("embedding_dimensions")
|
|
19
|
+
|
|
20
|
+
next unless provider || dims
|
|
21
|
+
|
|
22
|
+
results << DatabaseState.new(label: label, provider: provider, dimensions: dims)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
results
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def dimension_checks(provider_name, model_name)
|
|
29
|
+
results = []
|
|
30
|
+
|
|
31
|
+
with_each_store do |label, store|
|
|
32
|
+
stored_dims = store.get_meta("embedding_dimensions")&.to_i
|
|
33
|
+
stored_provider = store.get_meta("embedding_provider")
|
|
34
|
+
|
|
35
|
+
if stored_dims
|
|
36
|
+
current_dims = resolve_current_dimensions(provider_name, model_name)
|
|
37
|
+
|
|
38
|
+
status = if current_dims && current_dims != stored_dims
|
|
39
|
+
:mismatch
|
|
40
|
+
else
|
|
41
|
+
:match
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
results << DimensionResult.new(
|
|
45
|
+
label: label,
|
|
46
|
+
status: status,
|
|
47
|
+
stored_dims: stored_dims,
|
|
48
|
+
stored_provider: stored_provider,
|
|
49
|
+
current_dims: current_dims
|
|
50
|
+
)
|
|
51
|
+
else
|
|
52
|
+
results << DimensionResult.new(
|
|
53
|
+
label: label,
|
|
54
|
+
status: :fresh,
|
|
55
|
+
stored_dims: nil,
|
|
56
|
+
stored_provider: nil,
|
|
57
|
+
current_dims: nil
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
results
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def resolve_current_dimensions(provider_name, model_name)
|
|
68
|
+
if model_name
|
|
69
|
+
ModelRegistry.dimensions_for(model_name)
|
|
70
|
+
else
|
|
71
|
+
ModelRegistry.default_for_provider(provider_name)&.dimensions
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def with_each_store
|
|
76
|
+
config = Configuration.new
|
|
77
|
+
|
|
78
|
+
[["global", config.global_db_path], ["project", config.project_db_path]].each do |label, path|
|
|
79
|
+
next unless File.exist?(path)
|
|
80
|
+
|
|
81
|
+
store = Store::SQLiteStore.new(path)
|
|
82
|
+
begin
|
|
83
|
+
yield label, store
|
|
84
|
+
ensure
|
|
85
|
+
store.close
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Embeddings
|
|
5
|
+
# Registry of known embedding models with their properties.
|
|
6
|
+
# Enables model validation, dimension lookup, and discoverability.
|
|
7
|
+
#
|
|
8
|
+
# Models are registered by canonical name (e.g., "BAAI/bge-small-en-v1.5")
|
|
9
|
+
# with provider type, dimensions, and description.
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# ModelRegistry.find("BAAI/bge-small-en-v1.5")
|
|
13
|
+
# # => {provider: "fastembed", dimensions: 384, description: "...", ...}
|
|
14
|
+
#
|
|
15
|
+
# ModelRegistry.models_for_provider("fastembed")
|
|
16
|
+
# # => [...]
|
|
17
|
+
#
|
|
18
|
+
class ModelRegistry
|
|
19
|
+
ModelInfo = Data.define(:name, :provider, :dimensions, :description, :size_mb, :max_tokens)
|
|
20
|
+
|
|
21
|
+
# Known models with validated dimensions.
|
|
22
|
+
# Fastembed models sourced from fastembed-rb SUPPORTED_MODELS.
|
|
23
|
+
# API models sourced from provider documentation.
|
|
24
|
+
MODELS = [
|
|
25
|
+
# --- fastembed: local ONNX models (no API key needed) ---
|
|
26
|
+
ModelInfo.new(
|
|
27
|
+
name: "BAAI/bge-small-en-v1.5",
|
|
28
|
+
provider: "fastembed",
|
|
29
|
+
dimensions: 384,
|
|
30
|
+
description: "Fast English embedding (default)",
|
|
31
|
+
size_mb: 67,
|
|
32
|
+
max_tokens: 512
|
|
33
|
+
),
|
|
34
|
+
ModelInfo.new(
|
|
35
|
+
name: "BAAI/bge-base-en-v1.5",
|
|
36
|
+
provider: "fastembed",
|
|
37
|
+
dimensions: 768,
|
|
38
|
+
description: "Balanced English embedding, higher accuracy",
|
|
39
|
+
size_mb: 210,
|
|
40
|
+
max_tokens: 512
|
|
41
|
+
),
|
|
42
|
+
ModelInfo.new(
|
|
43
|
+
name: "BAAI/bge-large-en-v1.5",
|
|
44
|
+
provider: "fastembed",
|
|
45
|
+
dimensions: 1024,
|
|
46
|
+
description: "High accuracy English embedding",
|
|
47
|
+
size_mb: 1200,
|
|
48
|
+
max_tokens: 512
|
|
49
|
+
),
|
|
50
|
+
ModelInfo.new(
|
|
51
|
+
name: "sentence-transformers/all-MiniLM-L6-v2",
|
|
52
|
+
provider: "fastembed",
|
|
53
|
+
dimensions: 384,
|
|
54
|
+
description: "Lightweight general-purpose sentence embedding",
|
|
55
|
+
size_mb: 90,
|
|
56
|
+
max_tokens: 512
|
|
57
|
+
),
|
|
58
|
+
ModelInfo.new(
|
|
59
|
+
name: "intfloat/multilingual-e5-small",
|
|
60
|
+
provider: "fastembed",
|
|
61
|
+
dimensions: 384,
|
|
62
|
+
description: "Multilingual embedding, 100+ languages",
|
|
63
|
+
size_mb: 450,
|
|
64
|
+
max_tokens: 512
|
|
65
|
+
),
|
|
66
|
+
ModelInfo.new(
|
|
67
|
+
name: "intfloat/multilingual-e5-base",
|
|
68
|
+
provider: "fastembed",
|
|
69
|
+
dimensions: 768,
|
|
70
|
+
description: "Larger multilingual embedding",
|
|
71
|
+
size_mb: 1110,
|
|
72
|
+
max_tokens: 512
|
|
73
|
+
),
|
|
74
|
+
ModelInfo.new(
|
|
75
|
+
name: "nomic-ai/nomic-embed-text-v1.5",
|
|
76
|
+
provider: "fastembed",
|
|
77
|
+
dimensions: 768,
|
|
78
|
+
description: "Long context (8192 tokens) with Matryoshka support",
|
|
79
|
+
size_mb: 520,
|
|
80
|
+
max_tokens: 8192
|
|
81
|
+
),
|
|
82
|
+
ModelInfo.new(
|
|
83
|
+
name: "jinaai/jina-embeddings-v2-small-en",
|
|
84
|
+
provider: "fastembed",
|
|
85
|
+
dimensions: 512,
|
|
86
|
+
description: "Small English embedding, 8192 token context",
|
|
87
|
+
size_mb: 60,
|
|
88
|
+
max_tokens: 8192
|
|
89
|
+
),
|
|
90
|
+
ModelInfo.new(
|
|
91
|
+
name: "jinaai/jina-embeddings-v2-base-en",
|
|
92
|
+
provider: "fastembed",
|
|
93
|
+
dimensions: 768,
|
|
94
|
+
description: "Base English embedding, 8192 token context",
|
|
95
|
+
size_mb: 520,
|
|
96
|
+
max_tokens: 8192
|
|
97
|
+
),
|
|
98
|
+
|
|
99
|
+
# --- api: OpenAI-compatible endpoints ---
|
|
100
|
+
ModelInfo.new(
|
|
101
|
+
name: "text-embedding-3-small",
|
|
102
|
+
provider: "api",
|
|
103
|
+
dimensions: 1536,
|
|
104
|
+
description: "OpenAI small embedding (default API model)",
|
|
105
|
+
size_mb: nil,
|
|
106
|
+
max_tokens: 8191
|
|
107
|
+
),
|
|
108
|
+
ModelInfo.new(
|
|
109
|
+
name: "text-embedding-3-large",
|
|
110
|
+
provider: "api",
|
|
111
|
+
dimensions: 3072,
|
|
112
|
+
description: "OpenAI large embedding, highest accuracy",
|
|
113
|
+
size_mb: nil,
|
|
114
|
+
max_tokens: 8191
|
|
115
|
+
),
|
|
116
|
+
ModelInfo.new(
|
|
117
|
+
name: "text-embedding-ada-002",
|
|
118
|
+
provider: "api",
|
|
119
|
+
dimensions: 1536,
|
|
120
|
+
description: "OpenAI legacy embedding",
|
|
121
|
+
size_mb: nil,
|
|
122
|
+
max_tokens: 8191
|
|
123
|
+
),
|
|
124
|
+
ModelInfo.new(
|
|
125
|
+
name: "voyage-3",
|
|
126
|
+
provider: "api",
|
|
127
|
+
dimensions: 1024,
|
|
128
|
+
description: "Voyage AI general-purpose embedding",
|
|
129
|
+
size_mb: nil,
|
|
130
|
+
max_tokens: 32000
|
|
131
|
+
),
|
|
132
|
+
ModelInfo.new(
|
|
133
|
+
name: "voyage-3-lite",
|
|
134
|
+
provider: "api",
|
|
135
|
+
dimensions: 512,
|
|
136
|
+
description: "Voyage AI lightweight embedding",
|
|
137
|
+
size_mb: nil,
|
|
138
|
+
max_tokens: 32000
|
|
139
|
+
),
|
|
140
|
+
ModelInfo.new(
|
|
141
|
+
name: "voyage-code-3",
|
|
142
|
+
provider: "api",
|
|
143
|
+
dimensions: 1024,
|
|
144
|
+
description: "Voyage AI code-optimized embedding",
|
|
145
|
+
size_mb: nil,
|
|
146
|
+
max_tokens: 32000
|
|
147
|
+
),
|
|
148
|
+
|
|
149
|
+
# --- tfidf: built-in, no dependencies ---
|
|
150
|
+
ModelInfo.new(
|
|
151
|
+
name: "tfidf",
|
|
152
|
+
provider: "tfidf",
|
|
153
|
+
dimensions: 384,
|
|
154
|
+
description: "Built-in TF-IDF embedding (no dependencies)",
|
|
155
|
+
size_mb: 0,
|
|
156
|
+
max_tokens: nil
|
|
157
|
+
)
|
|
158
|
+
].freeze
|
|
159
|
+
|
|
160
|
+
MODELS_BY_NAME = MODELS.each_with_object({}) { |m, h| h[m.name] = m }.freeze
|
|
161
|
+
|
|
162
|
+
DEFAULTS = {
|
|
163
|
+
"fastembed" => "BAAI/bge-small-en-v1.5",
|
|
164
|
+
"api" => "text-embedding-3-small",
|
|
165
|
+
"tfidf" => "tfidf"
|
|
166
|
+
}.freeze
|
|
167
|
+
|
|
168
|
+
# Find a model by name.
|
|
169
|
+
# @param name [String] model name (e.g., "BAAI/bge-small-en-v1.5")
|
|
170
|
+
# @return [ModelInfo, nil]
|
|
171
|
+
def self.find(name)
|
|
172
|
+
MODELS_BY_NAME[name]
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# List all models for a given provider.
|
|
176
|
+
# @param provider [String] "fastembed", "api", or "tfidf"
|
|
177
|
+
# @return [Array<ModelInfo>]
|
|
178
|
+
def self.models_for_provider(provider)
|
|
179
|
+
MODELS.select { |m| m.provider == provider }
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# All known model names.
|
|
183
|
+
# @return [Array<String>]
|
|
184
|
+
def self.model_names
|
|
185
|
+
MODELS.map(&:name)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# All provider names.
|
|
189
|
+
# @return [Array<String>]
|
|
190
|
+
def self.providers
|
|
191
|
+
MODELS.map(&:provider).uniq
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Look up dimensions for a model name. Returns nil if unknown.
|
|
195
|
+
# @param name [String] model name
|
|
196
|
+
# @return [Integer, nil]
|
|
197
|
+
def self.dimensions_for(name)
|
|
198
|
+
find(name)&.dimensions
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Return the default ModelInfo for a provider.
|
|
202
|
+
# @param provider [String] "fastembed", "api", or "tfidf"
|
|
203
|
+
# @return [ModelInfo, nil]
|
|
204
|
+
def self.default_for_provider(provider)
|
|
205
|
+
default_name = DEFAULTS[provider]
|
|
206
|
+
find(default_name) if default_name
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
@@ -2,17 +2,43 @@
|
|
|
2
2
|
|
|
3
3
|
module ClaudeMemory
|
|
4
4
|
module Embeddings
|
|
5
|
-
# Resolves an embedding provider by name or ENV.
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
# Resolves an embedding provider by name, model, or ENV.
|
|
6
|
+
#
|
|
7
|
+
# Provider selection (in priority order):
|
|
8
|
+
# 1. Explicit name parameter
|
|
9
|
+
# 2. CLAUDE_MEMORY_EMBEDDING_PROVIDER env var
|
|
10
|
+
# 3. Default: "tfidf"
|
|
11
|
+
#
|
|
12
|
+
# Model selection is forwarded to the provider via CLAUDE_MEMORY_EMBEDDING_MODEL
|
|
13
|
+
# or the model parameter. The model can also imply the provider:
|
|
14
|
+
# - "BAAI/bge-small-en-v1.5" → fastembed
|
|
15
|
+
# - "text-embedding-3-small" → api
|
|
16
|
+
#
|
|
17
|
+
# Examples:
|
|
18
|
+
# Embeddings.resolve # tfidf default
|
|
19
|
+
# Embeddings.resolve("fastembed") # fastembed with default model
|
|
20
|
+
# Embeddings.resolve("fastembed", model: "BAAI/bge-base-en-v1.5")
|
|
21
|
+
# Embeddings.resolve(model: "text-embedding-3-small") # auto-detects api provider
|
|
22
|
+
#
|
|
23
|
+
def self.resolve(name = nil, model: nil, env: ENV)
|
|
24
|
+
model ||= env["CLAUDE_MEMORY_EMBEDDING_MODEL"]
|
|
25
|
+
provider = name || env["CLAUDE_MEMORY_EMBEDDING_PROVIDER"] || infer_provider(model) || "tfidf"
|
|
9
26
|
|
|
10
27
|
case provider
|
|
11
28
|
when "tfidf" then Generator.new
|
|
12
|
-
when "fastembed" then FastembedAdapter.new
|
|
13
|
-
when "api" then ApiAdapter.new(env: env)
|
|
29
|
+
when "fastembed" then FastembedAdapter.new(model_name: model, env: env)
|
|
30
|
+
when "api" then ApiAdapter.new(model: model, env: env)
|
|
14
31
|
else raise ArgumentError, "Unknown embedding provider: #{provider}. Available: tfidf, fastembed, api"
|
|
15
32
|
end
|
|
16
33
|
end
|
|
34
|
+
|
|
35
|
+
# Infer provider from a model name using the registry.
|
|
36
|
+
# Returns nil if the model is unknown.
|
|
37
|
+
def self.infer_provider(model)
|
|
38
|
+
return nil unless model
|
|
39
|
+
|
|
40
|
+
ModelRegistry.find(model)&.provider
|
|
41
|
+
end
|
|
42
|
+
private_class_method :infer_provider
|
|
17
43
|
end
|
|
18
44
|
end
|
|
@@ -4,7 +4,17 @@ require "digest"
|
|
|
4
4
|
|
|
5
5
|
module ClaudeMemory
|
|
6
6
|
module Ingest
|
|
7
|
+
# Delta-based transcript ingestion with cursor tracking.
|
|
8
|
+
# Reads new content from transcripts, extracts metadata and tool calls,
|
|
9
|
+
# sanitizes private tags, and persists to the content_items table with FTS indexing.
|
|
7
10
|
class Ingester
|
|
11
|
+
# @param store [Store::SQLiteStore] database store for persistence
|
|
12
|
+
# @param fts [Index::LexicalFTS, nil] full-text search index (default: new from store)
|
|
13
|
+
# @param env [Hash] environment variables
|
|
14
|
+
# @param metadata_extractor [MetadataExtractor, nil] extracts git branch, cwd, etc.
|
|
15
|
+
# @param tool_extractor [ToolExtractor, nil] extracts tool calls from transcript text
|
|
16
|
+
# @param tool_filter [ToolFilter, nil] filters irrelevant tool calls
|
|
17
|
+
# @param observation_compressor [ObservationCompressor, nil] compresses tool observations
|
|
8
18
|
def initialize(store, fts: nil, env: ENV, metadata_extractor: nil, tool_extractor: nil, tool_filter: nil, observation_compressor: nil)
|
|
9
19
|
@store = store
|
|
10
20
|
@fts = fts || Index::LexicalFTS.new(store)
|
|
@@ -15,6 +25,13 @@ module ClaudeMemory
|
|
|
15
25
|
@observation_compressor = observation_compressor || ObservationCompressor.new
|
|
16
26
|
end
|
|
17
27
|
|
|
28
|
+
# Ingest new content from a transcript file
|
|
29
|
+
# @param source [String] content source identifier (e.g., "hook", "cli")
|
|
30
|
+
# @param session_id [String] Claude session ID
|
|
31
|
+
# @param transcript_path [String] path to the transcript file
|
|
32
|
+
# @param project_path [String, nil] project root (defaults to detected path)
|
|
33
|
+
# @return [Hash] result with :status (:ingested, :skipped, or :no_change),
|
|
34
|
+
# :content_id, :bytes_read, and optional :reason
|
|
18
35
|
def ingest(source:, session_id:, transcript_path:, project_path: nil)
|
|
19
36
|
unless should_ingest?(transcript_path)
|
|
20
37
|
ClaudeMemory.logger.debug("ingest", message: "Skipped unchanged file", transcript_path: transcript_path)
|
|
@@ -66,6 +66,30 @@ module ClaudeMemory
|
|
|
66
66
|
end
|
|
67
67
|
end
|
|
68
68
|
|
|
69
|
+
def reject_fact(args)
|
|
70
|
+
scope = args["scope"] || "project"
|
|
71
|
+
store = get_store_for_scope(scope)
|
|
72
|
+
return {error: "Database not available"} unless store
|
|
73
|
+
|
|
74
|
+
fact_id = args["fact_id"]
|
|
75
|
+
if fact_id.nil? && args["docid"]
|
|
76
|
+
row = store.find_fact_by_docid(args["docid"])
|
|
77
|
+
fact_id = row && row[:id]
|
|
78
|
+
end
|
|
79
|
+
return {error: "fact_id or docid required"} if fact_id.nil?
|
|
80
|
+
|
|
81
|
+
result = store.reject_fact(fact_id, reason: args["reason"])
|
|
82
|
+
return {error: "Fact #{fact_id} not found in #{scope} database"} if result.nil?
|
|
83
|
+
|
|
84
|
+
{
|
|
85
|
+
success: true,
|
|
86
|
+
scope: scope,
|
|
87
|
+
fact_id: fact_id,
|
|
88
|
+
conflicts_resolved: result[:conflicts_resolved],
|
|
89
|
+
message: "Fact rejected"
|
|
90
|
+
}
|
|
91
|
+
end
|
|
92
|
+
|
|
69
93
|
def sweep_now(args)
|
|
70
94
|
scope = args["scope"] || "project"
|
|
71
95
|
store = get_store_for_scope(scope)
|
|
@@ -129,13 +129,16 @@ module ClaudeMemory
|
|
|
129
129
|
}
|
|
130
130
|
|
|
131
131
|
if active_facts > 0
|
|
132
|
-
|
|
132
|
+
all_predicates = store.db[:facts]
|
|
133
133
|
.where(status: "active")
|
|
134
134
|
.group_and_count(:predicate)
|
|
135
135
|
.order(Sequel.desc(:count))
|
|
136
|
-
.limit(10)
|
|
137
136
|
.all
|
|
138
137
|
.map { |row| {predicate: row[:predicate], count: row[:count]} }
|
|
138
|
+
|
|
139
|
+
stats[:top_predicates] = all_predicates.first(10)
|
|
140
|
+
stats[:predicates_known], stats[:predicates_novel] =
|
|
141
|
+
all_predicates.partition { |row| Resolve::PredicatePolicy.known_predicates.include?(row[:predicate]) }
|
|
139
142
|
end
|
|
140
143
|
|
|
141
144
|
stats
|
|
@@ -108,9 +108,26 @@ module ClaudeMemory
|
|
|
108
108
|
|
|
109
109
|
escalation = vec ? "recall_semantic, explain, or fact_graph" : "explain or fact_graph"
|
|
110
110
|
lines << "Start with fast tools (recall, decisions, conventions) before escalating to #{escalation}."
|
|
111
|
+
|
|
112
|
+
lines << proactive_recall_guidance
|
|
111
113
|
lines.join("\n")
|
|
112
114
|
end
|
|
113
115
|
|
|
116
|
+
# Directive guidance for when Claude should proactively consult memory.
|
|
117
|
+
# Validated by A/B testing: without these directives, Claude writes code
|
|
118
|
+
# using known-dangerous patterns (e.g. Sequel.sqlite) and hallucinates
|
|
119
|
+
# file paths instead of consulting memory for the correct structure.
|
|
120
|
+
def proactive_recall_guidance
|
|
121
|
+
<<~GUIDANCE.strip
|
|
122
|
+
IMPORTANT — check memory proactively in these situations:
|
|
123
|
+
- Before writing code: call memory.conventions to verify project patterns and avoid known gotchas
|
|
124
|
+
- Before explaining architecture: call memory.architecture for structural knowledge without file traversal
|
|
125
|
+
- Before refactoring: call memory.decisions to understand why past choices were made
|
|
126
|
+
- When asked about preferences: global facts store user environment and style preferences across all projects
|
|
127
|
+
- When adding to the codebase: recall which files and patterns to follow (memory knows correct paths and relationships)
|
|
128
|
+
GUIDANCE
|
|
129
|
+
end
|
|
130
|
+
|
|
114
131
|
def count_by_predicates(store, predicates)
|
|
115
132
|
store.facts
|
|
116
133
|
.where(status: "active")
|
|
@@ -5,20 +5,30 @@ require_relative "instructions_builder"
|
|
|
5
5
|
require_relative "query_guide"
|
|
6
6
|
require_relative "text_summary"
|
|
7
7
|
require_relative "error_classifier"
|
|
8
|
+
require_relative "telemetry"
|
|
8
9
|
|
|
9
10
|
module ClaudeMemory
|
|
10
11
|
module MCP
|
|
12
|
+
# MCP JSON-RPC server over stdio.
|
|
13
|
+
# Reads newline-delimited JSON requests from input, dispatches to Tools,
|
|
14
|
+
# and writes JSON responses to output.
|
|
11
15
|
class Server
|
|
12
16
|
PROTOCOL_VERSION = "2024-11-05"
|
|
13
17
|
|
|
18
|
+
# @param store_or_manager [Store::SQLiteStore, Store::StoreManager] database backend
|
|
19
|
+
# @param input [IO] input stream for JSON-RPC requests (default: $stdin)
|
|
20
|
+
# @param output [IO] output stream for JSON-RPC responses (default: $stdout)
|
|
14
21
|
def initialize(store_or_manager, input: $stdin, output: $stdout)
|
|
15
22
|
@store_or_manager = store_or_manager
|
|
16
23
|
@tools = Tools.new(store_or_manager)
|
|
24
|
+
@telemetry = Telemetry.new(store_or_manager)
|
|
17
25
|
@input = input
|
|
18
26
|
@output = output
|
|
19
27
|
@running = false
|
|
20
28
|
end
|
|
21
29
|
|
|
30
|
+
# Start the read loop, blocking until input is exhausted or stop is called.
|
|
31
|
+
# @return [void]
|
|
22
32
|
def run
|
|
23
33
|
@running = true
|
|
24
34
|
while @running
|
|
@@ -29,12 +39,15 @@ module ClaudeMemory
|
|
|
29
39
|
end
|
|
30
40
|
end
|
|
31
41
|
|
|
42
|
+
# Signal the read loop to exit after the current message.
|
|
43
|
+
# @return [void]
|
|
32
44
|
def stop
|
|
33
45
|
@running = false
|
|
34
46
|
end
|
|
35
47
|
|
|
36
48
|
private
|
|
37
49
|
|
|
50
|
+
# @return [void]
|
|
38
51
|
def handle_message(line)
|
|
39
52
|
return if line.empty?
|
|
40
53
|
|
|
@@ -51,6 +64,7 @@ module ClaudeMemory
|
|
|
51
64
|
end
|
|
52
65
|
end
|
|
53
66
|
|
|
67
|
+
# @return [Hash, nil] JSON-RPC response hash, or nil for notifications
|
|
54
68
|
def process_request(request)
|
|
55
69
|
id = request["id"]
|
|
56
70
|
method = request["method"]
|
|
@@ -74,6 +88,7 @@ module ClaudeMemory
|
|
|
74
88
|
end
|
|
75
89
|
end
|
|
76
90
|
|
|
91
|
+
# @return [Hash] initialize response with capabilities and server info
|
|
77
92
|
def handle_initialize(id, _params)
|
|
78
93
|
{
|
|
79
94
|
jsonrpc: "2.0",
|
|
@@ -93,6 +108,7 @@ module ClaudeMemory
|
|
|
93
108
|
}
|
|
94
109
|
end
|
|
95
110
|
|
|
111
|
+
# @return [Hash] list of available tool definitions
|
|
96
112
|
def handle_tools_list(id)
|
|
97
113
|
{
|
|
98
114
|
jsonrpc: "2.0",
|
|
@@ -103,11 +119,14 @@ module ClaudeMemory
|
|
|
103
119
|
}
|
|
104
120
|
end
|
|
105
121
|
|
|
122
|
+
# @return [Hash] tool result with dual content/structuredContent
|
|
106
123
|
def handle_tools_call(id, params)
|
|
107
124
|
name = params["name"]
|
|
108
125
|
arguments = params["arguments"] || {}
|
|
109
126
|
|
|
110
|
-
result = @
|
|
127
|
+
result = @telemetry.record(name, arguments) do
|
|
128
|
+
@tools.call(name, arguments)
|
|
129
|
+
end
|
|
111
130
|
|
|
112
131
|
# Release database connections after each tool call
|
|
113
132
|
# This prevents lock contention with hook commands
|
|
@@ -128,6 +147,7 @@ module ClaudeMemory
|
|
|
128
147
|
}
|
|
129
148
|
end
|
|
130
149
|
|
|
150
|
+
# @return [Hash] list of available prompt definitions
|
|
131
151
|
def handle_prompts_list(id)
|
|
132
152
|
{
|
|
133
153
|
jsonrpc: "2.0",
|
|
@@ -138,6 +158,7 @@ module ClaudeMemory
|
|
|
138
158
|
}
|
|
139
159
|
end
|
|
140
160
|
|
|
161
|
+
# @return [Hash] prompt content or error if unknown
|
|
141
162
|
def handle_prompts_get(id, params)
|
|
142
163
|
name = params&.dig("name")
|
|
143
164
|
|