claude_memory 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude/rules/claude_memory.generated.md +94 -2
- data/.claude/settings.json +30 -52
- data/.claude/settings.local.json +3 -1
- data/.claude/skills/release/SKILL.md +168 -0
- data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
- data/.claude-plugin/marketplace.json +2 -2
- data/.claude-plugin/plugin.json +3 -3
- data/.claude-plugin/scripts/hook-runner.sh +14 -0
- data/.claude-plugin/scripts/serve-mcp.sh +14 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +47 -0
- data/CLAUDE.md +31 -17
- data/README.md +35 -0
- data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
- data/db/migrations/014_canonicalize_predicates.rb +30 -0
- data/docs/improvements.md +58 -20
- data/docs/influence/claude-mem.md +1 -0
- data/docs/influence/claude-supermemory.md +1 -0
- data/docs/influence/episodic-memory.md +1 -0
- data/docs/influence/grepai.md +1 -0
- data/docs/influence/kbs.md +1 -0
- data/docs/influence/lossless-claw.md +1 -0
- data/docs/influence/qmd.md +1 -0
- data/lib/claude_memory/commands/completion_command.rb +1 -31
- data/lib/claude_memory/commands/embeddings_command.rb +198 -0
- data/lib/claude_memory/commands/help_command.rb +8 -1
- data/lib/claude_memory/commands/registry.rb +47 -34
- data/lib/claude_memory/commands/reject_command.rb +62 -0
- data/lib/claude_memory/commands/restore_command.rb +77 -0
- data/lib/claude_memory/commands/skills/distill-transcripts.md +5 -1
- data/lib/claude_memory/commands/stats_command.rb +98 -2
- data/lib/claude_memory/configuration.rb +14 -1
- data/lib/claude_memory/distill/json_schema.md +8 -4
- data/lib/claude_memory/distill/null_distiller.rb +2 -0
- data/lib/claude_memory/domain/entity.rb +13 -1
- data/lib/claude_memory/domain/fact.rb +26 -2
- data/lib/claude_memory/embeddings/api_adapter.rb +5 -4
- data/lib/claude_memory/embeddings/fastembed_adapter.rb +43 -13
- data/lib/claude_memory/embeddings/inspector.rb +91 -0
- data/lib/claude_memory/embeddings/model_registry.rb +210 -0
- data/lib/claude_memory/embeddings/resolver.rb +32 -6
- data/lib/claude_memory/ingest/ingester.rb +17 -0
- data/lib/claude_memory/mcp/handlers/management_handlers.rb +24 -0
- data/lib/claude_memory/mcp/handlers/stats_handlers.rb +5 -2
- data/lib/claude_memory/mcp/instructions_builder.rb +17 -0
- data/lib/claude_memory/mcp/server.rb +30 -3
- data/lib/claude_memory/mcp/telemetry.rb +86 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +86 -3
- data/lib/claude_memory/mcp/tools.rb +10 -0
- data/lib/claude_memory/publish.rb +40 -5
- data/lib/claude_memory/recall.rb +81 -0
- data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
- data/lib/claude_memory/resolve/resolver.rb +43 -0
- data/lib/claude_memory/store/schema_manager.rb +1 -1
- data/lib/claude_memory/store/sqlite_store.rb +250 -1
- data/lib/claude_memory/store/store_manager.rb +50 -1
- data/lib/claude_memory/sweep/maintenance.rb +115 -1
- data/lib/claude_memory/sweep/sweeper.rb +3 -0
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +5 -0
- metadata +27 -8
- data/.claude/memory.sqlite3-shm +0 -0
- data/.claude/memory.sqlite3-wal +0 -0
|
@@ -66,13 +66,17 @@ This document defines the schema for extracted knowledge from transcripts.
|
|
|
66
66
|
- **conflict**: `{kind: "conflict", value: true}` - indicates contradictory information detected
|
|
67
67
|
- **time_boundary**: `{kind: "time_boundary", value: "2024-01-15"}` - temporal boundary marker
|
|
68
68
|
|
|
69
|
-
## Predicate Types
|
|
69
|
+
## Predicate Types
|
|
70
|
+
|
|
71
|
+
Canonical vocabulary defined in `lib/claude_memory/resolve/predicate_policy.rb`.
|
|
70
72
|
|
|
71
73
|
| Predicate | Cardinality | Exclusive |
|
|
72
74
|
|-----------|-------------|-----------|
|
|
73
75
|
| convention | multi | no |
|
|
74
|
-
| decision | multi
|
|
75
|
-
|
|
|
76
|
+
| decision | multi | no |
|
|
77
|
+
| architecture | multi | no |
|
|
78
|
+
| uses_framework | multi | no |
|
|
79
|
+
| uses_language | multi | no |
|
|
76
80
|
| uses_database | single | yes |
|
|
77
|
-
| uses_framework | single | yes |
|
|
78
81
|
| deployment_platform | single | yes |
|
|
82
|
+
| auth_method | single | yes |
|
|
@@ -73,6 +73,8 @@ module ClaudeMemory
|
|
|
73
73
|
facts << build_fact("uses_framework", entity[:name], text, scope_hint)
|
|
74
74
|
when "platform"
|
|
75
75
|
facts << build_fact("deployment_platform", entity[:name], text, scope_hint)
|
|
76
|
+
when "language"
|
|
77
|
+
facts << build_fact("uses_language", entity[:name], text, scope_hint)
|
|
76
78
|
end
|
|
77
79
|
end
|
|
78
80
|
|
|
@@ -2,10 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
module ClaudeMemory
|
|
4
4
|
module Domain
|
|
5
|
-
# Domain model representing an entity (database, framework, person, etc.)
|
|
5
|
+
# Domain model representing an entity (database, framework, person, etc.).
|
|
6
|
+
# Instances are immutable (frozen).
|
|
6
7
|
class Entity
|
|
7
8
|
attr_reader :id, :type, :canonical_name, :slug, :created_at
|
|
8
9
|
|
|
10
|
+
# @param attributes [Hash] entity attributes
|
|
11
|
+
# @option attributes [Integer] :id database primary key
|
|
12
|
+
# @option attributes [String] :type entity category (required, e.g. "database", "framework", "person")
|
|
13
|
+
# @option attributes [String] :canonical_name display name (required)
|
|
14
|
+
# @option attributes [String] :slug URL-safe identifier (required)
|
|
15
|
+
# @option attributes [String] :created_at ISO 8601 creation timestamp
|
|
16
|
+
# @raise [ArgumentError] if type, canonical_name, or slug is blank
|
|
9
17
|
def initialize(attributes)
|
|
10
18
|
@id = attributes[:id]
|
|
11
19
|
@type = attributes[:type]
|
|
@@ -17,18 +25,22 @@ module ClaudeMemory
|
|
|
17
25
|
freeze
|
|
18
26
|
end
|
|
19
27
|
|
|
28
|
+
# @return [Boolean] true when type is "database"
|
|
20
29
|
def database?
|
|
21
30
|
type == "database"
|
|
22
31
|
end
|
|
23
32
|
|
|
33
|
+
# @return [Boolean] true when type is "framework"
|
|
24
34
|
def framework?
|
|
25
35
|
type == "framework"
|
|
26
36
|
end
|
|
27
37
|
|
|
38
|
+
# @return [Boolean] true when type is "person"
|
|
28
39
|
def person?
|
|
29
40
|
type == "person"
|
|
30
41
|
end
|
|
31
42
|
|
|
43
|
+
# @return [Hash] all attributes as a plain hash
|
|
32
44
|
def to_h
|
|
33
45
|
{
|
|
34
46
|
id: id,
|
|
@@ -2,13 +2,27 @@
|
|
|
2
2
|
|
|
3
3
|
module ClaudeMemory
|
|
4
4
|
module Domain
|
|
5
|
-
# Domain model representing a fact in the memory system
|
|
6
|
-
# Encapsulates business logic and validation
|
|
5
|
+
# Domain model representing a fact in the memory system.
|
|
6
|
+
# Encapsulates business logic and validation. Instances are immutable (frozen).
|
|
7
7
|
class Fact
|
|
8
8
|
attr_reader :id, :docid, :subject_name, :predicate, :object_literal,
|
|
9
9
|
:status, :confidence, :scope, :project_path,
|
|
10
10
|
:valid_from, :valid_to, :created_at
|
|
11
11
|
|
|
12
|
+
# @param attributes [Hash] fact attributes
|
|
13
|
+
# @option attributes [Integer] :id database primary key
|
|
14
|
+
# @option attributes [Integer] :docid FTS document id
|
|
15
|
+
# @option attributes [String] :subject_name entity name of the subject
|
|
16
|
+
# @option attributes [String] :predicate relationship type (required)
|
|
17
|
+
# @option attributes [String] :object_literal literal value (required)
|
|
18
|
+
# @option attributes [String] :status one of "active", "superseded", "rejected", "disputed"
|
|
19
|
+
# @option attributes [Float] :confidence score between 0 and 1 (default: 1.0)
|
|
20
|
+
# @option attributes [String] :scope "project" or "global" (default: "project")
|
|
21
|
+
# @option attributes [String] :project_path path for project-scoped facts
|
|
22
|
+
# @option attributes [String] :valid_from ISO 8601 start of validity
|
|
23
|
+
# @option attributes [String] :valid_to ISO 8601 end of validity (nil if current)
|
|
24
|
+
# @option attributes [String] :created_at ISO 8601 creation timestamp
|
|
25
|
+
# @raise [ArgumentError] if predicate, object_literal, or confidence is invalid
|
|
12
26
|
def initialize(attributes)
|
|
13
27
|
@id = attributes[:id]
|
|
14
28
|
@docid = attributes[:docid]
|
|
@@ -27,22 +41,32 @@ module ClaudeMemory
|
|
|
27
41
|
freeze
|
|
28
42
|
end
|
|
29
43
|
|
|
44
|
+
# @return [Boolean] true when status is "active"
|
|
30
45
|
def active?
|
|
31
46
|
status == "active"
|
|
32
47
|
end
|
|
33
48
|
|
|
49
|
+
# @return [Boolean] true when status is "superseded"
|
|
34
50
|
def superseded?
|
|
35
51
|
status == "superseded"
|
|
36
52
|
end
|
|
37
53
|
|
|
54
|
+
# @return [Boolean] true when status is "rejected"
|
|
55
|
+
def rejected?
|
|
56
|
+
status == "rejected"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# @return [Boolean] true when scope is "global"
|
|
38
60
|
def global?
|
|
39
61
|
scope == "global"
|
|
40
62
|
end
|
|
41
63
|
|
|
64
|
+
# @return [Boolean] true when scope is "project"
|
|
42
65
|
def project?
|
|
43
66
|
scope == "project"
|
|
44
67
|
end
|
|
45
68
|
|
|
69
|
+
# @return [Hash] all attributes as a plain hash
|
|
46
70
|
def to_h
|
|
47
71
|
{
|
|
48
72
|
id: id,
|
|
@@ -22,19 +22,20 @@ module ClaudeMemory
|
|
|
22
22
|
DEFAULT_API_URL = "https://api.openai.com/v1/embeddings"
|
|
23
23
|
DEFAULT_MODEL = "text-embedding-3-small"
|
|
24
24
|
|
|
25
|
-
def initialize(env: ENV)
|
|
25
|
+
def initialize(model: nil, env: ENV)
|
|
26
26
|
@api_key = env["CLAUDE_MEMORY_EMBEDDING_API_KEY"] || env["OPENAI_API_KEY"]
|
|
27
27
|
@api_url = env["CLAUDE_MEMORY_EMBEDDING_API_URL"] || DEFAULT_API_URL
|
|
28
|
-
@model = env["CLAUDE_MEMORY_EMBEDDING_MODEL"] || DEFAULT_MODEL
|
|
28
|
+
@model = model || env["CLAUDE_MEMORY_EMBEDDING_MODEL"] || DEFAULT_MODEL
|
|
29
|
+
@known_dimensions = ModelRegistry.dimensions_for(@model)
|
|
29
30
|
|
|
30
31
|
raise ArgumentError, "Set CLAUDE_MEMORY_EMBEDDING_API_KEY or OPENAI_API_KEY" unless @api_key
|
|
31
32
|
end
|
|
32
33
|
|
|
33
34
|
def name = "api"
|
|
34
35
|
|
|
35
|
-
# Dimensions
|
|
36
|
+
# Dimensions resolved from registry if known, otherwise lazy from first API response.
|
|
36
37
|
def dimensions
|
|
37
|
-
@dimensions ||= fetch_dimensions
|
|
38
|
+
@dimensions ||= @known_dimensions || fetch_dimensions
|
|
38
39
|
end
|
|
39
40
|
|
|
40
41
|
# Generate embedding for a query text.
|
|
@@ -2,37 +2,50 @@
|
|
|
2
2
|
|
|
3
3
|
module ClaudeMemory
|
|
4
4
|
module Embeddings
|
|
5
|
-
# Adapter wrapping fastembed-rb for high-quality local embeddings
|
|
6
|
-
#
|
|
5
|
+
# Adapter wrapping fastembed-rb for high-quality local embeddings.
|
|
6
|
+
# Supports any model available in fastembed-rb's SUPPORTED_MODELS.
|
|
7
7
|
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
8
|
+
# Model selection (in priority order):
|
|
9
|
+
# 1. Explicit model_name parameter
|
|
10
|
+
# 2. CLAUDE_MEMORY_EMBEDDING_MODEL env var
|
|
11
|
+
# 3. Default: BAAI/bge-small-en-v1.5 (384-dim, ~67MB ONNX)
|
|
12
|
+
#
|
|
13
|
+
# Dimensions are resolved from the ModelRegistry for known models,
|
|
14
|
+
# or probed from fastembed's ModelInfo for unknown models.
|
|
10
15
|
#
|
|
11
16
|
# Usage:
|
|
12
17
|
# adapter = FastembedAdapter.new
|
|
13
18
|
# query_vec = adapter.generate("What database?") # query encoding
|
|
14
19
|
# passage_vec = adapter.generate_passage("Uses PostgreSQL") # passage encoding
|
|
15
20
|
#
|
|
21
|
+
# # Use a larger model:
|
|
22
|
+
# adapter = FastembedAdapter.new(model_name: "BAAI/bge-base-en-v1.5")
|
|
23
|
+
# adapter.dimensions # => 768
|
|
24
|
+
#
|
|
16
25
|
class FastembedAdapter
|
|
17
|
-
EMBEDDING_DIM = 384
|
|
18
26
|
DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
|
|
19
27
|
|
|
28
|
+
attr_reader :model_name, :dimensions
|
|
29
|
+
|
|
20
30
|
def name = "fastembed"
|
|
21
31
|
|
|
22
|
-
def
|
|
32
|
+
def initialize(model_name: nil, env: ENV)
|
|
33
|
+
@model_name = model_name || env["CLAUDE_MEMORY_EMBEDDING_MODEL"] || DEFAULT_MODEL
|
|
34
|
+
@dimensions = resolve_dimensions(@model_name)
|
|
23
35
|
|
|
24
|
-
def initialize(model_name: DEFAULT_MODEL)
|
|
25
36
|
require "fastembed"
|
|
26
|
-
@model = Fastembed::TextEmbedding.new(model_name: model_name)
|
|
37
|
+
@model = Fastembed::TextEmbedding.new(model_name: @model_name)
|
|
38
|
+
|
|
39
|
+
# If dimensions weren't known from registry, probe from fastembed
|
|
40
|
+
@dimensions ||= probe_dimensions_from_fastembed
|
|
27
41
|
rescue LoadError
|
|
28
42
|
raise LoadError,
|
|
29
43
|
"fastembed gem is required for FastembedAdapter. Add `gem 'fastembed'` to your Gemfile."
|
|
30
44
|
end
|
|
31
45
|
|
|
32
46
|
# Generate query embedding (optimized for search queries)
|
|
33
|
-
# Compatible with Recall's embedding_generator interface
|
|
34
47
|
# @param text [String] query text to embed
|
|
35
|
-
# @return [Array<Float>] normalized
|
|
48
|
+
# @return [Array<Float>] normalized embedding vector
|
|
36
49
|
def generate(text)
|
|
37
50
|
return zero_vector if text.nil? || text.empty?
|
|
38
51
|
|
|
@@ -40,9 +53,8 @@ module ClaudeMemory
|
|
|
40
53
|
end
|
|
41
54
|
|
|
42
55
|
# Generate passage embedding (optimized for document/fact indexing)
|
|
43
|
-
# Use this when storing embeddings for facts
|
|
44
56
|
# @param text [String] passage text to embed
|
|
45
|
-
# @return [Array<Float>] normalized
|
|
57
|
+
# @return [Array<Float>] normalized embedding vector
|
|
46
58
|
def generate_passage(text)
|
|
47
59
|
return zero_vector if text.nil? || text.empty?
|
|
48
60
|
|
|
@@ -51,8 +63,26 @@ module ClaudeMemory
|
|
|
51
63
|
|
|
52
64
|
private
|
|
53
65
|
|
|
66
|
+
# Resolve dimensions from the model registry (fast, no I/O).
|
|
67
|
+
# Returns nil if the model isn't in the registry.
|
|
68
|
+
def resolve_dimensions(model)
|
|
69
|
+
ModelRegistry.dimensions_for(model)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Fallback: probe fastembed's SUPPORTED_MODELS for dimension info.
|
|
73
|
+
# This handles models added to fastembed-rb but not yet in our registry.
|
|
74
|
+
def probe_dimensions_from_fastembed
|
|
75
|
+
if defined?(Fastembed::SUPPORTED_MODELS)
|
|
76
|
+
info = Fastembed::SUPPORTED_MODELS[@model_name]
|
|
77
|
+
return info.dim if info
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Last resort: generate a test embedding and measure its size
|
|
81
|
+
@model.query_embed("dimension probe").first.size
|
|
82
|
+
end
|
|
83
|
+
|
|
54
84
|
def zero_vector
|
|
55
|
-
Array.new(
|
|
85
|
+
Array.new(@dimensions, 0.0)
|
|
56
86
|
end
|
|
57
87
|
end
|
|
58
88
|
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Embeddings
|
|
5
|
+
# Reads embedding metadata from global and project databases.
|
|
6
|
+
# Returns structured data — no I/O formatting or stdout output.
|
|
7
|
+
#
|
|
8
|
+
# Used by EmbeddingsCommand to separate DB concerns from presentation.
|
|
9
|
+
class Inspector
|
|
10
|
+
DatabaseState = Data.define(:label, :provider, :dimensions)
|
|
11
|
+
DimensionResult = Data.define(:label, :status, :stored_dims, :stored_provider, :current_dims)
|
|
12
|
+
|
|
13
|
+
def database_states
|
|
14
|
+
results = []
|
|
15
|
+
|
|
16
|
+
with_each_store do |label, store|
|
|
17
|
+
provider = store.get_meta("embedding_provider")
|
|
18
|
+
dims = store.get_meta("embedding_dimensions")
|
|
19
|
+
|
|
20
|
+
next unless provider || dims
|
|
21
|
+
|
|
22
|
+
results << DatabaseState.new(label: label, provider: provider, dimensions: dims)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
results
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def dimension_checks(provider_name, model_name)
|
|
29
|
+
results = []
|
|
30
|
+
|
|
31
|
+
with_each_store do |label, store|
|
|
32
|
+
stored_dims = store.get_meta("embedding_dimensions")&.to_i
|
|
33
|
+
stored_provider = store.get_meta("embedding_provider")
|
|
34
|
+
|
|
35
|
+
if stored_dims
|
|
36
|
+
current_dims = resolve_current_dimensions(provider_name, model_name)
|
|
37
|
+
|
|
38
|
+
status = if current_dims && current_dims != stored_dims
|
|
39
|
+
:mismatch
|
|
40
|
+
else
|
|
41
|
+
:match
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
results << DimensionResult.new(
|
|
45
|
+
label: label,
|
|
46
|
+
status: status,
|
|
47
|
+
stored_dims: stored_dims,
|
|
48
|
+
stored_provider: stored_provider,
|
|
49
|
+
current_dims: current_dims
|
|
50
|
+
)
|
|
51
|
+
else
|
|
52
|
+
results << DimensionResult.new(
|
|
53
|
+
label: label,
|
|
54
|
+
status: :fresh,
|
|
55
|
+
stored_dims: nil,
|
|
56
|
+
stored_provider: nil,
|
|
57
|
+
current_dims: nil
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
results
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def resolve_current_dimensions(provider_name, model_name)
|
|
68
|
+
if model_name
|
|
69
|
+
ModelRegistry.dimensions_for(model_name)
|
|
70
|
+
else
|
|
71
|
+
ModelRegistry.default_for_provider(provider_name)&.dimensions
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def with_each_store
|
|
76
|
+
config = Configuration.new
|
|
77
|
+
|
|
78
|
+
[["global", config.global_db_path], ["project", config.project_db_path]].each do |label, path|
|
|
79
|
+
next unless File.exist?(path)
|
|
80
|
+
|
|
81
|
+
store = Store::SQLiteStore.new(path)
|
|
82
|
+
begin
|
|
83
|
+
yield label, store
|
|
84
|
+
ensure
|
|
85
|
+
store.close
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Embeddings
|
|
5
|
+
# Registry of known embedding models with their properties.
|
|
6
|
+
# Enables model validation, dimension lookup, and discoverability.
|
|
7
|
+
#
|
|
8
|
+
# Models are registered by canonical name (e.g., "BAAI/bge-small-en-v1.5")
|
|
9
|
+
# with provider type, dimensions, and description.
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# ModelRegistry.find("BAAI/bge-small-en-v1.5")
|
|
13
|
+
# # => {provider: "fastembed", dimensions: 384, description: "...", ...}
|
|
14
|
+
#
|
|
15
|
+
# ModelRegistry.models_for_provider("fastembed")
|
|
16
|
+
# # => [...]
|
|
17
|
+
#
|
|
18
|
+
class ModelRegistry
|
|
19
|
+
ModelInfo = Data.define(:name, :provider, :dimensions, :description, :size_mb, :max_tokens)
|
|
20
|
+
|
|
21
|
+
# Known models with validated dimensions.
|
|
22
|
+
# Fastembed models sourced from fastembed-rb SUPPORTED_MODELS.
|
|
23
|
+
# API models sourced from provider documentation.
|
|
24
|
+
MODELS = [
|
|
25
|
+
# --- fastembed: local ONNX models (no API key needed) ---
|
|
26
|
+
ModelInfo.new(
|
|
27
|
+
name: "BAAI/bge-small-en-v1.5",
|
|
28
|
+
provider: "fastembed",
|
|
29
|
+
dimensions: 384,
|
|
30
|
+
description: "Fast English embedding (default)",
|
|
31
|
+
size_mb: 67,
|
|
32
|
+
max_tokens: 512
|
|
33
|
+
),
|
|
34
|
+
ModelInfo.new(
|
|
35
|
+
name: "BAAI/bge-base-en-v1.5",
|
|
36
|
+
provider: "fastembed",
|
|
37
|
+
dimensions: 768,
|
|
38
|
+
description: "Balanced English embedding, higher accuracy",
|
|
39
|
+
size_mb: 210,
|
|
40
|
+
max_tokens: 512
|
|
41
|
+
),
|
|
42
|
+
ModelInfo.new(
|
|
43
|
+
name: "BAAI/bge-large-en-v1.5",
|
|
44
|
+
provider: "fastembed",
|
|
45
|
+
dimensions: 1024,
|
|
46
|
+
description: "High accuracy English embedding",
|
|
47
|
+
size_mb: 1200,
|
|
48
|
+
max_tokens: 512
|
|
49
|
+
),
|
|
50
|
+
ModelInfo.new(
|
|
51
|
+
name: "sentence-transformers/all-MiniLM-L6-v2",
|
|
52
|
+
provider: "fastembed",
|
|
53
|
+
dimensions: 384,
|
|
54
|
+
description: "Lightweight general-purpose sentence embedding",
|
|
55
|
+
size_mb: 90,
|
|
56
|
+
max_tokens: 512
|
|
57
|
+
),
|
|
58
|
+
ModelInfo.new(
|
|
59
|
+
name: "intfloat/multilingual-e5-small",
|
|
60
|
+
provider: "fastembed",
|
|
61
|
+
dimensions: 384,
|
|
62
|
+
description: "Multilingual embedding, 100+ languages",
|
|
63
|
+
size_mb: 450,
|
|
64
|
+
max_tokens: 512
|
|
65
|
+
),
|
|
66
|
+
ModelInfo.new(
|
|
67
|
+
name: "intfloat/multilingual-e5-base",
|
|
68
|
+
provider: "fastembed",
|
|
69
|
+
dimensions: 768,
|
|
70
|
+
description: "Larger multilingual embedding",
|
|
71
|
+
size_mb: 1110,
|
|
72
|
+
max_tokens: 512
|
|
73
|
+
),
|
|
74
|
+
ModelInfo.new(
|
|
75
|
+
name: "nomic-ai/nomic-embed-text-v1.5",
|
|
76
|
+
provider: "fastembed",
|
|
77
|
+
dimensions: 768,
|
|
78
|
+
description: "Long context (8192 tokens) with Matryoshka support",
|
|
79
|
+
size_mb: 520,
|
|
80
|
+
max_tokens: 8192
|
|
81
|
+
),
|
|
82
|
+
ModelInfo.new(
|
|
83
|
+
name: "jinaai/jina-embeddings-v2-small-en",
|
|
84
|
+
provider: "fastembed",
|
|
85
|
+
dimensions: 512,
|
|
86
|
+
description: "Small English embedding, 8192 token context",
|
|
87
|
+
size_mb: 60,
|
|
88
|
+
max_tokens: 8192
|
|
89
|
+
),
|
|
90
|
+
ModelInfo.new(
|
|
91
|
+
name: "jinaai/jina-embeddings-v2-base-en",
|
|
92
|
+
provider: "fastembed",
|
|
93
|
+
dimensions: 768,
|
|
94
|
+
description: "Base English embedding, 8192 token context",
|
|
95
|
+
size_mb: 520,
|
|
96
|
+
max_tokens: 8192
|
|
97
|
+
),
|
|
98
|
+
|
|
99
|
+
# --- api: OpenAI-compatible endpoints ---
|
|
100
|
+
ModelInfo.new(
|
|
101
|
+
name: "text-embedding-3-small",
|
|
102
|
+
provider: "api",
|
|
103
|
+
dimensions: 1536,
|
|
104
|
+
description: "OpenAI small embedding (default API model)",
|
|
105
|
+
size_mb: nil,
|
|
106
|
+
max_tokens: 8191
|
|
107
|
+
),
|
|
108
|
+
ModelInfo.new(
|
|
109
|
+
name: "text-embedding-3-large",
|
|
110
|
+
provider: "api",
|
|
111
|
+
dimensions: 3072,
|
|
112
|
+
description: "OpenAI large embedding, highest accuracy",
|
|
113
|
+
size_mb: nil,
|
|
114
|
+
max_tokens: 8191
|
|
115
|
+
),
|
|
116
|
+
ModelInfo.new(
|
|
117
|
+
name: "text-embedding-ada-002",
|
|
118
|
+
provider: "api",
|
|
119
|
+
dimensions: 1536,
|
|
120
|
+
description: "OpenAI legacy embedding",
|
|
121
|
+
size_mb: nil,
|
|
122
|
+
max_tokens: 8191
|
|
123
|
+
),
|
|
124
|
+
ModelInfo.new(
|
|
125
|
+
name: "voyage-3",
|
|
126
|
+
provider: "api",
|
|
127
|
+
dimensions: 1024,
|
|
128
|
+
description: "Voyage AI general-purpose embedding",
|
|
129
|
+
size_mb: nil,
|
|
130
|
+
max_tokens: 32000
|
|
131
|
+
),
|
|
132
|
+
ModelInfo.new(
|
|
133
|
+
name: "voyage-3-lite",
|
|
134
|
+
provider: "api",
|
|
135
|
+
dimensions: 512,
|
|
136
|
+
description: "Voyage AI lightweight embedding",
|
|
137
|
+
size_mb: nil,
|
|
138
|
+
max_tokens: 32000
|
|
139
|
+
),
|
|
140
|
+
ModelInfo.new(
|
|
141
|
+
name: "voyage-code-3",
|
|
142
|
+
provider: "api",
|
|
143
|
+
dimensions: 1024,
|
|
144
|
+
description: "Voyage AI code-optimized embedding",
|
|
145
|
+
size_mb: nil,
|
|
146
|
+
max_tokens: 32000
|
|
147
|
+
),
|
|
148
|
+
|
|
149
|
+
# --- tfidf: built-in, no dependencies ---
|
|
150
|
+
ModelInfo.new(
|
|
151
|
+
name: "tfidf",
|
|
152
|
+
provider: "tfidf",
|
|
153
|
+
dimensions: 384,
|
|
154
|
+
description: "Built-in TF-IDF embedding (no dependencies)",
|
|
155
|
+
size_mb: 0,
|
|
156
|
+
max_tokens: nil
|
|
157
|
+
)
|
|
158
|
+
].freeze
|
|
159
|
+
|
|
160
|
+
MODELS_BY_NAME = MODELS.each_with_object({}) { |m, h| h[m.name] = m }.freeze
|
|
161
|
+
|
|
162
|
+
DEFAULTS = {
|
|
163
|
+
"fastembed" => "BAAI/bge-small-en-v1.5",
|
|
164
|
+
"api" => "text-embedding-3-small",
|
|
165
|
+
"tfidf" => "tfidf"
|
|
166
|
+
}.freeze
|
|
167
|
+
|
|
168
|
+
# Find a model by name.
|
|
169
|
+
# @param name [String] model name (e.g., "BAAI/bge-small-en-v1.5")
|
|
170
|
+
# @return [ModelInfo, nil]
|
|
171
|
+
def self.find(name)
|
|
172
|
+
MODELS_BY_NAME[name]
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# List all models for a given provider.
|
|
176
|
+
# @param provider [String] "fastembed", "api", or "tfidf"
|
|
177
|
+
# @return [Array<ModelInfo>]
|
|
178
|
+
def self.models_for_provider(provider)
|
|
179
|
+
MODELS.select { |m| m.provider == provider }
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# All known model names.
|
|
183
|
+
# @return [Array<String>]
|
|
184
|
+
def self.model_names
|
|
185
|
+
MODELS.map(&:name)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# All provider names.
|
|
189
|
+
# @return [Array<String>]
|
|
190
|
+
def self.providers
|
|
191
|
+
MODELS.map(&:provider).uniq
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Look up dimensions for a model name. Returns nil if unknown.
|
|
195
|
+
# @param name [String] model name
|
|
196
|
+
# @return [Integer, nil]
|
|
197
|
+
def self.dimensions_for(name)
|
|
198
|
+
find(name)&.dimensions
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Return the default ModelInfo for a provider.
|
|
202
|
+
# @param provider [String] "fastembed", "api", or "tfidf"
|
|
203
|
+
# @return [ModelInfo, nil]
|
|
204
|
+
def self.default_for_provider(provider)
|
|
205
|
+
default_name = DEFAULTS[provider]
|
|
206
|
+
find(default_name) if default_name
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
@@ -2,17 +2,43 @@
|
|
|
2
2
|
|
|
3
3
|
module ClaudeMemory
|
|
4
4
|
module Embeddings
|
|
5
|
-
# Resolves an embedding provider by name or ENV.
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
# Resolves an embedding provider by name, model, or ENV.
|
|
6
|
+
#
|
|
7
|
+
# Provider selection (in priority order):
|
|
8
|
+
# 1. Explicit name parameter
|
|
9
|
+
# 2. CLAUDE_MEMORY_EMBEDDING_PROVIDER env var
|
|
10
|
+
# 3. Default: "tfidf"
|
|
11
|
+
#
|
|
12
|
+
# Model selection is forwarded to the provider via CLAUDE_MEMORY_EMBEDDING_MODEL
|
|
13
|
+
# or the model parameter. The model can also imply the provider:
|
|
14
|
+
# - "BAAI/bge-small-en-v1.5" → fastembed
|
|
15
|
+
# - "text-embedding-3-small" → api
|
|
16
|
+
#
|
|
17
|
+
# Examples:
|
|
18
|
+
# Embeddings.resolve # tfidf default
|
|
19
|
+
# Embeddings.resolve("fastembed") # fastembed with default model
|
|
20
|
+
# Embeddings.resolve("fastembed", model: "BAAI/bge-base-en-v1.5")
|
|
21
|
+
# Embeddings.resolve(model: "text-embedding-3-small") # auto-detects api provider
|
|
22
|
+
#
|
|
23
|
+
def self.resolve(name = nil, model: nil, env: ENV)
|
|
24
|
+
model ||= env["CLAUDE_MEMORY_EMBEDDING_MODEL"]
|
|
25
|
+
provider = name || env["CLAUDE_MEMORY_EMBEDDING_PROVIDER"] || infer_provider(model) || "tfidf"
|
|
9
26
|
|
|
10
27
|
case provider
|
|
11
28
|
when "tfidf" then Generator.new
|
|
12
|
-
when "fastembed" then FastembedAdapter.new
|
|
13
|
-
when "api" then ApiAdapter.new(env: env)
|
|
29
|
+
when "fastembed" then FastembedAdapter.new(model_name: model, env: env)
|
|
30
|
+
when "api" then ApiAdapter.new(model: model, env: env)
|
|
14
31
|
else raise ArgumentError, "Unknown embedding provider: #{provider}. Available: tfidf, fastembed, api"
|
|
15
32
|
end
|
|
16
33
|
end
|
|
34
|
+
|
|
35
|
+
# Infer provider from a model name using the registry.
|
|
36
|
+
# Returns nil if the model is unknown.
|
|
37
|
+
def self.infer_provider(model)
|
|
38
|
+
return nil unless model
|
|
39
|
+
|
|
40
|
+
ModelRegistry.find(model)&.provider
|
|
41
|
+
end
|
|
42
|
+
private_class_method :infer_provider
|
|
17
43
|
end
|
|
18
44
|
end
|
|
@@ -4,7 +4,17 @@ require "digest"
|
|
|
4
4
|
|
|
5
5
|
module ClaudeMemory
|
|
6
6
|
module Ingest
|
|
7
|
+
# Delta-based transcript ingestion with cursor tracking.
|
|
8
|
+
# Reads new content from transcripts, extracts metadata and tool calls,
|
|
9
|
+
# sanitizes private tags, and persists to the content_items table with FTS indexing.
|
|
7
10
|
class Ingester
|
|
11
|
+
# @param store [Store::SQLiteStore] database store for persistence
|
|
12
|
+
# @param fts [Index::LexicalFTS, nil] full-text search index (default: new from store)
|
|
13
|
+
# @param env [Hash] environment variables
|
|
14
|
+
# @param metadata_extractor [MetadataExtractor, nil] extracts git branch, cwd, etc.
|
|
15
|
+
# @param tool_extractor [ToolExtractor, nil] extracts tool calls from transcript text
|
|
16
|
+
# @param tool_filter [ToolFilter, nil] filters irrelevant tool calls
|
|
17
|
+
# @param observation_compressor [ObservationCompressor, nil] compresses tool observations
|
|
8
18
|
def initialize(store, fts: nil, env: ENV, metadata_extractor: nil, tool_extractor: nil, tool_filter: nil, observation_compressor: nil)
|
|
9
19
|
@store = store
|
|
10
20
|
@fts = fts || Index::LexicalFTS.new(store)
|
|
@@ -15,6 +25,13 @@ module ClaudeMemory
|
|
|
15
25
|
@observation_compressor = observation_compressor || ObservationCompressor.new
|
|
16
26
|
end
|
|
17
27
|
|
|
28
|
+
# Ingest new content from a transcript file
|
|
29
|
+
# @param source [String] content source identifier (e.g., "hook", "cli")
|
|
30
|
+
# @param session_id [String] Claude session ID
|
|
31
|
+
# @param transcript_path [String] path to the transcript file
|
|
32
|
+
# @param project_path [String, nil] project root (defaults to detected path)
|
|
33
|
+
# @return [Hash] result with :status (:ingested, :skipped, or :no_change),
|
|
34
|
+
# :content_id, :bytes_read, and optional :reason
|
|
18
35
|
def ingest(source:, session_id:, transcript_path:, project_path: nil)
|
|
19
36
|
unless should_ingest?(transcript_path)
|
|
20
37
|
ClaudeMemory.logger.debug("ingest", message: "Skipped unchanged file", transcript_path: transcript_path)
|
|
@@ -66,6 +66,30 @@ module ClaudeMemory
|
|
|
66
66
|
end
|
|
67
67
|
end
|
|
68
68
|
|
|
69
|
+
def reject_fact(args)
|
|
70
|
+
scope = args["scope"] || "project"
|
|
71
|
+
store = get_store_for_scope(scope)
|
|
72
|
+
return {error: "Database not available"} unless store
|
|
73
|
+
|
|
74
|
+
fact_id = args["fact_id"]
|
|
75
|
+
if fact_id.nil? && args["docid"]
|
|
76
|
+
row = store.find_fact_by_docid(args["docid"])
|
|
77
|
+
fact_id = row && row[:id]
|
|
78
|
+
end
|
|
79
|
+
return {error: "fact_id or docid required"} if fact_id.nil?
|
|
80
|
+
|
|
81
|
+
result = store.reject_fact(fact_id, reason: args["reason"])
|
|
82
|
+
return {error: "Fact #{fact_id} not found in #{scope} database"} if result.nil?
|
|
83
|
+
|
|
84
|
+
{
|
|
85
|
+
success: true,
|
|
86
|
+
scope: scope,
|
|
87
|
+
fact_id: fact_id,
|
|
88
|
+
conflicts_resolved: result[:conflicts_resolved],
|
|
89
|
+
message: "Fact rejected"
|
|
90
|
+
}
|
|
91
|
+
end
|
|
92
|
+
|
|
69
93
|
def sweep_now(args)
|
|
70
94
|
scope = args["scope"] || "project"
|
|
71
95
|
store = get_store_for_scope(scope)
|