claude_memory 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/memory.sqlite3 +0 -0
  3. data/.claude/rules/claude_memory.generated.md +94 -2
  4. data/.claude/settings.json +30 -52
  5. data/.claude/settings.local.json +3 -1
  6. data/.claude/skills/release/SKILL.md +168 -0
  7. data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
  8. data/.claude-plugin/marketplace.json +2 -2
  9. data/.claude-plugin/plugin.json +3 -3
  10. data/.claude-plugin/scripts/hook-runner.sh +14 -0
  11. data/.claude-plugin/scripts/serve-mcp.sh +14 -0
  12. data/.ruby-version +1 -1
  13. data/CHANGELOG.md +47 -0
  14. data/CLAUDE.md +31 -17
  15. data/README.md +35 -0
  16. data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
  17. data/db/migrations/014_canonicalize_predicates.rb +30 -0
  18. data/docs/improvements.md +58 -20
  19. data/docs/influence/claude-mem.md +1 -0
  20. data/docs/influence/claude-supermemory.md +1 -0
  21. data/docs/influence/episodic-memory.md +1 -0
  22. data/docs/influence/grepai.md +1 -0
  23. data/docs/influence/kbs.md +1 -0
  24. data/docs/influence/lossless-claw.md +1 -0
  25. data/docs/influence/qmd.md +1 -0
  26. data/lib/claude_memory/commands/completion_command.rb +1 -31
  27. data/lib/claude_memory/commands/embeddings_command.rb +198 -0
  28. data/lib/claude_memory/commands/help_command.rb +8 -1
  29. data/lib/claude_memory/commands/registry.rb +47 -34
  30. data/lib/claude_memory/commands/reject_command.rb +62 -0
  31. data/lib/claude_memory/commands/restore_command.rb +77 -0
  32. data/lib/claude_memory/commands/skills/distill-transcripts.md +5 -1
  33. data/lib/claude_memory/commands/stats_command.rb +98 -2
  34. data/lib/claude_memory/configuration.rb +14 -1
  35. data/lib/claude_memory/distill/json_schema.md +8 -4
  36. data/lib/claude_memory/distill/null_distiller.rb +2 -0
  37. data/lib/claude_memory/domain/entity.rb +13 -1
  38. data/lib/claude_memory/domain/fact.rb +26 -2
  39. data/lib/claude_memory/embeddings/api_adapter.rb +5 -4
  40. data/lib/claude_memory/embeddings/fastembed_adapter.rb +43 -13
  41. data/lib/claude_memory/embeddings/inspector.rb +91 -0
  42. data/lib/claude_memory/embeddings/model_registry.rb +210 -0
  43. data/lib/claude_memory/embeddings/resolver.rb +32 -6
  44. data/lib/claude_memory/ingest/ingester.rb +17 -0
  45. data/lib/claude_memory/mcp/handlers/management_handlers.rb +24 -0
  46. data/lib/claude_memory/mcp/handlers/stats_handlers.rb +5 -2
  47. data/lib/claude_memory/mcp/instructions_builder.rb +17 -0
  48. data/lib/claude_memory/mcp/server.rb +30 -3
  49. data/lib/claude_memory/mcp/telemetry.rb +86 -0
  50. data/lib/claude_memory/mcp/tool_definitions.rb +86 -3
  51. data/lib/claude_memory/mcp/tools.rb +10 -0
  52. data/lib/claude_memory/publish.rb +40 -5
  53. data/lib/claude_memory/recall.rb +81 -0
  54. data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
  55. data/lib/claude_memory/resolve/resolver.rb +43 -0
  56. data/lib/claude_memory/store/schema_manager.rb +1 -1
  57. data/lib/claude_memory/store/sqlite_store.rb +250 -1
  58. data/lib/claude_memory/store/store_manager.rb +50 -1
  59. data/lib/claude_memory/sweep/maintenance.rb +115 -1
  60. data/lib/claude_memory/sweep/sweeper.rb +3 -0
  61. data/lib/claude_memory/version.rb +1 -1
  62. data/lib/claude_memory.rb +5 -0
  63. metadata +27 -8
  64. data/.claude/memory.sqlite3-shm +0 -0
  65. data/.claude/memory.sqlite3-wal +0 -0
@@ -66,13 +66,17 @@ This document defines the schema for extracted knowledge from transcripts.
66
66
  - **conflict**: `{kind: "conflict", value: true}` - indicates contradictory information detected
67
67
  - **time_boundary**: `{kind: "time_boundary", value: "2024-01-15"}` - temporal boundary marker
68
68
 
69
- ## Predicate Types (MVP)
69
+ ## Predicate Types
70
+
71
+ Canonical vocabulary defined in `lib/claude_memory/resolve/predicate_policy.rb`.
70
72
 
71
73
  | Predicate | Cardinality | Exclusive |
72
74
  |-----------|-------------|-----------|
73
75
  | convention | multi | no |
74
- | decision | multi (by scope) | no |
75
- | auth_method | single | yes |
76
+ | decision | multi | no |
77
+ | architecture | multi | no |
78
+ | uses_framework | multi | no |
79
+ | uses_language | multi | no |
76
80
  | uses_database | single | yes |
77
- | uses_framework | single | yes |
78
81
  | deployment_platform | single | yes |
82
+ | auth_method | single | yes |
@@ -73,6 +73,8 @@ module ClaudeMemory
73
73
  facts << build_fact("uses_framework", entity[:name], text, scope_hint)
74
74
  when "platform"
75
75
  facts << build_fact("deployment_platform", entity[:name], text, scope_hint)
76
+ when "language"
77
+ facts << build_fact("uses_language", entity[:name], text, scope_hint)
76
78
  end
77
79
  end
78
80
 
@@ -2,10 +2,18 @@
2
2
 
3
3
  module ClaudeMemory
4
4
  module Domain
5
- # Domain model representing an entity (database, framework, person, etc.)
5
+ # Domain model representing an entity (database, framework, person, etc.).
6
+ # Instances are immutable (frozen).
6
7
  class Entity
7
8
  attr_reader :id, :type, :canonical_name, :slug, :created_at
8
9
 
10
+ # @param attributes [Hash] entity attributes
11
+ # @option attributes [Integer] :id database primary key
12
+ # @option attributes [String] :type entity category (required, e.g. "database", "framework", "person")
13
+ # @option attributes [String] :canonical_name display name (required)
14
+ # @option attributes [String] :slug URL-safe identifier (required)
15
+ # @option attributes [String] :created_at ISO 8601 creation timestamp
16
+ # @raise [ArgumentError] if type, canonical_name, or slug is blank
9
17
  def initialize(attributes)
10
18
  @id = attributes[:id]
11
19
  @type = attributes[:type]
@@ -17,18 +25,22 @@ module ClaudeMemory
17
25
  freeze
18
26
  end
19
27
 
28
+ # @return [Boolean] true when type is "database"
20
29
  def database?
21
30
  type == "database"
22
31
  end
23
32
 
33
+ # @return [Boolean] true when type is "framework"
24
34
  def framework?
25
35
  type == "framework"
26
36
  end
27
37
 
38
+ # @return [Boolean] true when type is "person"
28
39
  def person?
29
40
  type == "person"
30
41
  end
31
42
 
43
+ # @return [Hash] all attributes as a plain hash
32
44
  def to_h
33
45
  {
34
46
  id: id,
@@ -2,13 +2,27 @@
2
2
 
3
3
  module ClaudeMemory
4
4
  module Domain
5
- # Domain model representing a fact in the memory system
6
- # Encapsulates business logic and validation
5
+ # Domain model representing a fact in the memory system.
6
+ # Encapsulates business logic and validation. Instances are immutable (frozen).
7
7
  class Fact
8
8
  attr_reader :id, :docid, :subject_name, :predicate, :object_literal,
9
9
  :status, :confidence, :scope, :project_path,
10
10
  :valid_from, :valid_to, :created_at
11
11
 
12
+ # @param attributes [Hash] fact attributes
13
+ # @option attributes [Integer] :id database primary key
14
+ # @option attributes [Integer] :docid FTS document id
15
+ # @option attributes [String] :subject_name entity name of the subject
16
+ # @option attributes [String] :predicate relationship type (required)
17
+ # @option attributes [String] :object_literal literal value (required)
18
+ # @option attributes [String] :status one of "active", "superseded", "rejected", "disputed"
19
+ # @option attributes [Float] :confidence score between 0 and 1 (default: 1.0)
20
+ # @option attributes [String] :scope "project" or "global" (default: "project")
21
+ # @option attributes [String] :project_path path for project-scoped facts
22
+ # @option attributes [String] :valid_from ISO 8601 start of validity
23
+ # @option attributes [String] :valid_to ISO 8601 end of validity (nil if current)
24
+ # @option attributes [String] :created_at ISO 8601 creation timestamp
25
+ # @raise [ArgumentError] if predicate, object_literal, or confidence is invalid
12
26
  def initialize(attributes)
13
27
  @id = attributes[:id]
14
28
  @docid = attributes[:docid]
@@ -27,22 +41,32 @@ module ClaudeMemory
27
41
  freeze
28
42
  end
29
43
 
44
+ # @return [Boolean] true when status is "active"
30
45
  def active?
31
46
  status == "active"
32
47
  end
33
48
 
49
+ # @return [Boolean] true when status is "superseded"
34
50
  def superseded?
35
51
  status == "superseded"
36
52
  end
37
53
 
54
+ # @return [Boolean] true when status is "rejected"
55
+ def rejected?
56
+ status == "rejected"
57
+ end
58
+
59
+ # @return [Boolean] true when scope is "global"
38
60
  def global?
39
61
  scope == "global"
40
62
  end
41
63
 
64
+ # @return [Boolean] true when scope is "project"
42
65
  def project?
43
66
  scope == "project"
44
67
  end
45
68
 
69
+ # @return [Hash] all attributes as a plain hash
46
70
  def to_h
47
71
  {
48
72
  id: id,
@@ -22,19 +22,20 @@ module ClaudeMemory
22
22
  DEFAULT_API_URL = "https://api.openai.com/v1/embeddings"
23
23
  DEFAULT_MODEL = "text-embedding-3-small"
24
24
 
25
- def initialize(env: ENV)
25
+ def initialize(model: nil, env: ENV)
26
26
  @api_key = env["CLAUDE_MEMORY_EMBEDDING_API_KEY"] || env["OPENAI_API_KEY"]
27
27
  @api_url = env["CLAUDE_MEMORY_EMBEDDING_API_URL"] || DEFAULT_API_URL
28
- @model = env["CLAUDE_MEMORY_EMBEDDING_MODEL"] || DEFAULT_MODEL
28
+ @model = model || env["CLAUDE_MEMORY_EMBEDDING_MODEL"] || DEFAULT_MODEL
29
+ @known_dimensions = ModelRegistry.dimensions_for(@model)
29
30
 
30
31
  raise ArgumentError, "Set CLAUDE_MEMORY_EMBEDDING_API_KEY or OPENAI_API_KEY" unless @api_key
31
32
  end
32
33
 
33
34
  def name = "api"
34
35
 
35
- # Dimensions are lazy derived from the first API response and cached.
36
+ # Dimensions resolved from registry if known, otherwise lazy from first API response.
36
37
  def dimensions
37
- @dimensions ||= fetch_dimensions
38
+ @dimensions ||= @known_dimensions || fetch_dimensions
38
39
  end
39
40
 
40
41
  # Generate embedding for a query text.
@@ -2,37 +2,50 @@
2
2
 
3
3
  module ClaudeMemory
4
4
  module Embeddings
5
- # Adapter wrapping fastembed-rb for high-quality local embeddings
6
- # Uses BAAI/bge-small-en-v1.5 by default (384-dim, ~67MB ONNX model)
5
+ # Adapter wrapping fastembed-rb for high-quality local embeddings.
6
+ # Supports any model available in fastembed-rb's SUPPORTED_MODELS.
7
7
  #
8
- # Implements the same generate(text) interface as Generator for DI compatibility.
9
- # Supports asymmetric query/passage encoding for better retrieval accuracy.
8
+ # Model selection (in priority order):
9
+ # 1. Explicit model_name parameter
10
+ # 2. CLAUDE_MEMORY_EMBEDDING_MODEL env var
11
+ # 3. Default: BAAI/bge-small-en-v1.5 (384-dim, ~67MB ONNX)
12
+ #
13
+ # Dimensions are resolved from the ModelRegistry for known models,
14
+ # or probed from fastembed's ModelInfo for unknown models.
10
15
  #
11
16
  # Usage:
12
17
  # adapter = FastembedAdapter.new
13
18
  # query_vec = adapter.generate("What database?") # query encoding
14
19
  # passage_vec = adapter.generate_passage("Uses PostgreSQL") # passage encoding
15
20
  #
21
+ # # Use a larger model:
22
+ # adapter = FastembedAdapter.new(model_name: "BAAI/bge-base-en-v1.5")
23
+ # adapter.dimensions # => 768
24
+ #
16
25
  class FastembedAdapter
17
- EMBEDDING_DIM = 384
18
26
  DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
19
27
 
28
+ attr_reader :model_name, :dimensions
29
+
20
30
  def name = "fastembed"
21
31
 
22
- def dimensions = EMBEDDING_DIM
32
+ def initialize(model_name: nil, env: ENV)
33
+ @model_name = model_name || env["CLAUDE_MEMORY_EMBEDDING_MODEL"] || DEFAULT_MODEL
34
+ @dimensions = resolve_dimensions(@model_name)
23
35
 
24
- def initialize(model_name: DEFAULT_MODEL)
25
36
  require "fastembed"
26
- @model = Fastembed::TextEmbedding.new(model_name: model_name)
37
+ @model = Fastembed::TextEmbedding.new(model_name: @model_name)
38
+
39
+ # If dimensions weren't known from registry, probe from fastembed
40
+ @dimensions ||= probe_dimensions_from_fastembed
27
41
  rescue LoadError
28
42
  raise LoadError,
29
43
  "fastembed gem is required for FastembedAdapter. Add `gem 'fastembed'` to your Gemfile."
30
44
  end
31
45
 
32
46
  # Generate query embedding (optimized for search queries)
33
- # Compatible with Recall's embedding_generator interface
34
47
  # @param text [String] query text to embed
35
- # @return [Array<Float>] normalized 384-dimensional vector
48
+ # @return [Array<Float>] normalized embedding vector
36
49
  def generate(text)
37
50
  return zero_vector if text.nil? || text.empty?
38
51
 
@@ -40,9 +53,8 @@ module ClaudeMemory
40
53
  end
41
54
 
42
55
  # Generate passage embedding (optimized for document/fact indexing)
43
- # Use this when storing embeddings for facts
44
56
  # @param text [String] passage text to embed
45
- # @return [Array<Float>] normalized 384-dimensional vector
57
+ # @return [Array<Float>] normalized embedding vector
46
58
  def generate_passage(text)
47
59
  return zero_vector if text.nil? || text.empty?
48
60
 
@@ -51,8 +63,26 @@ module ClaudeMemory
51
63
 
52
64
  private
53
65
 
66
+ # Resolve dimensions from the model registry (fast, no I/O).
67
+ # Returns nil if the model isn't in the registry.
68
+ def resolve_dimensions(model)
69
+ ModelRegistry.dimensions_for(model)
70
+ end
71
+
72
+ # Fallback: probe fastembed's SUPPORTED_MODELS for dimension info.
73
+ # This handles models added to fastembed-rb but not yet in our registry.
74
+ def probe_dimensions_from_fastembed
75
+ if defined?(Fastembed::SUPPORTED_MODELS)
76
+ info = Fastembed::SUPPORTED_MODELS[@model_name]
77
+ return info.dim if info
78
+ end
79
+
80
+ # Last resort: generate a test embedding and measure its size
81
+ @model.query_embed("dimension probe").first.size
82
+ end
83
+
54
84
  def zero_vector
55
- Array.new(EMBEDDING_DIM, 0.0)
85
+ Array.new(@dimensions, 0.0)
56
86
  end
57
87
  end
58
88
  end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Embeddings
5
+ # Reads embedding metadata from global and project databases.
6
+ # Returns structured data — no I/O formatting or stdout output.
7
+ #
8
+ # Used by EmbeddingsCommand to separate DB concerns from presentation.
9
+ class Inspector
10
+ DatabaseState = Data.define(:label, :provider, :dimensions)
11
+ DimensionResult = Data.define(:label, :status, :stored_dims, :stored_provider, :current_dims)
12
+
13
+ def database_states
14
+ results = []
15
+
16
+ with_each_store do |label, store|
17
+ provider = store.get_meta("embedding_provider")
18
+ dims = store.get_meta("embedding_dimensions")
19
+
20
+ next unless provider || dims
21
+
22
+ results << DatabaseState.new(label: label, provider: provider, dimensions: dims)
23
+ end
24
+
25
+ results
26
+ end
27
+
28
+ def dimension_checks(provider_name, model_name)
29
+ results = []
30
+
31
+ with_each_store do |label, store|
32
+ stored_dims = store.get_meta("embedding_dimensions")&.to_i
33
+ stored_provider = store.get_meta("embedding_provider")
34
+
35
+ if stored_dims
36
+ current_dims = resolve_current_dimensions(provider_name, model_name)
37
+
38
+ status = if current_dims && current_dims != stored_dims
39
+ :mismatch
40
+ else
41
+ :match
42
+ end
43
+
44
+ results << DimensionResult.new(
45
+ label: label,
46
+ status: status,
47
+ stored_dims: stored_dims,
48
+ stored_provider: stored_provider,
49
+ current_dims: current_dims
50
+ )
51
+ else
52
+ results << DimensionResult.new(
53
+ label: label,
54
+ status: :fresh,
55
+ stored_dims: nil,
56
+ stored_provider: nil,
57
+ current_dims: nil
58
+ )
59
+ end
60
+ end
61
+
62
+ results
63
+ end
64
+
65
+ private
66
+
67
+ def resolve_current_dimensions(provider_name, model_name)
68
+ if model_name
69
+ ModelRegistry.dimensions_for(model_name)
70
+ else
71
+ ModelRegistry.default_for_provider(provider_name)&.dimensions
72
+ end
73
+ end
74
+
75
+ def with_each_store
76
+ config = Configuration.new
77
+
78
+ [["global", config.global_db_path], ["project", config.project_db_path]].each do |label, path|
79
+ next unless File.exist?(path)
80
+
81
+ store = Store::SQLiteStore.new(path)
82
+ begin
83
+ yield label, store
84
+ ensure
85
+ store.close
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Embeddings
5
+ # Registry of known embedding models with their properties.
6
+ # Enables model validation, dimension lookup, and discoverability.
7
+ #
8
+ # Models are registered by canonical name (e.g., "BAAI/bge-small-en-v1.5")
9
+ # with provider type, dimensions, and description.
10
+ #
11
+ # Usage:
12
+ # ModelRegistry.find("BAAI/bge-small-en-v1.5")
13
+ # # => {provider: "fastembed", dimensions: 384, description: "...", ...}
14
+ #
15
+ # ModelRegistry.models_for_provider("fastembed")
16
+ # # => [...]
17
+ #
18
+ class ModelRegistry
19
+ ModelInfo = Data.define(:name, :provider, :dimensions, :description, :size_mb, :max_tokens)
20
+
21
+ # Known models with validated dimensions.
22
+ # Fastembed models sourced from fastembed-rb SUPPORTED_MODELS.
23
+ # API models sourced from provider documentation.
24
+ MODELS = [
25
+ # --- fastembed: local ONNX models (no API key needed) ---
26
+ ModelInfo.new(
27
+ name: "BAAI/bge-small-en-v1.5",
28
+ provider: "fastembed",
29
+ dimensions: 384,
30
+ description: "Fast English embedding (default)",
31
+ size_mb: 67,
32
+ max_tokens: 512
33
+ ),
34
+ ModelInfo.new(
35
+ name: "BAAI/bge-base-en-v1.5",
36
+ provider: "fastembed",
37
+ dimensions: 768,
38
+ description: "Balanced English embedding, higher accuracy",
39
+ size_mb: 210,
40
+ max_tokens: 512
41
+ ),
42
+ ModelInfo.new(
43
+ name: "BAAI/bge-large-en-v1.5",
44
+ provider: "fastembed",
45
+ dimensions: 1024,
46
+ description: "High accuracy English embedding",
47
+ size_mb: 1200,
48
+ max_tokens: 512
49
+ ),
50
+ ModelInfo.new(
51
+ name: "sentence-transformers/all-MiniLM-L6-v2",
52
+ provider: "fastembed",
53
+ dimensions: 384,
54
+ description: "Lightweight general-purpose sentence embedding",
55
+ size_mb: 90,
56
+ max_tokens: 512
57
+ ),
58
+ ModelInfo.new(
59
+ name: "intfloat/multilingual-e5-small",
60
+ provider: "fastembed",
61
+ dimensions: 384,
62
+ description: "Multilingual embedding, 100+ languages",
63
+ size_mb: 450,
64
+ max_tokens: 512
65
+ ),
66
+ ModelInfo.new(
67
+ name: "intfloat/multilingual-e5-base",
68
+ provider: "fastembed",
69
+ dimensions: 768,
70
+ description: "Larger multilingual embedding",
71
+ size_mb: 1110,
72
+ max_tokens: 512
73
+ ),
74
+ ModelInfo.new(
75
+ name: "nomic-ai/nomic-embed-text-v1.5",
76
+ provider: "fastembed",
77
+ dimensions: 768,
78
+ description: "Long context (8192 tokens) with Matryoshka support",
79
+ size_mb: 520,
80
+ max_tokens: 8192
81
+ ),
82
+ ModelInfo.new(
83
+ name: "jinaai/jina-embeddings-v2-small-en",
84
+ provider: "fastembed",
85
+ dimensions: 512,
86
+ description: "Small English embedding, 8192 token context",
87
+ size_mb: 60,
88
+ max_tokens: 8192
89
+ ),
90
+ ModelInfo.new(
91
+ name: "jinaai/jina-embeddings-v2-base-en",
92
+ provider: "fastembed",
93
+ dimensions: 768,
94
+ description: "Base English embedding, 8192 token context",
95
+ size_mb: 520,
96
+ max_tokens: 8192
97
+ ),
98
+
99
+ # --- api: OpenAI-compatible endpoints ---
100
+ ModelInfo.new(
101
+ name: "text-embedding-3-small",
102
+ provider: "api",
103
+ dimensions: 1536,
104
+ description: "OpenAI small embedding (default API model)",
105
+ size_mb: nil,
106
+ max_tokens: 8191
107
+ ),
108
+ ModelInfo.new(
109
+ name: "text-embedding-3-large",
110
+ provider: "api",
111
+ dimensions: 3072,
112
+ description: "OpenAI large embedding, highest accuracy",
113
+ size_mb: nil,
114
+ max_tokens: 8191
115
+ ),
116
+ ModelInfo.new(
117
+ name: "text-embedding-ada-002",
118
+ provider: "api",
119
+ dimensions: 1536,
120
+ description: "OpenAI legacy embedding",
121
+ size_mb: nil,
122
+ max_tokens: 8191
123
+ ),
124
+ ModelInfo.new(
125
+ name: "voyage-3",
126
+ provider: "api",
127
+ dimensions: 1024,
128
+ description: "Voyage AI general-purpose embedding",
129
+ size_mb: nil,
130
+ max_tokens: 32000
131
+ ),
132
+ ModelInfo.new(
133
+ name: "voyage-3-lite",
134
+ provider: "api",
135
+ dimensions: 512,
136
+ description: "Voyage AI lightweight embedding",
137
+ size_mb: nil,
138
+ max_tokens: 32000
139
+ ),
140
+ ModelInfo.new(
141
+ name: "voyage-code-3",
142
+ provider: "api",
143
+ dimensions: 1024,
144
+ description: "Voyage AI code-optimized embedding",
145
+ size_mb: nil,
146
+ max_tokens: 32000
147
+ ),
148
+
149
+ # --- tfidf: built-in, no dependencies ---
150
+ ModelInfo.new(
151
+ name: "tfidf",
152
+ provider: "tfidf",
153
+ dimensions: 384,
154
+ description: "Built-in TF-IDF embedding (no dependencies)",
155
+ size_mb: 0,
156
+ max_tokens: nil
157
+ )
158
+ ].freeze
159
+
160
+ MODELS_BY_NAME = MODELS.each_with_object({}) { |m, h| h[m.name] = m }.freeze
161
+
162
+ DEFAULTS = {
163
+ "fastembed" => "BAAI/bge-small-en-v1.5",
164
+ "api" => "text-embedding-3-small",
165
+ "tfidf" => "tfidf"
166
+ }.freeze
167
+
168
+ # Find a model by name.
169
+ # @param name [String] model name (e.g., "BAAI/bge-small-en-v1.5")
170
+ # @return [ModelInfo, nil]
171
+ def self.find(name)
172
+ MODELS_BY_NAME[name]
173
+ end
174
+
175
+ # List all models for a given provider.
176
+ # @param provider [String] "fastembed", "api", or "tfidf"
177
+ # @return [Array<ModelInfo>]
178
+ def self.models_for_provider(provider)
179
+ MODELS.select { |m| m.provider == provider }
180
+ end
181
+
182
+ # All known model names.
183
+ # @return [Array<String>]
184
+ def self.model_names
185
+ MODELS.map(&:name)
186
+ end
187
+
188
+ # All provider names.
189
+ # @return [Array<String>]
190
+ def self.providers
191
+ MODELS.map(&:provider).uniq
192
+ end
193
+
194
+ # Look up dimensions for a model name. Returns nil if unknown.
195
+ # @param name [String] model name
196
+ # @return [Integer, nil]
197
+ def self.dimensions_for(name)
198
+ find(name)&.dimensions
199
+ end
200
+
201
+ # Return the default ModelInfo for a provider.
202
+ # @param provider [String] "fastembed", "api", or "tfidf"
203
+ # @return [ModelInfo, nil]
204
+ def self.default_for_provider(provider)
205
+ default_name = DEFAULTS[provider]
206
+ find(default_name) if default_name
207
+ end
208
+ end
209
+ end
210
+ end
@@ -2,17 +2,43 @@
2
2
 
3
3
  module ClaudeMemory
4
4
  module Embeddings
5
- # Resolves an embedding provider by name or ENV.
6
- # Three providers: tfidf (default), fastembed, api.
7
- def self.resolve(name = nil, env: ENV)
8
- provider = name || env["CLAUDE_MEMORY_EMBEDDING_PROVIDER"] || "tfidf"
5
+ # Resolves an embedding provider by name, model, or ENV.
6
+ #
7
+ # Provider selection (in priority order):
8
+ # 1. Explicit name parameter
9
+ # 2. CLAUDE_MEMORY_EMBEDDING_PROVIDER env var
10
+ # 3. Default: "tfidf"
11
+ #
12
+ # Model selection is forwarded to the provider via CLAUDE_MEMORY_EMBEDDING_MODEL
13
+ # or the model parameter. The model can also imply the provider:
14
+ # - "BAAI/bge-small-en-v1.5" → fastembed
15
+ # - "text-embedding-3-small" → api
16
+ #
17
+ # Examples:
18
+ # Embeddings.resolve # tfidf default
19
+ # Embeddings.resolve("fastembed") # fastembed with default model
20
+ # Embeddings.resolve("fastembed", model: "BAAI/bge-base-en-v1.5")
21
+ # Embeddings.resolve(model: "text-embedding-3-small") # auto-detects api provider
22
+ #
23
+ def self.resolve(name = nil, model: nil, env: ENV)
24
+ model ||= env["CLAUDE_MEMORY_EMBEDDING_MODEL"]
25
+ provider = name || env["CLAUDE_MEMORY_EMBEDDING_PROVIDER"] || infer_provider(model) || "tfidf"
9
26
 
10
27
  case provider
11
28
  when "tfidf" then Generator.new
12
- when "fastembed" then FastembedAdapter.new
13
- when "api" then ApiAdapter.new(env: env)
29
+ when "fastembed" then FastembedAdapter.new(model_name: model, env: env)
30
+ when "api" then ApiAdapter.new(model: model, env: env)
14
31
  else raise ArgumentError, "Unknown embedding provider: #{provider}. Available: tfidf, fastembed, api"
15
32
  end
16
33
  end
34
+
35
+ # Infer provider from a model name using the registry.
36
+ # Returns nil if the model is unknown.
37
+ def self.infer_provider(model)
38
+ return nil unless model
39
+
40
+ ModelRegistry.find(model)&.provider
41
+ end
42
+ private_class_method :infer_provider
17
43
  end
18
44
  end
@@ -4,7 +4,17 @@ require "digest"
4
4
 
5
5
  module ClaudeMemory
6
6
  module Ingest
7
+ # Delta-based transcript ingestion with cursor tracking.
8
+ # Reads new content from transcripts, extracts metadata and tool calls,
9
+ # sanitizes private tags, and persists to the content_items table with FTS indexing.
7
10
  class Ingester
11
+ # @param store [Store::SQLiteStore] database store for persistence
12
+ # @param fts [Index::LexicalFTS, nil] full-text search index (default: new from store)
13
+ # @param env [Hash] environment variables
14
+ # @param metadata_extractor [MetadataExtractor, nil] extracts git branch, cwd, etc.
15
+ # @param tool_extractor [ToolExtractor, nil] extracts tool calls from transcript text
16
+ # @param tool_filter [ToolFilter, nil] filters irrelevant tool calls
17
+ # @param observation_compressor [ObservationCompressor, nil] compresses tool observations
8
18
  def initialize(store, fts: nil, env: ENV, metadata_extractor: nil, tool_extractor: nil, tool_filter: nil, observation_compressor: nil)
9
19
  @store = store
10
20
  @fts = fts || Index::LexicalFTS.new(store)
@@ -15,6 +25,13 @@ module ClaudeMemory
15
25
  @observation_compressor = observation_compressor || ObservationCompressor.new
16
26
  end
17
27
 
28
+ # Ingest new content from a transcript file
29
+ # @param source [String] content source identifier (e.g., "hook", "cli")
30
+ # @param session_id [String] Claude session ID
31
+ # @param transcript_path [String] path to the transcript file
32
+ # @param project_path [String, nil] project root (defaults to detected path)
33
+ # @return [Hash] result with :status (:ingested, :skipped, or :no_change),
34
+ # :content_id, :bytes_read, and optional :reason
18
35
  def ingest(source:, session_id:, transcript_path:, project_path: nil)
19
36
  unless should_ingest?(transcript_path)
20
37
  ClaudeMemory.logger.debug("ingest", message: "Skipped unchanged file", transcript_path: transcript_path)
@@ -66,6 +66,30 @@ module ClaudeMemory
66
66
  end
67
67
  end
68
68
 
69
+ def reject_fact(args)
70
+ scope = args["scope"] || "project"
71
+ store = get_store_for_scope(scope)
72
+ return {error: "Database not available"} unless store
73
+
74
+ fact_id = args["fact_id"]
75
+ if fact_id.nil? && args["docid"]
76
+ row = store.find_fact_by_docid(args["docid"])
77
+ fact_id = row && row[:id]
78
+ end
79
+ return {error: "fact_id or docid required"} if fact_id.nil?
80
+
81
+ result = store.reject_fact(fact_id, reason: args["reason"])
82
+ return {error: "Fact #{fact_id} not found in #{scope} database"} if result.nil?
83
+
84
+ {
85
+ success: true,
86
+ scope: scope,
87
+ fact_id: fact_id,
88
+ conflicts_resolved: result[:conflicts_resolved],
89
+ message: "Fact rejected"
90
+ }
91
+ end
92
+
69
93
  def sweep_now(args)
70
94
  scope = args["scope"] || "project"
71
95
  store = get_store_for_scope(scope)