claude_memory 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/memory.sqlite3 +0 -0
  3. data/.claude/rules/claude_memory.generated.md +32 -2
  4. data/.claude/settings.json +65 -15
  5. data/.claude/settings.local.json +5 -2
  6. data/.claude/skills/improve/SKILL.md +113 -25
  7. data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
  8. data/.claude-plugin/commands/distill-transcripts.md +98 -0
  9. data/.claude-plugin/commands/memory-recall.md +67 -0
  10. data/.claude-plugin/marketplace.json +2 -2
  11. data/.claude-plugin/plugin.json +3 -3
  12. data/.claude-plugin/scripts/hook-runner.sh +14 -0
  13. data/.claude-plugin/scripts/serve-mcp.sh +14 -0
  14. data/.ruby-version +1 -1
  15. data/CHANGELOG.md +90 -1
  16. data/CLAUDE.md +56 -18
  17. data/README.md +35 -0
  18. data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
  19. data/db/migrations/014_canonicalize_predicates.rb +30 -0
  20. data/docs/improvements.md +74 -74
  21. data/docs/influence/claude-mem.md +1 -0
  22. data/docs/influence/claude-supermemory.md +1 -0
  23. data/docs/influence/episodic-memory.md +1 -0
  24. data/docs/influence/grepai.md +1 -0
  25. data/docs/influence/kbs.md +1 -0
  26. data/docs/influence/lossless-claw.md +1 -0
  27. data/docs/influence/qmd.md +1 -0
  28. data/docs/quality_review.md +119 -224
  29. data/hooks/hooks.json +39 -7
  30. data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
  31. data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
  32. data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
  33. data/lib/claude_memory/commands/completion_command.rb +149 -0
  34. data/lib/claude_memory/commands/doctor_command.rb +2 -0
  35. data/lib/claude_memory/commands/embeddings_command.rb +198 -0
  36. data/lib/claude_memory/commands/help_command.rb +12 -1
  37. data/lib/claude_memory/commands/hook_command.rb +2 -1
  38. data/lib/claude_memory/commands/index_command.rb +85 -78
  39. data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
  40. data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
  41. data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
  42. data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
  43. data/lib/claude_memory/commands/install_skill_command.rb +78 -0
  44. data/lib/claude_memory/commands/registry.rb +47 -32
  45. data/lib/claude_memory/commands/reject_command.rb +62 -0
  46. data/lib/claude_memory/commands/restore_command.rb +77 -0
  47. data/lib/claude_memory/commands/skills/distill-transcripts.md +102 -0
  48. data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
  49. data/lib/claude_memory/commands/stats_command.rb +98 -2
  50. data/lib/claude_memory/configuration.rb +14 -1
  51. data/lib/claude_memory/core/fact_ranker.rb +2 -2
  52. data/lib/claude_memory/core/rr_fusion.rb +23 -6
  53. data/lib/claude_memory/core/snippet_extractor.rb +7 -3
  54. data/lib/claude_memory/core/text_builder.rb +11 -0
  55. data/lib/claude_memory/distill/json_schema.md +8 -4
  56. data/lib/claude_memory/distill/null_distiller.rb +2 -0
  57. data/lib/claude_memory/domain/entity.rb +13 -1
  58. data/lib/claude_memory/domain/fact.rb +26 -2
  59. data/lib/claude_memory/domain/provenance.rb +0 -1
  60. data/lib/claude_memory/embeddings/api_adapter.rb +97 -0
  61. data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
  62. data/lib/claude_memory/embeddings/fastembed_adapter.rb +46 -12
  63. data/lib/claude_memory/embeddings/generator.rb +4 -0
  64. data/lib/claude_memory/embeddings/inspector.rb +91 -0
  65. data/lib/claude_memory/embeddings/model_registry.rb +210 -0
  66. data/lib/claude_memory/embeddings/resolver.rb +44 -0
  67. data/lib/claude_memory/hook/context_injector.rb +58 -2
  68. data/lib/claude_memory/hook/distillation_runner.rb +46 -0
  69. data/lib/claude_memory/hook/handler.rb +11 -2
  70. data/lib/claude_memory/index/vector_index.rb +15 -2
  71. data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
  72. data/lib/claude_memory/ingest/ingester.rb +17 -0
  73. data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
  74. data/lib/claude_memory/mcp/handlers/management_handlers.rb +169 -0
  75. data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
  76. data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
  77. data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
  78. data/lib/claude_memory/mcp/handlers/stats_handlers.rb +205 -0
  79. data/lib/claude_memory/mcp/instructions_builder.rb +19 -1
  80. data/lib/claude_memory/mcp/query_guide.rb +10 -0
  81. data/lib/claude_memory/mcp/response_formatter.rb +1 -0
  82. data/lib/claude_memory/mcp/server.rb +22 -1
  83. data/lib/claude_memory/mcp/telemetry.rb +86 -0
  84. data/lib/claude_memory/mcp/text_summary.rb +26 -0
  85. data/lib/claude_memory/mcp/tool_definitions.rb +116 -4
  86. data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
  87. data/lib/claude_memory/mcp/tools.rb +50 -679
  88. data/lib/claude_memory/publish.rb +40 -5
  89. data/lib/claude_memory/recall/dual_engine.rb +105 -0
  90. data/lib/claude_memory/recall/legacy_engine.rb +138 -0
  91. data/lib/claude_memory/recall/query_core.rb +371 -0
  92. data/lib/claude_memory/recall.rb +121 -673
  93. data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
  94. data/lib/claude_memory/resolve/resolver.rb +43 -0
  95. data/lib/claude_memory/shortcuts.rb +4 -4
  96. data/lib/claude_memory/store/retry_handler.rb +61 -0
  97. data/lib/claude_memory/store/schema_manager.rb +68 -0
  98. data/lib/claude_memory/store/sqlite_store.rb +334 -201
  99. data/lib/claude_memory/store/store_manager.rb +50 -1
  100. data/lib/claude_memory/sweep/maintenance.rb +115 -1
  101. data/lib/claude_memory/sweep/sweeper.rb +3 -0
  102. data/lib/claude_memory/templates/hooks.example.json +26 -7
  103. data/lib/claude_memory/version.rb +1 -1
  104. data/lib/claude_memory.rb +16 -0
  105. metadata +48 -8
  106. data/.claude/memory.sqlite3-shm +0 -0
  107. data/.claude/memory.sqlite3-wal +0 -0
@@ -6,14 +6,25 @@ require "digest"
6
6
  require "json"
7
7
  require "extralite"
8
8
  require "sequel/adapters/extralite"
9
+ require_relative "retry_handler"
10
+ require_relative "schema_manager"
9
11
 
10
12
  module ClaudeMemory
11
13
  module Store
14
+ # SQLite-backed fact store for ClaudeMemory.
15
+ # Manages all database tables (content_items, entities, facts, provenance,
16
+ # conflicts, fact_links, etc.) via Sequel with Extralite adapter.
17
+ # Includes RetryHandler for transient lock recovery and SchemaManager
18
+ # for automatic migrations on open.
12
19
  class SQLiteStore
13
- SCHEMA_VERSION = 12
20
+ include RetryHandler
21
+ include SchemaManager
14
22
 
23
+ # @return [Sequel::Database] the underlying Sequel database connection
15
24
  attr_reader :db
16
25
 
26
+ # Open (or create) a SQLite database and migrate to the current schema.
27
+ # @param db_path [String] filesystem path to the SQLite database file
17
28
  def initialize(db_path)
18
29
  @db_path = db_path
19
30
  @db = connect_database(db_path)
@@ -21,140 +32,117 @@ module ClaudeMemory
21
32
  ensure_schema!
22
33
  end
23
34
 
24
- # Retry configuration for database operations
25
- # SQLite's busy_timeout doesn't reliably detect lock release, so we use
26
- # shorter timeouts with application-level retry for better responsiveness
27
- MAX_RETRIES = 5
28
- RETRY_BASE_DELAY = 0.1 # seconds, with exponential backoff
29
-
30
- # Execute a block with retry logic for busy/locked errors
31
- # This handles concurrent access from multiple hook processes
32
- def with_retry(operation_name = "database operation")
33
- retries = 0
34
- begin
35
- yield
36
- rescue Sequel::DatabaseError, Extralite::Error, Extralite::BusyError => e
37
- if retryable_error?(e) && retries < MAX_RETRIES
38
- retries += 1
39
- delay = RETRY_BASE_DELAY * (2**retries) # Exponential backoff
40
- sleep(delay)
41
- retry
42
- end
43
- raise
44
- end
45
- end
46
-
47
- # Execute a transaction with retry logic for concurrent access
48
- # Use this instead of @db.transaction when concurrent writes are expected
49
- def transaction_with_retry(&block)
50
- with_retry("transaction") do
51
- @db.transaction(&block)
52
- end
53
- end
54
-
55
- private
56
-
57
- def retryable_error?(error)
58
- message = error.message.downcase
59
- message.include?("busy") || message.include?("locked")
60
- end
61
-
62
- def connect_database(db_path)
63
- retries = 0
64
- begin
65
- Sequel.connect(
66
- "extralite:#{db_path}",
67
- # Use shorter busy_timeout since we handle retry at app level
68
- # This allows faster detection of lock release between retries
69
- connect_sqls: [
70
- "PRAGMA busy_timeout = 1000",
71
- "PRAGMA journal_mode = WAL",
72
- "PRAGMA synchronous = NORMAL"
73
- ]
74
- )
75
- rescue Sequel::DatabaseConnectionError, Extralite::Error => e
76
- retries += 1
77
- if retries <= MAX_RETRIES && retryable_error?(e)
78
- sleep(RETRY_BASE_DELAY * (2**retries))
79
- retry
80
- end
81
- raise
82
- end
83
- end
84
-
85
- public
86
-
35
+ # Disconnect from the database.
36
+ # @return [void]
87
37
  def close
88
38
  @db.disconnect
89
39
  end
90
40
 
41
+ # Lazily-initialized vector index for semantic search.
42
+ # @return [Index::VectorIndex]
91
43
  def vector_index
92
44
  @vector_index ||= Index::VectorIndex.new(self)
93
45
  end
94
46
 
95
- # Checkpoint the WAL file to prevent unlimited growth
96
- # This truncates the WAL after checkpointing
97
- # Should be called periodically during maintenance/sweep operations
47
+ # Checkpoint the WAL file to prevent unlimited growth.
48
+ # @return [void]
98
49
  def checkpoint_wal
99
50
  @db.run("PRAGMA wal_checkpoint(TRUNCATE)")
100
51
  end
101
52
 
53
+ # Current schema version stored in the meta table.
54
+ # @return [Integer, nil]
102
55
  def schema_version
103
56
  @db[:meta].where(key: "schema_version").get(:value)&.to_i
104
57
  end
105
58
 
106
- def content_items
107
- @db[:content_items]
108
- end
59
+ # --- Table accessors ---
60
+ # Each returns a {Sequel::Dataset} bound to the corresponding table.
109
61
 
110
- def delta_cursors
111
- @db[:delta_cursors]
112
- end
62
+ # @return [Sequel::Dataset]
63
+ def content_items = @db[:content_items]
113
64
 
114
- def entities
115
- @db[:entities]
116
- end
65
+ # @return [Sequel::Dataset]
66
+ def delta_cursors = @db[:delta_cursors]
117
67
 
118
- def entity_aliases
119
- @db[:entity_aliases]
120
- end
68
+ # @return [Sequel::Dataset]
69
+ def entities = @db[:entities]
121
70
 
122
- def facts
123
- @db[:facts]
124
- end
71
+ # @return [Sequel::Dataset]
72
+ def entity_aliases = @db[:entity_aliases]
125
73
 
126
- def provenance
127
- @db[:provenance]
128
- end
74
+ # @return [Sequel::Dataset]
75
+ def facts = @db[:facts]
129
76
 
130
- def fact_links
131
- @db[:fact_links]
132
- end
77
+ # @return [Sequel::Dataset]
78
+ def provenance = @db[:provenance]
133
79
 
134
- def conflicts
135
- @db[:conflicts]
136
- end
80
+ # @return [Sequel::Dataset]
81
+ def fact_links = @db[:fact_links]
137
82
 
138
- def tool_calls
139
- @db[:tool_calls]
140
- end
83
+ # @return [Sequel::Dataset]
84
+ def conflicts = @db[:conflicts]
141
85
 
142
- def operation_progress
143
- @db[:operation_progress]
144
- end
86
+ # @return [Sequel::Dataset]
87
+ def tool_calls = @db[:tool_calls]
145
88
 
146
- def schema_health
147
- @db[:schema_health]
148
- end
89
+ # @return [Sequel::Dataset]
90
+ def operation_progress = @db[:operation_progress]
149
91
 
150
- def ingestion_metrics
151
- @db[:ingestion_metrics]
152
- end
92
+ # @return [Sequel::Dataset]
93
+ def schema_health = @db[:schema_health]
94
+
95
+ # @return [Sequel::Dataset]
96
+ def ingestion_metrics = @db[:ingestion_metrics]
153
97
 
154
- def llm_cache
155
- @db[:llm_cache]
98
+ # @return [Sequel::Dataset]
99
+ def llm_cache = @db[:llm_cache]
100
+
101
+ # @return [Sequel::Dataset]
102
+ def mcp_tool_calls = @db[:mcp_tool_calls]
103
+
104
+ # Record a single MCP tool invocation for telemetry.
105
+ # Inserts synchronously; callers wrap in with_retry at the call site
106
+ # if needed.
107
+ #
108
+ # @param tool_name [String] name of the MCP tool invoked
109
+ # @param duration_ms [Integer] execution time in milliseconds
110
+ # @param result_count [Integer, nil] number of results returned
111
+ # @param scope [String, nil] "global" or "project"
112
+ # @param error_class [String, nil] error class name if the call failed
113
+ # @param called_at [String, nil] ISO 8601 timestamp (defaults to now UTC)
114
+ # @return [Integer] inserted row id
115
+ def insert_mcp_tool_call(tool_name:, duration_ms:, result_count: nil, scope: nil, error_class: nil, called_at: nil)
116
+ mcp_tool_calls.insert(
117
+ tool_name: tool_name,
118
+ called_at: called_at || Time.now.utc.iso8601,
119
+ duration_ms: duration_ms,
120
+ result_count: result_count,
121
+ scope: scope,
122
+ error_class: error_class
123
+ )
156
124
  end
157
125
 
126
+ # --- Content items ---
127
+
128
+ # Insert a content item or return the existing id if a duplicate
129
+ # (same text_hash + session_id) already exists. Wrapped in retry logic.
130
+ #
131
+ # @param source [String] origin type (e.g. "transcript", "hook")
132
+ # @param text_hash [String] SHA-256 hex digest of the raw text
133
+ # @param byte_len [Integer] byte length of the raw text
134
+ # @param session_id [String, nil] Claude Code session identifier
135
+ # @param transcript_path [String, nil] filesystem path to the transcript file
136
+ # @param project_path [String, nil] project directory path
137
+ # @param occurred_at [String, nil] ISO 8601 timestamp (defaults to now UTC)
138
+ # @param raw_text [String, nil] original text content
139
+ # @param metadata [Hash, nil] additional metadata stored as JSON
140
+ # @param git_branch [String, nil] active git branch at ingestion time
141
+ # @param cwd [String, nil] working directory at ingestion time
142
+ # @param claude_version [String, nil] Claude Code version string
143
+ # @param thinking_level [String, nil] thinking level setting
144
+ # @param source_mtime [String, nil] ISO 8601 mtime of the source file
145
+ # @return [Integer] content item row id (existing or newly inserted)
158
146
  def upsert_content_item(source:, text_hash:, byte_len:, session_id: nil, transcript_path: nil,
159
147
  project_path: nil, occurred_at: nil, raw_text: nil, metadata: nil,
160
148
  git_branch: nil, cwd: nil, claude_version: nil, thinking_level: nil, source_mtime: nil)
@@ -183,12 +171,31 @@ module ClaudeMemory
183
171
  end
184
172
  end
185
173
 
174
+ # Fetch a single content item by primary key.
175
+ # @param id [Integer] content item id
176
+ # @return [Hash, nil]
177
+ def get_content_item(id)
178
+ content_items.where(id: id).first
179
+ end
180
+
181
+ # Find a content item by transcript path and source modification time.
182
+ # @param transcript_path [String] filesystem path to the transcript
183
+ # @param mtime_iso8601 [String] ISO 8601 modification timestamp
184
+ # @return [Hash, nil]
186
185
  def content_item_by_transcript_and_mtime(transcript_path, mtime_iso8601)
187
186
  content_items
188
187
  .where(transcript_path: transcript_path, source_mtime: mtime_iso8601)
189
188
  .first
190
189
  end
191
190
 
191
+ # --- Tool calls ---
192
+
193
+ # Bulk-insert tool call records for a content item.
194
+ # @param content_item_id [Integer] owning content item id
195
+ # @param tool_calls_data [Array<Hash>] tool call hashes with keys
196
+ # :tool_name, :tool_input, :tool_result, :compressed_summary,
197
+ # :is_error, :timestamp
198
+ # @return [void]
192
199
  def insert_tool_calls(content_item_id, tool_calls_data)
193
200
  tool_calls_data.each do |tc|
194
201
  tool_calls.insert(
@@ -203,6 +210,9 @@ module ClaudeMemory
203
210
  end
204
211
  end
205
212
 
213
+ # Retrieve tool calls for a content item, ordered by timestamp.
214
+ # @param content_item_id [Integer] content item id
215
+ # @return [Array<Hash>]
206
216
  def tool_calls_for_content_item(content_item_id)
207
217
  tool_calls
208
218
  .where(content_item_id: content_item_id)
@@ -210,10 +220,21 @@ module ClaudeMemory
210
220
  .all
211
221
  end
212
222
 
223
+ # --- Delta cursors ---
224
+
225
+ # Get the last-read byte offset for a session/transcript pair.
226
+ # @param session_id [String] session identifier
227
+ # @param transcript_path [String] transcript file path
228
+ # @return [Integer, nil] byte offset, or nil if no cursor exists
213
229
  def get_delta_cursor(session_id, transcript_path)
214
230
  delta_cursors.where(session_id: session_id, transcript_path: transcript_path).get(:last_byte_offset)
215
231
  end
216
232
 
233
+ # Create or update the byte-offset cursor for a session/transcript pair.
234
+ # @param session_id [String] session identifier
235
+ # @param transcript_path [String] transcript file path
236
+ # @param offset [Integer] new byte offset
237
+ # @return [void]
217
238
  def update_delta_cursor(session_id, transcript_path, offset)
218
239
  now = Time.now.utc.iso8601
219
240
  delta_cursors
@@ -229,6 +250,12 @@ module ClaudeMemory
229
250
  )
230
251
  end
231
252
 
253
+ # --- Entities ---
254
+
255
+ # Find an entity by its slug or create a new one.
256
+ # @param type [String] entity type (e.g. "database", "framework", "person")
257
+ # @param name [String] canonical entity name
258
+ # @return [Integer] entity row id
232
259
  def find_or_create_entity(type:, name:)
233
260
  slug = slugify(type, name)
234
261
  existing = entities.where(slug: slug).get(:id)
@@ -238,6 +265,23 @@ module ClaudeMemory
238
265
  entities.insert(type: type, canonical_name: name, slug: slug, created_at: now)
239
266
  end
240
267
 
268
+ # --- Facts ---
269
+
270
+ # Insert a new fact (subject-predicate-object triple) with an auto-generated docid.
271
+ #
272
+ # @param subject_entity_id [Integer] entity id for the subject
273
+ # @param predicate [String] predicate label (e.g. "uses_database", "depends_on")
274
+ # @param object_entity_id [Integer, nil] entity id for the object (if entity-valued)
275
+ # @param object_literal [String, nil] literal value for the object
276
+ # @param datatype [String, nil] datatype hint for the object literal
277
+ # @param polarity [String] "positive" or "negative"
278
+ # @param valid_from [String, nil] ISO 8601 validity start (defaults to now UTC)
279
+ # @param status [String] fact status ("active", "superseded", "rejected")
280
+ # @param confidence [Float] confidence score 0.0..1.0
281
+ # @param created_from [String, nil] provenance tag (e.g. "promoted:path:id")
282
+ # @param scope [String] "global" or "project"
283
+ # @param project_path [String, nil] project directory for project-scoped facts
284
+ # @return [Integer] inserted fact row id
241
285
  def insert_fact(subject_entity_id:, predicate:, object_entity_id: nil, object_literal: nil,
242
286
  datatype: nil, polarity: "positive", valid_from: nil, status: "active",
243
287
  confidence: 1.0, created_from: nil, scope: "project", project_path: nil)
@@ -261,10 +305,24 @@ module ClaudeMemory
261
305
  )
262
306
  end
263
307
 
308
+ # Look up a fact by its short document identifier.
309
+ # @param docid [String] 8-character hex document id
310
+ # @return [Hash, nil]
264
311
  def find_fact_by_docid(docid)
265
312
  facts.where(docid: docid).first
266
313
  end
267
314
 
315
+ # Selectively update one or more fields on a fact.
316
+ # Only provided (non-nil) keyword arguments are written. Setting scope
317
+ # to "global" automatically clears project_path.
318
+ #
319
+ # @param fact_id [Integer] fact row id
320
+ # @param status [String, nil] new status value
321
+ # @param valid_to [String, nil] ISO 8601 end-of-validity timestamp
322
+ # @param scope [String, nil] "global" or "project"
323
+ # @param project_path [String, nil] project directory (cleared when scope is "global")
324
+ # @param embedding [Array<Float>, nil] embedding vector to store as JSON
325
+ # @return [Boolean] true if any fields were updated, false if all args were nil
268
326
  def update_fact(fact_id, status: nil, valid_to: nil, scope: nil, project_path: nil, embedding: nil)
269
327
  updates = {}
270
328
  updates[:status] = status if status
@@ -285,10 +343,53 @@ module ClaudeMemory
285
343
  true
286
344
  end
287
345
 
346
+ # Overwrite the embedding vector for a fact.
347
+ # @param fact_id [Integer] fact row id
348
+ # @param embedding_vector [Array<Float>] embedding to store as JSON
349
+ # @return [void]
288
350
  def update_fact_embedding(fact_id, embedding_vector)
289
351
  facts.where(id: fact_id).update(embedding_json: embedding_vector.to_json)
290
352
  end
291
353
 
354
+ # Reject a fact as incorrect (e.g. a distiller hallucination).
355
+ # Sets status to "rejected", closes any open conflicts involving
356
+ # the fact, and records the reason in conflict notes when provided.
357
+ # All updates run in a single transaction.
358
+ #
359
+ # @param fact_id [Integer] fact row id to reject
360
+ # @param reason [String, nil] optional rejection reason appended to conflict notes
361
+ # @return [Hash, nil] +{rejected: true, conflicts_resolved: Integer}+
362
+ # or nil if the fact does not exist
363
+ def reject_fact(fact_id, reason: nil)
364
+ row = facts.where(id: fact_id).first
365
+ return nil unless row
366
+
367
+ now = Time.now.utc.iso8601
368
+ resolved = 0
369
+
370
+ @db.transaction do
371
+ facts.where(id: fact_id).update(status: "rejected", valid_to: now)
372
+
373
+ open_conflict_rows = conflicts
374
+ .where(status: "open")
375
+ .where { (fact_a_id =~ fact_id) | (fact_b_id =~ fact_id) }
376
+ .all
377
+
378
+ open_conflict_rows.each do |conflict|
379
+ suffix = reason ? " | resolved: rejected fact #{fact_id} (#{reason})" : " | resolved: rejected fact #{fact_id}"
380
+ notes = "#{conflict[:notes]}#{suffix}"
381
+ conflicts.where(id: conflict[:id]).update(status: "resolved", notes: notes)
382
+ end
383
+ resolved = open_conflict_rows.size
384
+ end
385
+
386
+ {rejected: true, conflicts_resolved: resolved}
387
+ end
388
+
389
+ # Retrieve active facts that have stored embeddings.
390
+ # @param limit [Integer] maximum rows to return
391
+ # @return [Array<Hash>] fact rows with :id, :subject_entity_id,
392
+ # :predicate, :object_literal, :embedding_json, :scope
292
393
  def facts_with_embeddings(limit: 1000)
293
394
  facts
294
395
  .where(Sequel.~(embedding_json: nil))
@@ -298,6 +399,12 @@ module ClaudeMemory
298
399
  .all
299
400
  end
300
401
 
402
+ # Find all facts for a given subject + predicate combination (a "slot").
403
+ # Used by the resolver to detect supersession and conflicts.
404
+ # @param subject_entity_id [Integer] subject entity id
405
+ # @param predicate [String] predicate label
406
+ # @param status [String] filter by status (default: "active")
407
+ # @return [Array<Hash>]
301
408
  def facts_for_slot(subject_entity_id, predicate, status: "active")
302
409
  facts
303
410
  .where(subject_entity_id: subject_entity_id, predicate: predicate, status: status)
@@ -307,6 +414,18 @@ module ClaudeMemory
307
414
  .all
308
415
  end
309
416
 
417
+ # --- Provenance ---
418
+
419
+ # Record a provenance link between a fact and its source evidence.
420
+ #
421
+ # @param fact_id [Integer] fact row id
422
+ # @param content_item_id [Integer, nil] source content item id
423
+ # @param quote [String, nil] verbatim quote from the source
424
+ # @param attribution_entity_id [Integer, nil] entity who stated the fact
425
+ # @param strength [String] evidence strength ("stated", "inferred", "derived")
426
+ # @param line_start [Integer, nil] starting line in source content
427
+ # @param line_end [Integer, nil] ending line in source content
428
+ # @return [Integer] inserted provenance row id
310
429
  def insert_provenance(fact_id:, content_item_id: nil, quote: nil, attribution_entity_id: nil, strength: "stated",
311
430
  line_start: nil, line_end: nil)
312
431
  provenance.insert(
@@ -320,10 +439,21 @@ module ClaudeMemory
320
439
  )
321
440
  end
322
441
 
442
+ # Retrieve all provenance records for a given fact.
443
+ # @param fact_id [Integer] fact row id
444
+ # @return [Array<Hash>]
323
445
  def provenance_for_fact(fact_id)
324
446
  provenance.where(fact_id: fact_id).all
325
447
  end
326
448
 
449
+ # --- Conflicts & fact links ---
450
+
451
+ # Record a conflict between two facts.
452
+ # @param fact_a_id [Integer] first conflicting fact id
453
+ # @param fact_b_id [Integer] second conflicting fact id
454
+ # @param status [String] conflict status ("open" or "resolved")
455
+ # @param notes [String, nil] human-readable notes about the conflict
456
+ # @return [Integer] inserted conflict row id
327
457
  def insert_conflict(fact_a_id:, fact_b_id:, status: "open", notes: nil)
328
458
  now = Time.now.utc.iso8601
329
459
  conflicts.insert(
@@ -335,21 +465,55 @@ module ClaudeMemory
335
465
  )
336
466
  end
337
467
 
468
+ # Retrieve all unresolved conflicts.
469
+ # @return [Array<Hash>]
338
470
  def open_conflicts
339
471
  conflicts.where(status: "open").all
340
472
  end
341
473
 
474
+ # Create a directional link between two facts (e.g. supersession).
475
+ # @param from_fact_id [Integer] source fact id
476
+ # @param to_fact_id [Integer] target fact id
477
+ # @param link_type [String] relationship type (e.g. "supersedes", "conflicts_with")
478
+ # @return [Integer] inserted fact_link row id
342
479
  def insert_fact_link(from_fact_id:, to_fact_id:, link_type:)
343
480
  fact_links.insert(from_fact_id: from_fact_id, to_fact_id: to_fact_id, link_type: link_type)
344
481
  end
345
482
 
346
- # Record token usage metrics for a distillation operation
347
- #
348
- # @param content_item_id [Integer] The content item that was distilled
349
- # @param input_tokens [Integer] Tokens sent to the API
350
- # @param output_tokens [Integer] Tokens returned from the API
351
- # @param facts_extracted [Integer] Number of facts extracted
352
- # @return [Integer] The created metric record ID
483
+ # --- Ingestion metrics ---
484
+
485
+ # Fetch content items that have not yet been distilled, ordered newest first.
486
+ # @param limit [Integer] maximum rows to return
487
+ # @param min_length [Integer] minimum byte_len threshold
488
+ # @return [Array<Hash>]
489
+ def undistilled_content_items(limit: 3, min_length: 200)
490
+ content_items
491
+ .left_join(:ingestion_metrics, content_item_id: :id)
492
+ .where(Sequel[:ingestion_metrics][:id] => nil)
493
+ .where { byte_len >= min_length }
494
+ .order(Sequel.desc(:occurred_at))
495
+ .limit(limit)
496
+ .select_all(:content_items)
497
+ .all
498
+ end
499
+
500
+ # Count content items that have not yet been distilled.
501
+ # @param min_length [Integer] minimum byte_len threshold
502
+ # @return [Integer]
503
+ def count_undistilled(min_length: 200)
504
+ content_items
505
+ .left_join(:ingestion_metrics, content_item_id: :id)
506
+ .where(Sequel[:ingestion_metrics][:id] => nil)
507
+ .where { byte_len >= min_length }
508
+ .count
509
+ end
510
+
511
+ # Record token usage and extraction counts for a distillation run.
512
+ # @param content_item_id [Integer] content item that was distilled
513
+ # @param input_tokens [Integer] LLM input tokens consumed
514
+ # @param output_tokens [Integer] LLM output tokens consumed
515
+ # @param facts_extracted [Integer] number of facts extracted
516
+ # @return [Integer] inserted row id
353
517
  def record_ingestion_metrics(content_item_id:, input_tokens:, output_tokens:, facts_extracted:)
354
518
  ingestion_metrics.insert(
355
519
  content_item_id: content_item_id,
@@ -360,14 +524,8 @@ module ClaudeMemory
360
524
  )
361
525
  end
362
526
 
363
- # Get aggregate metrics across all distillation operations
364
- #
365
- # @return [Hash] Aggregated metrics with keys:
366
- # - total_input_tokens: Total tokens sent to API
367
- # - total_output_tokens: Total tokens returned from API
368
- # - total_facts_extracted: Total facts extracted
369
- # - total_operations: Number of distillation operations
370
- # - avg_facts_per_1k_input_tokens: Average efficiency metric
527
+ # Compute aggregate ingestion metrics across all distillation runs.
528
+ # @return [Hash, nil] totals and efficiency ratio, or nil if no data
371
529
  def aggregate_ingestion_metrics
372
530
  # standard:disable Performance/Detect (Sequel DSL requires .select{}.first)
373
531
  result = ingestion_metrics
@@ -400,23 +558,48 @@ module ClaudeMemory
400
558
  }
401
559
  end
402
560
 
403
- # Look up a cached LLM response by cache key
404
- #
405
- # @param cache_key [String] SHA256 hex digest of operation+model+input
406
- # @return [Hash, nil] Cached result row or nil
561
+ # Mark all undistilled content items as distilled with zero token counts.
562
+ # Used for backfilling legacy content that predates the metrics table.
563
+ # @return [Integer] number of items backfilled
564
+ def backfill_distillation_metrics!
565
+ undistilled_ids = content_items
566
+ .left_join(:ingestion_metrics, content_item_id: :id)
567
+ .where(Sequel[:ingestion_metrics][:id] => nil)
568
+ .select_map(Sequel[:content_items][:id])
569
+
570
+ return 0 if undistilled_ids.empty?
571
+
572
+ now = Time.now.utc.iso8601
573
+ undistilled_ids.each do |cid|
574
+ ingestion_metrics.insert(
575
+ content_item_id: cid,
576
+ input_tokens: 0,
577
+ output_tokens: 0,
578
+ facts_extracted: 0,
579
+ created_at: now
580
+ )
581
+ end
582
+
583
+ undistilled_ids.size
584
+ end
585
+
586
+ # --- LLM cache ---
587
+
588
+ # Look up a cached LLM result by its cache key.
589
+ # @param cache_key [String] SHA-256 hex cache key
590
+ # @return [Hash, nil]
407
591
  def llm_cache_lookup(cache_key)
408
592
  llm_cache.where(cache_key: cache_key).first
409
593
  end
410
594
 
411
- # Store an LLM response in the cache
412
- #
413
- # @param operation [String] Operation type (e.g., "distill", "extract")
414
- # @param model [String] Model identifier
415
- # @param input_hash [String] SHA256 of input content
416
- # @param result_json [String] JSON response to cache
417
- # @param input_tokens [Integer, nil] Tokens in request
418
- # @param output_tokens [Integer, nil] Tokens in response
419
- # @return [Integer] The created cache entry ID
595
+ # Store or update a cached LLM result. Uses upsert on the cache_key.
596
+ # @param operation [String] operation name (e.g. "distill", "embed")
597
+ # @param model [String] model identifier
598
+ # @param input_hash [String] SHA-256 hex digest of the input
599
+ # @param result_json [String] JSON-serialized result
600
+ # @param input_tokens [Integer, nil] input tokens consumed
601
+ # @param output_tokens [Integer, nil] output tokens consumed
602
+ # @return [void]
420
603
  def llm_cache_store(operation:, model:, input_hash:, result_json:, input_tokens: nil, output_tokens: nil)
421
604
  cache_key = Digest::SHA256.hexdigest("#{operation}:#{model}:#{input_hash}")
422
605
 
@@ -439,97 +622,47 @@ module ClaudeMemory
439
622
  )
440
623
  end
441
624
 
442
- # Generate a cache key for LLM response lookup
443
- #
444
- # @param operation [String] Operation type
445
- # @param model [String] Model identifier
446
- # @param input [String] Raw input content
447
- # @return [String] SHA256 hex digest cache key
625
+ # Compute the cache key for an LLM operation.
626
+ # @param operation [String] operation name
627
+ # @param model [String] model identifier
628
+ # @param input [String] raw input text
629
+ # @return [String] SHA-256 hex cache key
448
630
  def llm_cache_key(operation, model, input)
449
631
  input_hash = Digest::SHA256.hexdigest(input)
450
632
  Digest::SHA256.hexdigest("#{operation}:#{model}:#{input_hash}")
451
633
  end
452
634
 
453
- # Prune cache entries older than the given age
454
- #
455
- # @param max_age_seconds [Integer] Maximum age in seconds (default: 7 days)
456
- # @return [Integer] Number of entries pruned
635
+ # Delete LLM cache entries older than the given age.
636
+ # @param max_age_seconds [Integer] maximum age in seconds (default: 7 days)
637
+ # @return [Integer] number of rows deleted
457
638
  def llm_cache_prune(max_age_seconds: 604_800)
458
639
  cutoff = (Time.now - max_age_seconds).utc.iso8601
459
640
  llm_cache.where { created_at < cutoff }.delete
460
641
  end
461
642
 
462
- private
463
-
464
- def ensure_schema!
465
- migrations_path = File.expand_path("../../../db/migrations", __dir__)
466
-
467
- # Handle backward compatibility: databases created with old migration system
468
- sync_legacy_schema_version!
469
-
470
- # Skip migration if the database is already ahead of this gem's version.
471
- # This happens when a newer gem version migrated the DB and an older
472
- # installed gem (e.g. via hooks) tries to open it.
473
- current = current_schema_version
474
- return if current && current > SCHEMA_VERSION
475
-
476
- # Run Sequel migrations to bring database to target version
477
- Sequel::Migrator.run(@db, migrations_path, target: SCHEMA_VERSION)
478
-
479
- # Set created_at timestamp on first initialization
480
- set_meta("created_at", Time.now.utc.iso8601) unless get_meta("created_at")
481
-
482
- # Sync legacy schema_version meta key with Sequel's schema_info
483
- # This maintains backwards compatibility with code that reads schema_version
484
- sequel_version = @db[:schema_info].get(:version) if @db.table_exists?(:schema_info)
485
- set_meta("schema_version", sequel_version.to_s) if sequel_version
486
- end
487
-
488
- # Sync legacy schema_version from meta table to Sequel's schema_info
489
- # Handles two cases:
490
- # 1. No schema_info table exists (old system, pre-Sequel migrations)
491
- # 2. schema_info exists but is out of sync with meta.schema_version
492
- def sync_legacy_schema_version!
493
- return unless @db.table_exists?(:meta)
494
-
495
- meta_version = get_meta("schema_version")&.to_i
496
- return unless meta_version && meta_version >= 2
497
-
498
- # Verify database actually has v2+ schema (defensive check)
499
- columns = @db.schema(:content_items).map(&:first) if @db.table_exists?(:content_items)
500
- return unless columns&.include?(:project_path)
501
-
502
- # Create or update schema_info to match meta.schema_version
503
- @db.create_table?(:schema_info) do
504
- Integer :version, null: false, default: 0
505
- end
506
-
507
- sequel_version = @db[:schema_info].get(:version)
508
- if sequel_version.nil? || sequel_version < meta_version
509
- # Update schema_info to match meta (old system's version)
510
- @db[:schema_info].delete
511
- @db[:schema_info].insert(version: meta_version)
512
- end
513
- end
514
-
515
- def current_schema_version
516
- return nil unless @db.table_exists?(:schema_info)
517
- @db[:schema_info].get(:version)
518
- end
643
+ # --- Meta ---
519
644
 
645
+ # Set a key-value pair in the meta table (upsert).
646
+ # @param key [String] metadata key
647
+ # @param value [String] metadata value
648
+ # @return [void]
520
649
  def set_meta(key, value)
521
650
  @db[:meta].insert_conflict(target: :key, update: {value: value}).insert(key: key, value: value)
522
651
  end
523
652
 
653
+ # Retrieve a value from the meta table.
654
+ # @param key [String] metadata key
655
+ # @return [String, nil]
524
656
  def get_meta(key)
525
657
  @db[:meta].where(key: key).get(:value)
526
658
  end
527
659
 
660
+ private
661
+
528
662
  def generate_docid(subject_entity_id, predicate, object_literal, created_at)
529
663
  input = "#{subject_entity_id}:#{predicate}:#{object_literal}:#{created_at}"
530
664
  docid = Digest::SHA256.hexdigest(input)[0, 8]
531
665
 
532
- # Handle unlikely collisions by rehashing with a counter
533
666
  counter = 0
534
667
  while facts.where(docid: docid).any?
535
668
  counter += 1