claude_memory 0.7.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude/rules/claude_memory.generated.md +32 -2
- data/.claude/settings.json +65 -15
- data/.claude/settings.local.json +5 -2
- data/.claude/skills/improve/SKILL.md +113 -25
- data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
- data/.claude-plugin/commands/distill-transcripts.md +98 -0
- data/.claude-plugin/commands/memory-recall.md +67 -0
- data/.claude-plugin/marketplace.json +2 -2
- data/.claude-plugin/plugin.json +3 -3
- data/.claude-plugin/scripts/hook-runner.sh +14 -0
- data/.claude-plugin/scripts/serve-mcp.sh +14 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +90 -1
- data/CLAUDE.md +56 -18
- data/README.md +35 -0
- data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
- data/db/migrations/014_canonicalize_predicates.rb +30 -0
- data/docs/improvements.md +74 -74
- data/docs/influence/claude-mem.md +1 -0
- data/docs/influence/claude-supermemory.md +1 -0
- data/docs/influence/episodic-memory.md +1 -0
- data/docs/influence/grepai.md +1 -0
- data/docs/influence/kbs.md +1 -0
- data/docs/influence/lossless-claw.md +1 -0
- data/docs/influence/qmd.md +1 -0
- data/docs/quality_review.md +119 -224
- data/hooks/hooks.json +39 -7
- data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
- data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
- data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
- data/lib/claude_memory/commands/completion_command.rb +149 -0
- data/lib/claude_memory/commands/doctor_command.rb +2 -0
- data/lib/claude_memory/commands/embeddings_command.rb +198 -0
- data/lib/claude_memory/commands/help_command.rb +12 -1
- data/lib/claude_memory/commands/hook_command.rb +2 -1
- data/lib/claude_memory/commands/index_command.rb +85 -78
- data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
- data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
- data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
- data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
- data/lib/claude_memory/commands/install_skill_command.rb +78 -0
- data/lib/claude_memory/commands/registry.rb +47 -32
- data/lib/claude_memory/commands/reject_command.rb +62 -0
- data/lib/claude_memory/commands/restore_command.rb +77 -0
- data/lib/claude_memory/commands/skills/distill-transcripts.md +102 -0
- data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
- data/lib/claude_memory/commands/stats_command.rb +98 -2
- data/lib/claude_memory/configuration.rb +14 -1
- data/lib/claude_memory/core/fact_ranker.rb +2 -2
- data/lib/claude_memory/core/rr_fusion.rb +23 -6
- data/lib/claude_memory/core/snippet_extractor.rb +7 -3
- data/lib/claude_memory/core/text_builder.rb +11 -0
- data/lib/claude_memory/distill/json_schema.md +8 -4
- data/lib/claude_memory/distill/null_distiller.rb +2 -0
- data/lib/claude_memory/domain/entity.rb +13 -1
- data/lib/claude_memory/domain/fact.rb +26 -2
- data/lib/claude_memory/domain/provenance.rb +0 -1
- data/lib/claude_memory/embeddings/api_adapter.rb +97 -0
- data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
- data/lib/claude_memory/embeddings/fastembed_adapter.rb +46 -12
- data/lib/claude_memory/embeddings/generator.rb +4 -0
- data/lib/claude_memory/embeddings/inspector.rb +91 -0
- data/lib/claude_memory/embeddings/model_registry.rb +210 -0
- data/lib/claude_memory/embeddings/resolver.rb +44 -0
- data/lib/claude_memory/hook/context_injector.rb +58 -2
- data/lib/claude_memory/hook/distillation_runner.rb +46 -0
- data/lib/claude_memory/hook/handler.rb +11 -2
- data/lib/claude_memory/index/vector_index.rb +15 -2
- data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
- data/lib/claude_memory/ingest/ingester.rb +17 -0
- data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
- data/lib/claude_memory/mcp/handlers/management_handlers.rb +169 -0
- data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
- data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
- data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
- data/lib/claude_memory/mcp/handlers/stats_handlers.rb +205 -0
- data/lib/claude_memory/mcp/instructions_builder.rb +19 -1
- data/lib/claude_memory/mcp/query_guide.rb +10 -0
- data/lib/claude_memory/mcp/response_formatter.rb +1 -0
- data/lib/claude_memory/mcp/server.rb +22 -1
- data/lib/claude_memory/mcp/telemetry.rb +86 -0
- data/lib/claude_memory/mcp/text_summary.rb +26 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +116 -4
- data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
- data/lib/claude_memory/mcp/tools.rb +50 -679
- data/lib/claude_memory/publish.rb +40 -5
- data/lib/claude_memory/recall/dual_engine.rb +105 -0
- data/lib/claude_memory/recall/legacy_engine.rb +138 -0
- data/lib/claude_memory/recall/query_core.rb +371 -0
- data/lib/claude_memory/recall.rb +121 -673
- data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
- data/lib/claude_memory/resolve/resolver.rb +43 -0
- data/lib/claude_memory/shortcuts.rb +4 -4
- data/lib/claude_memory/store/retry_handler.rb +61 -0
- data/lib/claude_memory/store/schema_manager.rb +68 -0
- data/lib/claude_memory/store/sqlite_store.rb +334 -201
- data/lib/claude_memory/store/store_manager.rb +50 -1
- data/lib/claude_memory/sweep/maintenance.rb +115 -1
- data/lib/claude_memory/sweep/sweeper.rb +3 -0
- data/lib/claude_memory/templates/hooks.example.json +26 -7
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +16 -0
- metadata +48 -8
- data/.claude/memory.sqlite3-shm +0 -0
- data/.claude/memory.sqlite3-wal +0 -0
|
@@ -6,14 +6,25 @@ require "digest"
|
|
|
6
6
|
require "json"
|
|
7
7
|
require "extralite"
|
|
8
8
|
require "sequel/adapters/extralite"
|
|
9
|
+
require_relative "retry_handler"
|
|
10
|
+
require_relative "schema_manager"
|
|
9
11
|
|
|
10
12
|
module ClaudeMemory
|
|
11
13
|
module Store
|
|
14
|
+
# SQLite-backed fact store for ClaudeMemory.
|
|
15
|
+
# Manages all database tables (content_items, entities, facts, provenance,
|
|
16
|
+
# conflicts, fact_links, etc.) via Sequel with Extralite adapter.
|
|
17
|
+
# Includes RetryHandler for transient lock recovery and SchemaManager
|
|
18
|
+
# for automatic migrations on open.
|
|
12
19
|
class SQLiteStore
|
|
13
|
-
|
|
20
|
+
include RetryHandler
|
|
21
|
+
include SchemaManager
|
|
14
22
|
|
|
23
|
+
# @return [Sequel::Database] the underlying Sequel database connection
|
|
15
24
|
attr_reader :db
|
|
16
25
|
|
|
26
|
+
# Open (or create) a SQLite database and migrate to the current schema.
|
|
27
|
+
# @param db_path [String] filesystem path to the SQLite database file
|
|
17
28
|
def initialize(db_path)
|
|
18
29
|
@db_path = db_path
|
|
19
30
|
@db = connect_database(db_path)
|
|
@@ -21,140 +32,117 @@ module ClaudeMemory
|
|
|
21
32
|
ensure_schema!
|
|
22
33
|
end
|
|
23
34
|
|
|
24
|
-
#
|
|
25
|
-
#
|
|
26
|
-
# shorter timeouts with application-level retry for better responsiveness
|
|
27
|
-
MAX_RETRIES = 5
|
|
28
|
-
RETRY_BASE_DELAY = 0.1 # seconds, with exponential backoff
|
|
29
|
-
|
|
30
|
-
# Execute a block with retry logic for busy/locked errors
|
|
31
|
-
# This handles concurrent access from multiple hook processes
|
|
32
|
-
def with_retry(operation_name = "database operation")
|
|
33
|
-
retries = 0
|
|
34
|
-
begin
|
|
35
|
-
yield
|
|
36
|
-
rescue Sequel::DatabaseError, Extralite::Error, Extralite::BusyError => e
|
|
37
|
-
if retryable_error?(e) && retries < MAX_RETRIES
|
|
38
|
-
retries += 1
|
|
39
|
-
delay = RETRY_BASE_DELAY * (2**retries) # Exponential backoff
|
|
40
|
-
sleep(delay)
|
|
41
|
-
retry
|
|
42
|
-
end
|
|
43
|
-
raise
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Execute a transaction with retry logic for concurrent access
|
|
48
|
-
# Use this instead of @db.transaction when concurrent writes are expected
|
|
49
|
-
def transaction_with_retry(&block)
|
|
50
|
-
with_retry("transaction") do
|
|
51
|
-
@db.transaction(&block)
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
private
|
|
56
|
-
|
|
57
|
-
def retryable_error?(error)
|
|
58
|
-
message = error.message.downcase
|
|
59
|
-
message.include?("busy") || message.include?("locked")
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def connect_database(db_path)
|
|
63
|
-
retries = 0
|
|
64
|
-
begin
|
|
65
|
-
Sequel.connect(
|
|
66
|
-
"extralite:#{db_path}",
|
|
67
|
-
# Use shorter busy_timeout since we handle retry at app level
|
|
68
|
-
# This allows faster detection of lock release between retries
|
|
69
|
-
connect_sqls: [
|
|
70
|
-
"PRAGMA busy_timeout = 1000",
|
|
71
|
-
"PRAGMA journal_mode = WAL",
|
|
72
|
-
"PRAGMA synchronous = NORMAL"
|
|
73
|
-
]
|
|
74
|
-
)
|
|
75
|
-
rescue Sequel::DatabaseConnectionError, Extralite::Error => e
|
|
76
|
-
retries += 1
|
|
77
|
-
if retries <= MAX_RETRIES && retryable_error?(e)
|
|
78
|
-
sleep(RETRY_BASE_DELAY * (2**retries))
|
|
79
|
-
retry
|
|
80
|
-
end
|
|
81
|
-
raise
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
public
|
|
86
|
-
|
|
35
|
+
# Disconnect from the database.
|
|
36
|
+
# @return [void]
|
|
87
37
|
def close
|
|
88
38
|
@db.disconnect
|
|
89
39
|
end
|
|
90
40
|
|
|
41
|
+
# Lazily-initialized vector index for semantic search.
|
|
42
|
+
# @return [Index::VectorIndex]
|
|
91
43
|
def vector_index
|
|
92
44
|
@vector_index ||= Index::VectorIndex.new(self)
|
|
93
45
|
end
|
|
94
46
|
|
|
95
|
-
# Checkpoint the WAL file to prevent unlimited growth
|
|
96
|
-
#
|
|
97
|
-
# Should be called periodically during maintenance/sweep operations
|
|
47
|
+
# Checkpoint the WAL file to prevent unlimited growth.
|
|
48
|
+
# @return [void]
|
|
98
49
|
def checkpoint_wal
|
|
99
50
|
@db.run("PRAGMA wal_checkpoint(TRUNCATE)")
|
|
100
51
|
end
|
|
101
52
|
|
|
53
|
+
# Current schema version stored in the meta table.
|
|
54
|
+
# @return [Integer, nil]
|
|
102
55
|
def schema_version
|
|
103
56
|
@db[:meta].where(key: "schema_version").get(:value)&.to_i
|
|
104
57
|
end
|
|
105
58
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
end
|
|
59
|
+
# --- Table accessors ---
|
|
60
|
+
# Each returns a {Sequel::Dataset} bound to the corresponding table.
|
|
109
61
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
end
|
|
62
|
+
# @return [Sequel::Dataset]
|
|
63
|
+
def content_items = @db[:content_items]
|
|
113
64
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
end
|
|
65
|
+
# @return [Sequel::Dataset]
|
|
66
|
+
def delta_cursors = @db[:delta_cursors]
|
|
117
67
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
end
|
|
68
|
+
# @return [Sequel::Dataset]
|
|
69
|
+
def entities = @db[:entities]
|
|
121
70
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
end
|
|
71
|
+
# @return [Sequel::Dataset]
|
|
72
|
+
def entity_aliases = @db[:entity_aliases]
|
|
125
73
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
end
|
|
74
|
+
# @return [Sequel::Dataset]
|
|
75
|
+
def facts = @db[:facts]
|
|
129
76
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
end
|
|
77
|
+
# @return [Sequel::Dataset]
|
|
78
|
+
def provenance = @db[:provenance]
|
|
133
79
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
end
|
|
80
|
+
# @return [Sequel::Dataset]
|
|
81
|
+
def fact_links = @db[:fact_links]
|
|
137
82
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
end
|
|
83
|
+
# @return [Sequel::Dataset]
|
|
84
|
+
def conflicts = @db[:conflicts]
|
|
141
85
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
end
|
|
86
|
+
# @return [Sequel::Dataset]
|
|
87
|
+
def tool_calls = @db[:tool_calls]
|
|
145
88
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
end
|
|
89
|
+
# @return [Sequel::Dataset]
|
|
90
|
+
def operation_progress = @db[:operation_progress]
|
|
149
91
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
92
|
+
# @return [Sequel::Dataset]
|
|
93
|
+
def schema_health = @db[:schema_health]
|
|
94
|
+
|
|
95
|
+
# @return [Sequel::Dataset]
|
|
96
|
+
def ingestion_metrics = @db[:ingestion_metrics]
|
|
153
97
|
|
|
154
|
-
|
|
155
|
-
|
|
98
|
+
# @return [Sequel::Dataset]
|
|
99
|
+
def llm_cache = @db[:llm_cache]
|
|
100
|
+
|
|
101
|
+
# @return [Sequel::Dataset]
|
|
102
|
+
def mcp_tool_calls = @db[:mcp_tool_calls]
|
|
103
|
+
|
|
104
|
+
# Record a single MCP tool invocation for telemetry.
|
|
105
|
+
# Inserts synchronously; callers wrap in with_retry at the call site
|
|
106
|
+
# if needed.
|
|
107
|
+
#
|
|
108
|
+
# @param tool_name [String] name of the MCP tool invoked
|
|
109
|
+
# @param duration_ms [Integer] execution time in milliseconds
|
|
110
|
+
# @param result_count [Integer, nil] number of results returned
|
|
111
|
+
# @param scope [String, nil] "global" or "project"
|
|
112
|
+
# @param error_class [String, nil] error class name if the call failed
|
|
113
|
+
# @param called_at [String, nil] ISO 8601 timestamp (defaults to now UTC)
|
|
114
|
+
# @return [Integer] inserted row id
|
|
115
|
+
def insert_mcp_tool_call(tool_name:, duration_ms:, result_count: nil, scope: nil, error_class: nil, called_at: nil)
|
|
116
|
+
mcp_tool_calls.insert(
|
|
117
|
+
tool_name: tool_name,
|
|
118
|
+
called_at: called_at || Time.now.utc.iso8601,
|
|
119
|
+
duration_ms: duration_ms,
|
|
120
|
+
result_count: result_count,
|
|
121
|
+
scope: scope,
|
|
122
|
+
error_class: error_class
|
|
123
|
+
)
|
|
156
124
|
end
|
|
157
125
|
|
|
126
|
+
# --- Content items ---
|
|
127
|
+
|
|
128
|
+
# Insert a content item or return the existing id if a duplicate
|
|
129
|
+
# (same text_hash + session_id) already exists. Wrapped in retry logic.
|
|
130
|
+
#
|
|
131
|
+
# @param source [String] origin type (e.g. "transcript", "hook")
|
|
132
|
+
# @param text_hash [String] SHA-256 hex digest of the raw text
|
|
133
|
+
# @param byte_len [Integer] byte length of the raw text
|
|
134
|
+
# @param session_id [String, nil] Claude Code session identifier
|
|
135
|
+
# @param transcript_path [String, nil] filesystem path to the transcript file
|
|
136
|
+
# @param project_path [String, nil] project directory path
|
|
137
|
+
# @param occurred_at [String, nil] ISO 8601 timestamp (defaults to now UTC)
|
|
138
|
+
# @param raw_text [String, nil] original text content
|
|
139
|
+
# @param metadata [Hash, nil] additional metadata stored as JSON
|
|
140
|
+
# @param git_branch [String, nil] active git branch at ingestion time
|
|
141
|
+
# @param cwd [String, nil] working directory at ingestion time
|
|
142
|
+
# @param claude_version [String, nil] Claude Code version string
|
|
143
|
+
# @param thinking_level [String, nil] thinking level setting
|
|
144
|
+
# @param source_mtime [String, nil] ISO 8601 mtime of the source file
|
|
145
|
+
# @return [Integer] content item row id (existing or newly inserted)
|
|
158
146
|
def upsert_content_item(source:, text_hash:, byte_len:, session_id: nil, transcript_path: nil,
|
|
159
147
|
project_path: nil, occurred_at: nil, raw_text: nil, metadata: nil,
|
|
160
148
|
git_branch: nil, cwd: nil, claude_version: nil, thinking_level: nil, source_mtime: nil)
|
|
@@ -183,12 +171,31 @@ module ClaudeMemory
|
|
|
183
171
|
end
|
|
184
172
|
end
|
|
185
173
|
|
|
174
|
+
# Fetch a single content item by primary key.
|
|
175
|
+
# @param id [Integer] content item id
|
|
176
|
+
# @return [Hash, nil]
|
|
177
|
+
def get_content_item(id)
|
|
178
|
+
content_items.where(id: id).first
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Find a content item by transcript path and source modification time.
|
|
182
|
+
# @param transcript_path [String] filesystem path to the transcript
|
|
183
|
+
# @param mtime_iso8601 [String] ISO 8601 modification timestamp
|
|
184
|
+
# @return [Hash, nil]
|
|
186
185
|
def content_item_by_transcript_and_mtime(transcript_path, mtime_iso8601)
|
|
187
186
|
content_items
|
|
188
187
|
.where(transcript_path: transcript_path, source_mtime: mtime_iso8601)
|
|
189
188
|
.first
|
|
190
189
|
end
|
|
191
190
|
|
|
191
|
+
# --- Tool calls ---
|
|
192
|
+
|
|
193
|
+
# Bulk-insert tool call records for a content item.
|
|
194
|
+
# @param content_item_id [Integer] owning content item id
|
|
195
|
+
# @param tool_calls_data [Array<Hash>] tool call hashes with keys
|
|
196
|
+
# :tool_name, :tool_input, :tool_result, :compressed_summary,
|
|
197
|
+
# :is_error, :timestamp
|
|
198
|
+
# @return [void]
|
|
192
199
|
def insert_tool_calls(content_item_id, tool_calls_data)
|
|
193
200
|
tool_calls_data.each do |tc|
|
|
194
201
|
tool_calls.insert(
|
|
@@ -203,6 +210,9 @@ module ClaudeMemory
|
|
|
203
210
|
end
|
|
204
211
|
end
|
|
205
212
|
|
|
213
|
+
# Retrieve tool calls for a content item, ordered by timestamp.
|
|
214
|
+
# @param content_item_id [Integer] content item id
|
|
215
|
+
# @return [Array<Hash>]
|
|
206
216
|
def tool_calls_for_content_item(content_item_id)
|
|
207
217
|
tool_calls
|
|
208
218
|
.where(content_item_id: content_item_id)
|
|
@@ -210,10 +220,21 @@ module ClaudeMemory
|
|
|
210
220
|
.all
|
|
211
221
|
end
|
|
212
222
|
|
|
223
|
+
# --- Delta cursors ---
|
|
224
|
+
|
|
225
|
+
# Get the last-read byte offset for a session/transcript pair.
|
|
226
|
+
# @param session_id [String] session identifier
|
|
227
|
+
# @param transcript_path [String] transcript file path
|
|
228
|
+
# @return [Integer, nil] byte offset, or nil if no cursor exists
|
|
213
229
|
def get_delta_cursor(session_id, transcript_path)
|
|
214
230
|
delta_cursors.where(session_id: session_id, transcript_path: transcript_path).get(:last_byte_offset)
|
|
215
231
|
end
|
|
216
232
|
|
|
233
|
+
# Create or update the byte-offset cursor for a session/transcript pair.
|
|
234
|
+
# @param session_id [String] session identifier
|
|
235
|
+
# @param transcript_path [String] transcript file path
|
|
236
|
+
# @param offset [Integer] new byte offset
|
|
237
|
+
# @return [void]
|
|
217
238
|
def update_delta_cursor(session_id, transcript_path, offset)
|
|
218
239
|
now = Time.now.utc.iso8601
|
|
219
240
|
delta_cursors
|
|
@@ -229,6 +250,12 @@ module ClaudeMemory
|
|
|
229
250
|
)
|
|
230
251
|
end
|
|
231
252
|
|
|
253
|
+
# --- Entities ---
|
|
254
|
+
|
|
255
|
+
# Find an entity by its slug or create a new one.
|
|
256
|
+
# @param type [String] entity type (e.g. "database", "framework", "person")
|
|
257
|
+
# @param name [String] canonical entity name
|
|
258
|
+
# @return [Integer] entity row id
|
|
232
259
|
def find_or_create_entity(type:, name:)
|
|
233
260
|
slug = slugify(type, name)
|
|
234
261
|
existing = entities.where(slug: slug).get(:id)
|
|
@@ -238,6 +265,23 @@ module ClaudeMemory
|
|
|
238
265
|
entities.insert(type: type, canonical_name: name, slug: slug, created_at: now)
|
|
239
266
|
end
|
|
240
267
|
|
|
268
|
+
# --- Facts ---
|
|
269
|
+
|
|
270
|
+
# Insert a new fact (subject-predicate-object triple) with an auto-generated docid.
|
|
271
|
+
#
|
|
272
|
+
# @param subject_entity_id [Integer] entity id for the subject
|
|
273
|
+
# @param predicate [String] predicate label (e.g. "uses_database", "depends_on")
|
|
274
|
+
# @param object_entity_id [Integer, nil] entity id for the object (if entity-valued)
|
|
275
|
+
# @param object_literal [String, nil] literal value for the object
|
|
276
|
+
# @param datatype [String, nil] datatype hint for the object literal
|
|
277
|
+
# @param polarity [String] "positive" or "negative"
|
|
278
|
+
# @param valid_from [String, nil] ISO 8601 validity start (defaults to now UTC)
|
|
279
|
+
# @param status [String] fact status ("active", "superseded", "rejected")
|
|
280
|
+
# @param confidence [Float] confidence score 0.0..1.0
|
|
281
|
+
# @param created_from [String, nil] provenance tag (e.g. "promoted:path:id")
|
|
282
|
+
# @param scope [String] "global" or "project"
|
|
283
|
+
# @param project_path [String, nil] project directory for project-scoped facts
|
|
284
|
+
# @return [Integer] inserted fact row id
|
|
241
285
|
def insert_fact(subject_entity_id:, predicate:, object_entity_id: nil, object_literal: nil,
|
|
242
286
|
datatype: nil, polarity: "positive", valid_from: nil, status: "active",
|
|
243
287
|
confidence: 1.0, created_from: nil, scope: "project", project_path: nil)
|
|
@@ -261,10 +305,24 @@ module ClaudeMemory
|
|
|
261
305
|
)
|
|
262
306
|
end
|
|
263
307
|
|
|
308
|
+
# Look up a fact by its short document identifier.
|
|
309
|
+
# @param docid [String] 8-character hex document id
|
|
310
|
+
# @return [Hash, nil]
|
|
264
311
|
def find_fact_by_docid(docid)
|
|
265
312
|
facts.where(docid: docid).first
|
|
266
313
|
end
|
|
267
314
|
|
|
315
|
+
# Selectively update one or more fields on a fact.
|
|
316
|
+
# Only provided (non-nil) keyword arguments are written. Setting scope
|
|
317
|
+
# to "global" automatically clears project_path.
|
|
318
|
+
#
|
|
319
|
+
# @param fact_id [Integer] fact row id
|
|
320
|
+
# @param status [String, nil] new status value
|
|
321
|
+
# @param valid_to [String, nil] ISO 8601 end-of-validity timestamp
|
|
322
|
+
# @param scope [String, nil] "global" or "project"
|
|
323
|
+
# @param project_path [String, nil] project directory (cleared when scope is "global")
|
|
324
|
+
# @param embedding [Array<Float>, nil] embedding vector to store as JSON
|
|
325
|
+
# @return [Boolean] true if any fields were updated, false if all args were nil
|
|
268
326
|
def update_fact(fact_id, status: nil, valid_to: nil, scope: nil, project_path: nil, embedding: nil)
|
|
269
327
|
updates = {}
|
|
270
328
|
updates[:status] = status if status
|
|
@@ -285,10 +343,53 @@ module ClaudeMemory
|
|
|
285
343
|
true
|
|
286
344
|
end
|
|
287
345
|
|
|
346
|
+
# Overwrite the embedding vector for a fact.
|
|
347
|
+
# @param fact_id [Integer] fact row id
|
|
348
|
+
# @param embedding_vector [Array<Float>] embedding to store as JSON
|
|
349
|
+
# @return [void]
|
|
288
350
|
def update_fact_embedding(fact_id, embedding_vector)
|
|
289
351
|
facts.where(id: fact_id).update(embedding_json: embedding_vector.to_json)
|
|
290
352
|
end
|
|
291
353
|
|
|
354
|
+
# Reject a fact as incorrect (e.g. a distiller hallucination).
|
|
355
|
+
# Sets status to "rejected", closes any open conflicts involving
|
|
356
|
+
# the fact, and records the reason in conflict notes when provided.
|
|
357
|
+
# All updates run in a single transaction.
|
|
358
|
+
#
|
|
359
|
+
# @param fact_id [Integer] fact row id to reject
|
|
360
|
+
# @param reason [String, nil] optional rejection reason appended to conflict notes
|
|
361
|
+
# @return [Hash, nil] +{rejected: true, conflicts_resolved: Integer}+
|
|
362
|
+
# or nil if the fact does not exist
|
|
363
|
+
def reject_fact(fact_id, reason: nil)
|
|
364
|
+
row = facts.where(id: fact_id).first
|
|
365
|
+
return nil unless row
|
|
366
|
+
|
|
367
|
+
now = Time.now.utc.iso8601
|
|
368
|
+
resolved = 0
|
|
369
|
+
|
|
370
|
+
@db.transaction do
|
|
371
|
+
facts.where(id: fact_id).update(status: "rejected", valid_to: now)
|
|
372
|
+
|
|
373
|
+
open_conflict_rows = conflicts
|
|
374
|
+
.where(status: "open")
|
|
375
|
+
.where { (fact_a_id =~ fact_id) | (fact_b_id =~ fact_id) }
|
|
376
|
+
.all
|
|
377
|
+
|
|
378
|
+
open_conflict_rows.each do |conflict|
|
|
379
|
+
suffix = reason ? " | resolved: rejected fact #{fact_id} (#{reason})" : " | resolved: rejected fact #{fact_id}"
|
|
380
|
+
notes = "#{conflict[:notes]}#{suffix}"
|
|
381
|
+
conflicts.where(id: conflict[:id]).update(status: "resolved", notes: notes)
|
|
382
|
+
end
|
|
383
|
+
resolved = open_conflict_rows.size
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
{rejected: true, conflicts_resolved: resolved}
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Retrieve active facts that have stored embeddings.
|
|
390
|
+
# @param limit [Integer] maximum rows to return
|
|
391
|
+
# @return [Array<Hash>] fact rows with :id, :subject_entity_id,
|
|
392
|
+
# :predicate, :object_literal, :embedding_json, :scope
|
|
292
393
|
def facts_with_embeddings(limit: 1000)
|
|
293
394
|
facts
|
|
294
395
|
.where(Sequel.~(embedding_json: nil))
|
|
@@ -298,6 +399,12 @@ module ClaudeMemory
|
|
|
298
399
|
.all
|
|
299
400
|
end
|
|
300
401
|
|
|
402
|
+
# Find all facts for a given subject + predicate combination (a "slot").
|
|
403
|
+
# Used by the resolver to detect supersession and conflicts.
|
|
404
|
+
# @param subject_entity_id [Integer] subject entity id
|
|
405
|
+
# @param predicate [String] predicate label
|
|
406
|
+
# @param status [String] filter by status (default: "active")
|
|
407
|
+
# @return [Array<Hash>]
|
|
301
408
|
def facts_for_slot(subject_entity_id, predicate, status: "active")
|
|
302
409
|
facts
|
|
303
410
|
.where(subject_entity_id: subject_entity_id, predicate: predicate, status: status)
|
|
@@ -307,6 +414,18 @@ module ClaudeMemory
|
|
|
307
414
|
.all
|
|
308
415
|
end
|
|
309
416
|
|
|
417
|
+
# --- Provenance ---
|
|
418
|
+
|
|
419
|
+
# Record a provenance link between a fact and its source evidence.
|
|
420
|
+
#
|
|
421
|
+
# @param fact_id [Integer] fact row id
|
|
422
|
+
# @param content_item_id [Integer, nil] source content item id
|
|
423
|
+
# @param quote [String, nil] verbatim quote from the source
|
|
424
|
+
# @param attribution_entity_id [Integer, nil] entity who stated the fact
|
|
425
|
+
# @param strength [String] evidence strength ("stated", "inferred", "derived")
|
|
426
|
+
# @param line_start [Integer, nil] starting line in source content
|
|
427
|
+
# @param line_end [Integer, nil] ending line in source content
|
|
428
|
+
# @return [Integer] inserted provenance row id
|
|
310
429
|
def insert_provenance(fact_id:, content_item_id: nil, quote: nil, attribution_entity_id: nil, strength: "stated",
|
|
311
430
|
line_start: nil, line_end: nil)
|
|
312
431
|
provenance.insert(
|
|
@@ -320,10 +439,21 @@ module ClaudeMemory
|
|
|
320
439
|
)
|
|
321
440
|
end
|
|
322
441
|
|
|
442
|
+
# Retrieve all provenance records for a given fact.
|
|
443
|
+
# @param fact_id [Integer] fact row id
|
|
444
|
+
# @return [Array<Hash>]
|
|
323
445
|
def provenance_for_fact(fact_id)
|
|
324
446
|
provenance.where(fact_id: fact_id).all
|
|
325
447
|
end
|
|
326
448
|
|
|
449
|
+
# --- Conflicts & fact links ---
|
|
450
|
+
|
|
451
|
+
# Record a conflict between two facts.
|
|
452
|
+
# @param fact_a_id [Integer] first conflicting fact id
|
|
453
|
+
# @param fact_b_id [Integer] second conflicting fact id
|
|
454
|
+
# @param status [String] conflict status ("open" or "resolved")
|
|
455
|
+
# @param notes [String, nil] human-readable notes about the conflict
|
|
456
|
+
# @return [Integer] inserted conflict row id
|
|
327
457
|
def insert_conflict(fact_a_id:, fact_b_id:, status: "open", notes: nil)
|
|
328
458
|
now = Time.now.utc.iso8601
|
|
329
459
|
conflicts.insert(
|
|
@@ -335,21 +465,55 @@ module ClaudeMemory
|
|
|
335
465
|
)
|
|
336
466
|
end
|
|
337
467
|
|
|
468
|
+
# Retrieve all unresolved conflicts.
|
|
469
|
+
# @return [Array<Hash>]
|
|
338
470
|
def open_conflicts
|
|
339
471
|
conflicts.where(status: "open").all
|
|
340
472
|
end
|
|
341
473
|
|
|
474
|
+
# Create a directional link between two facts (e.g. supersession).
|
|
475
|
+
# @param from_fact_id [Integer] source fact id
|
|
476
|
+
# @param to_fact_id [Integer] target fact id
|
|
477
|
+
# @param link_type [String] relationship type (e.g. "supersedes", "conflicts_with")
|
|
478
|
+
# @return [Integer] inserted fact_link row id
|
|
342
479
|
def insert_fact_link(from_fact_id:, to_fact_id:, link_type:)
|
|
343
480
|
fact_links.insert(from_fact_id: from_fact_id, to_fact_id: to_fact_id, link_type: link_type)
|
|
344
481
|
end
|
|
345
482
|
|
|
346
|
-
#
|
|
347
|
-
|
|
348
|
-
#
|
|
349
|
-
# @param
|
|
350
|
-
# @param
|
|
351
|
-
# @
|
|
352
|
-
|
|
483
|
+
# --- Ingestion metrics ---
|
|
484
|
+
|
|
485
|
+
# Fetch content items that have not yet been distilled, ordered newest first.
|
|
486
|
+
# @param limit [Integer] maximum rows to return
|
|
487
|
+
# @param min_length [Integer] minimum byte_len threshold
|
|
488
|
+
# @return [Array<Hash>]
|
|
489
|
+
def undistilled_content_items(limit: 3, min_length: 200)
|
|
490
|
+
content_items
|
|
491
|
+
.left_join(:ingestion_metrics, content_item_id: :id)
|
|
492
|
+
.where(Sequel[:ingestion_metrics][:id] => nil)
|
|
493
|
+
.where { byte_len >= min_length }
|
|
494
|
+
.order(Sequel.desc(:occurred_at))
|
|
495
|
+
.limit(limit)
|
|
496
|
+
.select_all(:content_items)
|
|
497
|
+
.all
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
# Count content items that have not yet been distilled.
|
|
501
|
+
# @param min_length [Integer] minimum byte_len threshold
|
|
502
|
+
# @return [Integer]
|
|
503
|
+
def count_undistilled(min_length: 200)
|
|
504
|
+
content_items
|
|
505
|
+
.left_join(:ingestion_metrics, content_item_id: :id)
|
|
506
|
+
.where(Sequel[:ingestion_metrics][:id] => nil)
|
|
507
|
+
.where { byte_len >= min_length }
|
|
508
|
+
.count
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
# Record token usage and extraction counts for a distillation run.
|
|
512
|
+
# @param content_item_id [Integer] content item that was distilled
|
|
513
|
+
# @param input_tokens [Integer] LLM input tokens consumed
|
|
514
|
+
# @param output_tokens [Integer] LLM output tokens consumed
|
|
515
|
+
# @param facts_extracted [Integer] number of facts extracted
|
|
516
|
+
# @return [Integer] inserted row id
|
|
353
517
|
def record_ingestion_metrics(content_item_id:, input_tokens:, output_tokens:, facts_extracted:)
|
|
354
518
|
ingestion_metrics.insert(
|
|
355
519
|
content_item_id: content_item_id,
|
|
@@ -360,14 +524,8 @@ module ClaudeMemory
|
|
|
360
524
|
)
|
|
361
525
|
end
|
|
362
526
|
|
|
363
|
-
#
|
|
364
|
-
#
|
|
365
|
-
# @return [Hash] Aggregated metrics with keys:
|
|
366
|
-
# - total_input_tokens: Total tokens sent to API
|
|
367
|
-
# - total_output_tokens: Total tokens returned from API
|
|
368
|
-
# - total_facts_extracted: Total facts extracted
|
|
369
|
-
# - total_operations: Number of distillation operations
|
|
370
|
-
# - avg_facts_per_1k_input_tokens: Average efficiency metric
|
|
527
|
+
# Compute aggregate ingestion metrics across all distillation runs.
|
|
528
|
+
# @return [Hash, nil] totals and efficiency ratio, or nil if no data
|
|
371
529
|
def aggregate_ingestion_metrics
|
|
372
530
|
# standard:disable Performance/Detect (Sequel DSL requires .select{}.first)
|
|
373
531
|
result = ingestion_metrics
|
|
@@ -400,23 +558,48 @@ module ClaudeMemory
|
|
|
400
558
|
}
|
|
401
559
|
end
|
|
402
560
|
|
|
403
|
-
#
|
|
404
|
-
#
|
|
405
|
-
# @
|
|
406
|
-
|
|
561
|
+
# Mark all undistilled content items as distilled with zero token counts.
|
|
562
|
+
# Used for backfilling legacy content that predates the metrics table.
|
|
563
|
+
# @return [Integer] number of items backfilled
|
|
564
|
+
def backfill_distillation_metrics!
|
|
565
|
+
undistilled_ids = content_items
|
|
566
|
+
.left_join(:ingestion_metrics, content_item_id: :id)
|
|
567
|
+
.where(Sequel[:ingestion_metrics][:id] => nil)
|
|
568
|
+
.select_map(Sequel[:content_items][:id])
|
|
569
|
+
|
|
570
|
+
return 0 if undistilled_ids.empty?
|
|
571
|
+
|
|
572
|
+
now = Time.now.utc.iso8601
|
|
573
|
+
undistilled_ids.each do |cid|
|
|
574
|
+
ingestion_metrics.insert(
|
|
575
|
+
content_item_id: cid,
|
|
576
|
+
input_tokens: 0,
|
|
577
|
+
output_tokens: 0,
|
|
578
|
+
facts_extracted: 0,
|
|
579
|
+
created_at: now
|
|
580
|
+
)
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
undistilled_ids.size
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
# --- LLM cache ---
|
|
587
|
+
|
|
588
|
+
# Look up a cached LLM result by its cache key.
|
|
589
|
+
# @param cache_key [String] SHA-256 hex cache key
|
|
590
|
+
# @return [Hash, nil]
|
|
407
591
|
def llm_cache_lookup(cache_key)
|
|
408
592
|
llm_cache.where(cache_key: cache_key).first
|
|
409
593
|
end
|
|
410
594
|
|
|
411
|
-
# Store
|
|
412
|
-
#
|
|
413
|
-
# @param
|
|
414
|
-
# @param
|
|
415
|
-
# @param
|
|
416
|
-
# @param
|
|
417
|
-
# @param
|
|
418
|
-
# @
|
|
419
|
-
# @return [Integer] The created cache entry ID
|
|
595
|
+
# Store or update a cached LLM result. Uses upsert on the cache_key.
|
|
596
|
+
# @param operation [String] operation name (e.g. "distill", "embed")
|
|
597
|
+
# @param model [String] model identifier
|
|
598
|
+
# @param input_hash [String] SHA-256 hex digest of the input
|
|
599
|
+
# @param result_json [String] JSON-serialized result
|
|
600
|
+
# @param input_tokens [Integer, nil] input tokens consumed
|
|
601
|
+
# @param output_tokens [Integer, nil] output tokens consumed
|
|
602
|
+
# @return [void]
|
|
420
603
|
def llm_cache_store(operation:, model:, input_hash:, result_json:, input_tokens: nil, output_tokens: nil)
|
|
421
604
|
cache_key = Digest::SHA256.hexdigest("#{operation}:#{model}:#{input_hash}")
|
|
422
605
|
|
|
@@ -439,97 +622,47 @@ module ClaudeMemory
|
|
|
439
622
|
)
|
|
440
623
|
end
|
|
441
624
|
|
|
442
|
-
#
|
|
443
|
-
#
|
|
444
|
-
# @param
|
|
445
|
-
# @param
|
|
446
|
-
# @
|
|
447
|
-
# @return [String] SHA256 hex digest cache key
|
|
625
|
+
# Compute the cache key for an LLM operation.
|
|
626
|
+
# @param operation [String] operation name
|
|
627
|
+
# @param model [String] model identifier
|
|
628
|
+
# @param input [String] raw input text
|
|
629
|
+
# @return [String] SHA-256 hex cache key
|
|
448
630
|
def llm_cache_key(operation, model, input)
|
|
449
631
|
input_hash = Digest::SHA256.hexdigest(input)
|
|
450
632
|
Digest::SHA256.hexdigest("#{operation}:#{model}:#{input_hash}")
|
|
451
633
|
end
|
|
452
634
|
|
|
453
|
-
#
|
|
454
|
-
#
|
|
455
|
-
# @
|
|
456
|
-
# @return [Integer] Number of entries pruned
|
|
635
|
+
# Delete LLM cache entries older than the given age.
|
|
636
|
+
# @param max_age_seconds [Integer] maximum age in seconds (default: 7 days)
|
|
637
|
+
# @return [Integer] number of rows deleted
|
|
457
638
|
def llm_cache_prune(max_age_seconds: 604_800)
|
|
458
639
|
cutoff = (Time.now - max_age_seconds).utc.iso8601
|
|
459
640
|
llm_cache.where { created_at < cutoff }.delete
|
|
460
641
|
end
|
|
461
642
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
def ensure_schema!
|
|
465
|
-
migrations_path = File.expand_path("../../../db/migrations", __dir__)
|
|
466
|
-
|
|
467
|
-
# Handle backward compatibility: databases created with old migration system
|
|
468
|
-
sync_legacy_schema_version!
|
|
469
|
-
|
|
470
|
-
# Skip migration if the database is already ahead of this gem's version.
|
|
471
|
-
# This happens when a newer gem version migrated the DB and an older
|
|
472
|
-
# installed gem (e.g. via hooks) tries to open it.
|
|
473
|
-
current = current_schema_version
|
|
474
|
-
return if current && current > SCHEMA_VERSION
|
|
475
|
-
|
|
476
|
-
# Run Sequel migrations to bring database to target version
|
|
477
|
-
Sequel::Migrator.run(@db, migrations_path, target: SCHEMA_VERSION)
|
|
478
|
-
|
|
479
|
-
# Set created_at timestamp on first initialization
|
|
480
|
-
set_meta("created_at", Time.now.utc.iso8601) unless get_meta("created_at")
|
|
481
|
-
|
|
482
|
-
# Sync legacy schema_version meta key with Sequel's schema_info
|
|
483
|
-
# This maintains backwards compatibility with code that reads schema_version
|
|
484
|
-
sequel_version = @db[:schema_info].get(:version) if @db.table_exists?(:schema_info)
|
|
485
|
-
set_meta("schema_version", sequel_version.to_s) if sequel_version
|
|
486
|
-
end
|
|
487
|
-
|
|
488
|
-
# Sync legacy schema_version from meta table to Sequel's schema_info
|
|
489
|
-
# Handles two cases:
|
|
490
|
-
# 1. No schema_info table exists (old system, pre-Sequel migrations)
|
|
491
|
-
# 2. schema_info exists but is out of sync with meta.schema_version
|
|
492
|
-
def sync_legacy_schema_version!
|
|
493
|
-
return unless @db.table_exists?(:meta)
|
|
494
|
-
|
|
495
|
-
meta_version = get_meta("schema_version")&.to_i
|
|
496
|
-
return unless meta_version && meta_version >= 2
|
|
497
|
-
|
|
498
|
-
# Verify database actually has v2+ schema (defensive check)
|
|
499
|
-
columns = @db.schema(:content_items).map(&:first) if @db.table_exists?(:content_items)
|
|
500
|
-
return unless columns&.include?(:project_path)
|
|
501
|
-
|
|
502
|
-
# Create or update schema_info to match meta.schema_version
|
|
503
|
-
@db.create_table?(:schema_info) do
|
|
504
|
-
Integer :version, null: false, default: 0
|
|
505
|
-
end
|
|
506
|
-
|
|
507
|
-
sequel_version = @db[:schema_info].get(:version)
|
|
508
|
-
if sequel_version.nil? || sequel_version < meta_version
|
|
509
|
-
# Update schema_info to match meta (old system's version)
|
|
510
|
-
@db[:schema_info].delete
|
|
511
|
-
@db[:schema_info].insert(version: meta_version)
|
|
512
|
-
end
|
|
513
|
-
end
|
|
514
|
-
|
|
515
|
-
def current_schema_version
|
|
516
|
-
return nil unless @db.table_exists?(:schema_info)
|
|
517
|
-
@db[:schema_info].get(:version)
|
|
518
|
-
end
|
|
643
|
+
# --- Meta ---
|
|
519
644
|
|
|
645
|
+
# Set a key-value pair in the meta table (upsert).
|
|
646
|
+
# @param key [String] metadata key
|
|
647
|
+
# @param value [String] metadata value
|
|
648
|
+
# @return [void]
|
|
520
649
|
def set_meta(key, value)
|
|
521
650
|
@db[:meta].insert_conflict(target: :key, update: {value: value}).insert(key: key, value: value)
|
|
522
651
|
end
|
|
523
652
|
|
|
653
|
+
# Retrieve a value from the meta table.
|
|
654
|
+
# @param key [String] metadata key
|
|
655
|
+
# @return [String, nil]
|
|
524
656
|
def get_meta(key)
|
|
525
657
|
@db[:meta].where(key: key).get(:value)
|
|
526
658
|
end
|
|
527
659
|
|
|
660
|
+
private
|
|
661
|
+
|
|
528
662
|
def generate_docid(subject_entity_id, predicate, object_literal, created_at)
|
|
529
663
|
input = "#{subject_entity_id}:#{predicate}:#{object_literal}:#{created_at}"
|
|
530
664
|
docid = Digest::SHA256.hexdigest(input)[0, 8]
|
|
531
665
|
|
|
532
|
-
# Handle unlikely collisions by rehashing with a counter
|
|
533
666
|
counter = 0
|
|
534
667
|
while facts.where(docid: docid).any?
|
|
535
668
|
counter += 1
|