claude_memory 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/CLAUDE.md +1 -1
- data/.claude/rules/claude_memory.generated.md +1 -1
- data/.claude/settings.json +5 -0
- data/.claude/settings.local.json +9 -1
- data/.claude-plugin/marketplace.json +5 -2
- data/.claude-plugin/plugin.json +16 -3
- data/CHANGELOG.md +55 -0
- data/CLAUDE.md +27 -13
- data/README.md +6 -2
- data/Rakefile +22 -0
- data/db/migrations/011_add_tool_call_summaries.rb +18 -0
- data/db/migrations/012_add_vec_indexing_support.rb +19 -0
- data/docs/improvements.md +86 -66
- data/docs/influence/claude-mem.md +253 -0
- data/docs/influence/claude-supermemory.md +158 -430
- data/docs/influence/episodic-memory.md +217 -0
- data/docs/influence/grepai.md +163 -839
- data/docs/influence/kbs.md +437 -0
- data/docs/influence/qmd.md +139 -481
- data/hooks/hooks.json +19 -15
- data/lefthook.yml +4 -0
- data/lib/claude_memory/commands/checks/vec_check.rb +73 -0
- data/lib/claude_memory/commands/compact_command.rb +94 -0
- data/lib/claude_memory/commands/doctor_command.rb +1 -0
- data/lib/claude_memory/commands/export_command.rb +108 -0
- data/lib/claude_memory/commands/help_command.rb +2 -0
- data/lib/claude_memory/commands/hook_command.rb +110 -9
- data/lib/claude_memory/commands/index_command.rb +63 -8
- data/lib/claude_memory/commands/initializers/global_initializer.rb +26 -7
- data/lib/claude_memory/commands/initializers/project_initializer.rb +35 -12
- data/lib/claude_memory/commands/registry.rb +3 -1
- data/lib/claude_memory/hook/context_injector.rb +75 -0
- data/lib/claude_memory/hook/error_classifier.rb +67 -0
- data/lib/claude_memory/hook/handler.rb +21 -1
- data/lib/claude_memory/index/vector_index.rb +171 -0
- data/lib/claude_memory/infrastructure/schema_validator.rb +5 -1
- data/lib/claude_memory/ingest/ingester.rb +26 -1
- data/lib/claude_memory/ingest/observation_compressor.rb +177 -0
- data/lib/claude_memory/mcp/instructions_builder.rb +76 -0
- data/lib/claude_memory/mcp/server.rb +3 -1
- data/lib/claude_memory/mcp/tool_definitions.rb +15 -7
- data/lib/claude_memory/mcp/tools.rb +125 -2
- data/lib/claude_memory/publish.rb +28 -27
- data/lib/claude_memory/recall/dual_query_template.rb +1 -12
- data/lib/claude_memory/recall.rb +71 -17
- data/lib/claude_memory/store/sqlite_store.rb +17 -1
- data/lib/claude_memory/sweep/sweeper.rb +30 -0
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +8 -0
- data/scripts/hook-runner.sh +14 -0
- data/scripts/serve-mcp.sh +14 -0
- data/skills/setup-memory/SKILL.md +6 -0
- metadata +31 -2
|
@@ -14,17 +14,27 @@ module ClaudeMemory
|
|
|
14
14
|
def initialize_memory
|
|
15
15
|
@stdout.puts "Initializing ClaudeMemory (global only)...\n\n"
|
|
16
16
|
|
|
17
|
+
if plugin_mode?
|
|
18
|
+
@stdout.puts "(Plugin mode detected — hooks and MCP managed by plugin)\n\n"
|
|
19
|
+
end
|
|
20
|
+
|
|
17
21
|
# Check for existing hooks in global settings
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
22
|
+
unless plugin_mode?
|
|
23
|
+
hooks_config = HooksConfigurator.new(@stdout)
|
|
24
|
+
global_settings = File.join(Dir.home, ".claude", "settings.json")
|
|
25
|
+
if hooks_config.has_claude_memory_hooks?(global_settings)
|
|
26
|
+
handle_existing_hooks(hooks_config, global_settings)
|
|
27
|
+
return 0 if @skip_initialization
|
|
28
|
+
end
|
|
23
29
|
end
|
|
24
30
|
|
|
25
31
|
ensure_database
|
|
26
|
-
|
|
27
|
-
|
|
32
|
+
|
|
33
|
+
unless plugin_mode?
|
|
34
|
+
configure_hooks unless @skip_hooks
|
|
35
|
+
configure_mcp
|
|
36
|
+
end
|
|
37
|
+
|
|
28
38
|
configure_memory_instructions
|
|
29
39
|
|
|
30
40
|
print_completion_message
|
|
@@ -74,9 +84,18 @@ module ClaudeMemory
|
|
|
74
84
|
MemoryInstructionsWriter.new(@stdout).write_global_instructions
|
|
75
85
|
end
|
|
76
86
|
|
|
87
|
+
def plugin_mode?
|
|
88
|
+
ENV.key?("CLAUDE_PLUGIN_ROOT")
|
|
89
|
+
end
|
|
90
|
+
|
|
77
91
|
def print_completion_message
|
|
78
92
|
@stdout.puts "\n=== Global Setup Complete ===\n"
|
|
79
93
|
@stdout.puts "ClaudeMemory is now configured globally."
|
|
94
|
+
|
|
95
|
+
if plugin_mode?
|
|
96
|
+
@stdout.puts "\nPlugin mode: hooks and MCP are managed by the plugin."
|
|
97
|
+
end
|
|
98
|
+
|
|
80
99
|
@stdout.puts "\nNote: Run 'claude-memory init' in each project for project-specific memory."
|
|
81
100
|
end
|
|
82
101
|
end
|
|
@@ -16,19 +16,29 @@ module ClaudeMemory
|
|
|
16
16
|
def initialize_memory
|
|
17
17
|
@stdout.puts "Initializing ClaudeMemory (project-local)...\n\n"
|
|
18
18
|
|
|
19
|
+
if plugin_mode?
|
|
20
|
+
@stdout.puts "(Plugin mode detected — hooks, MCP, and output style managed by plugin)\n\n"
|
|
21
|
+
end
|
|
22
|
+
|
|
19
23
|
# Check for existing hooks and offer options
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
+
unless plugin_mode?
|
|
25
|
+
hooks_config = HooksConfigurator.new(@stdout)
|
|
26
|
+
if hooks_config.has_claude_memory_hooks?(".claude/settings.json")
|
|
27
|
+
handle_existing_hooks(hooks_config)
|
|
28
|
+
return 0 if @skip_initialization
|
|
29
|
+
end
|
|
24
30
|
end
|
|
25
31
|
|
|
26
32
|
ensure_databases
|
|
27
33
|
ensure_directories
|
|
28
|
-
|
|
29
|
-
|
|
34
|
+
|
|
35
|
+
unless plugin_mode?
|
|
36
|
+
configure_hooks unless @skip_hooks
|
|
37
|
+
configure_mcp
|
|
38
|
+
install_output_style
|
|
39
|
+
end
|
|
40
|
+
|
|
30
41
|
configure_memory_instructions
|
|
31
|
-
install_output_style
|
|
32
42
|
|
|
33
43
|
print_completion_message
|
|
34
44
|
0
|
|
@@ -93,17 +103,30 @@ module ClaudeMemory
|
|
|
93
103
|
@stdout.puts "✓ Installed output style at #{style_dest}"
|
|
94
104
|
end
|
|
95
105
|
|
|
106
|
+
def plugin_mode?
|
|
107
|
+
ENV.key?("CLAUDE_PLUGIN_ROOT")
|
|
108
|
+
end
|
|
109
|
+
|
|
96
110
|
def print_completion_message
|
|
97
111
|
@stdout.puts "\n=== Setup Complete ===\n"
|
|
98
112
|
@stdout.puts "ClaudeMemory is now configured for this project."
|
|
99
113
|
@stdout.puts "\nDatabases:"
|
|
100
114
|
@stdout.puts " Global: ~/.claude/memory.sqlite3 (user-wide knowledge)"
|
|
101
115
|
@stdout.puts " Project: .claude/memory.sqlite3 (project-specific)"
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
116
|
+
|
|
117
|
+
if plugin_mode?
|
|
118
|
+
@stdout.puts "\nPlugin mode: hooks, MCP, and output style are managed by the plugin."
|
|
119
|
+
@stdout.puts "\nNext steps:"
|
|
120
|
+
@stdout.puts " 1. Use Claude Code normally - transcripts will be ingested automatically"
|
|
121
|
+
@stdout.puts " 2. Run 'claude-memory promote <fact_id>' to move facts to global"
|
|
122
|
+
@stdout.puts " 3. Run 'claude-memory doctor' to verify setup"
|
|
123
|
+
else
|
|
124
|
+
@stdout.puts "\nNext steps:"
|
|
125
|
+
@stdout.puts " 1. Restart Claude Code to load the new configuration"
|
|
126
|
+
@stdout.puts " 2. Use Claude Code normally - transcripts will be ingested automatically"
|
|
127
|
+
@stdout.puts " 3. Run 'claude-memory promote <fact_id>' to move facts to global"
|
|
128
|
+
@stdout.puts " 4. Run 'claude-memory doctor' to verify setup"
|
|
129
|
+
end
|
|
107
130
|
end
|
|
108
131
|
end
|
|
109
132
|
end
|
|
@@ -27,7 +27,9 @@ module ClaudeMemory
|
|
|
27
27
|
"serve-mcp" => "ServeMcpCommand",
|
|
28
28
|
"hook" => "HookCommand",
|
|
29
29
|
"index" => "IndexCommand",
|
|
30
|
-
"recover" => "RecoverCommand"
|
|
30
|
+
"recover" => "RecoverCommand",
|
|
31
|
+
"compact" => "CompactCommand",
|
|
32
|
+
"export" => "ExportCommand"
|
|
31
33
|
}.freeze
|
|
32
34
|
|
|
33
35
|
# Find a command class by name
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Hook
|
|
5
|
+
# Generates context for SessionStart hook injection.
|
|
6
|
+
# Queries both global and project databases for key facts
|
|
7
|
+
# and formats them as concise context for Claude.
|
|
8
|
+
class ContextInjector
|
|
9
|
+
MAX_DECISIONS = 5
|
|
10
|
+
MAX_CONVENTIONS = 5
|
|
11
|
+
MAX_ARCHITECTURE = 5
|
|
12
|
+
|
|
13
|
+
QUERIES = {
|
|
14
|
+
decisions: {query: "decision constraint rule requirement", scope: "all"},
|
|
15
|
+
conventions: {query: "convention style format pattern prefer", scope: "all"},
|
|
16
|
+
architecture: {query: "uses framework implements architecture pattern", scope: "all"}
|
|
17
|
+
}.freeze
|
|
18
|
+
|
|
19
|
+
def initialize(manager)
|
|
20
|
+
@manager = manager
|
|
21
|
+
@recall = Recall.new(manager)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def generate_context
|
|
25
|
+
sections = []
|
|
26
|
+
|
|
27
|
+
decisions = fetch(:decisions, MAX_DECISIONS)
|
|
28
|
+
sections << format_section("Decisions", decisions) if decisions.any?
|
|
29
|
+
|
|
30
|
+
conventions = fetch(:conventions, MAX_CONVENTIONS)
|
|
31
|
+
sections << format_section("Conventions", conventions) if conventions.any?
|
|
32
|
+
|
|
33
|
+
architecture = fetch(:architecture, MAX_ARCHITECTURE)
|
|
34
|
+
sections << format_section("Architecture", architecture) if architecture.any?
|
|
35
|
+
|
|
36
|
+
return nil if sections.empty?
|
|
37
|
+
|
|
38
|
+
sections.join("\n")
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def fetch(category, limit)
|
|
44
|
+
config = QUERIES.fetch(category)
|
|
45
|
+
results = @recall.query(config[:query], limit: limit, scope: config[:scope])
|
|
46
|
+
results.map { |r| format_fact(r[:fact]) }
|
|
47
|
+
rescue => _e
|
|
48
|
+
[]
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def format_fact(fact)
|
|
52
|
+
return nil unless fact
|
|
53
|
+
|
|
54
|
+
subject = fact[:subject_name] || fact[:subject_entity_id]
|
|
55
|
+
predicate = fact[:predicate]
|
|
56
|
+
object = fact[:object_literal]
|
|
57
|
+
|
|
58
|
+
if subject && predicate && object
|
|
59
|
+
"#{subject}.#{predicate} = #{object}"
|
|
60
|
+
elsif object
|
|
61
|
+
object.to_s
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def format_section(title, items)
|
|
66
|
+
items = items.compact.uniq
|
|
67
|
+
return nil if items.empty?
|
|
68
|
+
|
|
69
|
+
lines = ["## #{title}"]
|
|
70
|
+
items.each { |item| lines << "- #{item}" }
|
|
71
|
+
lines.join("\n")
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Hook
|
|
5
|
+
# Classifies hook errors into transport/infrastructure failures
|
|
6
|
+
# vs client/programming bugs to determine exit code behavior.
|
|
7
|
+
#
|
|
8
|
+
# Transport errors (exit 0): Infrastructure failures that shouldn't
|
|
9
|
+
# block Claude Code sessions. Memory is optional — degrade gracefully.
|
|
10
|
+
#
|
|
11
|
+
# Client errors (exit 2): Programming bugs or invalid input that
|
|
12
|
+
# developers should see and fix.
|
|
13
|
+
#
|
|
14
|
+
# Source: claude-mem hook-command.ts:26-66
|
|
15
|
+
module ErrorClassifier
|
|
16
|
+
# Infrastructure/transport error class names — resolved lazily
|
|
17
|
+
# to avoid load-order dependency on Sequel
|
|
18
|
+
TRANSPORT_ERROR_NAMES = %w[
|
|
19
|
+
Sequel::DatabaseConnectionError
|
|
20
|
+
Sequel::DatabaseError
|
|
21
|
+
].freeze
|
|
22
|
+
|
|
23
|
+
TRANSPORT_ERROR_CLASSES = [
|
|
24
|
+
Errno::EACCES,
|
|
25
|
+
Errno::ENOSPC,
|
|
26
|
+
Errno::EISDIR,
|
|
27
|
+
Errno::EROFS,
|
|
28
|
+
IOError
|
|
29
|
+
].freeze
|
|
30
|
+
|
|
31
|
+
# Client/programming error classes — surface to developer
|
|
32
|
+
CLIENT_ERROR_CLASSES = [
|
|
33
|
+
Handler::PayloadError,
|
|
34
|
+
JSON::ParserError,
|
|
35
|
+
TypeError,
|
|
36
|
+
NoMethodError,
|
|
37
|
+
ArgumentError
|
|
38
|
+
].freeze
|
|
39
|
+
|
|
40
|
+
module_function
|
|
41
|
+
|
|
42
|
+
# Returns true for infrastructure failures that should not block sessions
|
|
43
|
+
def transport_error?(error)
|
|
44
|
+
TRANSPORT_ERROR_CLASSES.any? { |klass| error.is_a?(klass) } ||
|
|
45
|
+
TRANSPORT_ERROR_NAMES.any? { |name| error.class.ancestors.any? { |a| a.name == name } }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Returns true for programming bugs that developers should fix
|
|
49
|
+
def client_error?(error)
|
|
50
|
+
CLIENT_ERROR_CLASSES.any? { |klass| error.is_a?(klass) }
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Returns the appropriate exit code for an error
|
|
54
|
+
def exit_code_for(error)
|
|
55
|
+
if transport_error?(error)
|
|
56
|
+
ExitCodes::SUCCESS
|
|
57
|
+
elsif client_error?(error)
|
|
58
|
+
ExitCodes::ERROR
|
|
59
|
+
else
|
|
60
|
+
# Unknown errors default to graceful degradation —
|
|
61
|
+
# memory should never block Claude Code sessions
|
|
62
|
+
ExitCodes::SUCCESS
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -7,8 +7,9 @@ module ClaudeMemory
|
|
|
7
7
|
|
|
8
8
|
DEFAULT_SWEEP_BUDGET = 5
|
|
9
9
|
|
|
10
|
-
def initialize(store, env: ENV)
|
|
10
|
+
def initialize(store, env: ENV, manager: nil)
|
|
11
11
|
@store = store
|
|
12
|
+
@manager = manager
|
|
12
13
|
@config = Configuration.new(env)
|
|
13
14
|
@env = env
|
|
14
15
|
end
|
|
@@ -50,6 +51,25 @@ module ClaudeMemory
|
|
|
50
51
|
publisher = Publish.new(@store)
|
|
51
52
|
publisher.publish!(mode: mode, since: since, rules_dir: rules_dir)
|
|
52
53
|
end
|
|
54
|
+
|
|
55
|
+
def context(payload)
|
|
56
|
+
manager = @manager || build_manager(payload)
|
|
57
|
+
manager.ensure_both!
|
|
58
|
+
|
|
59
|
+
injector = ContextInjector.new(manager)
|
|
60
|
+
context_text = injector.generate_context
|
|
61
|
+
|
|
62
|
+
{status: :ok, context: context_text}
|
|
63
|
+
rescue => e
|
|
64
|
+
{status: :error, context: nil, message: e.message}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
private
|
|
68
|
+
|
|
69
|
+
def build_manager(payload)
|
|
70
|
+
project_path = payload["project_path"] || @config.project_dir
|
|
71
|
+
Store::StoreManager.new(project_path: project_path, env: @env)
|
|
72
|
+
end
|
|
53
73
|
end
|
|
54
74
|
end
|
|
55
75
|
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Index
|
|
5
|
+
# Native sqlite-vec KNN search wrapper
|
|
6
|
+
# Follows the same lazy-init pattern as LexicalFTS:
|
|
7
|
+
# the extension and virtual table are created on first use.
|
|
8
|
+
class VectorIndex
|
|
9
|
+
EMBEDDING_DIMENSIONS = 384
|
|
10
|
+
|
|
11
|
+
def initialize(store)
|
|
12
|
+
@store = store
|
|
13
|
+
@db = store.db
|
|
14
|
+
@available = nil
|
|
15
|
+
@vec_table_ensured = false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Is the sqlite-vec extension loadable?
|
|
19
|
+
# Caches the result after the first probe.
|
|
20
|
+
def available?
|
|
21
|
+
return @available unless @available.nil?
|
|
22
|
+
|
|
23
|
+
@available = load_extension!
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Insert (or replace) a fact's embedding into the vec0 virtual table.
|
|
27
|
+
# Also sets vec_indexed_at on the fact row.
|
|
28
|
+
# @param fact_id [Integer]
|
|
29
|
+
# @param vector [Array<Float>] 384-dimensional embedding
|
|
30
|
+
def insert_embedding(fact_id, vector)
|
|
31
|
+
return false unless available?
|
|
32
|
+
|
|
33
|
+
ensure_vec_table!
|
|
34
|
+
blob = vector.pack("f*")
|
|
35
|
+
# vec0 doesn't support INSERT OR REPLACE; delete first
|
|
36
|
+
execute_with_params("DELETE FROM facts_vec WHERE fact_id = ?", fact_id)
|
|
37
|
+
execute_with_params(
|
|
38
|
+
"INSERT INTO facts_vec(fact_id, embedding) VALUES (?, ?)",
|
|
39
|
+
fact_id, blob
|
|
40
|
+
)
|
|
41
|
+
@store.facts.where(id: fact_id).update(vec_indexed_at: Time.now.utc.iso8601)
|
|
42
|
+
true
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Remove a fact's embedding from the vec0 virtual table.
|
|
46
|
+
# Also clears vec_indexed_at on the fact row.
|
|
47
|
+
# @param fact_id [Integer]
|
|
48
|
+
def remove_embedding(fact_id)
|
|
49
|
+
return false unless available?
|
|
50
|
+
|
|
51
|
+
ensure_vec_table!
|
|
52
|
+
@db[:facts_vec].where(fact_id: fact_id).delete
|
|
53
|
+
@store.facts.where(id: fact_id).update(vec_indexed_at: nil)
|
|
54
|
+
true
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# KNN search: returns fact_ids + distances, caller hydrates facts
|
|
58
|
+
# Two-step query pattern (no JOINs with vec0).
|
|
59
|
+
# @param query_vector [Array<Float>]
|
|
60
|
+
# @param k [Integer] number of nearest neighbors
|
|
61
|
+
# @return [Array<Hash>] [{fact_id:, distance:, similarity:}, ...]
|
|
62
|
+
def search(query_vector, k: 10)
|
|
63
|
+
return [] unless available?
|
|
64
|
+
|
|
65
|
+
ensure_vec_table!
|
|
66
|
+
blob = query_vector.pack("f*")
|
|
67
|
+
|
|
68
|
+
rows = @db.synchronize do |conn|
|
|
69
|
+
conn.query(
|
|
70
|
+
"SELECT fact_id, distance FROM facts_vec WHERE embedding MATCH ? AND k = ? ORDER BY distance",
|
|
71
|
+
[blob, k]
|
|
72
|
+
)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
rows.map do |row|
|
|
76
|
+
{
|
|
77
|
+
fact_id: row[:fact_id],
|
|
78
|
+
distance: row[:distance],
|
|
79
|
+
similarity: (1.0 - row[:distance]).clamp(0.0, 1.0)
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Backfill facts that have embedding_json but haven't been indexed in vec0
|
|
85
|
+
# @param limit [Integer] max facts to process per call
|
|
86
|
+
# @return [Integer] number of facts backfilled
|
|
87
|
+
def backfill_batch!(limit: 100)
|
|
88
|
+
return 0 unless available?
|
|
89
|
+
|
|
90
|
+
ensure_vec_table!
|
|
91
|
+
rows = @store.facts
|
|
92
|
+
.where(vec_indexed_at: nil)
|
|
93
|
+
.where(Sequel.~(embedding_json: nil))
|
|
94
|
+
.where(status: "active")
|
|
95
|
+
.select(:id, :embedding_json)
|
|
96
|
+
.order(:id)
|
|
97
|
+
.limit(limit)
|
|
98
|
+
.all
|
|
99
|
+
|
|
100
|
+
return 0 if rows.empty?
|
|
101
|
+
|
|
102
|
+
now = Time.now.utc.iso8601
|
|
103
|
+
indexed_ids = []
|
|
104
|
+
|
|
105
|
+
rows.each do |row|
|
|
106
|
+
vector = JSON.parse(row[:embedding_json])
|
|
107
|
+
blob = vector.pack("f*")
|
|
108
|
+
# No DELETE needed: vec_indexed_at is nil so these rows can't be in vec0
|
|
109
|
+
execute_with_params(
|
|
110
|
+
"INSERT INTO facts_vec(fact_id, embedding) VALUES (?, ?)",
|
|
111
|
+
row[:id], blob
|
|
112
|
+
)
|
|
113
|
+
indexed_ids << row[:id]
|
|
114
|
+
rescue JSON::ParserError
|
|
115
|
+
next
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Batch-update timestamps
|
|
119
|
+
@store.facts.where(id: indexed_ids).update(vec_indexed_at: now) if indexed_ids.any?
|
|
120
|
+
|
|
121
|
+
indexed_ids.size
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Number of entries in the vec0 virtual table
|
|
125
|
+
def count
|
|
126
|
+
return 0 unless available?
|
|
127
|
+
|
|
128
|
+
ensure_vec_table!
|
|
129
|
+
@db[:facts_vec].count
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Coverage statistics for vec indexing
|
|
133
|
+
# @return [Hash] {with_embedding:, vec_indexed:, coverage_pct:}
|
|
134
|
+
def coverage_stats
|
|
135
|
+
with_embedding = @store.facts.where(Sequel.~(embedding_json: nil)).where(status: "active").count
|
|
136
|
+
vec_indexed = @store.facts.where(Sequel.~(vec_indexed_at: nil)).where(status: "active").count
|
|
137
|
+
coverage_pct = (with_embedding > 0) ? (vec_indexed * 100.0 / with_embedding).round(1) : 0
|
|
138
|
+
|
|
139
|
+
{with_embedding: with_embedding, vec_indexed: vec_indexed, coverage_pct: coverage_pct}
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
private
|
|
143
|
+
|
|
144
|
+
# Execute parameterized SQL via the raw Extralite connection
|
|
145
|
+
# Sequel's db.run doesn't support bind params with Extralite
|
|
146
|
+
def execute_with_params(sql, *params)
|
|
147
|
+
@db.synchronize { |conn| conn.execute(sql, params) }
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def load_extension!
|
|
151
|
+
require "sqlite_vec"
|
|
152
|
+
@db.synchronize do |conn|
|
|
153
|
+
SqliteVec.load(conn)
|
|
154
|
+
end
|
|
155
|
+
true
|
|
156
|
+
rescue LoadError, StandardError
|
|
157
|
+
false
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def ensure_vec_table!
|
|
161
|
+
return if @vec_table_ensured
|
|
162
|
+
|
|
163
|
+
@db.run(<<~SQL)
|
|
164
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS facts_vec
|
|
165
|
+
USING vec0(fact_id INTEGER PRIMARY KEY, embedding float[#{EMBEDDING_DIMENSIONS}] distance_metric=cosine)
|
|
166
|
+
SQL
|
|
167
|
+
@vec_table_ensured = true
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -21,6 +21,9 @@ module ClaudeMemory
|
|
|
21
21
|
operation_progress: %i[id operation_type scope status started_at]
|
|
22
22
|
}.freeze
|
|
23
23
|
|
|
24
|
+
# sqlite-vec virtual table prefix; all internal tables (chunks, rowids, etc.) share this
|
|
25
|
+
VEC_TABLE_PREFIX = "facts_vec"
|
|
26
|
+
|
|
24
27
|
CRITICAL_INDEXES = %i[
|
|
25
28
|
idx_facts_predicate idx_facts_subject idx_facts_status idx_facts_scope
|
|
26
29
|
idx_facts_project idx_provenance_fact idx_content_items_session
|
|
@@ -184,9 +187,10 @@ module ClaudeMemory
|
|
|
184
187
|
now = Time.now.utc.iso8601
|
|
185
188
|
version = @store.schema_version
|
|
186
189
|
|
|
187
|
-
# Get table counts for snapshot
|
|
190
|
+
# Get table counts for snapshot (skip virtual tables that need extensions)
|
|
188
191
|
table_counts = {}
|
|
189
192
|
@store.db.tables.each do |table|
|
|
193
|
+
next if table.to_s.start_with?(VEC_TABLE_PREFIX)
|
|
190
194
|
table_counts[table.to_s] = @store.db[table].count
|
|
191
195
|
end
|
|
192
196
|
|
|
@@ -5,13 +5,14 @@ require "digest"
|
|
|
5
5
|
module ClaudeMemory
|
|
6
6
|
module Ingest
|
|
7
7
|
class Ingester
|
|
8
|
-
def initialize(store, fts: nil, env: ENV, metadata_extractor: nil, tool_extractor: nil, tool_filter: nil)
|
|
8
|
+
def initialize(store, fts: nil, env: ENV, metadata_extractor: nil, tool_extractor: nil, tool_filter: nil, observation_compressor: nil)
|
|
9
9
|
@store = store
|
|
10
10
|
@fts = fts || Index::LexicalFTS.new(store)
|
|
11
11
|
@config = Configuration.new(env)
|
|
12
12
|
@metadata_extractor = metadata_extractor || MetadataExtractor.new
|
|
13
13
|
@tool_extractor = tool_extractor || ToolExtractor.new
|
|
14
14
|
@tool_filter = tool_filter || ToolFilter.new
|
|
15
|
+
@observation_compressor = observation_compressor || ObservationCompressor.new
|
|
15
16
|
end
|
|
16
17
|
|
|
17
18
|
def ingest(source:, session_id:, transcript_path:, project_path: nil)
|
|
@@ -22,6 +23,7 @@ module ClaudeMemory
|
|
|
22
23
|
|
|
23
24
|
prepared = prepare_delta(session_id, transcript_path, project_path)
|
|
24
25
|
return {status: :no_change, bytes_read: 0} if prepared.nil?
|
|
26
|
+
return {status: :skipped, bytes_read: 0, reason: "session_excluded"} if prepared == :excluded
|
|
25
27
|
|
|
26
28
|
content_id = persist_content(source, session_id, transcript_path, prepared)
|
|
27
29
|
|
|
@@ -31,13 +33,25 @@ module ClaudeMemory
|
|
|
31
33
|
|
|
32
34
|
private
|
|
33
35
|
|
|
36
|
+
# Tags that cause the entire delta to be skipped when present.
|
|
37
|
+
# Different from ContentSanitizer which strips tag content but keeps the rest.
|
|
38
|
+
EXCLUSION_TAGS = %w[no-memory private].freeze
|
|
39
|
+
|
|
34
40
|
def prepare_delta(session_id, transcript_path, project_path)
|
|
35
41
|
current_offset = @store.get_delta_cursor(session_id, transcript_path) || 0
|
|
36
42
|
delta, new_offset = TranscriptReader.read_delta(transcript_path, current_offset)
|
|
37
43
|
return nil if delta.nil?
|
|
38
44
|
|
|
45
|
+
# Skip entire delta if session exclusion markers are present
|
|
46
|
+
if session_excluded?(delta)
|
|
47
|
+
# Advance cursor so we don't re-check this content
|
|
48
|
+
@store.update_delta_cursor(session_id, transcript_path, new_offset)
|
|
49
|
+
return :excluded
|
|
50
|
+
end
|
|
51
|
+
|
|
39
52
|
metadata = @metadata_extractor.extract(delta)
|
|
40
53
|
tool_calls = @tool_filter.filter(@tool_extractor.extract(delta))
|
|
54
|
+
tool_calls = compress_tool_calls(tool_calls)
|
|
41
55
|
delta = ContentSanitizer.strip_tags(delta)
|
|
42
56
|
|
|
43
57
|
{
|
|
@@ -128,6 +142,13 @@ module ClaudeMemory
|
|
|
128
142
|
end
|
|
129
143
|
end
|
|
130
144
|
|
|
145
|
+
def compress_tool_calls(tool_calls)
|
|
146
|
+
tool_calls.map do |tc|
|
|
147
|
+
summary = @observation_compressor.compress(tc[:tool_name], tc[:tool_input])
|
|
148
|
+
tc.merge(compressed_summary: summary)
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
131
152
|
def should_ingest?(transcript_path)
|
|
132
153
|
return true unless File.exist?(transcript_path)
|
|
133
154
|
|
|
@@ -140,6 +161,10 @@ module ClaudeMemory
|
|
|
140
161
|
existing.nil?
|
|
141
162
|
end
|
|
142
163
|
|
|
164
|
+
def session_excluded?(text)
|
|
165
|
+
EXCLUSION_TAGS.any? { |tag| text.include?("<#{tag}>") }
|
|
166
|
+
end
|
|
167
|
+
|
|
143
168
|
def detect_project_path
|
|
144
169
|
@config.project_dir
|
|
145
170
|
end
|