claude_memory 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/CLAUDE.md +1 -1
- data/.claude/rules/claude_memory.generated.md +14 -1
- data/.claude/skills/check-memory/SKILL.md +10 -0
- data/.claude/skills/improve/SKILL.md +12 -1
- data/.claude-plugin/plugin.json +1 -1
- data/CHANGELOG.md +70 -0
- data/db/migrations/008_add_provenance_line_range.rb +21 -0
- data/db/migrations/009_add_docid.rb +39 -0
- data/db/migrations/010_add_llm_cache.rb +30 -0
- data/docs/improvements.md +72 -1084
- data/docs/influence/claude-supermemory.md +498 -0
- data/docs/influence/qmd.md +424 -2022
- data/docs/quality_review.md +64 -705
- data/lib/claude_memory/commands/doctor_command.rb +45 -4
- data/lib/claude_memory/commands/explain_command.rb +11 -6
- data/lib/claude_memory/commands/stats_command.rb +1 -1
- data/lib/claude_memory/core/fact_graph.rb +122 -0
- data/lib/claude_memory/core/fact_query_builder.rb +34 -14
- data/lib/claude_memory/core/fact_ranker.rb +3 -20
- data/lib/claude_memory/core/relative_time.rb +45 -0
- data/lib/claude_memory/core/result_sorter.rb +2 -2
- data/lib/claude_memory/core/rr_fusion.rb +57 -0
- data/lib/claude_memory/core/snippet_extractor.rb +97 -0
- data/lib/claude_memory/domain/fact.rb +3 -1
- data/lib/claude_memory/index/index_query.rb +2 -0
- data/lib/claude_memory/index/lexical_fts.rb +18 -0
- data/lib/claude_memory/infrastructure/operation_tracker.rb +7 -21
- data/lib/claude_memory/infrastructure/schema_validator.rb +30 -25
- data/lib/claude_memory/ingest/content_sanitizer.rb +8 -1
- data/lib/claude_memory/ingest/ingester.rb +67 -56
- data/lib/claude_memory/ingest/tool_extractor.rb +1 -1
- data/lib/claude_memory/ingest/tool_filter.rb +55 -0
- data/lib/claude_memory/logging/logger.rb +112 -0
- data/lib/claude_memory/mcp/query_guide.rb +96 -0
- data/lib/claude_memory/mcp/response_formatter.rb +86 -23
- data/lib/claude_memory/mcp/server.rb +34 -4
- data/lib/claude_memory/mcp/text_summary.rb +257 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +20 -4
- data/lib/claude_memory/mcp/tools.rb +133 -120
- data/lib/claude_memory/publish.rb +12 -2
- data/lib/claude_memory/recall/expansion_detector.rb +44 -0
- data/lib/claude_memory/recall.rb +93 -41
- data/lib/claude_memory/resolve/resolver.rb +72 -40
- data/lib/claude_memory/store/sqlite_store.rb +99 -24
- data/lib/claude_memory/sweep/sweeper.rb +6 -0
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +21 -0
- metadata +14 -2
- data/docs/remaining_improvements.md +0 -330
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "optparse"
|
|
4
|
+
|
|
3
5
|
module ClaudeMemory
|
|
4
6
|
module Commands
|
|
5
7
|
# Performs system health checks for ClaudeMemory
|
|
6
8
|
# Delegates to specialized check classes for actual validation
|
|
7
9
|
class DoctorCommand < BaseCommand
|
|
8
|
-
def call(
|
|
10
|
+
def call(args)
|
|
11
|
+
opts = parse_options(args, {brief: false}) do |o|
|
|
12
|
+
OptionParser.new do |parser|
|
|
13
|
+
parser.on("--brief", "Output single-line status summary") { o[:brief] = true }
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
return 1 if opts.nil?
|
|
17
|
+
|
|
9
18
|
manager = ClaudeMemory::Store::StoreManager.new
|
|
10
19
|
|
|
11
20
|
checks = [
|
|
@@ -20,10 +29,42 @@ module ClaudeMemory
|
|
|
20
29
|
|
|
21
30
|
manager.close
|
|
22
31
|
|
|
23
|
-
|
|
24
|
-
|
|
32
|
+
if opts[:brief]
|
|
33
|
+
report_brief(results)
|
|
34
|
+
else
|
|
35
|
+
reporter = Checks::Reporter.new(stdout, stderr)
|
|
36
|
+
success = reporter.report(results)
|
|
37
|
+
success ? 0 : 1
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def report_brief(results)
|
|
44
|
+
errors = results.select { |r| r[:status] == :error }
|
|
45
|
+
warnings = results.select { |r| r[:status] == :warning }
|
|
46
|
+
|
|
47
|
+
if errors.any?
|
|
48
|
+
messages = errors.map { |e| e[:message] }
|
|
49
|
+
stdout.puts "Memory ERROR: #{messages.join(", ")}"
|
|
50
|
+
return 1
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
fact_parts = results
|
|
54
|
+
.select { |r| r[:label] =~ /global|project/ && r.dig(:details, :fact_count) }
|
|
55
|
+
.map { |r| "#{r.dig(:details, :fact_count)} facts (#{r[:label]})" }
|
|
56
|
+
|
|
57
|
+
status = warnings.any? ? "WARNING" : "OK"
|
|
58
|
+
summary = fact_parts.any? ? fact_parts.join(", ") : "no databases"
|
|
59
|
+
|
|
60
|
+
if warnings.any?
|
|
61
|
+
warning_msgs = warnings.map { |w| w[:message] }.join("; ")
|
|
62
|
+
stdout.puts "Memory #{status}: #{summary} [#{warning_msgs}]"
|
|
63
|
+
else
|
|
64
|
+
stdout.puts "Memory #{status}: #{summary}"
|
|
65
|
+
end
|
|
25
66
|
|
|
26
|
-
|
|
67
|
+
0
|
|
27
68
|
end
|
|
28
69
|
end
|
|
29
70
|
end
|
|
@@ -5,12 +5,15 @@ module ClaudeMemory
|
|
|
5
5
|
# Explains a fact with provenance and relationships
|
|
6
6
|
class ExplainCommand < BaseCommand
|
|
7
7
|
def call(args)
|
|
8
|
-
|
|
9
|
-
unless
|
|
10
|
-
stderr.puts "Usage: claude-memory explain <fact_id> [--scope project|global]"
|
|
8
|
+
identifier = args.first
|
|
9
|
+
unless identifier && !identifier.empty?
|
|
10
|
+
stderr.puts "Usage: claude-memory explain <fact_id|docid> [--scope project|global]"
|
|
11
11
|
return 1
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
+
# Accept integer IDs or 8-char docid strings
|
|
15
|
+
fact_ref = identifier.match?(/\A\d+\z/) ? identifier.to_i : identifier
|
|
16
|
+
|
|
14
17
|
opts = parse_options(args[1..] || [], {scope: "project"}) do |o|
|
|
15
18
|
OptionParser.new do |parser|
|
|
16
19
|
parser.on("--scope SCOPE", "Scope: project or global") { |v| o[:scope] = v }
|
|
@@ -21,14 +24,16 @@ module ClaudeMemory
|
|
|
21
24
|
manager = ClaudeMemory::Store::StoreManager.new
|
|
22
25
|
recall = ClaudeMemory::Recall.new(manager)
|
|
23
26
|
|
|
24
|
-
explanation = recall.explain(
|
|
27
|
+
explanation = recall.explain(fact_ref, scope: opts[:scope])
|
|
25
28
|
if explanation.is_a?(ClaudeMemory::Core::NullExplanation)
|
|
26
|
-
stderr.puts "Fact #{
|
|
29
|
+
stderr.puts "Fact #{identifier} not found in #{opts[:scope]} database."
|
|
27
30
|
manager.close
|
|
28
31
|
return 1
|
|
29
32
|
end
|
|
30
33
|
|
|
31
|
-
|
|
34
|
+
docid = explanation[:fact][:docid]
|
|
35
|
+
label = docid ? "##{docid}" : "##{explanation[:fact][:id]}"
|
|
36
|
+
stdout.puts "Fact #{label} (#{opts[:scope]}):"
|
|
32
37
|
print_fact(explanation[:fact])
|
|
33
38
|
print_receipts(explanation[:receipts])
|
|
34
39
|
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Core
|
|
5
|
+
# Builds a dependency graph of facts using BFS traversal.
|
|
6
|
+
# Queries fact_links and conflicts tables to build a graph
|
|
7
|
+
# of related facts with their relationships.
|
|
8
|
+
# Follows Functional Core pattern - pure query + transformation.
|
|
9
|
+
class FactGraph
|
|
10
|
+
MAX_DEPTH = 5
|
|
11
|
+
|
|
12
|
+
# Build a fact dependency graph starting from a root fact
|
|
13
|
+
# @param store [SQLiteStore] Database store
|
|
14
|
+
# @param root_fact_id [Integer] Starting fact ID
|
|
15
|
+
# @param depth [Integer] Maximum BFS depth (1-5)
|
|
16
|
+
# @return [Hash] Graph with :nodes and :edges arrays
|
|
17
|
+
def self.build(store, root_fact_id, depth: 2)
|
|
18
|
+
depth = depth.clamp(1, MAX_DEPTH)
|
|
19
|
+
|
|
20
|
+
visited = Set.new
|
|
21
|
+
queue = [[root_fact_id, 0]]
|
|
22
|
+
nodes = {}
|
|
23
|
+
edges = []
|
|
24
|
+
|
|
25
|
+
while queue.any?
|
|
26
|
+
fact_id, current_depth = queue.shift
|
|
27
|
+
next if visited.include?(fact_id)
|
|
28
|
+
|
|
29
|
+
visited.add(fact_id)
|
|
30
|
+
|
|
31
|
+
fact = FactQueryBuilder.find_fact(store, fact_id)
|
|
32
|
+
next unless fact
|
|
33
|
+
|
|
34
|
+
nodes[fact_id] = build_node(fact)
|
|
35
|
+
|
|
36
|
+
next if current_depth >= depth
|
|
37
|
+
|
|
38
|
+
discover_links(store, fact_id, current_depth, visited, queue, edges)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
deduped = dedupe_edges(edges)
|
|
42
|
+
|
|
43
|
+
{
|
|
44
|
+
root_fact_id: root_fact_id,
|
|
45
|
+
depth: depth,
|
|
46
|
+
node_count: nodes.size,
|
|
47
|
+
edge_count: deduped.size,
|
|
48
|
+
nodes: nodes.values,
|
|
49
|
+
edges: deduped
|
|
50
|
+
}
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def self.discover_links(store, fact_id, current_depth, visited, queue, edges)
|
|
54
|
+
discover_supersedes(store, fact_id, current_depth, visited, queue, edges)
|
|
55
|
+
discover_superseded_by(store, fact_id, current_depth, visited, queue, edges)
|
|
56
|
+
discover_conflicts(store, fact_id, current_depth, visited, queue, edges)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def self.discover_supersedes(store, fact_id, current_depth, visited, queue, edges)
|
|
60
|
+
store.fact_links
|
|
61
|
+
.where(from_fact_id: fact_id, link_type: "supersedes")
|
|
62
|
+
.select_map(:to_fact_id)
|
|
63
|
+
.each do |target_id|
|
|
64
|
+
edges << {from: fact_id, to: target_id, type: "supersedes"}
|
|
65
|
+
queue << [target_id, current_depth + 1] unless visited.include?(target_id)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def self.discover_superseded_by(store, fact_id, current_depth, visited, queue, edges)
|
|
70
|
+
store.fact_links
|
|
71
|
+
.where(to_fact_id: fact_id, link_type: "supersedes")
|
|
72
|
+
.select_map(:from_fact_id)
|
|
73
|
+
.each do |source_id|
|
|
74
|
+
edges << {from: source_id, to: fact_id, type: "supersedes"}
|
|
75
|
+
queue << [source_id, current_depth + 1] unless visited.include?(source_id)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def self.discover_conflicts(store, fact_id, current_depth, visited, queue, edges)
|
|
80
|
+
store.conflicts
|
|
81
|
+
.where(fact_a_id: fact_id)
|
|
82
|
+
.select(:fact_b_id, :status)
|
|
83
|
+
.all
|
|
84
|
+
.each do |conflict|
|
|
85
|
+
edges << {from: fact_id, to: conflict[:fact_b_id], type: "conflicts", status: conflict[:status]}
|
|
86
|
+
queue << [conflict[:fact_b_id], current_depth + 1] unless visited.include?(conflict[:fact_b_id])
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
store.conflicts
|
|
90
|
+
.where(fact_b_id: fact_id)
|
|
91
|
+
.select(:fact_a_id, :status)
|
|
92
|
+
.all
|
|
93
|
+
.each do |conflict|
|
|
94
|
+
edges << {from: conflict[:fact_a_id], to: fact_id, type: "conflicts", status: conflict[:status]}
|
|
95
|
+
queue << [conflict[:fact_a_id], current_depth + 1] unless visited.include?(conflict[:fact_a_id])
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Build a minimal node representation of a fact
|
|
100
|
+
# @param fact [Hash] Fact row from database
|
|
101
|
+
# @return [Hash] Node representation
|
|
102
|
+
def self.build_node(fact)
|
|
103
|
+
{
|
|
104
|
+
id: fact[:id],
|
|
105
|
+
docid: fact[:docid],
|
|
106
|
+
subject: fact[:subject_name],
|
|
107
|
+
predicate: fact[:predicate],
|
|
108
|
+
object: fact[:object_literal],
|
|
109
|
+
status: fact[:status],
|
|
110
|
+
scope: fact[:scope]
|
|
111
|
+
}
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Remove duplicate edges (same from/to/type)
|
|
115
|
+
# @param edges [Array<Hash>] Edges to deduplicate
|
|
116
|
+
# @return [Array<Hash>] Deduplicated edges
|
|
117
|
+
def self.dedupe_edges(edges)
|
|
118
|
+
edges.uniq { |e| [e[:from], e[:to], e[:type]] }
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -23,11 +23,12 @@ module ClaudeMemory
|
|
|
23
23
|
# Build dataset for batch finding receipts (provenance) with content_items join
|
|
24
24
|
# @param store [SQLiteStore] Database store
|
|
25
25
|
# @param fact_ids [Array<Integer>] Fact IDs to find receipts for
|
|
26
|
+
# @param include_raw_text [Boolean] Include raw_text for snippet extraction
|
|
26
27
|
# @return [Hash] Hash of fact_id => [receipt_rows]
|
|
27
|
-
def self.batch_find_receipts(store, fact_ids)
|
|
28
|
+
def self.batch_find_receipts(store, fact_ids, include_raw_text: false)
|
|
28
29
|
return {} if fact_ids.empty?
|
|
29
30
|
|
|
30
|
-
results = build_receipts_dataset(store)
|
|
31
|
+
results = build_receipts_dataset(store, include_raw_text: include_raw_text)
|
|
31
32
|
.where(Sequel[:provenance][:fact_id] => fact_ids)
|
|
32
33
|
.all
|
|
33
34
|
|
|
@@ -47,12 +48,23 @@ module ClaudeMemory
|
|
|
47
48
|
.first
|
|
48
49
|
end
|
|
49
50
|
|
|
51
|
+
# Find single fact by docid with entity join
|
|
52
|
+
# @param store [SQLiteStore] Database store
|
|
53
|
+
# @param docid [String] 8-character docid
|
|
54
|
+
# @return [Hash, nil] Fact row or nil
|
|
55
|
+
def self.find_fact_by_docid(store, docid)
|
|
56
|
+
build_facts_dataset(store)
|
|
57
|
+
.where(Sequel[:facts][:docid] => docid)
|
|
58
|
+
.first
|
|
59
|
+
end
|
|
60
|
+
|
|
50
61
|
# Find receipts for a single fact
|
|
51
62
|
# @param store [SQLiteStore] Database store
|
|
52
63
|
# @param fact_id [Integer] Fact ID
|
|
64
|
+
# @param include_raw_text [Boolean] Include raw_text for snippet extraction
|
|
53
65
|
# @return [Array<Hash>] Receipt rows
|
|
54
|
-
def self.find_receipts(store, fact_id)
|
|
55
|
-
build_receipts_dataset(store)
|
|
66
|
+
def self.find_receipts(store, fact_id, include_raw_text: false)
|
|
67
|
+
build_receipts_dataset(store, include_raw_text: include_raw_text)
|
|
56
68
|
.where(Sequel[:provenance][:fact_id] => fact_id)
|
|
57
69
|
.all
|
|
58
70
|
end
|
|
@@ -95,7 +107,7 @@ module ClaudeMemory
|
|
|
95
107
|
# @return [Array<Hash>] Fact rows
|
|
96
108
|
def self.fetch_changes(store, since, limit)
|
|
97
109
|
store.facts
|
|
98
|
-
.select(:id, :subject_entity_id, :predicate, :object_literal, :status, :created_at, :scope, :project_path)
|
|
110
|
+
.select(:id, :docid, :subject_entity_id, :predicate, :object_literal, :status, :created_at, :scope, :project_path)
|
|
99
111
|
.where { created_at >= since }
|
|
100
112
|
.order(Sequel.desc(:created_at))
|
|
101
113
|
.limit(limit)
|
|
@@ -121,6 +133,7 @@ module ClaudeMemory
|
|
|
121
133
|
.left_join(:entities, id: :subject_entity_id)
|
|
122
134
|
.select(
|
|
123
135
|
Sequel[:facts][:id],
|
|
136
|
+
Sequel[:facts][:docid],
|
|
124
137
|
Sequel[:facts][:predicate],
|
|
125
138
|
Sequel[:facts][:object_literal],
|
|
126
139
|
Sequel[:facts][:status],
|
|
@@ -136,18 +149,25 @@ module ClaudeMemory
|
|
|
136
149
|
|
|
137
150
|
# Build standard receipts dataset with content_items join
|
|
138
151
|
# @param store [SQLiteStore] Database store
|
|
152
|
+
# @param include_raw_text [Boolean] Include raw_text for snippet extraction
|
|
139
153
|
# @return [Sequel::Dataset] Configured dataset
|
|
140
|
-
def self.build_receipts_dataset(store)
|
|
154
|
+
def self.build_receipts_dataset(store, include_raw_text: false)
|
|
155
|
+
columns = [
|
|
156
|
+
Sequel[:provenance][:id],
|
|
157
|
+
Sequel[:provenance][:fact_id],
|
|
158
|
+
Sequel[:provenance][:quote],
|
|
159
|
+
Sequel[:provenance][:strength],
|
|
160
|
+
Sequel[:provenance][:line_start],
|
|
161
|
+
Sequel[:provenance][:line_end],
|
|
162
|
+
Sequel[:content_items][:session_id],
|
|
163
|
+
Sequel[:content_items][:occurred_at]
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
columns << Sequel[:content_items][:raw_text] if include_raw_text
|
|
167
|
+
|
|
141
168
|
store.provenance
|
|
142
169
|
.left_join(:content_items, id: :content_item_id)
|
|
143
|
-
.select(
|
|
144
|
-
Sequel[:provenance][:id],
|
|
145
|
-
Sequel[:provenance][:fact_id],
|
|
146
|
-
Sequel[:provenance][:quote],
|
|
147
|
-
Sequel[:provenance][:strength],
|
|
148
|
-
Sequel[:content_items][:session_id],
|
|
149
|
-
Sequel[:content_items][:occurred_at]
|
|
150
|
-
)
|
|
170
|
+
.select(*columns)
|
|
151
171
|
end
|
|
152
172
|
end
|
|
153
173
|
end
|
|
@@ -83,30 +83,13 @@ module ClaudeMemory
|
|
|
83
83
|
seen.values.sort_by { |r| -r[:similarity] }.take(limit)
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
-
# Merge vector and text search results
|
|
86
|
+
# Merge vector and text search results using Reciprocal Rank Fusion
|
|
87
87
|
# @param vector_results [Array<Hash>] Results from vector search with :fact and :similarity
|
|
88
88
|
# @param text_results [Array<Hash>] Results from text search with :fact and :similarity
|
|
89
89
|
# @param limit [Integer] Maximum results to return
|
|
90
|
-
# @return [Array<Hash>] Merged results sorted by
|
|
90
|
+
# @return [Array<Hash>] Merged results sorted by RRF score descending
|
|
91
91
|
def self.merge_search_results(vector_results, text_results, limit)
|
|
92
|
-
|
|
93
|
-
combined = {}
|
|
94
|
-
|
|
95
|
-
vector_results.each do |result|
|
|
96
|
-
fact_id = result[:fact][:id]
|
|
97
|
-
combined[fact_id] = result
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
text_results.each do |result|
|
|
101
|
-
fact_id = result[:fact][:id]
|
|
102
|
-
# Only add if not already present from vector search
|
|
103
|
-
combined[fact_id] ||= result
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
# Sort by similarity score (highest first)
|
|
107
|
-
combined.values
|
|
108
|
-
.sort_by { |r| -(r[:similarity] || 0) }
|
|
109
|
-
.take(limit)
|
|
92
|
+
RRFusion.fuse(vector_results, text_results, limit)
|
|
110
93
|
end
|
|
111
94
|
end
|
|
112
95
|
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Core
|
|
5
|
+
# Formats timestamps as human-readable relative time strings.
|
|
6
|
+
# Progressive granularity: just now → Xm ago → Xh ago → Xd ago → date
|
|
7
|
+
module RelativeTime
|
|
8
|
+
MINUTE = 60
|
|
9
|
+
HOUR = 3600
|
|
10
|
+
DAY = 86400
|
|
11
|
+
|
|
12
|
+
def self.format(timestamp, now: Time.now)
|
|
13
|
+
return nil if timestamp.nil?
|
|
14
|
+
|
|
15
|
+
time = parse_time(timestamp)
|
|
16
|
+
return nil unless time
|
|
17
|
+
|
|
18
|
+
diff = now - time
|
|
19
|
+
return format_absolute(time) if diff.negative?
|
|
20
|
+
|
|
21
|
+
case diff
|
|
22
|
+
when 0...MINUTE then "just now"
|
|
23
|
+
when MINUTE...HOUR then "#{(diff / MINUTE).to_i}m ago"
|
|
24
|
+
when HOUR...DAY then "#{(diff / HOUR).to_i}h ago"
|
|
25
|
+
when DAY...(7 * DAY) then "#{(diff / DAY).to_i}d ago"
|
|
26
|
+
else format_absolute(time)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def self.parse_time(value)
|
|
31
|
+
case value
|
|
32
|
+
when Time then value
|
|
33
|
+
when String then Time.parse(value)
|
|
34
|
+
when Integer, Float then Time.at(value)
|
|
35
|
+
end
|
|
36
|
+
rescue ArgumentError
|
|
37
|
+
nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.format_absolute(time)
|
|
41
|
+
time.strftime("%Y-%m-%d")
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -16,9 +16,9 @@ module ClaudeMemory
|
|
|
16
16
|
# Add source annotation to each result in collection
|
|
17
17
|
# @param results [Array<Hash>] Results to annotate
|
|
18
18
|
# @param source [Symbol] Source identifier (:project, :global, :legacy)
|
|
19
|
-
# @return [Array<Hash>]
|
|
19
|
+
# @return [Array<Hash>] New array of results with :source key added
|
|
20
20
|
def self.annotate_source(results, source)
|
|
21
|
-
results.
|
|
21
|
+
results.map { |r| r.merge(source: source) }
|
|
22
22
|
end
|
|
23
23
|
end
|
|
24
24
|
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Core
|
|
5
|
+
# Reciprocal Rank Fusion (RRF) for merging ranked result lists
|
|
6
|
+
# Follows Functional Core pattern - no I/O, just transformations
|
|
7
|
+
#
|
|
8
|
+
# RRF combines multiple ranked lists using position-based scoring:
|
|
9
|
+
# score(d) = Σ(weight_r / (k + rank_r(d)))
|
|
10
|
+
#
|
|
11
|
+
# This is more effective than naive deduplication because it considers
|
|
12
|
+
# rank positions from both sources, giving higher scores to results
|
|
13
|
+
# that appear near the top in multiple lists.
|
|
14
|
+
class RRFusion
|
|
15
|
+
K = 60 # Standard RRF constant - controls rank pressure
|
|
16
|
+
TOP_BONUS = {1 => 0.05, 2 => 0.02, 3 => 0.02}.freeze
|
|
17
|
+
|
|
18
|
+
# Fuse ranked lists from vector and text search
|
|
19
|
+
# @param vector_results [Array<Hash>] Results from vector search (ordered by similarity)
|
|
20
|
+
# @param text_results [Array<Hash>] Results from text search (ordered by FTS rank)
|
|
21
|
+
# @param limit [Integer] Maximum results to return
|
|
22
|
+
# @param vector_weight [Float] Weight multiplier for vector rankings (default 1.0)
|
|
23
|
+
# @param text_weight [Float] Weight multiplier for text rankings (default 1.0)
|
|
24
|
+
# @return [Array<Hash>] Fused results sorted by RRF score, with :similarity set to RRF score
|
|
25
|
+
def self.fuse(vector_results, text_results, limit, vector_weight: 1.0, text_weight: 1.0)
|
|
26
|
+
scores = {}
|
|
27
|
+
fact_data = {}
|
|
28
|
+
|
|
29
|
+
# Score vector results by rank position
|
|
30
|
+
vector_results.each_with_index do |result, idx|
|
|
31
|
+
fact_id = result[:fact][:id]
|
|
32
|
+
rank = idx + 1 # 1-based rank
|
|
33
|
+
scores[fact_id] = (scores[fact_id] || 0.0) + (vector_weight / (K + rank))
|
|
34
|
+
scores[fact_id] += TOP_BONUS.fetch(rank, 0.0)
|
|
35
|
+
# Prefer vector result data (has real similarity score)
|
|
36
|
+
fact_data[fact_id] = result
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Score text results by rank position
|
|
40
|
+
text_results.each_with_index do |result, idx|
|
|
41
|
+
fact_id = result[:fact][:id]
|
|
42
|
+
rank = idx + 1
|
|
43
|
+
scores[fact_id] = (scores[fact_id] || 0.0) + (text_weight / (K + rank))
|
|
44
|
+
scores[fact_id] += TOP_BONUS.fetch(rank, 0.0)
|
|
45
|
+
# Only use text data if not already present from vector
|
|
46
|
+
fact_data[fact_id] ||= result
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Sort by RRF score descending and return top results
|
|
50
|
+
scores
|
|
51
|
+
.sort_by { |_id, score| -score }
|
|
52
|
+
.take(limit)
|
|
53
|
+
.map { |fact_id, score| fact_data[fact_id].merge(similarity: score) }
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Core
|
|
5
|
+
# Extracts relevant snippets from raw content based on query terms.
|
|
6
|
+
# Finds the line with the most query term matches and returns
|
|
7
|
+
# surrounding context (1 line before + 2 lines after).
|
|
8
|
+
# Follows Functional Core pattern - pure transformations, no I/O.
|
|
9
|
+
class SnippetExtractor
|
|
10
|
+
CONTEXT_BEFORE = 1
|
|
11
|
+
CONTEXT_AFTER = 2
|
|
12
|
+
MAX_SNIPPET_LENGTH = 500
|
|
13
|
+
|
|
14
|
+
# Extract the best snippet from content matching the query
|
|
15
|
+
# @param content [String] Raw text content
|
|
16
|
+
# @param query [String] Search query
|
|
17
|
+
# @return [String, nil] Best matching snippet or nil if no content
|
|
18
|
+
def self.extract(content, query)
|
|
19
|
+
return nil if content.nil? || content.empty? || query.nil? || query.empty?
|
|
20
|
+
|
|
21
|
+
lines = content.lines.map(&:chomp)
|
|
22
|
+
return nil if lines.empty?
|
|
23
|
+
|
|
24
|
+
terms = tokenize_query(query)
|
|
25
|
+
return nil if terms.empty?
|
|
26
|
+
|
|
27
|
+
best_line_idx = find_best_line(lines, terms)
|
|
28
|
+
return nil unless best_line_idx
|
|
29
|
+
|
|
30
|
+
build_snippet(lines, best_line_idx)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Extract snippet and return line range information
|
|
34
|
+
# @param content [String] Raw text content
|
|
35
|
+
# @param query [String] Search query
|
|
36
|
+
# @return [Hash, nil] Hash with :snippet, :line_start, :line_end or nil
|
|
37
|
+
def self.extract_with_lines(content, query)
|
|
38
|
+
return nil if content.nil? || content.empty? || query.nil? || query.empty?
|
|
39
|
+
|
|
40
|
+
lines = content.lines.map(&:chomp)
|
|
41
|
+
return nil if lines.empty?
|
|
42
|
+
|
|
43
|
+
terms = tokenize_query(query)
|
|
44
|
+
return nil if terms.empty?
|
|
45
|
+
|
|
46
|
+
best_line_idx = find_best_line(lines, terms)
|
|
47
|
+
return nil unless best_line_idx
|
|
48
|
+
|
|
49
|
+
start_idx = [best_line_idx - CONTEXT_BEFORE, 0].max
|
|
50
|
+
end_idx = [best_line_idx + CONTEXT_AFTER, lines.size - 1].min
|
|
51
|
+
|
|
52
|
+
{
|
|
53
|
+
snippet: build_snippet(lines, best_line_idx),
|
|
54
|
+
line_start: start_idx + 1, # 1-indexed
|
|
55
|
+
line_end: end_idx + 1 # 1-indexed
|
|
56
|
+
}
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# @api private
|
|
60
|
+
def self.tokenize_query(query)
|
|
61
|
+
query.downcase.split(/\s+/).reject { |t| t.length < 2 }
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# @api private
|
|
65
|
+
def self.find_best_line(lines, terms)
|
|
66
|
+
best_idx = nil
|
|
67
|
+
best_score = 0
|
|
68
|
+
|
|
69
|
+
lines.each_with_index do |line, idx|
|
|
70
|
+
downcased = line.downcase
|
|
71
|
+
score = terms.count { |term| downcased.include?(term) }
|
|
72
|
+
if score > best_score
|
|
73
|
+
best_score = score
|
|
74
|
+
best_idx = idx
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
best_idx
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# @api private
|
|
82
|
+
def self.build_snippet(lines, center_idx)
|
|
83
|
+
start_idx = [center_idx - CONTEXT_BEFORE, 0].max
|
|
84
|
+
end_idx = [center_idx + CONTEXT_AFTER, lines.size - 1].min
|
|
85
|
+
|
|
86
|
+
snippet = lines[start_idx..end_idx].join("\n")
|
|
87
|
+
truncate(snippet)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# @api private
|
|
91
|
+
def self.truncate(text)
|
|
92
|
+
return text if text.length <= MAX_SNIPPET_LENGTH
|
|
93
|
+
text[0, MAX_SNIPPET_LENGTH - 3] + "..."
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -5,12 +5,13 @@ module ClaudeMemory
|
|
|
5
5
|
# Domain model representing a fact in the memory system
|
|
6
6
|
# Encapsulates business logic and validation
|
|
7
7
|
class Fact
|
|
8
|
-
attr_reader :id, :subject_name, :predicate, :object_literal,
|
|
8
|
+
attr_reader :id, :docid, :subject_name, :predicate, :object_literal,
|
|
9
9
|
:status, :confidence, :scope, :project_path,
|
|
10
10
|
:valid_from, :valid_to, :created_at
|
|
11
11
|
|
|
12
12
|
def initialize(attributes)
|
|
13
13
|
@id = attributes[:id]
|
|
14
|
+
@docid = attributes[:docid]
|
|
14
15
|
@subject_name = attributes[:subject_name]
|
|
15
16
|
@predicate = attributes[:predicate]
|
|
16
17
|
@object_literal = attributes[:object_literal]
|
|
@@ -45,6 +46,7 @@ module ClaudeMemory
|
|
|
45
46
|
def to_h
|
|
46
47
|
{
|
|
47
48
|
id: id,
|
|
49
|
+
docid: docid,
|
|
48
50
|
subject_name: subject_name,
|
|
49
51
|
predicate: predicate,
|
|
50
52
|
object_literal: object_literal,
|
|
@@ -55,6 +55,7 @@ module ClaudeMemory
|
|
|
55
55
|
.left_join(:entities, id: :subject_entity_id)
|
|
56
56
|
.select(
|
|
57
57
|
Sequel[:facts][:id],
|
|
58
|
+
Sequel[:facts][:docid],
|
|
58
59
|
Sequel[:facts][:predicate],
|
|
59
60
|
Sequel[:facts][:object_literal],
|
|
60
61
|
Sequel[:facts][:status],
|
|
@@ -67,6 +68,7 @@ module ClaudeMemory
|
|
|
67
68
|
.map do |fact|
|
|
68
69
|
{
|
|
69
70
|
id: fact[:id],
|
|
71
|
+
docid: fact[:docid],
|
|
70
72
|
subject: fact[:subject_name],
|
|
71
73
|
predicate: fact[:predicate],
|
|
72
74
|
object_preview: truncate_preview(fact[:object_literal]),
|
|
@@ -36,6 +36,24 @@ module ClaudeMemory
|
|
|
36
36
|
.select_map(:content_item_id)
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
+
# Search returning content IDs with FTS5 BM25 rank values
|
|
40
|
+
# @param query [String] Search query
|
|
41
|
+
# @param limit [Integer] Maximum results
|
|
42
|
+
# @return [Array<Hash>] Results with :content_item_id and :rank
|
|
43
|
+
def search_with_ranks(query, limit: 20)
|
|
44
|
+
ensure_fts_table!
|
|
45
|
+
return [] if query.nil? || query.strip.empty?
|
|
46
|
+
return [] if query.strip == "*"
|
|
47
|
+
|
|
48
|
+
escaped_query = escape_fts_query(query)
|
|
49
|
+
@db[:content_fts]
|
|
50
|
+
.where(Sequel.lit("text MATCH ?", escaped_query))
|
|
51
|
+
.order(:rank)
|
|
52
|
+
.limit(limit)
|
|
53
|
+
.select(Sequel.lit("content_item_id, rank"))
|
|
54
|
+
.all
|
|
55
|
+
end
|
|
56
|
+
|
|
39
57
|
def escape_fts_query(query)
|
|
40
58
|
words = query.split(/\s+/).map do |word|
|
|
41
59
|
next word if word == "*"
|