claude_memory 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/CLAUDE.md +1 -1
  3. data/.claude/rules/claude_memory.generated.md +14 -1
  4. data/.claude/skills/check-memory/SKILL.md +10 -0
  5. data/.claude/skills/improve/SKILL.md +12 -1
  6. data/.claude-plugin/plugin.json +1 -1
  7. data/CHANGELOG.md +70 -0
  8. data/db/migrations/008_add_provenance_line_range.rb +21 -0
  9. data/db/migrations/009_add_docid.rb +39 -0
  10. data/db/migrations/010_add_llm_cache.rb +30 -0
  11. data/docs/improvements.md +72 -1084
  12. data/docs/influence/claude-supermemory.md +498 -0
  13. data/docs/influence/qmd.md +424 -2022
  14. data/docs/quality_review.md +64 -705
  15. data/lib/claude_memory/commands/doctor_command.rb +45 -4
  16. data/lib/claude_memory/commands/explain_command.rb +11 -6
  17. data/lib/claude_memory/commands/stats_command.rb +1 -1
  18. data/lib/claude_memory/core/fact_graph.rb +122 -0
  19. data/lib/claude_memory/core/fact_query_builder.rb +34 -14
  20. data/lib/claude_memory/core/fact_ranker.rb +3 -20
  21. data/lib/claude_memory/core/relative_time.rb +45 -0
  22. data/lib/claude_memory/core/result_sorter.rb +2 -2
  23. data/lib/claude_memory/core/rr_fusion.rb +57 -0
  24. data/lib/claude_memory/core/snippet_extractor.rb +97 -0
  25. data/lib/claude_memory/domain/fact.rb +3 -1
  26. data/lib/claude_memory/index/index_query.rb +2 -0
  27. data/lib/claude_memory/index/lexical_fts.rb +18 -0
  28. data/lib/claude_memory/infrastructure/operation_tracker.rb +7 -21
  29. data/lib/claude_memory/infrastructure/schema_validator.rb +30 -25
  30. data/lib/claude_memory/ingest/content_sanitizer.rb +8 -1
  31. data/lib/claude_memory/ingest/ingester.rb +67 -56
  32. data/lib/claude_memory/ingest/tool_extractor.rb +1 -1
  33. data/lib/claude_memory/ingest/tool_filter.rb +55 -0
  34. data/lib/claude_memory/logging/logger.rb +112 -0
  35. data/lib/claude_memory/mcp/query_guide.rb +96 -0
  36. data/lib/claude_memory/mcp/response_formatter.rb +86 -23
  37. data/lib/claude_memory/mcp/server.rb +34 -4
  38. data/lib/claude_memory/mcp/text_summary.rb +257 -0
  39. data/lib/claude_memory/mcp/tool_definitions.rb +20 -4
  40. data/lib/claude_memory/mcp/tools.rb +133 -120
  41. data/lib/claude_memory/publish.rb +12 -2
  42. data/lib/claude_memory/recall/expansion_detector.rb +44 -0
  43. data/lib/claude_memory/recall.rb +93 -41
  44. data/lib/claude_memory/resolve/resolver.rb +72 -40
  45. data/lib/claude_memory/store/sqlite_store.rb +99 -24
  46. data/lib/claude_memory/sweep/sweeper.rb +6 -0
  47. data/lib/claude_memory/version.rb +1 -1
  48. data/lib/claude_memory.rb +21 -0
  49. metadata +14 -2
  50. data/docs/remaining_improvements.md +0 -330
@@ -1,11 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "optparse"
4
+
3
5
  module ClaudeMemory
4
6
  module Commands
5
7
  # Performs system health checks for ClaudeMemory
6
8
  # Delegates to specialized check classes for actual validation
7
9
  class DoctorCommand < BaseCommand
8
- def call(_args)
10
+ def call(args)
11
+ opts = parse_options(args, {brief: false}) do |o|
12
+ OptionParser.new do |parser|
13
+ parser.on("--brief", "Output single-line status summary") { o[:brief] = true }
14
+ end
15
+ end
16
+ return 1 if opts.nil?
17
+
9
18
  manager = ClaudeMemory::Store::StoreManager.new
10
19
 
11
20
  checks = [
@@ -20,10 +29,42 @@ module ClaudeMemory
20
29
 
21
30
  manager.close
22
31
 
23
- reporter = Checks::Reporter.new(stdout, stderr)
24
- success = reporter.report(results)
32
+ if opts[:brief]
33
+ report_brief(results)
34
+ else
35
+ reporter = Checks::Reporter.new(stdout, stderr)
36
+ success = reporter.report(results)
37
+ success ? 0 : 1
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def report_brief(results)
44
+ errors = results.select { |r| r[:status] == :error }
45
+ warnings = results.select { |r| r[:status] == :warning }
46
+
47
+ if errors.any?
48
+ messages = errors.map { |e| e[:message] }
49
+ stdout.puts "Memory ERROR: #{messages.join(", ")}"
50
+ return 1
51
+ end
52
+
53
+ fact_parts = results
54
+ .select { |r| r[:label] =~ /global|project/ && r.dig(:details, :fact_count) }
55
+ .map { |r| "#{r.dig(:details, :fact_count)} facts (#{r[:label]})" }
56
+
57
+ status = warnings.any? ? "WARNING" : "OK"
58
+ summary = fact_parts.any? ? fact_parts.join(", ") : "no databases"
59
+
60
+ if warnings.any?
61
+ warning_msgs = warnings.map { |w| w[:message] }.join("; ")
62
+ stdout.puts "Memory #{status}: #{summary} [#{warning_msgs}]"
63
+ else
64
+ stdout.puts "Memory #{status}: #{summary}"
65
+ end
25
66
 
26
- success ? 0 : 1
67
+ 0
27
68
  end
28
69
  end
29
70
  end
@@ -5,12 +5,15 @@ module ClaudeMemory
5
5
  # Explains a fact with provenance and relationships
6
6
  class ExplainCommand < BaseCommand
7
7
  def call(args)
8
- fact_id = args.first&.to_i
9
- unless fact_id && fact_id > 0
10
- stderr.puts "Usage: claude-memory explain <fact_id> [--scope project|global]"
8
+ identifier = args.first
9
+ unless identifier && !identifier.empty?
10
+ stderr.puts "Usage: claude-memory explain <fact_id|docid> [--scope project|global]"
11
11
  return 1
12
12
  end
13
13
 
14
+ # Accept integer IDs or 8-char docid strings
15
+ fact_ref = identifier.match?(/\A\d+\z/) ? identifier.to_i : identifier
16
+
14
17
  opts = parse_options(args[1..] || [], {scope: "project"}) do |o|
15
18
  OptionParser.new do |parser|
16
19
  parser.on("--scope SCOPE", "Scope: project or global") { |v| o[:scope] = v }
@@ -21,14 +24,16 @@ module ClaudeMemory
21
24
  manager = ClaudeMemory::Store::StoreManager.new
22
25
  recall = ClaudeMemory::Recall.new(manager)
23
26
 
24
- explanation = recall.explain(fact_id, scope: opts[:scope])
27
+ explanation = recall.explain(fact_ref, scope: opts[:scope])
25
28
  if explanation.is_a?(ClaudeMemory::Core::NullExplanation)
26
- stderr.puts "Fact #{fact_id} not found in #{opts[:scope]} database."
29
+ stderr.puts "Fact #{identifier} not found in #{opts[:scope]} database."
27
30
  manager.close
28
31
  return 1
29
32
  end
30
33
 
31
- stdout.puts "Fact ##{fact_id} (#{opts[:scope]}):"
34
+ docid = explanation[:fact][:docid]
35
+ label = docid ? "##{docid}" : "##{explanation[:fact][:id]}"
36
+ stdout.puts "Fact #{label} (#{opts[:scope]}):"
32
37
  print_fact(explanation[:fact])
33
38
  print_receipts(explanation[:receipts])
34
39
 
@@ -84,7 +84,7 @@ module ClaudeMemory
84
84
  stdout.puts
85
85
 
86
86
  db.disconnect
87
- rescue => e
87
+ rescue Sequel::DatabaseError, Extralite::Error => e
88
88
  stderr.puts "Error reading database: #{e.message}"
89
89
  end
90
90
  end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Core
5
+ # Builds a dependency graph of facts using BFS traversal.
6
+ # Queries fact_links and conflicts tables to build a graph
7
+ # of related facts with their relationships.
8
+ # Follows Functional Core pattern - pure query + transformation.
9
+ class FactGraph
10
+ MAX_DEPTH = 5
11
+
12
+ # Build a fact dependency graph starting from a root fact
13
+ # @param store [SQLiteStore] Database store
14
+ # @param root_fact_id [Integer] Starting fact ID
15
+ # @param depth [Integer] Maximum BFS depth (1-5)
16
+ # @return [Hash] Graph with :nodes and :edges arrays
17
+ def self.build(store, root_fact_id, depth: 2)
18
+ depth = depth.clamp(1, MAX_DEPTH)
19
+
20
+ visited = Set.new
21
+ queue = [[root_fact_id, 0]]
22
+ nodes = {}
23
+ edges = []
24
+
25
+ while queue.any?
26
+ fact_id, current_depth = queue.shift
27
+ next if visited.include?(fact_id)
28
+
29
+ visited.add(fact_id)
30
+
31
+ fact = FactQueryBuilder.find_fact(store, fact_id)
32
+ next unless fact
33
+
34
+ nodes[fact_id] = build_node(fact)
35
+
36
+ next if current_depth >= depth
37
+
38
+ discover_links(store, fact_id, current_depth, visited, queue, edges)
39
+ end
40
+
41
+ deduped = dedupe_edges(edges)
42
+
43
+ {
44
+ root_fact_id: root_fact_id,
45
+ depth: depth,
46
+ node_count: nodes.size,
47
+ edge_count: deduped.size,
48
+ nodes: nodes.values,
49
+ edges: deduped
50
+ }
51
+ end
52
+
53
+ def self.discover_links(store, fact_id, current_depth, visited, queue, edges)
54
+ discover_supersedes(store, fact_id, current_depth, visited, queue, edges)
55
+ discover_superseded_by(store, fact_id, current_depth, visited, queue, edges)
56
+ discover_conflicts(store, fact_id, current_depth, visited, queue, edges)
57
+ end
58
+
59
+ def self.discover_supersedes(store, fact_id, current_depth, visited, queue, edges)
60
+ store.fact_links
61
+ .where(from_fact_id: fact_id, link_type: "supersedes")
62
+ .select_map(:to_fact_id)
63
+ .each do |target_id|
64
+ edges << {from: fact_id, to: target_id, type: "supersedes"}
65
+ queue << [target_id, current_depth + 1] unless visited.include?(target_id)
66
+ end
67
+ end
68
+
69
+ def self.discover_superseded_by(store, fact_id, current_depth, visited, queue, edges)
70
+ store.fact_links
71
+ .where(to_fact_id: fact_id, link_type: "supersedes")
72
+ .select_map(:from_fact_id)
73
+ .each do |source_id|
74
+ edges << {from: source_id, to: fact_id, type: "supersedes"}
75
+ queue << [source_id, current_depth + 1] unless visited.include?(source_id)
76
+ end
77
+ end
78
+
79
+ def self.discover_conflicts(store, fact_id, current_depth, visited, queue, edges)
80
+ store.conflicts
81
+ .where(fact_a_id: fact_id)
82
+ .select(:fact_b_id, :status)
83
+ .all
84
+ .each do |conflict|
85
+ edges << {from: fact_id, to: conflict[:fact_b_id], type: "conflicts", status: conflict[:status]}
86
+ queue << [conflict[:fact_b_id], current_depth + 1] unless visited.include?(conflict[:fact_b_id])
87
+ end
88
+
89
+ store.conflicts
90
+ .where(fact_b_id: fact_id)
91
+ .select(:fact_a_id, :status)
92
+ .all
93
+ .each do |conflict|
94
+ edges << {from: conflict[:fact_a_id], to: fact_id, type: "conflicts", status: conflict[:status]}
95
+ queue << [conflict[:fact_a_id], current_depth + 1] unless visited.include?(conflict[:fact_a_id])
96
+ end
97
+ end
98
+
99
+ # Build a minimal node representation of a fact
100
+ # @param fact [Hash] Fact row from database
101
+ # @return [Hash] Node representation
102
+ def self.build_node(fact)
103
+ {
104
+ id: fact[:id],
105
+ docid: fact[:docid],
106
+ subject: fact[:subject_name],
107
+ predicate: fact[:predicate],
108
+ object: fact[:object_literal],
109
+ status: fact[:status],
110
+ scope: fact[:scope]
111
+ }
112
+ end
113
+
114
+ # Remove duplicate edges (same from/to/type)
115
+ # @param edges [Array<Hash>] Edges to deduplicate
116
+ # @return [Array<Hash>] Deduplicated edges
117
+ def self.dedupe_edges(edges)
118
+ edges.uniq { |e| [e[:from], e[:to], e[:type]] }
119
+ end
120
+ end
121
+ end
122
+ end
@@ -23,11 +23,12 @@ module ClaudeMemory
23
23
  # Build dataset for batch finding receipts (provenance) with content_items join
24
24
  # @param store [SQLiteStore] Database store
25
25
  # @param fact_ids [Array<Integer>] Fact IDs to find receipts for
26
+ # @param include_raw_text [Boolean] Include raw_text for snippet extraction
26
27
  # @return [Hash] Hash of fact_id => [receipt_rows]
27
- def self.batch_find_receipts(store, fact_ids)
28
+ def self.batch_find_receipts(store, fact_ids, include_raw_text: false)
28
29
  return {} if fact_ids.empty?
29
30
 
30
- results = build_receipts_dataset(store)
31
+ results = build_receipts_dataset(store, include_raw_text: include_raw_text)
31
32
  .where(Sequel[:provenance][:fact_id] => fact_ids)
32
33
  .all
33
34
 
@@ -47,12 +48,23 @@ module ClaudeMemory
47
48
  .first
48
49
  end
49
50
 
51
+ # Find single fact by docid with entity join
52
+ # @param store [SQLiteStore] Database store
53
+ # @param docid [String] 8-character docid
54
+ # @return [Hash, nil] Fact row or nil
55
+ def self.find_fact_by_docid(store, docid)
56
+ build_facts_dataset(store)
57
+ .where(Sequel[:facts][:docid] => docid)
58
+ .first
59
+ end
60
+
50
61
  # Find receipts for a single fact
51
62
  # @param store [SQLiteStore] Database store
52
63
  # @param fact_id [Integer] Fact ID
64
+ # @param include_raw_text [Boolean] Include raw_text for snippet extraction
53
65
  # @return [Array<Hash>] Receipt rows
54
- def self.find_receipts(store, fact_id)
55
- build_receipts_dataset(store)
66
+ def self.find_receipts(store, fact_id, include_raw_text: false)
67
+ build_receipts_dataset(store, include_raw_text: include_raw_text)
56
68
  .where(Sequel[:provenance][:fact_id] => fact_id)
57
69
  .all
58
70
  end
@@ -95,7 +107,7 @@ module ClaudeMemory
95
107
  # @return [Array<Hash>] Fact rows
96
108
  def self.fetch_changes(store, since, limit)
97
109
  store.facts
98
- .select(:id, :subject_entity_id, :predicate, :object_literal, :status, :created_at, :scope, :project_path)
110
+ .select(:id, :docid, :subject_entity_id, :predicate, :object_literal, :status, :created_at, :scope, :project_path)
99
111
  .where { created_at >= since }
100
112
  .order(Sequel.desc(:created_at))
101
113
  .limit(limit)
@@ -121,6 +133,7 @@ module ClaudeMemory
121
133
  .left_join(:entities, id: :subject_entity_id)
122
134
  .select(
123
135
  Sequel[:facts][:id],
136
+ Sequel[:facts][:docid],
124
137
  Sequel[:facts][:predicate],
125
138
  Sequel[:facts][:object_literal],
126
139
  Sequel[:facts][:status],
@@ -136,18 +149,25 @@ module ClaudeMemory
136
149
 
137
150
  # Build standard receipts dataset with content_items join
138
151
  # @param store [SQLiteStore] Database store
152
+ # @param include_raw_text [Boolean] Include raw_text for snippet extraction
139
153
  # @return [Sequel::Dataset] Configured dataset
140
- def self.build_receipts_dataset(store)
154
+ def self.build_receipts_dataset(store, include_raw_text: false)
155
+ columns = [
156
+ Sequel[:provenance][:id],
157
+ Sequel[:provenance][:fact_id],
158
+ Sequel[:provenance][:quote],
159
+ Sequel[:provenance][:strength],
160
+ Sequel[:provenance][:line_start],
161
+ Sequel[:provenance][:line_end],
162
+ Sequel[:content_items][:session_id],
163
+ Sequel[:content_items][:occurred_at]
164
+ ]
165
+
166
+ columns << Sequel[:content_items][:raw_text] if include_raw_text
167
+
141
168
  store.provenance
142
169
  .left_join(:content_items, id: :content_item_id)
143
- .select(
144
- Sequel[:provenance][:id],
145
- Sequel[:provenance][:fact_id],
146
- Sequel[:provenance][:quote],
147
- Sequel[:provenance][:strength],
148
- Sequel[:content_items][:session_id],
149
- Sequel[:content_items][:occurred_at]
150
- )
170
+ .select(*columns)
151
171
  end
152
172
  end
153
173
  end
@@ -83,30 +83,13 @@ module ClaudeMemory
83
83
  seen.values.sort_by { |r| -r[:similarity] }.take(limit)
84
84
  end
85
85
 
86
- # Merge vector and text search results, preferring vector similarity scores
86
+ # Merge vector and text search results using Reciprocal Rank Fusion
87
87
  # @param vector_results [Array<Hash>] Results from vector search with :fact and :similarity
88
88
  # @param text_results [Array<Hash>] Results from text search with :fact and :similarity
89
89
  # @param limit [Integer] Maximum results to return
90
- # @return [Array<Hash>] Merged results sorted by similarity descending
90
+ # @return [Array<Hash>] Merged results sorted by RRF score descending
91
91
  def self.merge_search_results(vector_results, text_results, limit)
92
- # Combine results, preferring vector similarity scores
93
- combined = {}
94
-
95
- vector_results.each do |result|
96
- fact_id = result[:fact][:id]
97
- combined[fact_id] = result
98
- end
99
-
100
- text_results.each do |result|
101
- fact_id = result[:fact][:id]
102
- # Only add if not already present from vector search
103
- combined[fact_id] ||= result
104
- end
105
-
106
- # Sort by similarity score (highest first)
107
- combined.values
108
- .sort_by { |r| -(r[:similarity] || 0) }
109
- .take(limit)
92
+ RRFusion.fuse(vector_results, text_results, limit)
110
93
  end
111
94
  end
112
95
  end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Core
5
+ # Formats timestamps as human-readable relative time strings.
6
+ # Progressive granularity: just now → Xm ago → Xh ago → Xd ago → date
7
+ module RelativeTime
8
+ MINUTE = 60
9
+ HOUR = 3600
10
+ DAY = 86400
11
+
12
+ def self.format(timestamp, now: Time.now)
13
+ return nil if timestamp.nil?
14
+
15
+ time = parse_time(timestamp)
16
+ return nil unless time
17
+
18
+ diff = now - time
19
+ return format_absolute(time) if diff.negative?
20
+
21
+ case diff
22
+ when 0...MINUTE then "just now"
23
+ when MINUTE...HOUR then "#{(diff / MINUTE).to_i}m ago"
24
+ when HOUR...DAY then "#{(diff / HOUR).to_i}h ago"
25
+ when DAY...(7 * DAY) then "#{(diff / DAY).to_i}d ago"
26
+ else format_absolute(time)
27
+ end
28
+ end
29
+
30
+ def self.parse_time(value)
31
+ case value
32
+ when Time then value
33
+ when String then Time.parse(value)
34
+ when Integer, Float then Time.at(value)
35
+ end
36
+ rescue ArgumentError
37
+ nil
38
+ end
39
+
40
+ def self.format_absolute(time)
41
+ time.strftime("%Y-%m-%d")
42
+ end
43
+ end
44
+ end
45
+ end
@@ -16,9 +16,9 @@ module ClaudeMemory
16
16
  # Add source annotation to each result in collection
17
17
  # @param results [Array<Hash>] Results to annotate
18
18
  # @param source [Symbol] Source identifier (:project, :global, :legacy)
19
- # @return [Array<Hash>] Results with :source key added (mutates in place)
19
+ # @return [Array<Hash>] New array of results with :source key added
20
20
  def self.annotate_source(results, source)
21
- results.each { |r| r[:source] = source }
21
+ results.map { |r| r.merge(source: source) }
22
22
  end
23
23
  end
24
24
  end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Core
5
+ # Reciprocal Rank Fusion (RRF) for merging ranked result lists
6
+ # Follows Functional Core pattern - no I/O, just transformations
7
+ #
8
+ # RRF combines multiple ranked lists using position-based scoring:
9
+ # score(d) = Σ(weight_r / (k + rank_r(d)))
10
+ #
11
+ # This is more effective than naive deduplication because it considers
12
+ # rank positions from both sources, giving higher scores to results
13
+ # that appear near the top in multiple lists.
14
+ class RRFusion
15
+ K = 60 # Standard RRF constant - controls rank pressure
16
+ TOP_BONUS = {1 => 0.05, 2 => 0.02, 3 => 0.02}.freeze
17
+
18
+ # Fuse ranked lists from vector and text search
19
+ # @param vector_results [Array<Hash>] Results from vector search (ordered by similarity)
20
+ # @param text_results [Array<Hash>] Results from text search (ordered by FTS rank)
21
+ # @param limit [Integer] Maximum results to return
22
+ # @param vector_weight [Float] Weight multiplier for vector rankings (default 1.0)
23
+ # @param text_weight [Float] Weight multiplier for text rankings (default 1.0)
24
+ # @return [Array<Hash>] Fused results sorted by RRF score, with :similarity set to RRF score
25
+ def self.fuse(vector_results, text_results, limit, vector_weight: 1.0, text_weight: 1.0)
26
+ scores = {}
27
+ fact_data = {}
28
+
29
+ # Score vector results by rank position
30
+ vector_results.each_with_index do |result, idx|
31
+ fact_id = result[:fact][:id]
32
+ rank = idx + 1 # 1-based rank
33
+ scores[fact_id] = (scores[fact_id] || 0.0) + (vector_weight / (K + rank))
34
+ scores[fact_id] += TOP_BONUS.fetch(rank, 0.0)
35
+ # Prefer vector result data (has real similarity score)
36
+ fact_data[fact_id] = result
37
+ end
38
+
39
+ # Score text results by rank position
40
+ text_results.each_with_index do |result, idx|
41
+ fact_id = result[:fact][:id]
42
+ rank = idx + 1
43
+ scores[fact_id] = (scores[fact_id] || 0.0) + (text_weight / (K + rank))
44
+ scores[fact_id] += TOP_BONUS.fetch(rank, 0.0)
45
+ # Only use text data if not already present from vector
46
+ fact_data[fact_id] ||= result
47
+ end
48
+
49
+ # Sort by RRF score descending and return top results
50
+ scores
51
+ .sort_by { |_id, score| -score }
52
+ .take(limit)
53
+ .map { |fact_id, score| fact_data[fact_id].merge(similarity: score) }
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Core
5
+ # Extracts relevant snippets from raw content based on query terms.
6
+ # Finds the line with the most query term matches and returns
7
+ # surrounding context (1 line before + 2 lines after).
8
+ # Follows Functional Core pattern - pure transformations, no I/O.
9
+ class SnippetExtractor
10
+ CONTEXT_BEFORE = 1
11
+ CONTEXT_AFTER = 2
12
+ MAX_SNIPPET_LENGTH = 500
13
+
14
+ # Extract the best snippet from content matching the query
15
+ # @param content [String] Raw text content
16
+ # @param query [String] Search query
17
+ # @return [String, nil] Best matching snippet or nil if no content
18
+ def self.extract(content, query)
19
+ return nil if content.nil? || content.empty? || query.nil? || query.empty?
20
+
21
+ lines = content.lines.map(&:chomp)
22
+ return nil if lines.empty?
23
+
24
+ terms = tokenize_query(query)
25
+ return nil if terms.empty?
26
+
27
+ best_line_idx = find_best_line(lines, terms)
28
+ return nil unless best_line_idx
29
+
30
+ build_snippet(lines, best_line_idx)
31
+ end
32
+
33
+ # Extract snippet and return line range information
34
+ # @param content [String] Raw text content
35
+ # @param query [String] Search query
36
+ # @return [Hash, nil] Hash with :snippet, :line_start, :line_end or nil
37
+ def self.extract_with_lines(content, query)
38
+ return nil if content.nil? || content.empty? || query.nil? || query.empty?
39
+
40
+ lines = content.lines.map(&:chomp)
41
+ return nil if lines.empty?
42
+
43
+ terms = tokenize_query(query)
44
+ return nil if terms.empty?
45
+
46
+ best_line_idx = find_best_line(lines, terms)
47
+ return nil unless best_line_idx
48
+
49
+ start_idx = [best_line_idx - CONTEXT_BEFORE, 0].max
50
+ end_idx = [best_line_idx + CONTEXT_AFTER, lines.size - 1].min
51
+
52
+ {
53
+ snippet: build_snippet(lines, best_line_idx),
54
+ line_start: start_idx + 1, # 1-indexed
55
+ line_end: end_idx + 1 # 1-indexed
56
+ }
57
+ end
58
+
59
+ # @api private
60
+ def self.tokenize_query(query)
61
+ query.downcase.split(/\s+/).reject { |t| t.length < 2 }
62
+ end
63
+
64
+ # @api private
65
+ def self.find_best_line(lines, terms)
66
+ best_idx = nil
67
+ best_score = 0
68
+
69
+ lines.each_with_index do |line, idx|
70
+ downcased = line.downcase
71
+ score = terms.count { |term| downcased.include?(term) }
72
+ if score > best_score
73
+ best_score = score
74
+ best_idx = idx
75
+ end
76
+ end
77
+
78
+ best_idx
79
+ end
80
+
81
+ # @api private
82
+ def self.build_snippet(lines, center_idx)
83
+ start_idx = [center_idx - CONTEXT_BEFORE, 0].max
84
+ end_idx = [center_idx + CONTEXT_AFTER, lines.size - 1].min
85
+
86
+ snippet = lines[start_idx..end_idx].join("\n")
87
+ truncate(snippet)
88
+ end
89
+
90
+ # @api private
91
+ def self.truncate(text)
92
+ return text if text.length <= MAX_SNIPPET_LENGTH
93
+ text[0, MAX_SNIPPET_LENGTH - 3] + "..."
94
+ end
95
+ end
96
+ end
97
+ end
@@ -5,12 +5,13 @@ module ClaudeMemory
5
5
  # Domain model representing a fact in the memory system
6
6
  # Encapsulates business logic and validation
7
7
  class Fact
8
- attr_reader :id, :subject_name, :predicate, :object_literal,
8
+ attr_reader :id, :docid, :subject_name, :predicate, :object_literal,
9
9
  :status, :confidence, :scope, :project_path,
10
10
  :valid_from, :valid_to, :created_at
11
11
 
12
12
  def initialize(attributes)
13
13
  @id = attributes[:id]
14
+ @docid = attributes[:docid]
14
15
  @subject_name = attributes[:subject_name]
15
16
  @predicate = attributes[:predicate]
16
17
  @object_literal = attributes[:object_literal]
@@ -45,6 +46,7 @@ module ClaudeMemory
45
46
  def to_h
46
47
  {
47
48
  id: id,
49
+ docid: docid,
48
50
  subject_name: subject_name,
49
51
  predicate: predicate,
50
52
  object_literal: object_literal,
@@ -55,6 +55,7 @@ module ClaudeMemory
55
55
  .left_join(:entities, id: :subject_entity_id)
56
56
  .select(
57
57
  Sequel[:facts][:id],
58
+ Sequel[:facts][:docid],
58
59
  Sequel[:facts][:predicate],
59
60
  Sequel[:facts][:object_literal],
60
61
  Sequel[:facts][:status],
@@ -67,6 +68,7 @@ module ClaudeMemory
67
68
  .map do |fact|
68
69
  {
69
70
  id: fact[:id],
71
+ docid: fact[:docid],
70
72
  subject: fact[:subject_name],
71
73
  predicate: fact[:predicate],
72
74
  object_preview: truncate_preview(fact[:object_literal]),
@@ -36,6 +36,24 @@ module ClaudeMemory
36
36
  .select_map(:content_item_id)
37
37
  end
38
38
 
39
+ # Search returning content IDs with FTS5 BM25 rank values
40
+ # @param query [String] Search query
41
+ # @param limit [Integer] Maximum results
42
+ # @return [Array<Hash>] Results with :content_item_id and :rank
43
+ def search_with_ranks(query, limit: 20)
44
+ ensure_fts_table!
45
+ return [] if query.nil? || query.strip.empty?
46
+ return [] if query.strip == "*"
47
+
48
+ escaped_query = escape_fts_query(query)
49
+ @db[:content_fts]
50
+ .where(Sequel.lit("text MATCH ?", escaped_query))
51
+ .order(:rank)
52
+ .limit(limit)
53
+ .select(Sequel.lit("content_item_id, rank"))
54
+ .all
55
+ end
56
+
39
57
  def escape_fts_query(query)
40
58
  words = query.split(/\s+/).map do |word|
41
59
  next word if word == "*"