claude_memory 0.7.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude/rules/claude_memory.generated.md +32 -2
- data/.claude/settings.json +65 -15
- data/.claude/settings.local.json +5 -2
- data/.claude/skills/improve/SKILL.md +113 -25
- data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
- data/.claude-plugin/commands/distill-transcripts.md +98 -0
- data/.claude-plugin/commands/memory-recall.md +67 -0
- data/.claude-plugin/marketplace.json +2 -2
- data/.claude-plugin/plugin.json +3 -3
- data/.claude-plugin/scripts/hook-runner.sh +14 -0
- data/.claude-plugin/scripts/serve-mcp.sh +14 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +90 -1
- data/CLAUDE.md +56 -18
- data/README.md +35 -0
- data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
- data/db/migrations/014_canonicalize_predicates.rb +30 -0
- data/docs/improvements.md +74 -74
- data/docs/influence/claude-mem.md +1 -0
- data/docs/influence/claude-supermemory.md +1 -0
- data/docs/influence/episodic-memory.md +1 -0
- data/docs/influence/grepai.md +1 -0
- data/docs/influence/kbs.md +1 -0
- data/docs/influence/lossless-claw.md +1 -0
- data/docs/influence/qmd.md +1 -0
- data/docs/quality_review.md +119 -224
- data/hooks/hooks.json +39 -7
- data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
- data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
- data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
- data/lib/claude_memory/commands/completion_command.rb +149 -0
- data/lib/claude_memory/commands/doctor_command.rb +2 -0
- data/lib/claude_memory/commands/embeddings_command.rb +198 -0
- data/lib/claude_memory/commands/help_command.rb +12 -1
- data/lib/claude_memory/commands/hook_command.rb +2 -1
- data/lib/claude_memory/commands/index_command.rb +85 -78
- data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
- data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
- data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
- data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
- data/lib/claude_memory/commands/install_skill_command.rb +78 -0
- data/lib/claude_memory/commands/registry.rb +47 -32
- data/lib/claude_memory/commands/reject_command.rb +62 -0
- data/lib/claude_memory/commands/restore_command.rb +77 -0
- data/lib/claude_memory/commands/skills/distill-transcripts.md +102 -0
- data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
- data/lib/claude_memory/commands/stats_command.rb +98 -2
- data/lib/claude_memory/configuration.rb +14 -1
- data/lib/claude_memory/core/fact_ranker.rb +2 -2
- data/lib/claude_memory/core/rr_fusion.rb +23 -6
- data/lib/claude_memory/core/snippet_extractor.rb +7 -3
- data/lib/claude_memory/core/text_builder.rb +11 -0
- data/lib/claude_memory/distill/json_schema.md +8 -4
- data/lib/claude_memory/distill/null_distiller.rb +2 -0
- data/lib/claude_memory/domain/entity.rb +13 -1
- data/lib/claude_memory/domain/fact.rb +26 -2
- data/lib/claude_memory/domain/provenance.rb +0 -1
- data/lib/claude_memory/embeddings/api_adapter.rb +97 -0
- data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
- data/lib/claude_memory/embeddings/fastembed_adapter.rb +46 -12
- data/lib/claude_memory/embeddings/generator.rb +4 -0
- data/lib/claude_memory/embeddings/inspector.rb +91 -0
- data/lib/claude_memory/embeddings/model_registry.rb +210 -0
- data/lib/claude_memory/embeddings/resolver.rb +44 -0
- data/lib/claude_memory/hook/context_injector.rb +58 -2
- data/lib/claude_memory/hook/distillation_runner.rb +46 -0
- data/lib/claude_memory/hook/handler.rb +11 -2
- data/lib/claude_memory/index/vector_index.rb +15 -2
- data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
- data/lib/claude_memory/ingest/ingester.rb +17 -0
- data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
- data/lib/claude_memory/mcp/handlers/management_handlers.rb +169 -0
- data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
- data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
- data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
- data/lib/claude_memory/mcp/handlers/stats_handlers.rb +205 -0
- data/lib/claude_memory/mcp/instructions_builder.rb +19 -1
- data/lib/claude_memory/mcp/query_guide.rb +10 -0
- data/lib/claude_memory/mcp/response_formatter.rb +1 -0
- data/lib/claude_memory/mcp/server.rb +22 -1
- data/lib/claude_memory/mcp/telemetry.rb +86 -0
- data/lib/claude_memory/mcp/text_summary.rb +26 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +116 -4
- data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
- data/lib/claude_memory/mcp/tools.rb +50 -679
- data/lib/claude_memory/publish.rb +40 -5
- data/lib/claude_memory/recall/dual_engine.rb +105 -0
- data/lib/claude_memory/recall/legacy_engine.rb +138 -0
- data/lib/claude_memory/recall/query_core.rb +371 -0
- data/lib/claude_memory/recall.rb +121 -673
- data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
- data/lib/claude_memory/resolve/resolver.rb +43 -0
- data/lib/claude_memory/shortcuts.rb +4 -4
- data/lib/claude_memory/store/retry_handler.rb +61 -0
- data/lib/claude_memory/store/schema_manager.rb +68 -0
- data/lib/claude_memory/store/sqlite_store.rb +334 -201
- data/lib/claude_memory/store/store_manager.rb +50 -1
- data/lib/claude_memory/sweep/maintenance.rb +115 -1
- data/lib/claude_memory/sweep/sweeper.rb +3 -0
- data/lib/claude_memory/templates/hooks.example.json +26 -7
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +16 -0
- metadata +48 -8
- data/.claude/memory.sqlite3-shm +0 -0
- data/.claude/memory.sqlite3-wal +0 -0
data/lib/claude_memory/recall.rb
CHANGED
|
@@ -1,727 +1,175 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module ClaudeMemory
|
|
4
|
+
# Query interface for facts across dual databases (global + project).
|
|
5
|
+
# Delegates to DualEngine or LegacyEngine depending on the store type.
|
|
4
6
|
class Recall
|
|
7
|
+
# @return [String] query only project-scoped facts
|
|
5
8
|
SCOPE_PROJECT = "project"
|
|
9
|
+
# @return [String] query only global-scoped facts
|
|
6
10
|
SCOPE_GLOBAL = "global"
|
|
11
|
+
# @return [String] query both project and global facts (default)
|
|
7
12
|
SCOPE_ALL = "all"
|
|
8
13
|
|
|
9
14
|
class << self
|
|
15
|
+
# @param manager [Store::StoreManager] dual-database manager
|
|
16
|
+
# @param limit [Integer] max results
|
|
17
|
+
# @return [Array<Hash>] recent decision facts
|
|
10
18
|
def recent_decisions(manager, limit: 10)
|
|
11
19
|
Shortcuts.for(:decisions, manager, limit: limit)
|
|
12
20
|
end
|
|
13
21
|
|
|
22
|
+
# @param manager [Store::StoreManager] dual-database manager
|
|
23
|
+
# @param limit [Integer] max results
|
|
24
|
+
# @return [Array<Hash>] architecture-related facts
|
|
14
25
|
def architecture_choices(manager, limit: 10)
|
|
15
26
|
Shortcuts.for(:architecture, manager, limit: limit)
|
|
16
27
|
end
|
|
17
28
|
|
|
29
|
+
# @param manager [Store::StoreManager] dual-database manager
|
|
30
|
+
# @param limit [Integer] max results
|
|
31
|
+
# @return [Array<Hash>] convention facts
|
|
18
32
|
def conventions(manager, limit: 20)
|
|
19
33
|
Shortcuts.for(:conventions, manager, limit: limit)
|
|
20
34
|
end
|
|
21
35
|
|
|
36
|
+
# @param manager [Store::StoreManager] dual-database manager
|
|
37
|
+
# @param limit [Integer] max results
|
|
38
|
+
# @return [Array<Hash>] project configuration facts
|
|
22
39
|
def project_config(manager, limit: 10)
|
|
23
40
|
Shortcuts.for(:project_config, manager, limit: limit)
|
|
24
41
|
end
|
|
25
42
|
end
|
|
26
43
|
|
|
44
|
+
# @param store_or_manager [Store::SQLiteStore, Store::StoreManager] database store or dual-database manager
|
|
45
|
+
# @param fts [Index::LexicalFTS, nil] full-text search index (used only with legacy single-store)
|
|
46
|
+
# @param project_path [String, nil] project root path (defaults to Configuration#project_dir)
|
|
47
|
+
# @param env [Hash] environment variables
|
|
48
|
+
# @param embedding_generator [Object, nil] vector embedding generator for semantic search
|
|
27
49
|
def initialize(store_or_manager, fts: nil, project_path: nil, env: ENV, embedding_generator: nil)
|
|
28
50
|
config = Configuration.new(env)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if store_or_manager.is_a?(Store::StoreManager)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
@legacy_mode = true
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
def query(query_text, limit: 10, scope: SCOPE_ALL, include_raw_text: false)
|
|
43
|
-
if @legacy_mode
|
|
44
|
-
query_legacy(query_text, limit: limit, scope: scope)
|
|
45
|
-
else
|
|
46
|
-
query_dual(query_text, limit: limit, scope: scope, include_raw_text: include_raw_text)
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def query_index(query_text, limit: 20, scope: SCOPE_ALL)
|
|
51
|
-
if @legacy_mode
|
|
52
|
-
query_index_legacy(query_text, limit: limit, scope: scope)
|
|
51
|
+
resolved_project_path = project_path || config.project_dir
|
|
52
|
+
resolved_generator = embedding_generator || Embeddings.resolve(env: env)
|
|
53
|
+
|
|
54
|
+
@engine = if store_or_manager.is_a?(Store::StoreManager)
|
|
55
|
+
DualEngine.new(
|
|
56
|
+
store_or_manager,
|
|
57
|
+
embedding_generator: resolved_generator,
|
|
58
|
+
project_path: resolved_project_path
|
|
59
|
+
)
|
|
53
60
|
else
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
61
|
+
LegacyEngine.new(
|
|
62
|
+
store_or_manager,
|
|
63
|
+
fts: fts || Index::LexicalFTS.new(store_or_manager),
|
|
64
|
+
embedding_generator: resolved_generator,
|
|
65
|
+
project_path: resolved_project_path
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Search facts by text query using FTS5
|
|
71
|
+
# @param query_text [String] search terms
|
|
72
|
+
# @param limit [Integer] max results
|
|
73
|
+
# @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
|
|
74
|
+
# @param include_raw_text [Boolean] include source content text in results
|
|
75
|
+
# @param intent [String, nil] query intent hint for ranking
|
|
76
|
+
# @return [Array<Hash>] matching facts with provenance
|
|
77
|
+
def query(query_text, limit: 10, scope: SCOPE_ALL, include_raw_text: false, intent: nil)
|
|
78
|
+
@engine.query(query_text, limit: limit, scope: scope, include_raw_text: include_raw_text, intent: intent)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Search content items (not facts) via FTS5 index
|
|
82
|
+
# @param query_text [String] search terms
|
|
83
|
+
# @param limit [Integer] max results
|
|
84
|
+
# @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
|
|
85
|
+
# @param intent [String, nil] query intent hint for ranking
|
|
86
|
+
# @return [Array<Hash>] matching content items
|
|
87
|
+
def query_index(query_text, limit: 20, scope: SCOPE_ALL, intent: nil)
|
|
88
|
+
@engine.query_index(query_text, limit: limit, scope: scope, intent: intent)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Traverse fact relationships (supersessions, conflicts) as a graph
|
|
92
|
+
# @param fact_id [Integer] starting fact ID
|
|
93
|
+
# @param depth [Integer] traversal depth
|
|
94
|
+
# @param scope [String, nil] optional scope filter
|
|
95
|
+
# @return [Hash] graph with nodes and edges
|
|
58
96
|
def fact_graph(fact_id, depth: 2, scope: nil)
|
|
59
|
-
|
|
60
|
-
Core::FactGraph.build(@legacy_store, fact_id, depth: depth)
|
|
61
|
-
else
|
|
62
|
-
scope ||= SCOPE_PROJECT
|
|
63
|
-
store = @manager.store_for_scope(scope)
|
|
64
|
-
Core::FactGraph.build(store, fact_id, depth: depth)
|
|
65
|
-
end
|
|
97
|
+
@engine.fact_graph(fact_id, depth: depth, scope: scope)
|
|
66
98
|
end
|
|
67
99
|
|
|
100
|
+
# Show provenance chain for a fact
|
|
101
|
+
# @param fact_id_or_docid [Integer, String] fact ID or document ID
|
|
102
|
+
# @param scope [String, nil] optional scope filter
|
|
103
|
+
# @return [Hash] provenance details including source content
|
|
68
104
|
def explain(fact_id_or_docid, scope: nil)
|
|
69
|
-
|
|
70
|
-
fact_id = resolve_fact_identifier(@legacy_store, fact_id_or_docid)
|
|
71
|
-
explain_from_store(@legacy_store, fact_id)
|
|
72
|
-
else
|
|
73
|
-
scope ||= SCOPE_PROJECT
|
|
74
|
-
store = @manager.store_for_scope(scope)
|
|
75
|
-
fact_id = resolve_fact_identifier(store, fact_id_or_docid)
|
|
76
|
-
explain_from_store(store, fact_id)
|
|
77
|
-
end
|
|
105
|
+
@engine.explain(fact_id_or_docid, scope: scope)
|
|
78
106
|
end
|
|
79
107
|
|
|
108
|
+
# List facts created or modified since a given time
|
|
109
|
+
# @param since [String] ISO 8601 timestamp
|
|
110
|
+
# @param limit [Integer] max results
|
|
111
|
+
# @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
|
|
112
|
+
# @return [Array<Hash>] recently changed facts
|
|
80
113
|
def changes(since:, limit: 50, scope: SCOPE_ALL)
|
|
81
|
-
|
|
82
|
-
changes_legacy(since: since, limit: limit, scope: scope)
|
|
83
|
-
else
|
|
84
|
-
changes_dual(since: since, limit: limit, scope: scope)
|
|
85
|
-
end
|
|
114
|
+
@engine.changes(since: since, limit: limit, scope: scope)
|
|
86
115
|
end
|
|
87
116
|
|
|
117
|
+
# List open fact conflicts
|
|
118
|
+
# @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
|
|
119
|
+
# @return [Array<Hash>] unresolved conflicts
|
|
88
120
|
def conflicts(scope: SCOPE_ALL)
|
|
89
|
-
|
|
90
|
-
conflicts_legacy(scope: scope)
|
|
91
|
-
else
|
|
92
|
-
conflicts_dual(scope: scope)
|
|
93
|
-
end
|
|
121
|
+
@engine.conflicts(scope: scope)
|
|
94
122
|
end
|
|
95
123
|
|
|
124
|
+
# Find facts associated with a git branch
|
|
125
|
+
# @param branch_name [String] git branch name
|
|
126
|
+
# @param limit [Integer] max results
|
|
127
|
+
# @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
|
|
128
|
+
# @return [Array<Hash>] facts from the given branch
|
|
96
129
|
def facts_by_branch(branch_name, limit: 20, scope: SCOPE_ALL)
|
|
97
|
-
|
|
98
|
-
facts_by_context_legacy(:git_branch, branch_name, limit: limit, scope: scope)
|
|
99
|
-
else
|
|
100
|
-
facts_by_context_dual(:git_branch, branch_name, limit: limit, scope: scope)
|
|
101
|
-
end
|
|
130
|
+
@engine.facts_by_branch(branch_name, limit: limit, scope: scope)
|
|
102
131
|
end
|
|
103
132
|
|
|
133
|
+
# Find facts associated with a working directory
|
|
134
|
+
# @param cwd [String] directory path
|
|
135
|
+
# @param limit [Integer] max results
|
|
136
|
+
# @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
|
|
137
|
+
# @return [Array<Hash>] facts from the given directory
|
|
104
138
|
def facts_by_directory(cwd, limit: 20, scope: SCOPE_ALL)
|
|
105
|
-
|
|
106
|
-
facts_by_context_legacy(:cwd, cwd, limit: limit, scope: scope)
|
|
107
|
-
else
|
|
108
|
-
facts_by_context_dual(:cwd, cwd, limit: limit, scope: scope)
|
|
109
|
-
end
|
|
139
|
+
@engine.facts_by_directory(cwd, limit: limit, scope: scope)
|
|
110
140
|
end
|
|
111
141
|
|
|
142
|
+
# Find facts associated with a specific tool
|
|
143
|
+
# @param tool_name [String] tool name (e.g., "Read", "Bash")
|
|
144
|
+
# @param limit [Integer] max results
|
|
145
|
+
# @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
|
|
146
|
+
# @return [Array<Hash>] facts from sessions using the given tool
|
|
112
147
|
def facts_by_tool(tool_name, limit: 20, scope: SCOPE_ALL)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
end
|
|
127
|
-
|
|
148
|
+
@engine.facts_by_tool(tool_name, limit: limit, scope: scope)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Search facts using vector embeddings (semantic similarity)
|
|
152
|
+
# @param text [String] natural language query
|
|
153
|
+
# @param limit [Integer] max results
|
|
154
|
+
# @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
|
|
155
|
+
# @param mode [Symbol] :vector, :lexical, or :both (hybrid RRF)
|
|
156
|
+
# @param explain [Boolean] include scoring breakdown in results
|
|
157
|
+
# @param intent [String, nil] query intent hint for ranking
|
|
158
|
+
# @return [Array<Hash>] semantically similar facts
|
|
159
|
+
def query_semantic(text, limit: 10, scope: SCOPE_ALL, mode: :both, explain: false, intent: nil)
|
|
160
|
+
@engine.query_semantic(text, limit: limit, scope: scope, mode: mode, explain: explain, intent: intent)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Find facts at the intersection of multiple concepts
|
|
164
|
+
# @param concepts [Array<String>] 2-5 concept terms to intersect
|
|
165
|
+
# @param limit [Integer] max results
|
|
166
|
+
# @param scope [String] one of SCOPE_ALL, SCOPE_PROJECT, SCOPE_GLOBAL
|
|
167
|
+
# @return [Array<Hash>] facts matching all given concepts
|
|
168
|
+
# @raise [ArgumentError] if concepts count is not 2-5
|
|
128
169
|
def query_concepts(concepts, limit: 10, scope: SCOPE_ALL)
|
|
129
170
|
raise ArgumentError, "Must provide 2-5 concepts" unless (2..5).cover?(concepts.size)
|
|
130
171
|
|
|
131
|
-
|
|
132
|
-
query_concepts_legacy(concepts, limit: limit, scope: scope)
|
|
133
|
-
else
|
|
134
|
-
query_concepts_dual(concepts, limit: limit, scope: scope)
|
|
135
|
-
end
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
private
|
|
139
|
-
|
|
140
|
-
def query_dual(query_text, limit:, scope:, include_raw_text: false)
|
|
141
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
142
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
143
|
-
query_single_store(store, query_text, limit: limit, source: source, include_raw_text: include_raw_text)
|
|
144
|
-
end
|
|
145
|
-
dedupe_and_sort(results, limit)
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
def query_index_dual(query_text, limit:, scope:)
|
|
149
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
150
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
151
|
-
query_index_single_store(store, query_text, limit: limit, source: source)
|
|
152
|
-
end
|
|
153
|
-
dedupe_and_sort_index(results, limit)
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
def query_index_single_store(store, query_text, limit:, source:)
|
|
157
|
-
options = Index::QueryOptions.new(
|
|
158
|
-
query_text: query_text,
|
|
159
|
-
limit: limit,
|
|
160
|
-
scope: :all,
|
|
161
|
-
source: source
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
query = Index::IndexQuery.new(store, options)
|
|
165
|
-
query.execute
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
def dedupe_and_sort_index(results, limit)
|
|
169
|
-
Core::FactRanker.dedupe_and_sort_index(results, limit)
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
def query_single_store(store, query_text, limit:, source:, include_raw_text: false)
|
|
173
|
-
fts = Index::LexicalFTS.new(store)
|
|
174
|
-
content_ids = fts.search(query_text, limit: limit * 3)
|
|
175
|
-
return [] if content_ids.empty?
|
|
176
|
-
|
|
177
|
-
# Batch query: fetch ALL provenance records at once using WHERE IN
|
|
178
|
-
provenance_by_content = store.provenance
|
|
179
|
-
.select(:fact_id, :content_item_id)
|
|
180
|
-
.where(content_item_id: content_ids)
|
|
181
|
-
.all
|
|
182
|
-
.group_by { |p| p[:content_item_id] }
|
|
183
|
-
|
|
184
|
-
# Collect fact IDs in content order, deduplicated
|
|
185
|
-
ordered_fact_ids = Core::FactCollector.collect_ordered_fact_ids(
|
|
186
|
-
provenance_by_content,
|
|
187
|
-
content_ids,
|
|
188
|
-
limit
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
return [] if ordered_fact_ids.empty?
|
|
192
|
-
|
|
193
|
-
# Batch query all facts at once
|
|
194
|
-
facts_by_id = batch_find_facts(store, ordered_fact_ids)
|
|
195
|
-
|
|
196
|
-
# Batch query all receipts at once
|
|
197
|
-
receipts_by_fact_id = batch_find_receipts(store, ordered_fact_ids, include_raw_text: include_raw_text)
|
|
198
|
-
|
|
199
|
-
# Build results maintaining order
|
|
200
|
-
Core::ResultBuilder.build_results(
|
|
201
|
-
ordered_fact_ids,
|
|
202
|
-
facts_by_id: facts_by_id,
|
|
203
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
204
|
-
source: source
|
|
205
|
-
)
|
|
206
|
-
end
|
|
207
|
-
|
|
208
|
-
def batch_find_facts(store, fact_ids)
|
|
209
|
-
Core::FactQueryBuilder.batch_find_facts(store, fact_ids)
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
def batch_find_receipts(store, fact_ids, include_raw_text: false)
|
|
213
|
-
Core::FactQueryBuilder.batch_find_receipts(store, fact_ids, include_raw_text: include_raw_text)
|
|
214
|
-
end
|
|
215
|
-
|
|
216
|
-
def dedupe_and_sort(results, limit)
|
|
217
|
-
Core::FactRanker.dedupe_and_sort(results, limit)
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
def changes_dual(since:, limit:, scope:)
|
|
221
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
222
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
223
|
-
changes = fetch_changes(store, since, limit)
|
|
224
|
-
Core::ResultSorter.annotate_source(changes, source)
|
|
225
|
-
end
|
|
226
|
-
Core::ResultSorter.sort_by_timestamp(results, limit)
|
|
227
|
-
end
|
|
228
|
-
|
|
229
|
-
def fetch_changes(store, since, limit)
|
|
230
|
-
Core::FactQueryBuilder.fetch_changes(store, since, limit)
|
|
231
|
-
end
|
|
232
|
-
|
|
233
|
-
def conflicts_dual(scope:)
|
|
234
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
235
|
-
template.execute(scope: scope) do |store, source|
|
|
236
|
-
conflicts = store.open_conflicts
|
|
237
|
-
Core::ResultSorter.annotate_source(conflicts, source)
|
|
238
|
-
end
|
|
239
|
-
end
|
|
240
|
-
|
|
241
|
-
# Resolve a fact identifier to an integer ID
|
|
242
|
-
# Accepts either an integer ID or an 8-char docid string
|
|
243
|
-
def resolve_fact_identifier(store, identifier)
|
|
244
|
-
return identifier if identifier.is_a?(Integer)
|
|
245
|
-
|
|
246
|
-
str = identifier.to_s
|
|
247
|
-
# If it looks like a pure integer, use as ID
|
|
248
|
-
return str.to_i if str.match?(/\A\d+\z/)
|
|
249
|
-
|
|
250
|
-
# Otherwise treat as docid
|
|
251
|
-
fact = Core::FactQueryBuilder.find_fact_by_docid(store, str)
|
|
252
|
-
fact ? fact[:id] : nil
|
|
253
|
-
end
|
|
254
|
-
|
|
255
|
-
def explain_from_store(store, fact_id)
|
|
256
|
-
fact = find_fact_from_store(store, fact_id)
|
|
257
|
-
return Core::NullExplanation.new unless fact
|
|
258
|
-
|
|
259
|
-
{
|
|
260
|
-
fact: fact,
|
|
261
|
-
receipts: find_receipts_from_store(store, fact_id),
|
|
262
|
-
superseded_by: find_superseded_by_from_store(store, fact_id),
|
|
263
|
-
supersedes: find_supersedes_from_store(store, fact_id),
|
|
264
|
-
conflicts: find_conflicts_from_store(store, fact_id)
|
|
265
|
-
}
|
|
266
|
-
end
|
|
267
|
-
|
|
268
|
-
def find_fact_from_store(store, fact_id)
|
|
269
|
-
Core::FactQueryBuilder.find_fact(store, fact_id)
|
|
270
|
-
end
|
|
271
|
-
|
|
272
|
-
def find_receipts_from_store(store, fact_id)
|
|
273
|
-
Core::FactQueryBuilder.find_receipts(store, fact_id)
|
|
274
|
-
end
|
|
275
|
-
|
|
276
|
-
def find_superseded_by_from_store(store, fact_id)
|
|
277
|
-
Core::FactQueryBuilder.find_superseded_by(store, fact_id)
|
|
278
|
-
end
|
|
279
|
-
|
|
280
|
-
def find_supersedes_from_store(store, fact_id)
|
|
281
|
-
Core::FactQueryBuilder.find_supersedes(store, fact_id)
|
|
282
|
-
end
|
|
283
|
-
|
|
284
|
-
def find_conflicts_from_store(store, fact_id)
|
|
285
|
-
Core::FactQueryBuilder.find_conflicts(store, fact_id)
|
|
286
|
-
end
|
|
287
|
-
|
|
288
|
-
def query_legacy(query_text, limit:, scope:)
|
|
289
|
-
content_ids = @legacy_fts.search(query_text, limit: limit * 3)
|
|
290
|
-
return [] if content_ids.empty?
|
|
291
|
-
|
|
292
|
-
# Batch query: fetch ALL provenance records at once using WHERE IN
|
|
293
|
-
provenance_by_content = @legacy_store.provenance
|
|
294
|
-
.select(:fact_id, :content_item_id)
|
|
295
|
-
.where(content_item_id: content_ids)
|
|
296
|
-
.all
|
|
297
|
-
.group_by { |p| p[:content_item_id] }
|
|
298
|
-
|
|
299
|
-
# Collect ordered unique fact IDs from provenance
|
|
300
|
-
all_fact_ids = []
|
|
301
|
-
seen_fact_ids = Set.new
|
|
302
|
-
content_ids.each do |content_id|
|
|
303
|
-
(provenance_by_content[content_id] || []).each do |prov|
|
|
304
|
-
next if seen_fact_ids.include?(prov[:fact_id])
|
|
305
|
-
seen_fact_ids.add(prov[:fact_id])
|
|
306
|
-
all_fact_ids << prov[:fact_id]
|
|
307
|
-
end
|
|
308
|
-
end
|
|
309
|
-
|
|
310
|
-
return [] if all_fact_ids.empty?
|
|
311
|
-
|
|
312
|
-
# Batch query: fetch ALL facts at once
|
|
313
|
-
facts_by_id = batch_find_facts(@legacy_store, all_fact_ids)
|
|
314
|
-
|
|
315
|
-
# Filter by scope and apply limit
|
|
316
|
-
selected_fact_ids = []
|
|
317
|
-
all_fact_ids.each do |fact_id|
|
|
318
|
-
fact = facts_by_id[fact_id]
|
|
319
|
-
next unless fact
|
|
320
|
-
next unless fact_matches_scope?(fact, scope)
|
|
321
|
-
selected_fact_ids << fact_id
|
|
322
|
-
break if selected_fact_ids.size >= limit
|
|
323
|
-
end
|
|
324
|
-
|
|
325
|
-
return [] if selected_fact_ids.empty?
|
|
326
|
-
|
|
327
|
-
# Batch query: fetch ALL receipts at once
|
|
328
|
-
receipts_by_fact_id = batch_find_receipts(@legacy_store, selected_fact_ids)
|
|
329
|
-
|
|
330
|
-
facts_with_provenance = selected_fact_ids.map do |fact_id|
|
|
331
|
-
{
|
|
332
|
-
fact: facts_by_id[fact_id],
|
|
333
|
-
receipts: receipts_by_fact_id[fact_id] || []
|
|
334
|
-
}
|
|
335
|
-
end
|
|
336
|
-
|
|
337
|
-
sort_by_scope_priority(facts_with_provenance)
|
|
338
|
-
end
|
|
339
|
-
|
|
340
|
-
def query_index_legacy(query_text, limit:, scope:)
|
|
341
|
-
options = Index::QueryOptions.new(
|
|
342
|
-
query_text: query_text,
|
|
343
|
-
limit: limit,
|
|
344
|
-
scope: :all,
|
|
345
|
-
source: :legacy
|
|
346
|
-
)
|
|
347
|
-
|
|
348
|
-
query = Index::IndexQuery.new(@legacy_store, options)
|
|
349
|
-
results = query.execute
|
|
350
|
-
|
|
351
|
-
# Filter by scope in legacy mode
|
|
352
|
-
results.select do |result|
|
|
353
|
-
# Need to get full fact to check scope
|
|
354
|
-
fact = find_fact(result[:id])
|
|
355
|
-
fact && fact_matches_scope?(fact, scope)
|
|
356
|
-
end
|
|
357
|
-
end
|
|
358
|
-
|
|
359
|
-
def changes_legacy(since:, limit:, scope:)
|
|
360
|
-
ds = @legacy_store.facts
|
|
361
|
-
.select(:id, :docid, :subject_entity_id, :predicate, :object_literal, :status, :created_at, :scope, :project_path)
|
|
362
|
-
.where { created_at >= since }
|
|
363
|
-
.order(Sequel.desc(:created_at))
|
|
364
|
-
.limit(limit)
|
|
365
|
-
|
|
366
|
-
ds = apply_scope_filter(ds, scope)
|
|
367
|
-
ds.all
|
|
368
|
-
end
|
|
369
|
-
|
|
370
|
-
def conflicts_legacy(scope:)
|
|
371
|
-
all_conflicts = @legacy_store.open_conflicts
|
|
372
|
-
return all_conflicts if scope == SCOPE_ALL
|
|
373
|
-
|
|
374
|
-
all_conflicts.select do |conflict|
|
|
375
|
-
fact_a = find_fact(conflict[:fact_a_id])
|
|
376
|
-
fact_b = find_fact(conflict[:fact_b_id])
|
|
377
|
-
|
|
378
|
-
fact_matches_scope?(fact_a, scope) || fact_matches_scope?(fact_b, scope)
|
|
379
|
-
end
|
|
380
|
-
end
|
|
381
|
-
|
|
382
|
-
def fact_matches_scope?(fact, scope)
|
|
383
|
-
Core::ScopeFilter.matches?(fact, scope, @project_path)
|
|
384
|
-
end
|
|
385
|
-
|
|
386
|
-
def apply_scope_filter(dataset, scope)
|
|
387
|
-
Core::ScopeFilter.apply_to_dataset(dataset, scope, @project_path)
|
|
388
|
-
end
|
|
389
|
-
|
|
390
|
-
def sort_by_scope_priority(facts_with_provenance)
|
|
391
|
-
Core::FactRanker.sort_by_scope_priority(facts_with_provenance, @project_path)
|
|
392
|
-
end
|
|
393
|
-
|
|
394
|
-
def find_provenance_by_content(content_id)
|
|
395
|
-
Core::FactQueryBuilder.find_provenance_by_content(@legacy_store, content_id)
|
|
396
|
-
end
|
|
397
|
-
|
|
398
|
-
def find_fact(fact_id)
|
|
399
|
-
find_fact_from_store(@legacy_store, fact_id)
|
|
400
|
-
end
|
|
401
|
-
|
|
402
|
-
def find_receipts(fact_id)
|
|
403
|
-
find_receipts_from_store(@legacy_store, fact_id)
|
|
404
|
-
end
|
|
405
|
-
|
|
406
|
-
# Context-aware query helpers
|
|
407
|
-
|
|
408
|
-
def facts_by_context_dual(column, value, limit:, scope:)
|
|
409
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
410
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
411
|
-
facts_by_context_single(store, column, value, limit: limit, source: source)
|
|
412
|
-
end
|
|
413
|
-
dedupe_and_sort(results, limit)
|
|
414
|
-
end
|
|
415
|
-
|
|
416
|
-
def facts_by_context_legacy(column, value, limit:, scope:)
|
|
417
|
-
facts_by_context_single(@legacy_store, column, value, limit: limit, source: :legacy)
|
|
418
|
-
end
|
|
419
|
-
|
|
420
|
-
def facts_by_context_single(store, column, value, limit:, source:)
|
|
421
|
-
# Find content items matching the context
|
|
422
|
-
content_ids = store.content_items
|
|
423
|
-
.where(column => value)
|
|
424
|
-
.select(:id)
|
|
425
|
-
.map { |row| row[:id] }
|
|
426
|
-
|
|
427
|
-
return [] if content_ids.empty?
|
|
428
|
-
|
|
429
|
-
# Find facts linked to those content items via provenance
|
|
430
|
-
fact_ids = store.provenance
|
|
431
|
-
.where(content_item_id: content_ids)
|
|
432
|
-
.select(:fact_id)
|
|
433
|
-
.distinct
|
|
434
|
-
.map { |row| row[:fact_id] }
|
|
435
|
-
|
|
436
|
-
return [] if fact_ids.empty?
|
|
437
|
-
|
|
438
|
-
# Batch fetch facts and their provenance
|
|
439
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
440
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
441
|
-
|
|
442
|
-
results = Core::ResultBuilder.build_results(
|
|
443
|
-
fact_ids,
|
|
444
|
-
facts_by_id: facts_by_id,
|
|
445
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
446
|
-
source: source
|
|
447
|
-
)
|
|
448
|
-
results.take(limit)
|
|
449
|
-
end
|
|
450
|
-
|
|
451
|
-
def facts_by_tool_dual(tool_name, limit:, scope:)
|
|
452
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
453
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
454
|
-
facts_by_tool_single(store, tool_name, limit: limit, source: source)
|
|
455
|
-
end
|
|
456
|
-
dedupe_and_sort(results, limit)
|
|
457
|
-
end
|
|
458
|
-
|
|
459
|
-
def facts_by_tool_legacy(tool_name, limit:, scope:)
|
|
460
|
-
facts_by_tool_single(@legacy_store, tool_name, limit: limit, source: :legacy)
|
|
461
|
-
end
|
|
462
|
-
|
|
463
|
-
def facts_by_tool_single(store, tool_name, limit:, source:)
|
|
464
|
-
# Find content items where the tool was used
|
|
465
|
-
content_ids = store.tool_calls
|
|
466
|
-
.where(tool_name: tool_name)
|
|
467
|
-
.select(:content_item_id)
|
|
468
|
-
.distinct
|
|
469
|
-
.map { |row| row[:content_item_id] }
|
|
470
|
-
|
|
471
|
-
return [] if content_ids.empty?
|
|
472
|
-
|
|
473
|
-
# Find facts linked to those content items via provenance
|
|
474
|
-
fact_ids = store.provenance
|
|
475
|
-
.where(content_item_id: content_ids)
|
|
476
|
-
.select(:fact_id)
|
|
477
|
-
.distinct
|
|
478
|
-
.map { |row| row[:fact_id] }
|
|
479
|
-
|
|
480
|
-
return [] if fact_ids.empty?
|
|
481
|
-
|
|
482
|
-
# Batch fetch facts and their provenance
|
|
483
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
484
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
485
|
-
|
|
486
|
-
results = Core::ResultBuilder.build_results(
|
|
487
|
-
fact_ids,
|
|
488
|
-
facts_by_id: facts_by_id,
|
|
489
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
490
|
-
source: source
|
|
491
|
-
)
|
|
492
|
-
results.take(limit)
|
|
493
|
-
end
|
|
494
|
-
|
|
495
|
-
# Semantic search helpers
|
|
496
|
-
|
|
497
|
-
def query_semantic_dual(text, limit:, scope:, mode:)
|
|
498
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
499
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
500
|
-
query_semantic_single(store, text, limit: limit * 3, mode: mode, source: source)
|
|
501
|
-
end
|
|
502
|
-
# Use similarity-preserving dedupe (not source/time sort) to keep RRF ordering
|
|
503
|
-
Core::FactRanker.dedupe_by_fact_id(results, limit)
|
|
504
|
-
end
|
|
505
|
-
|
|
506
|
-
def query_semantic_legacy(text, limit:, scope:, mode:)
|
|
507
|
-
query_semantic_single(@legacy_store, text, limit: limit, mode: mode, source: :legacy)
|
|
508
|
-
end
|
|
509
|
-
|
|
510
|
-
def query_semantic_single(store, text, limit:, mode:, source:)
|
|
511
|
-
vector_results = []
|
|
512
|
-
text_results = []
|
|
513
|
-
|
|
514
|
-
# Text search mode (FTS) - run first for expansion detection
|
|
515
|
-
if mode == :text || mode == :both
|
|
516
|
-
text_results = search_by_fts(store, text, limit, source)
|
|
517
|
-
end
|
|
518
|
-
|
|
519
|
-
# Vector search mode - skip if FTS already found strong match
|
|
520
|
-
if mode == :vector || mode == :both
|
|
521
|
-
skip_vector = mode == :both && strong_fts_signal?(store, text)
|
|
522
|
-
vector_results = search_by_vector(store, text, limit, source) unless skip_vector
|
|
523
|
-
end
|
|
524
|
-
|
|
525
|
-
# Merge and deduplicate
|
|
526
|
-
merge_search_results(vector_results, text_results, limit)
|
|
527
|
-
end
|
|
528
|
-
|
|
529
|
-
def search_by_vector(store, query_text, limit, source)
|
|
530
|
-
# Generate query embedding
|
|
531
|
-
query_embedding = @embedding_generator.generate(query_text)
|
|
532
|
-
|
|
533
|
-
# Fast path: use sqlite-vec KNN when available
|
|
534
|
-
vec_index = store.vector_index
|
|
535
|
-
if vec_index.available?
|
|
536
|
-
return search_by_vector_native(store, vec_index, query_embedding, limit, source)
|
|
537
|
-
end
|
|
538
|
-
|
|
539
|
-
# Fallback: JSON + Ruby cosine similarity
|
|
540
|
-
search_by_vector_fallback(store, query_embedding, limit, source)
|
|
541
|
-
end
|
|
542
|
-
|
|
543
|
-
def search_by_vector_native(store, vec_index, query_embedding, limit, source)
|
|
544
|
-
matches = vec_index.search(query_embedding, k: limit)
|
|
545
|
-
return [] if matches.empty?
|
|
546
|
-
|
|
547
|
-
fact_ids = matches.map { |m| m[:fact_id] }
|
|
548
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
549
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
550
|
-
|
|
551
|
-
Core::ResultBuilder.build_results_with_scores(
|
|
552
|
-
matches,
|
|
553
|
-
facts_by_id: facts_by_id,
|
|
554
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
555
|
-
source: source
|
|
556
|
-
)
|
|
557
|
-
end
|
|
558
|
-
|
|
559
|
-
def search_by_vector_fallback(store, query_embedding, limit, source)
|
|
560
|
-
# Load facts with embeddings
|
|
561
|
-
facts_data = store.facts_with_embeddings(limit: 5000)
|
|
562
|
-
return [] if facts_data.empty?
|
|
563
|
-
|
|
564
|
-
# Deduplicate: group facts by embedding, score unique embeddings only, fan out
|
|
565
|
-
unique_candidates, fact_groups = dedup_candidates(facts_data)
|
|
566
|
-
return [] if unique_candidates.empty?
|
|
567
|
-
|
|
568
|
-
# Calculate similarities on unique embeddings only
|
|
569
|
-
top_unique = Embeddings::Similarity.top_k(query_embedding, unique_candidates, limit)
|
|
570
|
-
|
|
571
|
-
# Fan out: expand unique matches back to all fact_ids sharing that embedding
|
|
572
|
-
top_matches = fan_out_matches(top_unique, fact_groups, limit)
|
|
573
|
-
|
|
574
|
-
# Batch fetch full fact details
|
|
575
|
-
fact_ids = top_matches.map { |m| m[:candidate][:fact_id] }
|
|
576
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
577
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
578
|
-
|
|
579
|
-
# Build results with similarity scores
|
|
580
|
-
Core::ResultBuilder.build_results_with_scores(
|
|
581
|
-
top_matches,
|
|
582
|
-
facts_by_id: facts_by_id,
|
|
583
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
584
|
-
source: source
|
|
585
|
-
)
|
|
586
|
-
end
|
|
587
|
-
|
|
588
|
-
# Group facts by embedding_json, return unique candidates + mapping
|
|
589
|
-
def dedup_candidates(facts_data)
|
|
590
|
-
groups = {} # embedding_json → [fact_ids]
|
|
591
|
-
unique = {} # embedding_json → parsed candidate (first occurrence)
|
|
592
|
-
|
|
593
|
-
facts_data.each do |row|
|
|
594
|
-
key = row[:embedding_json]
|
|
595
|
-
if unique.key?(key)
|
|
596
|
-
groups[key] << row[:id]
|
|
597
|
-
else
|
|
598
|
-
candidate = Core::EmbeddingCandidateBuilder.parse_candidate(row)
|
|
599
|
-
next unless candidate
|
|
600
|
-
unique[key] = candidate
|
|
601
|
-
groups[key] = [row[:id]]
|
|
602
|
-
end
|
|
603
|
-
end
|
|
604
|
-
|
|
605
|
-
[unique.values, groups]
|
|
606
|
-
end
|
|
607
|
-
|
|
608
|
-
# Expand unique matches back to all fact_ids sharing the same embedding
|
|
609
|
-
def fan_out_matches(top_unique, fact_groups, limit)
|
|
610
|
-
results = []
|
|
611
|
-
top_unique.each do |match|
|
|
612
|
-
candidate = match[:candidate]
|
|
613
|
-
similarity = match[:similarity]
|
|
614
|
-
|
|
615
|
-
# Find the group key for this candidate's embedding
|
|
616
|
-
group_key = fact_groups.find { |_key, ids| ids.include?(candidate[:fact_id]) }&.first
|
|
617
|
-
next unless group_key
|
|
618
|
-
|
|
619
|
-
fact_groups[group_key].each do |fact_id|
|
|
620
|
-
results << {
|
|
621
|
-
candidate: candidate.merge(fact_id: fact_id),
|
|
622
|
-
similarity: similarity
|
|
623
|
-
}
|
|
624
|
-
break if results.size >= limit
|
|
625
|
-
end
|
|
626
|
-
break if results.size >= limit
|
|
627
|
-
end
|
|
628
|
-
|
|
629
|
-
results
|
|
630
|
-
end
|
|
631
|
-
|
|
632
|
-
def search_by_fts(store, query_text, limit, source)
|
|
633
|
-
fts = Index::LexicalFTS.new(store)
|
|
634
|
-
ranked_results = fts.search_with_ranks(query_text, limit: limit * 2)
|
|
635
|
-
|
|
636
|
-
return [] if ranked_results.empty?
|
|
637
|
-
|
|
638
|
-
content_ids = ranked_results.map { |r| r[:content_item_id] }
|
|
639
|
-
|
|
640
|
-
# Map content_item_ids to fact_ids, preserving FTS rank order
|
|
641
|
-
provenance_rows = store.provenance
|
|
642
|
-
.where(content_item_id: content_ids)
|
|
643
|
-
.select(:fact_id, :content_item_id)
|
|
644
|
-
.all
|
|
645
|
-
|
|
646
|
-
content_to_facts = provenance_rows.group_by { |r| r[:content_item_id] }
|
|
647
|
-
|
|
648
|
-
# Build ordered fact list with normalized BM25 scores
|
|
649
|
-
# FTS5 rank values are negative (more negative = better match)
|
|
650
|
-
ranks = ranked_results.map { |r| r[:rank] }
|
|
651
|
-
min_rank = ranks.min # Most negative = best
|
|
652
|
-
max_rank = ranks.max # Least negative = worst
|
|
653
|
-
range = (max_rank - min_rank).abs
|
|
654
|
-
|
|
655
|
-
seen_fact_ids = Set.new
|
|
656
|
-
scored_matches = []
|
|
657
|
-
|
|
658
|
-
ranked_results.each do |r|
|
|
659
|
-
similarity = if range > 0
|
|
660
|
-
# Normalize: best rank → 1.0, worst rank → 0.1
|
|
661
|
-
0.1 + 0.9 * ((max_rank - r[:rank]).abs / range)
|
|
662
|
-
else
|
|
663
|
-
0.8 # Single result gets a reasonable score
|
|
664
|
-
end
|
|
665
|
-
|
|
666
|
-
fact_ids = content_to_facts[r[:content_item_id]]&.map { |p| p[:fact_id] } || []
|
|
667
|
-
fact_ids.each do |fid|
|
|
668
|
-
next if seen_fact_ids.include?(fid)
|
|
669
|
-
seen_fact_ids.add(fid)
|
|
670
|
-
scored_matches << {fact_id: fid, similarity: similarity}
|
|
671
|
-
end
|
|
672
|
-
end
|
|
673
|
-
|
|
674
|
-
return [] if scored_matches.empty?
|
|
675
|
-
|
|
676
|
-
fact_ids = scored_matches.map { |m| m[:fact_id] }
|
|
677
|
-
facts_by_id = batch_find_facts(store, fact_ids)
|
|
678
|
-
receipts_by_fact_id = batch_find_receipts(store, fact_ids)
|
|
679
|
-
|
|
680
|
-
Core::ResultBuilder.build_results_with_scores(
|
|
681
|
-
scored_matches,
|
|
682
|
-
facts_by_id: facts_by_id,
|
|
683
|
-
receipts_by_fact_id: receipts_by_fact_id,
|
|
684
|
-
source: source
|
|
685
|
-
).take(limit)
|
|
686
|
-
end
|
|
687
|
-
|
|
688
|
-
def merge_search_results(vector_results, text_results, limit)
|
|
689
|
-
Core::FactRanker.merge_search_results(vector_results, text_results, limit)
|
|
690
|
-
end
|
|
691
|
-
|
|
692
|
-
def strong_fts_signal?(store, query_text)
|
|
693
|
-
fts = Index::LexicalFTS.new(store)
|
|
694
|
-
ranked_results = fts.search_with_ranks(query_text, limit: 5)
|
|
695
|
-
Recall::ExpansionDetector.strong_fts_signal?(ranked_results)
|
|
696
|
-
end
|
|
697
|
-
|
|
698
|
-
# Multi-concept search helpers
|
|
699
|
-
|
|
700
|
-
def query_concepts_dual(concepts, limit:, scope:)
|
|
701
|
-
template = Recall::DualQueryTemplate.new(@manager)
|
|
702
|
-
results = template.execute(scope: scope, limit: limit) do |store, source|
|
|
703
|
-
query_concepts_single(store, concepts, limit: limit * 2, source: source)
|
|
704
|
-
end
|
|
705
|
-
# Deduplicate and sort by average similarity
|
|
706
|
-
dedupe_by_fact_id(results, limit)
|
|
707
|
-
end
|
|
708
|
-
|
|
709
|
-
def query_concepts_legacy(concepts, limit:, scope:)
|
|
710
|
-
query_concepts_single(@legacy_store, concepts, limit: limit, source: :legacy)
|
|
711
|
-
end
|
|
712
|
-
|
|
713
|
-
def query_concepts_single(store, concepts, limit:, source:)
|
|
714
|
-
# I/O: Search each concept independently with higher limit for intersection
|
|
715
|
-
concept_results = concepts.map do |concept|
|
|
716
|
-
search_by_vector(store, concept, limit * 5, source)
|
|
717
|
-
end
|
|
718
|
-
|
|
719
|
-
# Pure logic: Rank by average similarity across all concepts
|
|
720
|
-
Core::ConceptRanker.rank_by_concepts(concept_results, limit)
|
|
721
|
-
end
|
|
722
|
-
|
|
723
|
-
def dedupe_by_fact_id(results, limit)
|
|
724
|
-
Core::FactRanker.dedupe_by_fact_id(results, limit)
|
|
172
|
+
@engine.query_concepts(concepts, limit: limit, scope: scope)
|
|
725
173
|
end
|
|
726
174
|
end
|
|
727
175
|
end
|