vemora 0.1.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vemora might be problematic. Click here for more details.
- package/README.md +759 -0
- package/dist/cli.d.ts +16 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +589 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/ask.d.ts +14 -0
- package/dist/commands/ask.d.ts.map +1 -0
- package/dist/commands/ask.js +137 -0
- package/dist/commands/ask.js.map +1 -0
- package/dist/commands/audit.d.ts +17 -0
- package/dist/commands/audit.d.ts.map +1 -0
- package/dist/commands/audit.js +408 -0
- package/dist/commands/audit.js.map +1 -0
- package/dist/commands/brief.d.ts +14 -0
- package/dist/commands/brief.d.ts.map +1 -0
- package/dist/commands/brief.js +73 -0
- package/dist/commands/brief.js.map +1 -0
- package/dist/commands/chat.d.ts +7 -0
- package/dist/commands/chat.d.ts.map +1 -0
- package/dist/commands/chat.js +155 -0
- package/dist/commands/chat.js.map +1 -0
- package/dist/commands/context.d.ts +61 -0
- package/dist/commands/context.d.ts.map +1 -0
- package/dist/commands/context.js +778 -0
- package/dist/commands/context.js.map +1 -0
- package/dist/commands/deps.d.ts +20 -0
- package/dist/commands/deps.d.ts.map +1 -0
- package/dist/commands/deps.js +138 -0
- package/dist/commands/deps.js.map +1 -0
- package/dist/commands/focus.d.ts +6 -0
- package/dist/commands/focus.d.ts.map +1 -0
- package/dist/commands/focus.js +302 -0
- package/dist/commands/focus.js.map +1 -0
- package/dist/commands/index.d.ts +10 -0
- package/dist/commands/index.d.ts.map +1 -0
- package/dist/commands/index.js +366 -0
- package/dist/commands/index.js.map +1 -0
- package/dist/commands/init-agent.d.ts +23 -0
- package/dist/commands/init-agent.d.ts.map +1 -0
- package/dist/commands/init-agent.js +447 -0
- package/dist/commands/init-agent.js.map +1 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +122 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/knowledge.d.ts +8 -0
- package/dist/commands/knowledge.d.ts.map +1 -0
- package/dist/commands/knowledge.js +98 -0
- package/dist/commands/knowledge.js.map +1 -0
- package/dist/commands/plan.d.ts +16 -0
- package/dist/commands/plan.d.ts.map +1 -0
- package/dist/commands/plan.js +535 -0
- package/dist/commands/plan.js.map +1 -0
- package/dist/commands/query.d.ts +39 -0
- package/dist/commands/query.d.ts.map +1 -0
- package/dist/commands/query.js +389 -0
- package/dist/commands/query.js.map +1 -0
- package/dist/commands/remember.d.ts +11 -0
- package/dist/commands/remember.d.ts.map +1 -0
- package/dist/commands/remember.js +174 -0
- package/dist/commands/remember.js.map +1 -0
- package/dist/commands/report.d.ts +10 -0
- package/dist/commands/report.d.ts.map +1 -0
- package/dist/commands/report.js +180 -0
- package/dist/commands/report.js.map +1 -0
- package/dist/commands/status.d.ts +2 -0
- package/dist/commands/status.d.ts.map +1 -0
- package/dist/commands/status.js +127 -0
- package/dist/commands/status.js.map +1 -0
- package/dist/commands/summarize.d.ts +14 -0
- package/dist/commands/summarize.d.ts.map +1 -0
- package/dist/commands/summarize.js +181 -0
- package/dist/commands/summarize.js.map +1 -0
- package/dist/commands/triage.d.ts +33 -0
- package/dist/commands/triage.d.ts.map +1 -0
- package/dist/commands/triage.js +419 -0
- package/dist/commands/triage.js.map +1 -0
- package/dist/commands/usages.d.ts +14 -0
- package/dist/commands/usages.d.ts.map +1 -0
- package/dist/commands/usages.js +236 -0
- package/dist/commands/usages.js.map +1 -0
- package/dist/core/config.d.ts +35 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +141 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/types.d.ts +274 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +4 -0
- package/dist/core/types.js.map +1 -0
- package/dist/embeddings/factory.d.ts +9 -0
- package/dist/embeddings/factory.d.ts.map +1 -0
- package/dist/embeddings/factory.js +26 -0
- package/dist/embeddings/factory.js.map +1 -0
- package/dist/embeddings/noop.d.ts +17 -0
- package/dist/embeddings/noop.d.ts.map +1 -0
- package/dist/embeddings/noop.js +22 -0
- package/dist/embeddings/noop.js.map +1 -0
- package/dist/embeddings/ollama.d.ts +11 -0
- package/dist/embeddings/ollama.d.ts.map +1 -0
- package/dist/embeddings/ollama.js +49 -0
- package/dist/embeddings/ollama.js.map +1 -0
- package/dist/embeddings/openai.d.ts +10 -0
- package/dist/embeddings/openai.d.ts.map +1 -0
- package/dist/embeddings/openai.js +67 -0
- package/dist/embeddings/openai.js.map +1 -0
- package/dist/embeddings/provider.d.ts +19 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +3 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/indexer/callgraph.d.ts +16 -0
- package/dist/indexer/callgraph.d.ts.map +1 -0
- package/dist/indexer/callgraph.js +154 -0
- package/dist/indexer/callgraph.js.map +1 -0
- package/dist/indexer/chunkBySlidingWindow.d.ts +6 -0
- package/dist/indexer/chunkBySlidingWindow.d.ts.map +1 -0
- package/dist/indexer/chunkBySlidingWindow.js +30 -0
- package/dist/indexer/chunkBySlidingWindow.js.map +1 -0
- package/dist/indexer/chunkBySymbols.d.ts +7 -0
- package/dist/indexer/chunkBySymbols.d.ts.map +1 -0
- package/dist/indexer/chunkBySymbols.js +57 -0
- package/dist/indexer/chunkBySymbols.js.map +1 -0
- package/dist/indexer/chunker.d.ts +15 -0
- package/dist/indexer/chunker.d.ts.map +1 -0
- package/dist/indexer/chunker.js +26 -0
- package/dist/indexer/chunker.js.map +1 -0
- package/dist/indexer/classHeader.d.ts +7 -0
- package/dist/indexer/classHeader.d.ts.map +1 -0
- package/dist/indexer/classHeader.js +37 -0
- package/dist/indexer/classHeader.js.map +1 -0
- package/dist/indexer/deps.d.ts +66 -0
- package/dist/indexer/deps.d.ts.map +1 -0
- package/dist/indexer/deps.js +409 -0
- package/dist/indexer/deps.js.map +1 -0
- package/dist/indexer/hasher.d.ts +17 -0
- package/dist/indexer/hasher.d.ts.map +1 -0
- package/dist/indexer/hasher.js +38 -0
- package/dist/indexer/hasher.js.map +1 -0
- package/dist/indexer/parser.d.ts +18 -0
- package/dist/indexer/parser.d.ts.map +1 -0
- package/dist/indexer/parser.js +355 -0
- package/dist/indexer/parser.js.map +1 -0
- package/dist/indexer/scanner.d.ts +18 -0
- package/dist/indexer/scanner.d.ts.map +1 -0
- package/dist/indexer/scanner.js +37 -0
- package/dist/indexer/scanner.js.map +1 -0
- package/dist/indexer/strategy.d.ts +11 -0
- package/dist/indexer/strategy.d.ts.map +1 -0
- package/dist/indexer/strategy.js +15 -0
- package/dist/indexer/strategy.js.map +1 -0
- package/dist/indexer/tests.d.ts +15 -0
- package/dist/indexer/tests.d.ts.map +1 -0
- package/dist/indexer/tests.js +68 -0
- package/dist/indexer/tests.js.map +1 -0
- package/dist/indexer/todos.d.ts +9 -0
- package/dist/indexer/todos.d.ts.map +1 -0
- package/dist/indexer/todos.js +29 -0
- package/dist/indexer/todos.js.map +1 -0
- package/dist/llm/anthropic.d.ts +8 -0
- package/dist/llm/anthropic.d.ts.map +1 -0
- package/dist/llm/anthropic.js +76 -0
- package/dist/llm/anthropic.js.map +1 -0
- package/dist/llm/factory.d.ts +7 -0
- package/dist/llm/factory.d.ts.map +1 -0
- package/dist/llm/factory.js +39 -0
- package/dist/llm/factory.js.map +1 -0
- package/dist/llm/ollama.d.ts +8 -0
- package/dist/llm/ollama.d.ts.map +1 -0
- package/dist/llm/ollama.js +83 -0
- package/dist/llm/ollama.js.map +1 -0
- package/dist/llm/openai.d.ts +8 -0
- package/dist/llm/openai.d.ts.map +1 -0
- package/dist/llm/openai.js +68 -0
- package/dist/llm/openai.js.map +1 -0
- package/dist/llm/provider.d.ts +29 -0
- package/dist/llm/provider.d.ts.map +1 -0
- package/dist/llm/provider.js +3 -0
- package/dist/llm/provider.js.map +1 -0
- package/dist/search/bm25.d.ts +3 -0
- package/dist/search/bm25.d.ts.map +1 -0
- package/dist/search/bm25.js +102 -0
- package/dist/search/bm25.js.map +1 -0
- package/dist/search/formatter.d.ts +43 -0
- package/dist/search/formatter.d.ts.map +1 -0
- package/dist/search/formatter.js +208 -0
- package/dist/search/formatter.js.map +1 -0
- package/dist/search/hybrid.d.ts +10 -0
- package/dist/search/hybrid.d.ts.map +1 -0
- package/dist/search/hybrid.js +53 -0
- package/dist/search/hybrid.js.map +1 -0
- package/dist/search/merge.d.ts +33 -0
- package/dist/search/merge.d.ts.map +1 -0
- package/dist/search/merge.js +158 -0
- package/dist/search/merge.js.map +1 -0
- package/dist/search/mmr.d.ts +23 -0
- package/dist/search/mmr.d.ts.map +1 -0
- package/dist/search/mmr.js +95 -0
- package/dist/search/mmr.js.map +1 -0
- package/dist/search/rerank.d.ts +12 -0
- package/dist/search/rerank.d.ts.map +1 -0
- package/dist/search/rerank.js +113 -0
- package/dist/search/rerank.js.map +1 -0
- package/dist/search/signature.d.ts +42 -0
- package/dist/search/signature.d.ts.map +1 -0
- package/dist/search/signature.js +112 -0
- package/dist/search/signature.js.map +1 -0
- package/dist/search/vector.d.ts +41 -0
- package/dist/search/vector.d.ts.map +1 -0
- package/dist/search/vector.js +185 -0
- package/dist/search/vector.js.map +1 -0
- package/dist/storage/cache.d.ts +30 -0
- package/dist/storage/cache.d.ts.map +1 -0
- package/dist/storage/cache.js +160 -0
- package/dist/storage/cache.js.map +1 -0
- package/dist/storage/knowledge.d.ts +17 -0
- package/dist/storage/knowledge.d.ts.map +1 -0
- package/dist/storage/knowledge.js +58 -0
- package/dist/storage/knowledge.js.map +1 -0
- package/dist/storage/repository.d.ts +27 -0
- package/dist/storage/repository.d.ts.map +1 -0
- package/dist/storage/repository.js +95 -0
- package/dist/storage/repository.js.map +1 -0
- package/dist/storage/session.d.ts +38 -0
- package/dist/storage/session.d.ts.map +1 -0
- package/dist/storage/session.js +100 -0
- package/dist/storage/session.js.map +1 -0
- package/dist/storage/summaries.d.ts +19 -0
- package/dist/storage/summaries.d.ts.map +1 -0
- package/dist/storage/summaries.js +66 -0
- package/dist/storage/summaries.js.map +1 -0
- package/dist/storage/usage.d.ts +35 -0
- package/dist/storage/usage.d.ts.map +1 -0
- package/dist/storage/usage.js +55 -0
- package/dist/storage/usage.js.map +1 -0
- package/dist/utils/git.d.ts +15 -0
- package/dist/utils/git.d.ts.map +1 -0
- package/dist/utils/git.js +38 -0
- package/dist/utils/git.js.map +1 -0
- package/dist/utils/tokenizer.d.ts +24 -0
- package/dist/utils/tokenizer.d.ts.map +1 -0
- package/dist/utils/tokenizer.js +52 -0
- package/dist/utils/tokenizer.js.map +1 -0
- package/package.json +71 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.formatTerse = formatTerse;
|
|
4
|
+
exports.formatJson = formatJson;
|
|
5
|
+
exports.formatMarkdown = formatMarkdown;
|
|
6
|
+
const deps_1 = require("../indexer/deps");
|
|
7
|
+
const signature_1 = require("./signature");
|
|
8
|
+
// ─── Terse formatter ──────────────────────────────────────────────────────────
|
|
9
|
+
/**
|
|
10
|
+
* Ultra-compact one-liner per result for small/local models with limited context windows.
|
|
11
|
+
* Format: file:startLine | symbol (type) | score | first-line-of-signature
|
|
12
|
+
*
|
|
13
|
+
* ~70-80% fewer tokens than the markdown format. Recommended for models with
|
|
14
|
+
* context windows ≤ 32K, or whenever token budget is the primary constraint.
|
|
15
|
+
*/
|
|
16
|
+
function formatTerse(results, options) {
|
|
17
|
+
const topK = options.topK ?? 10;
|
|
18
|
+
const seen = new Set();
|
|
19
|
+
const lines = [];
|
|
20
|
+
for (const { chunk, score, symbol } of results) {
|
|
21
|
+
if (seen.has(chunk.id))
|
|
22
|
+
continue;
|
|
23
|
+
seen.add(chunk.id);
|
|
24
|
+
const symbolPart = chunk.symbol
|
|
25
|
+
? `${chunk.symbol} (${symbol?.type ?? "symbol"})`
|
|
26
|
+
: "(no symbol)";
|
|
27
|
+
const sig = (0, signature_1.extractSignature)(chunk.content).split("\n")[0].trim();
|
|
28
|
+
lines.push(`${chunk.file}:${chunk.start} | ${symbolPart} | ${score.toFixed(3)} | ${sig}`);
|
|
29
|
+
if (lines.length >= topK)
|
|
30
|
+
break;
|
|
31
|
+
}
|
|
32
|
+
return lines.join("\n");
|
|
33
|
+
}
|
|
34
|
+
function formatJson(query, results, depGraph, fileSummaries, options) {
|
|
35
|
+
const importedByMap = (0, deps_1.computeImportedBy)(depGraph);
|
|
36
|
+
const showCode = options.showCode ?? false;
|
|
37
|
+
const topK = options.topK ?? 10;
|
|
38
|
+
const seen = new Set();
|
|
39
|
+
const jsonResults = [];
|
|
40
|
+
for (const { chunk, score, symbol } of results) {
|
|
41
|
+
if (seen.has(chunk.id))
|
|
42
|
+
continue;
|
|
43
|
+
seen.add(chunk.id);
|
|
44
|
+
const rank = jsonResults.length + 1;
|
|
45
|
+
const tier = showCode ? "high" : (0, signature_1.getDisplayTier)(rank);
|
|
46
|
+
const fileDeps = depGraph[chunk.file];
|
|
47
|
+
const usedBy = importedByMap.get(chunk.file) ?? [];
|
|
48
|
+
let code = null;
|
|
49
|
+
let signature = null;
|
|
50
|
+
if (tier === "high") {
|
|
51
|
+
const codeLines = chunk.content.split("\n");
|
|
52
|
+
const limit = showCode ? codeLines.length : signature_1.HIGH_CODE_LINES;
|
|
53
|
+
code = codeLines.slice(0, limit).join("\n");
|
|
54
|
+
}
|
|
55
|
+
else if (tier === "med") {
|
|
56
|
+
signature = (0, signature_1.extractSignature)(chunk.content);
|
|
57
|
+
}
|
|
58
|
+
const fileSummary = fileSummaries[chunk.file];
|
|
59
|
+
jsonResults.push({
|
|
60
|
+
rank,
|
|
61
|
+
tier,
|
|
62
|
+
file: chunk.file,
|
|
63
|
+
symbol: chunk.symbol ?? null,
|
|
64
|
+
symbolType: symbol?.type ?? null,
|
|
65
|
+
lines: { start: chunk.start, end: chunk.end },
|
|
66
|
+
score: parseFloat(score.toFixed(4)),
|
|
67
|
+
code,
|
|
68
|
+
signature,
|
|
69
|
+
imports: fileDeps?.imports ?? [],
|
|
70
|
+
usedBy,
|
|
71
|
+
summary: fileSummary?.summary ?? null,
|
|
72
|
+
});
|
|
73
|
+
if (jsonResults.length >= topK)
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
const output = {
|
|
77
|
+
query,
|
|
78
|
+
totalResults: jsonResults.length,
|
|
79
|
+
results: jsonResults,
|
|
80
|
+
};
|
|
81
|
+
return JSON.stringify(output, null, 2);
|
|
82
|
+
}
|
|
83
|
+
// ─── Markdown formatter ───────────────────────────────────────────────────────
|
|
84
|
+
function formatMarkdown(query, results, depGraph, fileSummaries, callGraph, options) {
|
|
85
|
+
const importedByMap = (0, deps_1.computeImportedBy)(depGraph);
|
|
86
|
+
const showCode = options.showCode ?? false;
|
|
87
|
+
const topK = options.topK ?? 10;
|
|
88
|
+
const lines = [];
|
|
89
|
+
lines.push(`## Relevant code for: \`${query}\``);
|
|
90
|
+
lines.push("");
|
|
91
|
+
const seen = new Set();
|
|
92
|
+
let displayed = 0;
|
|
93
|
+
for (const { chunk, score, symbol } of results) {
|
|
94
|
+
if (seen.has(chunk.id))
|
|
95
|
+
continue;
|
|
96
|
+
seen.add(chunk.id);
|
|
97
|
+
const rank = displayed + 1;
|
|
98
|
+
const tier = showCode ? "high" : (0, signature_1.getDisplayTier)(rank);
|
|
99
|
+
// Header
|
|
100
|
+
lines.push(`### ${rank}. \`${chunk.file}\``);
|
|
101
|
+
if (chunk.symbol) {
|
|
102
|
+
const symType = symbol?.type ?? "symbol";
|
|
103
|
+
lines.push(`**${symType}** \`${chunk.symbol}\` `);
|
|
104
|
+
}
|
|
105
|
+
lines.push(`Lines ${chunk.start}–${chunk.end} · Score: ${score.toFixed(4)} · Tier: ${tier}`);
|
|
106
|
+
lines.push("");
|
|
107
|
+
// Dependencies
|
|
108
|
+
if (tier !== "low") {
|
|
109
|
+
const fileDeps = depGraph[chunk.file];
|
|
110
|
+
const usedBy = importedByMap.get(chunk.file) ?? [];
|
|
111
|
+
if (fileDeps?.imports.length) {
|
|
112
|
+
const maxDeps = tier === "high" ? 6 : 3;
|
|
113
|
+
const shown = fileDeps.imports.slice(0, maxDeps);
|
|
114
|
+
const hidden = fileDeps.imports.length - shown.length;
|
|
115
|
+
lines.push("**Imports:**");
|
|
116
|
+
for (const imp of shown) {
|
|
117
|
+
const syms = imp.symbols.length > 0
|
|
118
|
+
? ` — \`${imp.symbols.slice(0, 4).join(", ")}\``
|
|
119
|
+
: "";
|
|
120
|
+
lines.push(`- \`${imp.file}\`${syms}`);
|
|
121
|
+
}
|
|
122
|
+
if (hidden > 0)
|
|
123
|
+
lines.push(`- _…and ${hidden} more_`);
|
|
124
|
+
lines.push("");
|
|
125
|
+
}
|
|
126
|
+
if (usedBy.length > 0) {
|
|
127
|
+
const maxUsed = tier === "high" ? 4 : 2;
|
|
128
|
+
const shown = usedBy.slice(0, maxUsed);
|
|
129
|
+
const hidden = usedBy.length - shown.length;
|
|
130
|
+
lines.push("**Used by (files):**");
|
|
131
|
+
for (const caller of shown) {
|
|
132
|
+
lines.push(`- \`${caller}\``);
|
|
133
|
+
}
|
|
134
|
+
if (hidden > 0)
|
|
135
|
+
lines.push(`- _…and ${hidden} more_`);
|
|
136
|
+
lines.push("");
|
|
137
|
+
}
|
|
138
|
+
// Call Graph context
|
|
139
|
+
const symbolId = chunk.symbol ? `${chunk.file}:${chunk.symbol}` : null;
|
|
140
|
+
const callInfo = symbolId ? callGraph[symbolId] : null;
|
|
141
|
+
if (callInfo) {
|
|
142
|
+
if (callInfo.calls.length > 0) {
|
|
143
|
+
const maxCalls = tier === "high" ? 6 : 3;
|
|
144
|
+
const shown = callInfo.calls.slice(0, maxCalls);
|
|
145
|
+
const hidden = callInfo.calls.length - shown.length;
|
|
146
|
+
lines.push("**Calls:**");
|
|
147
|
+
for (const call of shown) {
|
|
148
|
+
const loc = call.file ? ` (in \`${call.file}\`)` : "";
|
|
149
|
+
lines.push(`- \`${call.name}\`${loc}`);
|
|
150
|
+
}
|
|
151
|
+
if (hidden > 0)
|
|
152
|
+
lines.push(`- _…and ${hidden} more_`);
|
|
153
|
+
lines.push("");
|
|
154
|
+
}
|
|
155
|
+
if (callInfo.calledBy.length > 0) {
|
|
156
|
+
const maxCallers = tier === "high" ? 4 : 2;
|
|
157
|
+
const shown = callInfo.calledBy.slice(0, maxCallers);
|
|
158
|
+
const hidden = callInfo.calledBy.length - shown.length;
|
|
159
|
+
lines.push("**Called by:**");
|
|
160
|
+
for (const callerId of shown) {
|
|
161
|
+
lines.push(`- \`${callerId}\``);
|
|
162
|
+
}
|
|
163
|
+
if (hidden > 0)
|
|
164
|
+
lines.push(`- _…and ${hidden} more_`);
|
|
165
|
+
lines.push("");
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
// Code / signature / summary
|
|
170
|
+
if (tier === "high") {
|
|
171
|
+
const codeLines = chunk.content.split("\n");
|
|
172
|
+
const limit = showCode ? codeLines.length : signature_1.HIGH_CODE_LINES;
|
|
173
|
+
const preview = codeLines.slice(0, limit).join("\n");
|
|
174
|
+
const ext = chunk.file.split(".").pop() ?? "";
|
|
175
|
+
lines.push(`\`\`\`${ext}`);
|
|
176
|
+
lines.push(preview);
|
|
177
|
+
if (codeLines.length > limit) {
|
|
178
|
+
lines.push(`// … (${codeLines.length - limit} more lines — use --show-code to expand)`);
|
|
179
|
+
}
|
|
180
|
+
lines.push("```");
|
|
181
|
+
}
|
|
182
|
+
else if (tier === "med") {
|
|
183
|
+
const sig = (0, signature_1.extractSignature)(chunk.content);
|
|
184
|
+
const ext = chunk.file.split(".").pop() ?? "";
|
|
185
|
+
lines.push(`\`\`\`${ext}`);
|
|
186
|
+
lines.push(sig);
|
|
187
|
+
lines.push("```");
|
|
188
|
+
}
|
|
189
|
+
else {
|
|
190
|
+
// LOW — summary if available
|
|
191
|
+
const fileSummary = fileSummaries[chunk.file];
|
|
192
|
+
if (fileSummary) {
|
|
193
|
+
lines.push(`> ${fileSummary.summary}`);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
lines.push("");
|
|
197
|
+
lines.push("---");
|
|
198
|
+
lines.push("");
|
|
199
|
+
displayed++;
|
|
200
|
+
if (displayed >= topK)
|
|
201
|
+
break;
|
|
202
|
+
}
|
|
203
|
+
if (displayed === 0) {
|
|
204
|
+
lines.push("_No results found._");
|
|
205
|
+
}
|
|
206
|
+
return lines.join("\n");
|
|
207
|
+
}
|
|
208
|
+
//# sourceMappingURL=formatter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"formatter.js","sourceRoot":"","sources":["../../src/search/formatter.ts"],"names":[],"mappings":";;AA0BA,kCAyBC;AAyBD,gCA8DC;AAID,wCAmJC;AA3RD,0CAAoD;AACpD,2CAAgF;AAUhF,iFAAiF;AAEjF;;;;;;GAMG;AACH,SAAgB,WAAW,CACzB,OAAuB,EACvB,OAAoC;IAEpC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;IAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAAE,SAAS;QACjC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAEnB,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM;YAC7B,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,KAAK,MAAM,EAAE,IAAI,IAAI,QAAQ,GAAG;YACjD,CAAC,CAAC,aAAa,CAAC;QAClB,MAAM,GAAG,GAAG,IAAA,4BAAgB,EAAC,KAAK,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAElE,KAAK,CAAC,IAAI,CACR,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,KAAK,MAAM,UAAU,MAAM,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,EAAE,CAC9E,CAAC;QAEF,IAAI,KAAK,CAAC,MAAM,IAAI,IAAI;YAAE,MAAM;IAClC,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAyBD,SAAgB,UAAU,CACxB,KAAa,EACb,OAAuB,EACvB,QAAyB,EACzB,aAA+B,EAC/B,OAAsB;IAEtB,MAAM,aAAa,GAAG,IAAA,wBAAiB,EAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,KAAK,CAAC;IAC3C,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;IAEhC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,WAAW,GAAiB,EAAE,CAAC;IAErC,KAAK,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAAE,SAAS;QACjC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAEnB,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAA,0BAAc,EAAC,IAAI,CAAC,CAAC;QAEtD,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAEnD,IAAI,IAAI,GAAkB,IAAI,CAAC;QAC/B,IAAI,SAAS,GAAkB,IAAI,CAAC;QAEpC,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,2BAAe,CAAC;YAC5D,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9C,CAAC;aAAM,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;YAC1B,SAAS,GAAG,IAAA,4BAAgB,EAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,MAAM,WAAW,GAAG,aAAa,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE9C,WAAW,CAAC,IAAI,CAAC;YACf,IAAI;YACJ,IAAI;YACJ,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,IAAI;YAC5B,UAAU,EAAE,MAAM,EAAE,IAAI,IAAI,IAAI;YAChC,KAAK,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE;YAC7C,KAAK,EAAE,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACnC,IAAI;YACJ,SAAS;YACT,OAAO,EAAE,QAAQ,EAAE,OAAO,IAAI,EAAE;YAChC,MAAM;YACN,OAAO,EAAE,WAAW,EAAE,OAAO,IAAI,IAAI;SACtC,CAAC,CAAC;QAEH,IAAI,WAAW,CAAC,MAAM,IAAI,IAAI;YAAE,MAAM;IACxC,CAAC;IAED,MAAM,MAAM,GAAe;QACzB,KAAK;QACL,YAAY,EAAE,WAAW,CAAC,MAAM;QAChC,OAAO,EAAE,WAAW;KACrB,CAAC;IAEF,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AACzC,CAAC;AAED,iFAAiF;AAEjF,SAAgB,cAAc,CAC5B,KAAa,EACb,OAAuB,EACvB,QAAyB,EACzB,aAA+B,EAC/B,SAAoB,EACpB,OAAsB;IAEtB,MAAM,aAAa,GAAG,IAAA,wBAAiB,EAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,KAAK,CAAC;IAC3C,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,2BAA2B,KAAK,IAAI,CAAC,CAAC;IACjD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAAE,SAAS;QACjC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAEnB,MAAM,IAAI,GAAG,SAAS,GAAG,CAAC,CAAC;QAC3B,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAA,0BAAc,EAAC,IAAI,CAAC,CAAC;QAEtD,SAAS;QACT,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,OAAO,KAAK,CAAC,IAAI,IAAI,CAAC,CAAC;QAC7C,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,MAAM,EAAE,IAAI,IAAI,QAAQ,CAAC;YACzC,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,QAAQ,KAAK,CAAC,MAAM,MAAM,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,CAAC,IAAI,CACR,SAAS,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,GAAG,aAAa,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,IAAI,EAAE,CACjF,CAAC;QACF,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,eAAe;QACf,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;YACnB,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACtC,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAEnD,IAAI,QAAQ,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC;gBAC7B,MAAM,OAAO,GAAG,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxC,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBACjD,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;gBAEtD,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;oBACxB,MAAM,IAAI,GACR,GAAG,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC;wBACpB,CAAC,CAAC,QAAQ,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI;wBAChD,CAAC,CAAC,EAAE,CAAC;oBACT,KAAK,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC,CAAC;gBACzC,CAAC;gBACD,IAAI,MAAM,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,QAAQ,CAAC,CAAC;gBACtD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACjB,CAAC;YAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBACvC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;gBAE5C,KAAK,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;gBACnC,KAAK,MAAM,MAAM,IAAI,KAAK,EAAE,CAAC;oBAC3B,KAAK,CAAC,IAAI,CAAC,OAAO,MAAM,IAAI,CAAC,CAAC;gBAChC,CAAC;gBACD,IAAI,MAAM,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,QAAQ,CAAC,CAAC;gBACtD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACjB,CAAC;YAED,qBAAqB;YACrB,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YACvE,MAAM,QAAQ,GAAG,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAEvD,IAAI,QAAQ,EAAE,CAAC;gBACb,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC9B,MAAM,QAAQ,GAAG,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;oBAChD,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;oBAEpD,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;wBACzB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;wBACtD,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,KAAK,GAAG,EAAE,CAAC,CAAC;oBACzC,CAAC;oBACD,IAAI,MAAM,GAAG,CAAC;wBAAE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,QAAQ,CAAC,CAAC;oBACtD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACjB,CAAC;gBAED,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACjC,MAAM,UAAU,GAAG,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC3C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;oBACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;oBAEvD,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;oBAC7B,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;wBAC7B,KAAK,CAAC,IAAI,CAAC,OAAO,QAAQ,IAAI,CAAC,CAAC;oBAClC,CAAC;oBACD,IAAI,MAAM,GAAG,CAAC;wBAAE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,QAAQ,CAAC,CAAC;oBACtD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACjB,CAAC;YACH,CAAC;QACH,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,2BAAe,CAAC;YAC5D,MAAM,OAAO,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrD,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;YAC9C,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC;YAC3B,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACpB,IAAI,SAAS,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;gBAC7B,KAAK,CAAC,IAAI,CACR,SAAS,SAAS,CAAC,MAAM,GAAG,KAAK,0CAA0C,CAC5E,CAAC;YACJ,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;aAAM,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;YAC1B,MAAM,GAAG,GAAG,IAAA,4BAAgB,EAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC5C,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;YAC9C,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC;YAC3B,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAChB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;aAAM,CAAC;YACN,6BAA6B;YAC7B,MAAM,WAAW,GAAG,aAAa,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC9C,IAAI,WAAW,EAAE,CAAC;gBAChB,KAAK,CAAC,IAAI,CAAC,KAAK,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC;YACzC,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,SAAS,EAAE,CAAC;QACZ,IAAI,SAAS,IAAI,IAAI;YAAE,MAAM;IAC/B,CAAC;IAED,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;QACpB,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IACpC,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { Chunk, EmbeddingCache, SearchResult, SymbolIndex } from "../core/types";
|
|
2
|
+
export interface HybridOptions {
|
|
3
|
+
alpha?: number;
|
|
4
|
+
topK?: number;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Combines Vector Search (semantic) and BM25 Search (keyword) for higher accuracy.
|
|
8
|
+
*/
|
|
9
|
+
export declare function hybridSearch(query: string, queryEmbedding: number[] | null, chunks: Chunk[], cache: EmbeddingCache, symbols: SymbolIndex, options?: HybridOptions): Promise<SearchResult[]>;
|
|
10
|
+
//# sourceMappingURL=hybrid.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,KAAK,EACL,cAAc,EACd,YAAY,EAEZ,WAAW,EACZ,MAAM,eAAe,CAAC;AAIvB,MAAM,WAAW,aAAa;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,wBAAsB,YAAY,CAChC,KAAK,EAAE,MAAM,EACb,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI,EAC/B,MAAM,EAAE,KAAK,EAAE,EACf,KAAK,EAAE,cAAc,EACrB,OAAO,EAAE,WAAW,EACpB,OAAO,GAAE,aAAkB,GAC1B,OAAO,CAAC,YAAY,EAAE,CAAC,CAoEzB"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.hybridSearch = hybridSearch;
|
|
4
|
+
const bm25_1 = require("./bm25");
|
|
5
|
+
const vector_1 = require("./vector");
|
|
6
|
+
/**
|
|
7
|
+
* Combines Vector Search (semantic) and BM25 Search (keyword) for higher accuracy.
|
|
8
|
+
*/
|
|
9
|
+
async function hybridSearch(query, queryEmbedding, chunks, cache, symbols, options = {}) {
|
|
10
|
+
const alpha = options.alpha ?? 0.7;
|
|
11
|
+
const topK = options.topK ?? 10;
|
|
12
|
+
// 1. Get Vector Scores
|
|
13
|
+
let vectorResults = [];
|
|
14
|
+
const oversample = topK * 3;
|
|
15
|
+
if (queryEmbedding && queryEmbedding.length > 0) {
|
|
16
|
+
vectorResults = (0, vector_1.vectorSearch)(queryEmbedding, chunks, cache, symbols, oversample);
|
|
17
|
+
}
|
|
18
|
+
// 2. Get BM25 Scores
|
|
19
|
+
const bm25Results = (0, bm25_1.computeBM25Scores)(query, chunks, symbols, oversample);
|
|
20
|
+
// 3. Normalize and Combine
|
|
21
|
+
const combinedMap = new Map();
|
|
22
|
+
// Map for easy access and O(1) retrieval
|
|
23
|
+
const vMap = new Map();
|
|
24
|
+
vectorResults.forEach((r) => vMap.set(r.chunk.id, { score: r.score, result: r }));
|
|
25
|
+
const bMap = new Map();
|
|
26
|
+
// Normalize BM25 scores to [0, 1] relative to the max BM25 score found
|
|
27
|
+
const maxBM25 = bm25Results.length > 0 ? Math.max(...bm25Results.map((r) => r.score)) : 1;
|
|
28
|
+
bm25Results.forEach((r) => bMap.set(r.chunk.id, { score: r.score / maxBM25, result: r }));
|
|
29
|
+
// Iterate over all chunks that appeared in either result
|
|
30
|
+
const allIds = new Set([...vMap.keys(), ...bMap.keys()]);
|
|
31
|
+
for (const id of allIds) {
|
|
32
|
+
const vData = vMap.get(id);
|
|
33
|
+
const bData = bMap.get(id);
|
|
34
|
+
const vScore = vData?.score || 0;
|
|
35
|
+
const bScore = bData?.score || 0;
|
|
36
|
+
// Weighted combination
|
|
37
|
+
const finalScore = alpha * vScore + (1 - alpha) * bScore;
|
|
38
|
+
// Retrieve chunk and symbol in O(1)
|
|
39
|
+
const result = vData?.result || bData?.result;
|
|
40
|
+
if (result) {
|
|
41
|
+
combinedMap.set(id, {
|
|
42
|
+
chunk: result.chunk,
|
|
43
|
+
score: finalScore,
|
|
44
|
+
symbol: result.symbol,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
// 4. Sort and Slice
|
|
49
|
+
return Array.from(combinedMap.values())
|
|
50
|
+
.sort((a, b) => b.score - a.score)
|
|
51
|
+
.slice(0, topK);
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=hybrid.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid.js","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":";;AAkBA,oCA2EC;AAtFD,iCAA2C;AAC3C,qCAAwC;AAOxC;;GAEG;AACI,KAAK,UAAU,YAAY,CAChC,KAAa,EACb,cAA+B,EAC/B,MAAe,EACf,KAAqB,EACrB,OAAoB,EACpB,UAAyB,EAAE;IAE3B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,GAAG,CAAC;IACnC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;IAEhC,uBAAuB;IACvB,IAAI,aAAa,GAAmB,EAAE,CAAC;IACvC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,CAAC;IAC5B,IAAI,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,aAAa,GAAG,IAAA,qBAAY,EAC1B,cAAc,EACd,MAAM,EACN,KAAK,EACL,OAAO,EACP,UAAU,CACX,CAAC;IACJ,CAAC;IAED,qBAAqB;IACrB,MAAM,WAAW,GAAG,IAAA,wBAAiB,EAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;IAE1E,2BAA2B;IAC3B,MAAM,WAAW,GAAG,IAAI,GAAG,EAGxB,CAAC;IAEJ,yCAAyC;IACzC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAmD,CAAC;IACxE,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAC1B,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CACpD,CAAC;IAEF,MAAM,IAAI,GAAG,IAAI,GAAG,EAAmD,CAAC;IACxE,uEAAuE;IACvE,MAAM,OAAO,GACX,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5E,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CACxB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,OAAO,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAC9D,CAAC;IAEF,yDAAyD;IACzD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,EAAE,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAEzD,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QACxB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAE3B,MAAM,MAAM,GAAG,KAAK,EAAE,KAAK,IAAI,CAAC,CAAC;QACjC,MAAM,MAAM,GAAG,KAAK,EAAE,KAAK,IAAI,CAAC,CAAC;QAEjC,uBAAuB;QACvB,MAAM,UAAU,GAAG,KAAK,GAAG,MAAM,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,MAAM,CAAC;QAEzD,oCAAoC;QACpC,MAAM,MAAM,GAAG,KAAK,EAAE,MAAM,IAAI,KAAK,EAAE,MAAM,CAAC;QAC9C,IAAI,MAAM,EAAE,CAAC;YACX,WAAW,CAAC,GAAG,CAAC,EAAE,EAAE;gBAClB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,KAAK,EAAE,UAAU;gBACjB,MAAM,EAAE,MAAM,CAAC,MAAM;aACtB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,OAAO,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;SACpC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;SACjC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AACpB,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { EmbeddingCache, SearchResult } from "../core/types";
|
|
2
|
+
/**
|
|
3
|
+
* Merges adjacent or overlapping chunks from the same file into a single chunk.
|
|
4
|
+
*
|
|
5
|
+
* Two chunks are merged when the next chunk's start line is within `gapThreshold`
|
|
6
|
+
* lines of the current chunk's end line. Overlapping lines (from sliding-window
|
|
7
|
+
* chunks) are deduplicated by skipping the already-covered lines when appending.
|
|
8
|
+
*
|
|
9
|
+
* Properties of the merged result:
|
|
10
|
+
* - `start` / `end` span the full combined line range
|
|
11
|
+
* - `content` is reconstructed without duplication
|
|
12
|
+
* - `score` is the maximum score among merged chunks
|
|
13
|
+
* - `symbol` is preserved only when all merged chunks share the same symbol name
|
|
14
|
+
*
|
|
15
|
+
* @param results Search results (any order).
|
|
16
|
+
* @param gapThreshold Max line gap between chunks to still merge them (default: 3).
|
|
17
|
+
*/
|
|
18
|
+
export declare function mergeAdjacentChunks(results: SearchResult[], gapThreshold?: number): SearchResult[];
|
|
19
|
+
/**
|
|
20
|
+
* Removes near-duplicate chunks before token budget is applied.
|
|
21
|
+
*
|
|
22
|
+
* Two chunks are considered near-duplicates when:
|
|
23
|
+
* - Embeddings available: cosineSimilarity(a, b) > threshold (default 0.92)
|
|
24
|
+
* - No embeddings: Jaccard similarity of word-token sets > 0.80
|
|
25
|
+
*
|
|
26
|
+
* Processes results in score order (highest first); earlier chunks win.
|
|
27
|
+
* The first result is always kept regardless of similarity.
|
|
28
|
+
*
|
|
29
|
+
* @param cache Embedding cache (may be null — falls back to Jaccard)
|
|
30
|
+
* @param threshold Cosine similarity above which two chunks are near-duplicates
|
|
31
|
+
*/
|
|
32
|
+
export declare function deduplicateBySimilarity(results: SearchResult[], cache: EmbeddingCache | null, threshold?: number): SearchResult[];
|
|
33
|
+
//# sourceMappingURL=merge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"merge.d.ts","sourceRoot":"","sources":["../../src/search/merge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAGlE;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,mBAAmB,CACjC,OAAO,EAAE,YAAY,EAAE,EACvB,YAAY,SAAI,GACf,YAAY,EAAE,CAwEhB;AAID;;;;;;;;;;;;GAYG;AACH,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,YAAY,EAAE,EACvB,KAAK,EAAE,cAAc,GAAG,IAAI,EAC5B,SAAS,SAAO,GACf,YAAY,EAAE,CAgDhB"}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.mergeAdjacentChunks = mergeAdjacentChunks;
|
|
4
|
+
exports.deduplicateBySimilarity = deduplicateBySimilarity;
|
|
5
|
+
const vector_1 = require("./vector");
|
|
6
|
+
/**
|
|
7
|
+
* Merges adjacent or overlapping chunks from the same file into a single chunk.
|
|
8
|
+
*
|
|
9
|
+
* Two chunks are merged when the next chunk's start line is within `gapThreshold`
|
|
10
|
+
* lines of the current chunk's end line. Overlapping lines (from sliding-window
|
|
11
|
+
* chunks) are deduplicated by skipping the already-covered lines when appending.
|
|
12
|
+
*
|
|
13
|
+
* Properties of the merged result:
|
|
14
|
+
* - `start` / `end` span the full combined line range
|
|
15
|
+
* - `content` is reconstructed without duplication
|
|
16
|
+
* - `score` is the maximum score among merged chunks
|
|
17
|
+
* - `symbol` is preserved only when all merged chunks share the same symbol name
|
|
18
|
+
*
|
|
19
|
+
* @param results Search results (any order).
|
|
20
|
+
* @param gapThreshold Max line gap between chunks to still merge them (default: 3).
|
|
21
|
+
*/
|
|
22
|
+
function mergeAdjacentChunks(results, gapThreshold = 3) {
|
|
23
|
+
if (results.length <= 1)
|
|
24
|
+
return results;
|
|
25
|
+
// Group results by file
|
|
26
|
+
const byFile = new Map();
|
|
27
|
+
for (const result of results) {
|
|
28
|
+
const list = byFile.get(result.chunk.file) ?? [];
|
|
29
|
+
list.push(result);
|
|
30
|
+
byFile.set(result.chunk.file, list);
|
|
31
|
+
}
|
|
32
|
+
const merged = [];
|
|
33
|
+
for (const fileResults of byFile.values()) {
|
|
34
|
+
// Sort ascending by start line so we can merge left-to-right
|
|
35
|
+
const sorted = [...fileResults].sort((a, b) => a.chunk.start - b.chunk.start);
|
|
36
|
+
let cur = sorted[0];
|
|
37
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
38
|
+
const next = sorted[i];
|
|
39
|
+
if (next.chunk.start <= cur.chunk.end + gapThreshold) {
|
|
40
|
+
// Deduplicate overlapping lines: compute how many lines of `next` are
|
|
41
|
+
// already covered by `cur` based on line numbers, but cap at the actual
|
|
42
|
+
// content length to handle reconstructed (previously merged) chunks.
|
|
43
|
+
const overlapByLineNum = Math.max(0, cur.chunk.end - next.chunk.start + 1);
|
|
44
|
+
const nextLines = next.chunk.content.split("\n");
|
|
45
|
+
const overlapLines = Math.min(overlapByLineNum, nextLines.length);
|
|
46
|
+
const appendLines = nextLines.slice(overlapLines);
|
|
47
|
+
const mergedContent = appendLines.length > 0
|
|
48
|
+
? cur.chunk.content + "\n" + appendLines.join("\n")
|
|
49
|
+
: cur.chunk.content;
|
|
50
|
+
const mergedSymbol = cur.chunk.symbol !== undefined &&
|
|
51
|
+
cur.chunk.symbol === next.chunk.symbol
|
|
52
|
+
? cur.chunk.symbol
|
|
53
|
+
: undefined;
|
|
54
|
+
cur = {
|
|
55
|
+
chunk: {
|
|
56
|
+
id: cur.chunk.id + "+" + next.chunk.id,
|
|
57
|
+
file: cur.chunk.file,
|
|
58
|
+
start: cur.chunk.start,
|
|
59
|
+
end: Math.max(cur.chunk.end, next.chunk.end),
|
|
60
|
+
symbol: mergedSymbol,
|
|
61
|
+
content: mergedContent,
|
|
62
|
+
},
|
|
63
|
+
score: Math.max(cur.score, next.score),
|
|
64
|
+
symbol: mergedSymbol !== undefined
|
|
65
|
+
? (cur.symbol ?? next.symbol)
|
|
66
|
+
: undefined,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
merged.push(cur);
|
|
71
|
+
cur = next;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
merged.push(cur);
|
|
75
|
+
}
|
|
76
|
+
// Restore score-descending order
|
|
77
|
+
return merged.sort((a, b) => b.score - a.score);
|
|
78
|
+
}
|
|
79
|
+
// ─── Semantic deduplication ───────────────────────────────────────────────────
|
|
80
|
+
/**
|
|
81
|
+
* Removes near-duplicate chunks before token budget is applied.
|
|
82
|
+
*
|
|
83
|
+
* Two chunks are considered near-duplicates when:
|
|
84
|
+
* - Embeddings available: cosineSimilarity(a, b) > threshold (default 0.92)
|
|
85
|
+
* - No embeddings: Jaccard similarity of word-token sets > 0.80
|
|
86
|
+
*
|
|
87
|
+
* Processes results in score order (highest first); earlier chunks win.
|
|
88
|
+
* The first result is always kept regardless of similarity.
|
|
89
|
+
*
|
|
90
|
+
* @param cache Embedding cache (may be null — falls back to Jaccard)
|
|
91
|
+
* @param threshold Cosine similarity above which two chunks are near-duplicates
|
|
92
|
+
*/
|
|
93
|
+
function deduplicateBySimilarity(results, cache, threshold = 0.92) {
|
|
94
|
+
if (results.length <= 1)
|
|
95
|
+
return results;
|
|
96
|
+
// Precompute a mapping from chunk ID to embedding for O(1) retrieval during deduplication.
|
|
97
|
+
let idToIndex;
|
|
98
|
+
if (cache && cache.chunkIds) {
|
|
99
|
+
idToIndex = new Map(cache.chunkIds.map((id, i) => [id, i]));
|
|
100
|
+
}
|
|
101
|
+
const embeddingOf = (id) => {
|
|
102
|
+
if (!cache)
|
|
103
|
+
return null;
|
|
104
|
+
// Dense binary format (chunkIds + vectors buffer)
|
|
105
|
+
if (cache.chunkIds && cache.vectors && idToIndex) {
|
|
106
|
+
const idx = idToIndex.get(id);
|
|
107
|
+
if (idx !== undefined) {
|
|
108
|
+
const dims = cache.dimensions;
|
|
109
|
+
return Array.from(cache.vectors.subarray(idx * dims, idx * dims + dims));
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// Legacy map format
|
|
113
|
+
return cache.embeddings?.[id] ?? null;
|
|
114
|
+
};
|
|
115
|
+
const selected = [];
|
|
116
|
+
const selectedEmbeddings = [];
|
|
117
|
+
for (const candidate of results) {
|
|
118
|
+
const candidateEmb = embeddingOf(candidate.chunk.id);
|
|
119
|
+
let tooSimilar = false;
|
|
120
|
+
for (let i = 0; i < selected.length; i++) {
|
|
121
|
+
const selEmb = selectedEmbeddings[i];
|
|
122
|
+
if (candidateEmb && selEmb) {
|
|
123
|
+
tooSimilar = (0, vector_1.cosineSimilarity)(candidateEmb, selEmb) > threshold;
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
tooSimilar =
|
|
127
|
+
jaccardSimilarity(candidate.chunk.content, selected[i].chunk.content) >
|
|
128
|
+
0.65;
|
|
129
|
+
}
|
|
130
|
+
if (tooSimilar)
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
if (!tooSimilar) {
|
|
134
|
+
selected.push(candidate);
|
|
135
|
+
selectedEmbeddings.push(candidateEmb);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return selected;
|
|
139
|
+
}
|
|
140
|
+
function jaccardSimilarity(a, b) {
|
|
141
|
+
const tokA = wordTokens(a);
|
|
142
|
+
const tokB = wordTokens(b);
|
|
143
|
+
if (tokA.size === 0 && tokB.size === 0)
|
|
144
|
+
return 1;
|
|
145
|
+
let intersection = 0;
|
|
146
|
+
for (const t of tokA)
|
|
147
|
+
if (tokB.has(t))
|
|
148
|
+
intersection++;
|
|
149
|
+
const union = tokA.size + tokB.size - intersection;
|
|
150
|
+
return union === 0 ? 0 : intersection / union;
|
|
151
|
+
}
|
|
152
|
+
function wordTokens(text) {
|
|
153
|
+
return new Set(text
|
|
154
|
+
.toLowerCase()
|
|
155
|
+
.split(/[\s\W]+/)
|
|
156
|
+
.filter((t) => t.length >= 2));
|
|
157
|
+
}
|
|
158
|
+
//# sourceMappingURL=merge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"merge.js","sourceRoot":"","sources":["../../src/search/merge.ts"],"names":[],"mappings":";;AAmBA,kDA2EC;AAiBD,0DAoDC;AAlKD,qCAA4C;AAE5C;;;;;;;;;;;;;;;GAeG;AACH,SAAgB,mBAAmB,CACjC,OAAuB,EACvB,YAAY,GAAG,CAAC;IAEhB,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,wBAAwB;IACxB,MAAM,MAAM,GAAG,IAAI,GAAG,EAA0B,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACjD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClB,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IAED,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,KAAK,MAAM,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QAC1C,6DAA6D;QAC7D,MAAM,MAAM,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC,IAAI,CAClC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CACxC,CAAC;QACF,IAAI,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YAEvB,IAAI,IAAI,CAAC,KAAK,CAAC,KAAK,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,YAAY,EAAE,CAAC;gBACrD,sEAAsE;gBACtE,wEAAwE;gBACxE,qEAAqE;gBACrE,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAC/B,CAAC,EACD,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CACrC,CAAC;gBACF,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACjD,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;gBAClE,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;gBAElD,MAAM,aAAa,GACjB,WAAW,CAAC,MAAM,GAAG,CAAC;oBACpB,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;oBACnD,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;gBAExB,MAAM,YAAY,GAChB,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,SAAS;oBAC9B,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,IAAI,CAAC,KAAK,CAAC,MAAM;oBACpC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM;oBAClB,CAAC,CAAC,SAAS,CAAC;gBAEhB,GAAG,GAAG;oBACJ,KAAK,EAAE;wBACL,EAAE,EAAE,GAAG,CAAC,KAAK,CAAC,EAAE,GAAG,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE;wBACtC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,IAAI;wBACpB,KAAK,EAAE,GAAG,CAAC,KAAK,CAAC,KAAK;wBACtB,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;wBAC5C,MAAM,EAAE,YAAY;wBACpB,OAAO,EAAE,aAAa;qBACvB;oBACD,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC;oBACtC,MAAM,EACJ,YAAY,KAAK,SAAS;wBACxB,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC;wBAC7B,CAAC,CAAC,SAAS;iBAChB,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACjB,GAAG,GAAG,IAAI,CAAC;YACb,CAAC;QACH,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IAED,iCAAiC;IACjC,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AAClD,CAAC;AAED,iFAAiF;AAEjF;;;;;;;;;;;;GAYG;AACH,SAAgB,uBAAuB,CACrC,OAAuB,EACvB,KAA4B,EAC5B,SAAS,GAAG,IAAI;IAEhB,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,2FAA2F;IAC3F,IAAI,SAA0C,CAAC;IAC/C,IAAI,KAAK,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC5B,SAAS,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC9D,CAAC;IACD,MAAM,WAAW,GAAG,CAAC,EAAU,EAAmB,EAAE;QAClD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QACxB,kDAAkD;QAClD,IAAI,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,OAAO,IAAI,SAAS,EAAE,CAAC;YACjD,MAAM,GAAG,GAAG,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC9B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;gBACtB,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC;gBAC9B,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,GAAG,IAAI,EAAE,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;YAC3E,CAAC;QACH,CAAC;QACD,oBAAoB;QACpB,OAAO,KAAK,CAAC,UAAU,EAAE,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC;IACxC,CAAC,CAAC;IAEF,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,MAAM,kBAAkB,GAA2B,EAAE,CAAC;IAEtD,KAAK,MAAM,SAAS,IAAI,OAAO,EAAE,CAAC;QAChC,MAAM,YAAY,GAAG,WAAW,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACrD,IAAI,UAAU,GAAG,KAAK,CAAC;QAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,MAAM,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC;YACrC,IAAI,YAAY,IAAI,MAAM,EAAE,CAAC;gBAC3B,UAAU,GAAG,IAAA,yBAAgB,EAAC,YAAY,EAAE,MAAM,CAAC,GAAG,SAAS,CAAC;YAClE,CAAC;iBAAM,CAAC;gBACN,UAAU;oBACR,iBAAiB,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;wBACrE,IAAI,CAAC;YACT,CAAC;YACD,IAAI,UAAU;gBAAE,MAAM;QACxB,CAAC;QAED,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACzB,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,iBAAiB,CAAC,CAAS,EAAE,CAAS;IAC7C,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAC3B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAC3B,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEjD,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,MAAM,CAAC,IAAI,IAAI;QAAE,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,YAAY,EAAE,CAAC;IACtD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,YAAY,CAAC;IACnD,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,KAAK,CAAC;AAChD,CAAC;AAED,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,GAAG,CACZ,IAAI;SACD,WAAW,EAAE;SACb,KAAK,CAAC,SAAS,CAAC;SAChB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAChC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { EmbeddingCache, SearchResult } from "../core/types";
|
|
2
|
+
/**
|
|
3
|
+
* Maximal Marginal Relevance (MMR) reranking.
|
|
4
|
+
*
|
|
5
|
+
* Selects up to `topK` results from `candidates` by iteratively picking
|
|
6
|
+
* the candidate that maximises:
|
|
7
|
+
*
|
|
8
|
+
* MMR(d) = lambda * relevance(d) - (1 - lambda) * max_sim(d, selected)
|
|
9
|
+
*
|
|
10
|
+
* where:
|
|
11
|
+
* - relevance(d) is the original retrieval score (normalised to [0, 1])
|
|
12
|
+
* - max_sim(d, selected) is the maximum cosine similarity between d and
|
|
13
|
+
* any already-selected result (requires embeddings in the cache)
|
|
14
|
+
*
|
|
15
|
+
* lambda=1.0 → pure relevance ordering (identical to original ranking)
|
|
16
|
+
* lambda=0.5 → balanced relevance / diversity (default)
|
|
17
|
+
* lambda=0.0 → maximum diversity, ignores relevance
|
|
18
|
+
*
|
|
19
|
+
* Falls back to returning `candidates.slice(0, topK)` when embeddings
|
|
20
|
+
* are unavailable (keyword-only mode).
|
|
21
|
+
*/
|
|
22
|
+
export declare function applyMMR(candidates: SearchResult[], cache: EmbeddingCache | null, topK: number, lambda?: number): SearchResult[];
|
|
23
|
+
//# sourceMappingURL=mmr.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mmr.d.ts","sourceRoot":"","sources":["../../src/search/mmr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAGlE;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,QAAQ,CACtB,UAAU,EAAE,YAAY,EAAE,EAC1B,KAAK,EAAE,cAAc,GAAG,IAAI,EAC5B,IAAI,EAAE,MAAM,EACZ,MAAM,SAAM,GACX,YAAY,EAAE,CAmFhB"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.applyMMR = applyMMR;
|
|
4
|
+
const vector_1 = require("./vector");
|
|
5
|
+
/**
|
|
6
|
+
* Maximal Marginal Relevance (MMR) reranking.
|
|
7
|
+
*
|
|
8
|
+
* Selects up to `topK` results from `candidates` by iteratively picking
|
|
9
|
+
* the candidate that maximises:
|
|
10
|
+
*
|
|
11
|
+
* MMR(d) = lambda * relevance(d) - (1 - lambda) * max_sim(d, selected)
|
|
12
|
+
*
|
|
13
|
+
* where:
|
|
14
|
+
* - relevance(d) is the original retrieval score (normalised to [0, 1])
|
|
15
|
+
* - max_sim(d, selected) is the maximum cosine similarity between d and
|
|
16
|
+
* any already-selected result (requires embeddings in the cache)
|
|
17
|
+
*
|
|
18
|
+
* lambda=1.0 → pure relevance ordering (identical to original ranking)
|
|
19
|
+
* lambda=0.5 → balanced relevance / diversity (default)
|
|
20
|
+
* lambda=0.0 → maximum diversity, ignores relevance
|
|
21
|
+
*
|
|
22
|
+
* Falls back to returning `candidates.slice(0, topK)` when embeddings
|
|
23
|
+
* are unavailable (keyword-only mode).
|
|
24
|
+
*/
|
|
25
|
+
function applyMMR(candidates, cache, topK, lambda = 0.5) {
|
|
26
|
+
if (candidates.length <= 1)
|
|
27
|
+
return candidates.slice(0, topK);
|
|
28
|
+
// Without vector data MMR cannot compute inter-result similarity.
|
|
29
|
+
if (!cache?.vectors || !cache.chunkIds || !cache.dimensions) {
|
|
30
|
+
return candidates.slice(0, topK);
|
|
31
|
+
}
|
|
32
|
+
const { vectors, chunkIds, dimensions } = cache;
|
|
33
|
+
// Build a fast chunk-id → flat-buffer-index map.
|
|
34
|
+
const idToIdx = new Map();
|
|
35
|
+
chunkIds.forEach((id, i) => idToIdx.set(id, i));
|
|
36
|
+
// Normalise relevance scores to [0, 1] so lambda is meaningful regardless
|
|
37
|
+
// of whether scores come from cosine similarity (already ~[0,1]) or BM25.
|
|
38
|
+
// Use reduce instead of Math.max(...spread) to avoid call-stack overflow on large arrays.
|
|
39
|
+
let maxScore = -Infinity;
|
|
40
|
+
let minScore = Infinity;
|
|
41
|
+
for (const r of candidates) {
|
|
42
|
+
if (r.score > maxScore)
|
|
43
|
+
maxScore = r.score;
|
|
44
|
+
if (r.score < minScore)
|
|
45
|
+
minScore = r.score;
|
|
46
|
+
}
|
|
47
|
+
const range = maxScore - minScore || 1;
|
|
48
|
+
const relNorm = candidates.map((r) => (r.score - minScore) / range);
|
|
49
|
+
// Pre-extract each candidate's vector as number[] so the inner loop avoids
|
|
50
|
+
// repeated Array.from() allocations (O(n² * d) → O(n * d) allocations).
|
|
51
|
+
const candidateVecs = candidates.map((r) => {
|
|
52
|
+
const idx = idToIdx.get(r.chunk.id);
|
|
53
|
+
if (idx === undefined)
|
|
54
|
+
return null;
|
|
55
|
+
const offset = idx * dimensions;
|
|
56
|
+
return Array.from(vectors.subarray(offset, offset + dimensions));
|
|
57
|
+
});
|
|
58
|
+
const selected = [];
|
|
59
|
+
// Flat-buffer indices of already-selected results (for similarity queries).
|
|
60
|
+
const selectedVecIdx = [];
|
|
61
|
+
// Track remaining candidates as index positions into `candidates`.
|
|
62
|
+
const remaining = candidates.map((_, i) => i);
|
|
63
|
+
while (selected.length < topK && remaining.length > 0) {
|
|
64
|
+
let bestPos = -1; // position in `remaining`
|
|
65
|
+
let bestMMR = -Infinity;
|
|
66
|
+
for (let pos = 0; pos < remaining.length; pos++) {
|
|
67
|
+
const candIdx = remaining[pos];
|
|
68
|
+
const relevance = relNorm[candIdx];
|
|
69
|
+
let maxSim = 0;
|
|
70
|
+
if (selectedVecIdx.length > 0) {
|
|
71
|
+
const vecA = candidateVecs[candIdx];
|
|
72
|
+
if (vecA !== null) {
|
|
73
|
+
for (const selIdx of selectedVecIdx) {
|
|
74
|
+
const sim = (0, vector_1.cosineSimilarityBinary)(vecA, vectors, selIdx * dimensions, dimensions);
|
|
75
|
+
if (sim > maxSim)
|
|
76
|
+
maxSim = sim;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
const mmrScore = lambda * relevance - (1 - lambda) * maxSim;
|
|
81
|
+
if (mmrScore > bestMMR) {
|
|
82
|
+
bestMMR = mmrScore;
|
|
83
|
+
bestPos = pos;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const chosenIdx = remaining[bestPos];
|
|
87
|
+
selected.push(candidates[chosenIdx]);
|
|
88
|
+
const vecIdx = idToIdx.get(candidates[chosenIdx].chunk.id);
|
|
89
|
+
if (vecIdx !== undefined)
|
|
90
|
+
selectedVecIdx.push(vecIdx);
|
|
91
|
+
remaining.splice(bestPos, 1);
|
|
92
|
+
}
|
|
93
|
+
return selected;
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=mmr.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mmr.js","sourceRoot":"","sources":["../../src/search/mmr.ts"],"names":[],"mappings":";;AAuBA,4BAwFC;AA9GD,qCAAkD;AAElD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,SAAgB,QAAQ,CACtB,UAA0B,EAC1B,KAA4B,EAC5B,IAAY,EACZ,MAAM,GAAG,GAAG;IAEZ,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAE7D,kEAAkE;IAClE,IAAI,CAAC,KAAK,EAAE,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;QAC5D,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,KAAK,CAAC;IAEhD,iDAAiD;IACjD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC1C,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAEhD,0EAA0E;IAC1E,0EAA0E;IAC1E,0FAA0F;IAC1F,IAAI,QAAQ,GAAG,CAAC,QAAQ,CAAC;IACzB,IAAI,QAAQ,GAAG,QAAQ,CAAC;IACxB,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,IAAI,CAAC,CAAC,KAAK,GAAG,QAAQ;YAAE,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC;QAC3C,IAAI,CAAC,CAAC,KAAK,GAAG,QAAQ;YAAE,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC;IAC7C,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,GAAG,QAAQ,IAAI,CAAC,CAAC;IACvC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,GAAG,KAAK,CAAC,CAAC;IAEpE,2EAA2E;IAC3E,wEAAwE;IACxE,MAAM,aAAa,GAA2B,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACjE,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACpC,IAAI,GAAG,KAAK,SAAS;YAAE,OAAO,IAAI,CAAC;QACnC,MAAM,MAAM,GAAG,GAAG,GAAG,UAAU,CAAC;QAChC,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,4EAA4E;IAC5E,MAAM,cAAc,GAAa,EAAE,CAAC;IAEpC,mEAAmE;IACnE,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAE9C,OAAO,QAAQ,CAAC,MAAM,GAAG,IAAI,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,IAAI,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B;QAC5C,IAAI,OAAO,GAAG,CAAC,QAAQ,CAAC;QAExB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,SAAS,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;YAC/B,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;YAEnC,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9B,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;gBACpC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBAClB,KAAK,MAAM,MAAM,IAAI,cAAc,EAAE,CAAC;wBACpC,MAAM,GAAG,GAAG,IAAA,+BAAsB,EAChC,IAAI,EACJ,OAAO,EACP,MAAM,GAAG,UAAU,EACnB,UAAU,CACX,CAAC;wBACF,IAAI,GAAG,GAAG,MAAM;4BAAE,MAAM,GAAG,GAAG,CAAC;oBACjC,CAAC;gBACH,CAAC;YACH,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;YAC5D,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;gBACvB,OAAO,GAAG,QAAQ,CAAC;gBACnB,OAAO,GAAG,GAAG,CAAC;YAChB,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACrC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC3D,IAAI,MAAM,KAAK,SAAS;YAAE,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEtD,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { RerankConfig, SearchResult } from "../core/types";
|
|
2
|
+
/**
|
|
3
|
+
* Re-scores search results using the configured reranker.
|
|
4
|
+
*
|
|
5
|
+
* @param query The user's natural language query
|
|
6
|
+
* @param results Initial search results (from vector or keyword search)
|
|
7
|
+
* @param topK Number of results to return after reranking
|
|
8
|
+
* @param config Reranker config (default: xenova cross-encoder)
|
|
9
|
+
* @param fallbackModel Model name to use when config.model is not set (ollama only)
|
|
10
|
+
*/
|
|
11
|
+
export declare function rerankResults(query: string, results: SearchResult[], topK?: number, config?: RerankConfig, fallbackModel?: string): Promise<SearchResult[]>;
|
|
12
|
+
//# sourceMappingURL=rerank.d.ts.map
|