vemora 0.1.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vemora might be problematic. Click here for more details.

Files changed (242) hide show
  1. package/README.md +759 -0
  2. package/dist/cli.d.ts +16 -0
  3. package/dist/cli.d.ts.map +1 -0
  4. package/dist/cli.js +589 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/commands/ask.d.ts +14 -0
  7. package/dist/commands/ask.d.ts.map +1 -0
  8. package/dist/commands/ask.js +137 -0
  9. package/dist/commands/ask.js.map +1 -0
  10. package/dist/commands/audit.d.ts +17 -0
  11. package/dist/commands/audit.d.ts.map +1 -0
  12. package/dist/commands/audit.js +408 -0
  13. package/dist/commands/audit.js.map +1 -0
  14. package/dist/commands/brief.d.ts +14 -0
  15. package/dist/commands/brief.d.ts.map +1 -0
  16. package/dist/commands/brief.js +73 -0
  17. package/dist/commands/brief.js.map +1 -0
  18. package/dist/commands/chat.d.ts +7 -0
  19. package/dist/commands/chat.d.ts.map +1 -0
  20. package/dist/commands/chat.js +155 -0
  21. package/dist/commands/chat.js.map +1 -0
  22. package/dist/commands/context.d.ts +61 -0
  23. package/dist/commands/context.d.ts.map +1 -0
  24. package/dist/commands/context.js +778 -0
  25. package/dist/commands/context.js.map +1 -0
  26. package/dist/commands/deps.d.ts +20 -0
  27. package/dist/commands/deps.d.ts.map +1 -0
  28. package/dist/commands/deps.js +138 -0
  29. package/dist/commands/deps.js.map +1 -0
  30. package/dist/commands/focus.d.ts +6 -0
  31. package/dist/commands/focus.d.ts.map +1 -0
  32. package/dist/commands/focus.js +302 -0
  33. package/dist/commands/focus.js.map +1 -0
  34. package/dist/commands/index.d.ts +10 -0
  35. package/dist/commands/index.d.ts.map +1 -0
  36. package/dist/commands/index.js +366 -0
  37. package/dist/commands/index.js.map +1 -0
  38. package/dist/commands/init-agent.d.ts +23 -0
  39. package/dist/commands/init-agent.d.ts.map +1 -0
  40. package/dist/commands/init-agent.js +447 -0
  41. package/dist/commands/init-agent.js.map +1 -0
  42. package/dist/commands/init.d.ts +2 -0
  43. package/dist/commands/init.d.ts.map +1 -0
  44. package/dist/commands/init.js +122 -0
  45. package/dist/commands/init.js.map +1 -0
  46. package/dist/commands/knowledge.d.ts +8 -0
  47. package/dist/commands/knowledge.d.ts.map +1 -0
  48. package/dist/commands/knowledge.js +98 -0
  49. package/dist/commands/knowledge.js.map +1 -0
  50. package/dist/commands/plan.d.ts +16 -0
  51. package/dist/commands/plan.d.ts.map +1 -0
  52. package/dist/commands/plan.js +535 -0
  53. package/dist/commands/plan.js.map +1 -0
  54. package/dist/commands/query.d.ts +39 -0
  55. package/dist/commands/query.d.ts.map +1 -0
  56. package/dist/commands/query.js +389 -0
  57. package/dist/commands/query.js.map +1 -0
  58. package/dist/commands/remember.d.ts +11 -0
  59. package/dist/commands/remember.d.ts.map +1 -0
  60. package/dist/commands/remember.js +174 -0
  61. package/dist/commands/remember.js.map +1 -0
  62. package/dist/commands/report.d.ts +10 -0
  63. package/dist/commands/report.d.ts.map +1 -0
  64. package/dist/commands/report.js +180 -0
  65. package/dist/commands/report.js.map +1 -0
  66. package/dist/commands/status.d.ts +2 -0
  67. package/dist/commands/status.d.ts.map +1 -0
  68. package/dist/commands/status.js +127 -0
  69. package/dist/commands/status.js.map +1 -0
  70. package/dist/commands/summarize.d.ts +14 -0
  71. package/dist/commands/summarize.d.ts.map +1 -0
  72. package/dist/commands/summarize.js +181 -0
  73. package/dist/commands/summarize.js.map +1 -0
  74. package/dist/commands/triage.d.ts +33 -0
  75. package/dist/commands/triage.d.ts.map +1 -0
  76. package/dist/commands/triage.js +419 -0
  77. package/dist/commands/triage.js.map +1 -0
  78. package/dist/commands/usages.d.ts +14 -0
  79. package/dist/commands/usages.d.ts.map +1 -0
  80. package/dist/commands/usages.js +236 -0
  81. package/dist/commands/usages.js.map +1 -0
  82. package/dist/core/config.d.ts +35 -0
  83. package/dist/core/config.d.ts.map +1 -0
  84. package/dist/core/config.js +141 -0
  85. package/dist/core/config.js.map +1 -0
  86. package/dist/core/types.d.ts +274 -0
  87. package/dist/core/types.d.ts.map +1 -0
  88. package/dist/core/types.js +4 -0
  89. package/dist/core/types.js.map +1 -0
  90. package/dist/embeddings/factory.d.ts +9 -0
  91. package/dist/embeddings/factory.d.ts.map +1 -0
  92. package/dist/embeddings/factory.js +26 -0
  93. package/dist/embeddings/factory.js.map +1 -0
  94. package/dist/embeddings/noop.d.ts +17 -0
  95. package/dist/embeddings/noop.d.ts.map +1 -0
  96. package/dist/embeddings/noop.js +22 -0
  97. package/dist/embeddings/noop.js.map +1 -0
  98. package/dist/embeddings/ollama.d.ts +11 -0
  99. package/dist/embeddings/ollama.d.ts.map +1 -0
  100. package/dist/embeddings/ollama.js +49 -0
  101. package/dist/embeddings/ollama.js.map +1 -0
  102. package/dist/embeddings/openai.d.ts +10 -0
  103. package/dist/embeddings/openai.d.ts.map +1 -0
  104. package/dist/embeddings/openai.js +67 -0
  105. package/dist/embeddings/openai.js.map +1 -0
  106. package/dist/embeddings/provider.d.ts +19 -0
  107. package/dist/embeddings/provider.d.ts.map +1 -0
  108. package/dist/embeddings/provider.js +3 -0
  109. package/dist/embeddings/provider.js.map +1 -0
  110. package/dist/indexer/callgraph.d.ts +16 -0
  111. package/dist/indexer/callgraph.d.ts.map +1 -0
  112. package/dist/indexer/callgraph.js +154 -0
  113. package/dist/indexer/callgraph.js.map +1 -0
  114. package/dist/indexer/chunkBySlidingWindow.d.ts +6 -0
  115. package/dist/indexer/chunkBySlidingWindow.d.ts.map +1 -0
  116. package/dist/indexer/chunkBySlidingWindow.js +30 -0
  117. package/dist/indexer/chunkBySlidingWindow.js.map +1 -0
  118. package/dist/indexer/chunkBySymbols.d.ts +7 -0
  119. package/dist/indexer/chunkBySymbols.d.ts.map +1 -0
  120. package/dist/indexer/chunkBySymbols.js +57 -0
  121. package/dist/indexer/chunkBySymbols.js.map +1 -0
  122. package/dist/indexer/chunker.d.ts +15 -0
  123. package/dist/indexer/chunker.d.ts.map +1 -0
  124. package/dist/indexer/chunker.js +26 -0
  125. package/dist/indexer/chunker.js.map +1 -0
  126. package/dist/indexer/classHeader.d.ts +7 -0
  127. package/dist/indexer/classHeader.d.ts.map +1 -0
  128. package/dist/indexer/classHeader.js +37 -0
  129. package/dist/indexer/classHeader.js.map +1 -0
  130. package/dist/indexer/deps.d.ts +66 -0
  131. package/dist/indexer/deps.d.ts.map +1 -0
  132. package/dist/indexer/deps.js +409 -0
  133. package/dist/indexer/deps.js.map +1 -0
  134. package/dist/indexer/hasher.d.ts +17 -0
  135. package/dist/indexer/hasher.d.ts.map +1 -0
  136. package/dist/indexer/hasher.js +38 -0
  137. package/dist/indexer/hasher.js.map +1 -0
  138. package/dist/indexer/parser.d.ts +18 -0
  139. package/dist/indexer/parser.d.ts.map +1 -0
  140. package/dist/indexer/parser.js +355 -0
  141. package/dist/indexer/parser.js.map +1 -0
  142. package/dist/indexer/scanner.d.ts +18 -0
  143. package/dist/indexer/scanner.d.ts.map +1 -0
  144. package/dist/indexer/scanner.js +37 -0
  145. package/dist/indexer/scanner.js.map +1 -0
  146. package/dist/indexer/strategy.d.ts +11 -0
  147. package/dist/indexer/strategy.d.ts.map +1 -0
  148. package/dist/indexer/strategy.js +15 -0
  149. package/dist/indexer/strategy.js.map +1 -0
  150. package/dist/indexer/tests.d.ts +15 -0
  151. package/dist/indexer/tests.d.ts.map +1 -0
  152. package/dist/indexer/tests.js +68 -0
  153. package/dist/indexer/tests.js.map +1 -0
  154. package/dist/indexer/todos.d.ts +9 -0
  155. package/dist/indexer/todos.d.ts.map +1 -0
  156. package/dist/indexer/todos.js +29 -0
  157. package/dist/indexer/todos.js.map +1 -0
  158. package/dist/llm/anthropic.d.ts +8 -0
  159. package/dist/llm/anthropic.d.ts.map +1 -0
  160. package/dist/llm/anthropic.js +76 -0
  161. package/dist/llm/anthropic.js.map +1 -0
  162. package/dist/llm/factory.d.ts +7 -0
  163. package/dist/llm/factory.d.ts.map +1 -0
  164. package/dist/llm/factory.js +39 -0
  165. package/dist/llm/factory.js.map +1 -0
  166. package/dist/llm/ollama.d.ts +8 -0
  167. package/dist/llm/ollama.d.ts.map +1 -0
  168. package/dist/llm/ollama.js +83 -0
  169. package/dist/llm/ollama.js.map +1 -0
  170. package/dist/llm/openai.d.ts +8 -0
  171. package/dist/llm/openai.d.ts.map +1 -0
  172. package/dist/llm/openai.js +68 -0
  173. package/dist/llm/openai.js.map +1 -0
  174. package/dist/llm/provider.d.ts +29 -0
  175. package/dist/llm/provider.d.ts.map +1 -0
  176. package/dist/llm/provider.js +3 -0
  177. package/dist/llm/provider.js.map +1 -0
  178. package/dist/search/bm25.d.ts +3 -0
  179. package/dist/search/bm25.d.ts.map +1 -0
  180. package/dist/search/bm25.js +102 -0
  181. package/dist/search/bm25.js.map +1 -0
  182. package/dist/search/formatter.d.ts +43 -0
  183. package/dist/search/formatter.d.ts.map +1 -0
  184. package/dist/search/formatter.js +208 -0
  185. package/dist/search/formatter.js.map +1 -0
  186. package/dist/search/hybrid.d.ts +10 -0
  187. package/dist/search/hybrid.d.ts.map +1 -0
  188. package/dist/search/hybrid.js +53 -0
  189. package/dist/search/hybrid.js.map +1 -0
  190. package/dist/search/merge.d.ts +33 -0
  191. package/dist/search/merge.d.ts.map +1 -0
  192. package/dist/search/merge.js +158 -0
  193. package/dist/search/merge.js.map +1 -0
  194. package/dist/search/mmr.d.ts +23 -0
  195. package/dist/search/mmr.d.ts.map +1 -0
  196. package/dist/search/mmr.js +95 -0
  197. package/dist/search/mmr.js.map +1 -0
  198. package/dist/search/rerank.d.ts +12 -0
  199. package/dist/search/rerank.d.ts.map +1 -0
  200. package/dist/search/rerank.js +113 -0
  201. package/dist/search/rerank.js.map +1 -0
  202. package/dist/search/signature.d.ts +42 -0
  203. package/dist/search/signature.d.ts.map +1 -0
  204. package/dist/search/signature.js +112 -0
  205. package/dist/search/signature.js.map +1 -0
  206. package/dist/search/vector.d.ts +41 -0
  207. package/dist/search/vector.d.ts.map +1 -0
  208. package/dist/search/vector.js +185 -0
  209. package/dist/search/vector.js.map +1 -0
  210. package/dist/storage/cache.d.ts +30 -0
  211. package/dist/storage/cache.d.ts.map +1 -0
  212. package/dist/storage/cache.js +160 -0
  213. package/dist/storage/cache.js.map +1 -0
  214. package/dist/storage/knowledge.d.ts +17 -0
  215. package/dist/storage/knowledge.d.ts.map +1 -0
  216. package/dist/storage/knowledge.js +58 -0
  217. package/dist/storage/knowledge.js.map +1 -0
  218. package/dist/storage/repository.d.ts +27 -0
  219. package/dist/storage/repository.d.ts.map +1 -0
  220. package/dist/storage/repository.js +95 -0
  221. package/dist/storage/repository.js.map +1 -0
  222. package/dist/storage/session.d.ts +38 -0
  223. package/dist/storage/session.d.ts.map +1 -0
  224. package/dist/storage/session.js +100 -0
  225. package/dist/storage/session.js.map +1 -0
  226. package/dist/storage/summaries.d.ts +19 -0
  227. package/dist/storage/summaries.d.ts.map +1 -0
  228. package/dist/storage/summaries.js +66 -0
  229. package/dist/storage/summaries.js.map +1 -0
  230. package/dist/storage/usage.d.ts +35 -0
  231. package/dist/storage/usage.d.ts.map +1 -0
  232. package/dist/storage/usage.js +55 -0
  233. package/dist/storage/usage.js.map +1 -0
  234. package/dist/utils/git.d.ts +15 -0
  235. package/dist/utils/git.d.ts.map +1 -0
  236. package/dist/utils/git.js +38 -0
  237. package/dist/utils/git.js.map +1 -0
  238. package/dist/utils/tokenizer.d.ts +24 -0
  239. package/dist/utils/tokenizer.d.ts.map +1 -0
  240. package/dist/utils/tokenizer.js +52 -0
  241. package/dist/utils/tokenizer.js.map +1 -0
  242. package/package.json +71 -0
@@ -0,0 +1,208 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.formatTerse = formatTerse;
4
+ exports.formatJson = formatJson;
5
+ exports.formatMarkdown = formatMarkdown;
6
+ const deps_1 = require("../indexer/deps");
7
+ const signature_1 = require("./signature");
8
+ // ─── Terse formatter ──────────────────────────────────────────────────────────
9
+ /**
10
+ * Ultra-compact one-liner per result for small/local models with limited context windows.
11
+ * Format: file:startLine | symbol (type) | score | first-line-of-signature
12
+ *
13
+ * ~70-80% fewer tokens than the markdown format. Recommended for models with
14
+ * context windows ≤ 32K, or whenever token budget is the primary constraint.
15
+ */
16
+ function formatTerse(results, options) {
17
+ const topK = options.topK ?? 10;
18
+ const seen = new Set();
19
+ const lines = [];
20
+ for (const { chunk, score, symbol } of results) {
21
+ if (seen.has(chunk.id))
22
+ continue;
23
+ seen.add(chunk.id);
24
+ const symbolPart = chunk.symbol
25
+ ? `${chunk.symbol} (${symbol?.type ?? "symbol"})`
26
+ : "(no symbol)";
27
+ const sig = (0, signature_1.extractSignature)(chunk.content).split("\n")[0].trim();
28
+ lines.push(`${chunk.file}:${chunk.start} | ${symbolPart} | ${score.toFixed(3)} | ${sig}`);
29
+ if (lines.length >= topK)
30
+ break;
31
+ }
32
+ return lines.join("\n");
33
+ }
34
+ function formatJson(query, results, depGraph, fileSummaries, options) {
35
+ const importedByMap = (0, deps_1.computeImportedBy)(depGraph);
36
+ const showCode = options.showCode ?? false;
37
+ const topK = options.topK ?? 10;
38
+ const seen = new Set();
39
+ const jsonResults = [];
40
+ for (const { chunk, score, symbol } of results) {
41
+ if (seen.has(chunk.id))
42
+ continue;
43
+ seen.add(chunk.id);
44
+ const rank = jsonResults.length + 1;
45
+ const tier = showCode ? "high" : (0, signature_1.getDisplayTier)(rank);
46
+ const fileDeps = depGraph[chunk.file];
47
+ const usedBy = importedByMap.get(chunk.file) ?? [];
48
+ let code = null;
49
+ let signature = null;
50
+ if (tier === "high") {
51
+ const codeLines = chunk.content.split("\n");
52
+ const limit = showCode ? codeLines.length : signature_1.HIGH_CODE_LINES;
53
+ code = codeLines.slice(0, limit).join("\n");
54
+ }
55
+ else if (tier === "med") {
56
+ signature = (0, signature_1.extractSignature)(chunk.content);
57
+ }
58
+ const fileSummary = fileSummaries[chunk.file];
59
+ jsonResults.push({
60
+ rank,
61
+ tier,
62
+ file: chunk.file,
63
+ symbol: chunk.symbol ?? null,
64
+ symbolType: symbol?.type ?? null,
65
+ lines: { start: chunk.start, end: chunk.end },
66
+ score: parseFloat(score.toFixed(4)),
67
+ code,
68
+ signature,
69
+ imports: fileDeps?.imports ?? [],
70
+ usedBy,
71
+ summary: fileSummary?.summary ?? null,
72
+ });
73
+ if (jsonResults.length >= topK)
74
+ break;
75
+ }
76
+ const output = {
77
+ query,
78
+ totalResults: jsonResults.length,
79
+ results: jsonResults,
80
+ };
81
+ return JSON.stringify(output, null, 2);
82
+ }
83
+ // ─── Markdown formatter ───────────────────────────────────────────────────────
84
+ function formatMarkdown(query, results, depGraph, fileSummaries, callGraph, options) {
85
+ const importedByMap = (0, deps_1.computeImportedBy)(depGraph);
86
+ const showCode = options.showCode ?? false;
87
+ const topK = options.topK ?? 10;
88
+ const lines = [];
89
+ lines.push(`## Relevant code for: \`${query}\``);
90
+ lines.push("");
91
+ const seen = new Set();
92
+ let displayed = 0;
93
+ for (const { chunk, score, symbol } of results) {
94
+ if (seen.has(chunk.id))
95
+ continue;
96
+ seen.add(chunk.id);
97
+ const rank = displayed + 1;
98
+ const tier = showCode ? "high" : (0, signature_1.getDisplayTier)(rank);
99
+ // Header
100
+ lines.push(`### ${rank}. \`${chunk.file}\``);
101
+ if (chunk.symbol) {
102
+ const symType = symbol?.type ?? "symbol";
103
+ lines.push(`**${symType}** \`${chunk.symbol}\` `);
104
+ }
105
+ lines.push(`Lines ${chunk.start}–${chunk.end} · Score: ${score.toFixed(4)} · Tier: ${tier}`);
106
+ lines.push("");
107
+ // Dependencies
108
+ if (tier !== "low") {
109
+ const fileDeps = depGraph[chunk.file];
110
+ const usedBy = importedByMap.get(chunk.file) ?? [];
111
+ if (fileDeps?.imports.length) {
112
+ const maxDeps = tier === "high" ? 6 : 3;
113
+ const shown = fileDeps.imports.slice(0, maxDeps);
114
+ const hidden = fileDeps.imports.length - shown.length;
115
+ lines.push("**Imports:**");
116
+ for (const imp of shown) {
117
+ const syms = imp.symbols.length > 0
118
+ ? ` — \`${imp.symbols.slice(0, 4).join(", ")}\``
119
+ : "";
120
+ lines.push(`- \`${imp.file}\`${syms}`);
121
+ }
122
+ if (hidden > 0)
123
+ lines.push(`- _…and ${hidden} more_`);
124
+ lines.push("");
125
+ }
126
+ if (usedBy.length > 0) {
127
+ const maxUsed = tier === "high" ? 4 : 2;
128
+ const shown = usedBy.slice(0, maxUsed);
129
+ const hidden = usedBy.length - shown.length;
130
+ lines.push("**Used by (files):**");
131
+ for (const caller of shown) {
132
+ lines.push(`- \`${caller}\``);
133
+ }
134
+ if (hidden > 0)
135
+ lines.push(`- _…and ${hidden} more_`);
136
+ lines.push("");
137
+ }
138
+ // Call Graph context
139
+ const symbolId = chunk.symbol ? `${chunk.file}:${chunk.symbol}` : null;
140
+ const callInfo = symbolId ? callGraph[symbolId] : null;
141
+ if (callInfo) {
142
+ if (callInfo.calls.length > 0) {
143
+ const maxCalls = tier === "high" ? 6 : 3;
144
+ const shown = callInfo.calls.slice(0, maxCalls);
145
+ const hidden = callInfo.calls.length - shown.length;
146
+ lines.push("**Calls:**");
147
+ for (const call of shown) {
148
+ const loc = call.file ? ` (in \`${call.file}\`)` : "";
149
+ lines.push(`- \`${call.name}\`${loc}`);
150
+ }
151
+ if (hidden > 0)
152
+ lines.push(`- _…and ${hidden} more_`);
153
+ lines.push("");
154
+ }
155
+ if (callInfo.calledBy.length > 0) {
156
+ const maxCallers = tier === "high" ? 4 : 2;
157
+ const shown = callInfo.calledBy.slice(0, maxCallers);
158
+ const hidden = callInfo.calledBy.length - shown.length;
159
+ lines.push("**Called by:**");
160
+ for (const callerId of shown) {
161
+ lines.push(`- \`${callerId}\``);
162
+ }
163
+ if (hidden > 0)
164
+ lines.push(`- _…and ${hidden} more_`);
165
+ lines.push("");
166
+ }
167
+ }
168
+ }
169
+ // Code / signature / summary
170
+ if (tier === "high") {
171
+ const codeLines = chunk.content.split("\n");
172
+ const limit = showCode ? codeLines.length : signature_1.HIGH_CODE_LINES;
173
+ const preview = codeLines.slice(0, limit).join("\n");
174
+ const ext = chunk.file.split(".").pop() ?? "";
175
+ lines.push(`\`\`\`${ext}`);
176
+ lines.push(preview);
177
+ if (codeLines.length > limit) {
178
+ lines.push(`// … (${codeLines.length - limit} more lines — use --show-code to expand)`);
179
+ }
180
+ lines.push("```");
181
+ }
182
+ else if (tier === "med") {
183
+ const sig = (0, signature_1.extractSignature)(chunk.content);
184
+ const ext = chunk.file.split(".").pop() ?? "";
185
+ lines.push(`\`\`\`${ext}`);
186
+ lines.push(sig);
187
+ lines.push("```");
188
+ }
189
+ else {
190
+ // LOW — summary if available
191
+ const fileSummary = fileSummaries[chunk.file];
192
+ if (fileSummary) {
193
+ lines.push(`> ${fileSummary.summary}`);
194
+ }
195
+ }
196
+ lines.push("");
197
+ lines.push("---");
198
+ lines.push("");
199
+ displayed++;
200
+ if (displayed >= topK)
201
+ break;
202
+ }
203
+ if (displayed === 0) {
204
+ lines.push("_No results found._");
205
+ }
206
+ return lines.join("\n");
207
+ }
208
+ //# sourceMappingURL=formatter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"formatter.js","sourceRoot":"","sources":["../../src/search/formatter.ts"],"names":[],"mappings":";;AA0BA,kCAyBC;AAyBD,gCA8DC;AAID,wCAmJC;AA3RD,0CAAoD;AACpD,2CAAgF;AAUhF,iFAAiF;AAEjF;;;;;;GAMG;AACH,SAAgB,WAAW,CACzB,OAAuB,EACvB,OAAoC;IAEpC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;IAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAAE,SAAS;QACjC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAEnB,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM;YAC7B,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,KAAK,MAAM,EAAE,IAAI,IAAI,QAAQ,GAAG;YACjD,CAAC,CAAC,aAAa,CAAC;QAClB,MAAM,GAAG,GAAG,IAAA,4BAAgB,EAAC,KAAK,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAElE,KAAK,CAAC,IAAI,CACR,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,KAAK,MAAM,UAAU,MAAM,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,EAAE,CAC9E,CAAC;QAEF,IAAI,KAAK,CAAC,MAAM,IAAI,IAAI;YAAE,MAAM;IAClC,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAyBD,SAAgB,UAAU,CACxB,KAAa,EACb,OAAuB,EACvB,QAAyB,EACzB,aAA+B,EAC/B,OAAsB;IAEtB,MAAM,aAAa,GAAG,IAAA,wBAAiB,EAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,KAAK,CAAC;IAC3C,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;IAEhC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,WAAW,GAAiB,EAAE,CAAC;IAErC,KAAK,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAAE,SAAS;QACjC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAEnB,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAA,0BAAc,EAAC,IAAI,CAAC,CAAC;QAEtD,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAEnD,IAAI,IAAI,GAAkB,IAAI,CAAC;QAC/B,IAAI,SAAS,GAAkB,IAAI,CAAC;QAEpC,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,2BAAe,CAAC;YAC5D,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9C,CAAC;aAAM,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;YAC1B,SAAS,GAAG,IAAA,4BAAgB,EAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,MAAM,WAAW,GAAG,aAAa,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE9C,WAAW,CAAC,IAAI,CAAC;YACf,IAAI;YACJ,IAAI;YACJ,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,IAAI;YAC5B,UAAU,EAAE,MAAM,EAAE,IAAI,IAAI,IAAI;YAChC,KAAK,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE;YAC7C,KAAK,EAAE,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACnC,IAAI;YACJ,SAAS;YACT,OAAO,EAAE,QAAQ,EAAE,OAAO,IAAI,EAAE;YAChC,MAAM;YACN,OAAO,EAAE,WAAW,EAAE,OAAO,IAAI,IAAI;SACtC,CAAC,CAAC;QAEH,IAAI,WAAW,CAAC,MAAM,IAAI,IAAI;YAAE,MAAM;IACxC,CAAC;IAED,MAAM,MAAM,GAAe;QACzB,KAAK;QACL,YAAY,EAAE,WAAW,CAAC,MAAM;QAChC,OAAO,EAAE,WAAW;KACrB,CAAC;IAEF,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AACzC,CAAC;AAED,iFAAiF;AAEjF,SAAgB,cAAc,CAC5B,KAAa,EACb,OAAuB,EACvB,QAAyB,EACzB,aAA+B,EAC/B,SAAoB,EACpB,OAAsB;IAEtB,MAAM,aAAa,GAAG,IAAA,wBAAiB,EAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,KAAK,CAAC;IAC3C,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,2BAA2B,KAAK,IAAI,CAAC,CAAC;IACjD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAAE,SAAS;QACjC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAEnB,MAAM,IAAI,GAAG,SAAS,GAAG,CAAC,CAAC;QAC3B,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAA,0BAAc,EAAC,IAAI,CAAC,CAAC;QAEtD,SAAS;QACT,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,OAAO,KAAK,CAAC,IAAI,IAAI,CAAC,CAAC;QAC7C,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,MAAM,EAAE,IAAI,IAAI,QAAQ,CAAC;YACzC,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,QAAQ,KAAK,CAAC,MAAM,MAAM,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,CAAC,IAAI,CACR,SAAS,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,GAAG,aAAa,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,IAAI,EAAE,CACjF,CAAC;QACF,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,eAAe;QACf,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;YACnB,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACtC,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAEnD,IAAI,QAAQ,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC;gBAC7B,MAAM,OAAO,GAAG,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxC,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBACjD,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;gBAEtD,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;oBACxB,MAAM,IAAI,GACR,GAAG,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC;wBACpB,CAAC,CAAC,QAAQ,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI;wBAChD,CAAC,CAAC,EAAE,CAAC;oBACT,KAAK,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC,CAAC;gBACzC,CAAC;gBACD,IAAI,MAAM,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,QAAQ,CAAC,CAAC;gBACtD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACjB,CAAC;YAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBACvC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;gBAE5C,KAAK,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;gBACnC,KAAK,MAAM,MAAM,IAAI,KAAK,EAAE,CAAC;oBAC3B,KAAK,CAAC,IAAI,CAAC,OAAO,MAAM,IAAI,CAAC,CAAC;gBAChC,CAAC;gBACD,IAAI,MAAM,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,QAAQ,CAAC,CAAC;gBACtD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACjB,CAAC;YAED,qBAAqB;YACrB,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YACvE,MAAM,QAAQ,GAAG,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAEvD,IAAI,QAAQ,EAAE,CAAC;gBACb,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC9B,MAAM,QAAQ,GAAG,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;oBAChD,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;oBAEpD,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;wBACzB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;wBACtD,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,KAAK,GAAG,EAAE,CAAC,CAAC;oBACzC,CAAC;oBACD,IAAI,MAAM,GAAG,CAAC;wBAAE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,QAAQ,CAAC,CAAC;oBACtD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACjB,CAAC;gBAED,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACjC,MAAM,UAAU,GAAG,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC3C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;oBACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;oBAEvD,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;oBAC7B,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;wBAC7B,KAAK,CAAC,IAAI,CAAC,OAAO,QAAQ,IAAI,CAAC,CAAC;oBAClC,CAAC;oBACD,IAAI,MAAM,GAAG,CAAC;wBAAE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,QAAQ,CAAC,CAAC;oBACtD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACjB,CAAC;YACH,CAAC;QACH,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,2BAAe,CAAC;YAC5D,MAAM,OAAO,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrD,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;YAC9C,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC;YAC3B,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACpB,IAAI,SAAS,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;gBAC7B,KAAK,CAAC,IAAI,CACR,SAAS,SAAS,CAAC,MAAM,GAAG,KAAK,0CAA0C,CAC5E,CAAC;YACJ,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;aAAM,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;YAC1B,MAAM,GAAG,GAAG,IAAA,4BAAgB,EAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC5C,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;YAC9C,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC;YAC3B,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAChB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;aAAM,CAAC;YACN,6BAA6B;YAC7B,MAAM,WAAW,GAAG,aAAa,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC9C,IAAI,WAAW,EAAE,CAAC;gBAChB,KAAK,CAAC,IAAI,CAAC,KAAK,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC;YACzC,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,SAAS,EAAE,CAAC;QACZ,IAAI,SAAS,IAAI,IAAI;YAAE,MAAM;IAC/B,CAAC;IAED,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;QACpB,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IACpC,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
@@ -0,0 +1,10 @@
1
+ import type { Chunk, EmbeddingCache, SearchResult, SymbolIndex } from "../core/types";
2
+ export interface HybridOptions {
3
+ alpha?: number;
4
+ topK?: number;
5
+ }
6
+ /**
7
+ * Combines Vector Search (semantic) and BM25 Search (keyword) for higher accuracy.
8
+ */
9
+ export declare function hybridSearch(query: string, queryEmbedding: number[] | null, chunks: Chunk[], cache: EmbeddingCache, symbols: SymbolIndex, options?: HybridOptions): Promise<SearchResult[]>;
10
+ //# sourceMappingURL=hybrid.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,KAAK,EACL,cAAc,EACd,YAAY,EAEZ,WAAW,EACZ,MAAM,eAAe,CAAC;AAIvB,MAAM,WAAW,aAAa;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,wBAAsB,YAAY,CAChC,KAAK,EAAE,MAAM,EACb,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI,EAC/B,MAAM,EAAE,KAAK,EAAE,EACf,KAAK,EAAE,cAAc,EACrB,OAAO,EAAE,WAAW,EACpB,OAAO,GAAE,aAAkB,GAC1B,OAAO,CAAC,YAAY,EAAE,CAAC,CAoEzB"}
@@ -0,0 +1,53 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.hybridSearch = hybridSearch;
4
+ const bm25_1 = require("./bm25");
5
+ const vector_1 = require("./vector");
6
+ /**
7
+ * Combines Vector Search (semantic) and BM25 Search (keyword) for higher accuracy.
8
+ */
9
+ async function hybridSearch(query, queryEmbedding, chunks, cache, symbols, options = {}) {
10
+ const alpha = options.alpha ?? 0.7;
11
+ const topK = options.topK ?? 10;
12
+ // 1. Get Vector Scores
13
+ let vectorResults = [];
14
+ const oversample = topK * 3;
15
+ if (queryEmbedding && queryEmbedding.length > 0) {
16
+ vectorResults = (0, vector_1.vectorSearch)(queryEmbedding, chunks, cache, symbols, oversample);
17
+ }
18
+ // 2. Get BM25 Scores
19
+ const bm25Results = (0, bm25_1.computeBM25Scores)(query, chunks, symbols, oversample);
20
+ // 3. Normalize and Combine
21
+ const combinedMap = new Map();
22
+ // Map for easy access and O(1) retrieval
23
+ const vMap = new Map();
24
+ vectorResults.forEach((r) => vMap.set(r.chunk.id, { score: r.score, result: r }));
25
+ const bMap = new Map();
26
+ // Normalize BM25 scores to [0, 1] relative to the max BM25 score found
27
+ const maxBM25 = bm25Results.length > 0 ? Math.max(...bm25Results.map((r) => r.score)) : 1;
28
+ bm25Results.forEach((r) => bMap.set(r.chunk.id, { score: r.score / maxBM25, result: r }));
29
+ // Iterate over all chunks that appeared in either result
30
+ const allIds = new Set([...vMap.keys(), ...bMap.keys()]);
31
+ for (const id of allIds) {
32
+ const vData = vMap.get(id);
33
+ const bData = bMap.get(id);
34
+ const vScore = vData?.score || 0;
35
+ const bScore = bData?.score || 0;
36
+ // Weighted combination
37
+ const finalScore = alpha * vScore + (1 - alpha) * bScore;
38
+ // Retrieve chunk and symbol in O(1)
39
+ const result = vData?.result || bData?.result;
40
+ if (result) {
41
+ combinedMap.set(id, {
42
+ chunk: result.chunk,
43
+ score: finalScore,
44
+ symbol: result.symbol,
45
+ });
46
+ }
47
+ }
48
+ // 4. Sort and Slice
49
+ return Array.from(combinedMap.values())
50
+ .sort((a, b) => b.score - a.score)
51
+ .slice(0, topK);
52
+ }
53
+ //# sourceMappingURL=hybrid.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hybrid.js","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":";;AAkBA,oCA2EC;AAtFD,iCAA2C;AAC3C,qCAAwC;AAOxC;;GAEG;AACI,KAAK,UAAU,YAAY,CAChC,KAAa,EACb,cAA+B,EAC/B,MAAe,EACf,KAAqB,EACrB,OAAoB,EACpB,UAAyB,EAAE;IAE3B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,GAAG,CAAC;IACnC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;IAEhC,uBAAuB;IACvB,IAAI,aAAa,GAAmB,EAAE,CAAC;IACvC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,CAAC;IAC5B,IAAI,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,aAAa,GAAG,IAAA,qBAAY,EAC1B,cAAc,EACd,MAAM,EACN,KAAK,EACL,OAAO,EACP,UAAU,CACX,CAAC;IACJ,CAAC;IAED,qBAAqB;IACrB,MAAM,WAAW,GAAG,IAAA,wBAAiB,EAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;IAE1E,2BAA2B;IAC3B,MAAM,WAAW,GAAG,IAAI,GAAG,EAGxB,CAAC;IAEJ,yCAAyC;IACzC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAmD,CAAC;IACxE,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAC1B,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CACpD,CAAC;IAEF,MAAM,IAAI,GAAG,IAAI,GAAG,EAAmD,CAAC;IACxE,uEAAuE;IACvE,MAAM,OAAO,GACX,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5E,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CACxB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,OAAO,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAC9D,CAAC;IAEF,yDAAyD;IACzD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,EAAE,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAEzD,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QACxB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAE3B,MAAM,MAAM,GAAG,KAAK,EAAE,KAAK,IAAI,CAAC,CAAC;QACjC,MAAM,MAAM,GAAG,KAAK,EAAE,KAAK,IAAI,CAAC,CAAC;QAEjC,uBAAuB;QACvB,MAAM,UAAU,GAAG,KAAK,GAAG,MAAM,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,MAAM,CAAC;QAEzD,oCAAoC;QACpC,MAAM,MAAM,GAAG,KAAK,EAAE,MAAM,IAAI,KAAK,EAAE,MAAM,CAAC;QAC9C,IAAI,MAAM,EAAE,CAAC;YACX,WAAW,CAAC,GAAG,CAAC,EAAE,EAAE;gBAClB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,KAAK,EAAE,UAAU;gBACjB,MAAM,EAAE,MAAM,CAAC,MAAM;aACtB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,OAAO,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;SACpC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;SACjC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AACpB,CAAC"}
@@ -0,0 +1,33 @@
1
+ import type { EmbeddingCache, SearchResult } from "../core/types";
2
+ /**
3
+ * Merges adjacent or overlapping chunks from the same file into a single chunk.
4
+ *
5
+ * Two chunks are merged when the next chunk's start line is within `gapThreshold`
6
+ * lines of the current chunk's end line. Overlapping lines (from sliding-window
7
+ * chunks) are deduplicated by skipping the already-covered lines when appending.
8
+ *
9
+ * Properties of the merged result:
10
+ * - `start` / `end` span the full combined line range
11
+ * - `content` is reconstructed without duplication
12
+ * - `score` is the maximum score among merged chunks
13
+ * - `symbol` is preserved only when all merged chunks share the same symbol name
14
+ *
15
+ * @param results Search results (any order).
16
+ * @param gapThreshold Max line gap between chunks to still merge them (default: 3).
17
+ */
18
+ export declare function mergeAdjacentChunks(results: SearchResult[], gapThreshold?: number): SearchResult[];
19
+ /**
20
+ * Removes near-duplicate chunks before token budget is applied.
21
+ *
22
+ * Two chunks are considered near-duplicates when:
23
+ * - Embeddings available: cosineSimilarity(a, b) > threshold (default 0.92)
24
+ * - No embeddings: Jaccard similarity of word-token sets > 0.80
25
+ *
26
+ * Processes results in score order (highest first); earlier chunks win.
27
+ * The first result is always kept regardless of similarity.
28
+ *
29
+ * @param cache Embedding cache (may be null — falls back to Jaccard)
30
+ * @param threshold Cosine similarity above which two chunks are near-duplicates
31
+ */
32
+ export declare function deduplicateBySimilarity(results: SearchResult[], cache: EmbeddingCache | null, threshold?: number): SearchResult[];
33
+ //# sourceMappingURL=merge.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"merge.d.ts","sourceRoot":"","sources":["../../src/search/merge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAGlE;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,mBAAmB,CACjC,OAAO,EAAE,YAAY,EAAE,EACvB,YAAY,SAAI,GACf,YAAY,EAAE,CAwEhB;AAID;;;;;;;;;;;;GAYG;AACH,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,YAAY,EAAE,EACvB,KAAK,EAAE,cAAc,GAAG,IAAI,EAC5B,SAAS,SAAO,GACf,YAAY,EAAE,CAgDhB"}
@@ -0,0 +1,158 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.mergeAdjacentChunks = mergeAdjacentChunks;
4
+ exports.deduplicateBySimilarity = deduplicateBySimilarity;
5
+ const vector_1 = require("./vector");
6
+ /**
7
+ * Merges adjacent or overlapping chunks from the same file into a single chunk.
8
+ *
9
+ * Two chunks are merged when the next chunk's start line is within `gapThreshold`
10
+ * lines of the current chunk's end line. Overlapping lines (from sliding-window
11
+ * chunks) are deduplicated by skipping the already-covered lines when appending.
12
+ *
13
+ * Properties of the merged result:
14
+ * - `start` / `end` span the full combined line range
15
+ * - `content` is reconstructed without duplication
16
+ * - `score` is the maximum score among merged chunks
17
+ * - `symbol` is preserved only when all merged chunks share the same symbol name
18
+ *
19
+ * @param results Search results (any order).
20
+ * @param gapThreshold Max line gap between chunks to still merge them (default: 3).
21
+ */
22
+ function mergeAdjacentChunks(results, gapThreshold = 3) {
23
+ if (results.length <= 1)
24
+ return results;
25
+ // Group results by file
26
+ const byFile = new Map();
27
+ for (const result of results) {
28
+ const list = byFile.get(result.chunk.file) ?? [];
29
+ list.push(result);
30
+ byFile.set(result.chunk.file, list);
31
+ }
32
+ const merged = [];
33
+ for (const fileResults of byFile.values()) {
34
+ // Sort ascending by start line so we can merge left-to-right
35
+ const sorted = [...fileResults].sort((a, b) => a.chunk.start - b.chunk.start);
36
+ let cur = sorted[0];
37
+ for (let i = 1; i < sorted.length; i++) {
38
+ const next = sorted[i];
39
+ if (next.chunk.start <= cur.chunk.end + gapThreshold) {
40
+ // Deduplicate overlapping lines: compute how many lines of `next` are
41
+ // already covered by `cur` based on line numbers, but cap at the actual
42
+ // content length to handle reconstructed (previously merged) chunks.
43
+ const overlapByLineNum = Math.max(0, cur.chunk.end - next.chunk.start + 1);
44
+ const nextLines = next.chunk.content.split("\n");
45
+ const overlapLines = Math.min(overlapByLineNum, nextLines.length);
46
+ const appendLines = nextLines.slice(overlapLines);
47
+ const mergedContent = appendLines.length > 0
48
+ ? cur.chunk.content + "\n" + appendLines.join("\n")
49
+ : cur.chunk.content;
50
+ const mergedSymbol = cur.chunk.symbol !== undefined &&
51
+ cur.chunk.symbol === next.chunk.symbol
52
+ ? cur.chunk.symbol
53
+ : undefined;
54
+ cur = {
55
+ chunk: {
56
+ id: cur.chunk.id + "+" + next.chunk.id,
57
+ file: cur.chunk.file,
58
+ start: cur.chunk.start,
59
+ end: Math.max(cur.chunk.end, next.chunk.end),
60
+ symbol: mergedSymbol,
61
+ content: mergedContent,
62
+ },
63
+ score: Math.max(cur.score, next.score),
64
+ symbol: mergedSymbol !== undefined
65
+ ? (cur.symbol ?? next.symbol)
66
+ : undefined,
67
+ };
68
+ }
69
+ else {
70
+ merged.push(cur);
71
+ cur = next;
72
+ }
73
+ }
74
+ merged.push(cur);
75
+ }
76
+ // Restore score-descending order
77
+ return merged.sort((a, b) => b.score - a.score);
78
+ }
79
+ // ─── Semantic deduplication ───────────────────────────────────────────────────
80
+ /**
81
+ * Removes near-duplicate chunks before token budget is applied.
82
+ *
83
+ * Two chunks are considered near-duplicates when:
84
+ * - Embeddings available: cosineSimilarity(a, b) > threshold (default 0.92)
85
+ * - No embeddings: Jaccard similarity of word-token sets > 0.80
86
+ *
87
+ * Processes results in score order (highest first); earlier chunks win.
88
+ * The first result is always kept regardless of similarity.
89
+ *
90
+ * @param cache Embedding cache (may be null — falls back to Jaccard)
91
+ * @param threshold Cosine similarity above which two chunks are near-duplicates
92
+ */
93
+ function deduplicateBySimilarity(results, cache, threshold = 0.92) {
94
+ if (results.length <= 1)
95
+ return results;
96
+ // Precompute a mapping from chunk ID to embedding for O(1) retrieval during deduplication.
97
+ let idToIndex;
98
+ if (cache && cache.chunkIds) {
99
+ idToIndex = new Map(cache.chunkIds.map((id, i) => [id, i]));
100
+ }
101
+ const embeddingOf = (id) => {
102
+ if (!cache)
103
+ return null;
104
+ // Dense binary format (chunkIds + vectors buffer)
105
+ if (cache.chunkIds && cache.vectors && idToIndex) {
106
+ const idx = idToIndex.get(id);
107
+ if (idx !== undefined) {
108
+ const dims = cache.dimensions;
109
+ return Array.from(cache.vectors.subarray(idx * dims, idx * dims + dims));
110
+ }
111
+ }
112
+ // Legacy map format
113
+ return cache.embeddings?.[id] ?? null;
114
+ };
115
+ const selected = [];
116
+ const selectedEmbeddings = [];
117
+ for (const candidate of results) {
118
+ const candidateEmb = embeddingOf(candidate.chunk.id);
119
+ let tooSimilar = false;
120
+ for (let i = 0; i < selected.length; i++) {
121
+ const selEmb = selectedEmbeddings[i];
122
+ if (candidateEmb && selEmb) {
123
+ tooSimilar = (0, vector_1.cosineSimilarity)(candidateEmb, selEmb) > threshold;
124
+ }
125
+ else {
126
+ tooSimilar =
127
+ jaccardSimilarity(candidate.chunk.content, selected[i].chunk.content) >
128
+ 0.65;
129
+ }
130
+ if (tooSimilar)
131
+ break;
132
+ }
133
+ if (!tooSimilar) {
134
+ selected.push(candidate);
135
+ selectedEmbeddings.push(candidateEmb);
136
+ }
137
+ }
138
+ return selected;
139
+ }
140
+ function jaccardSimilarity(a, b) {
141
+ const tokA = wordTokens(a);
142
+ const tokB = wordTokens(b);
143
+ if (tokA.size === 0 && tokB.size === 0)
144
+ return 1;
145
+ let intersection = 0;
146
+ for (const t of tokA)
147
+ if (tokB.has(t))
148
+ intersection++;
149
+ const union = tokA.size + tokB.size - intersection;
150
+ return union === 0 ? 0 : intersection / union;
151
+ }
152
+ function wordTokens(text) {
153
+ return new Set(text
154
+ .toLowerCase()
155
+ .split(/[\s\W]+/)
156
+ .filter((t) => t.length >= 2));
157
+ }
158
+ //# sourceMappingURL=merge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"merge.js","sourceRoot":"","sources":["../../src/search/merge.ts"],"names":[],"mappings":";;AAmBA,kDA2EC;AAiBD,0DAoDC;AAlKD,qCAA4C;AAE5C;;;;;;;;;;;;;;;GAeG;AACH,SAAgB,mBAAmB,CACjC,OAAuB,EACvB,YAAY,GAAG,CAAC;IAEhB,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,wBAAwB;IACxB,MAAM,MAAM,GAAG,IAAI,GAAG,EAA0B,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACjD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClB,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IAED,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,KAAK,MAAM,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QAC1C,6DAA6D;QAC7D,MAAM,MAAM,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC,IAAI,CAClC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CACxC,CAAC;QACF,IAAI,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YAEvB,IAAI,IAAI,CAAC,KAAK,CAAC,KAAK,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,YAAY,EAAE,CAAC;gBACrD,sEAAsE;gBACtE,wEAAwE;gBACxE,qEAAqE;gBACrE,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAC/B,CAAC,EACD,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CACrC,CAAC;gBACF,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACjD,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;gBAClE,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;gBAElD,MAAM,aAAa,GACjB,WAAW,CAAC,MAAM,GAAG,CAAC;oBACpB,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;oBACnD,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;gBAExB,MAAM,YAAY,GAChB,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,SAAS;oBAC9B,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,IAAI,CAAC,KAAK,CAAC,MAAM;oBACpC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM;oBAClB,CAAC,CAAC,SAAS,CAAC;gBAEhB,GAAG,GAAG;oBACJ,KAAK,EAAE;wBACL,EAAE,EAAE,GAAG,CAAC,KAAK,CAAC,EAAE,GAAG,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE;wBACtC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,IAAI;wBACpB,KAAK,EAAE,GAAG,CAAC,KAAK,CAAC,KAAK;wBACtB,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;wBAC5C,MAAM,EAAE,YAAY;wBACpB,OAAO,EAAE,aAAa;qBACvB;oBACD,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC;oBACtC,MAAM,EACJ,YAAY,KAAK,SAAS;wBACxB,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC;wBAC7B,CAAC,CAAC,SAAS;iBAChB,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACjB,GAAG,GAAG,IAAI,CAAC;YACb,CAAC;QACH,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IAED,iCAAiC;IACjC,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AAClD,CAAC;AAED,iFAAiF;AAEjF;;;;;;;;;;;;GAYG;AACH,SAAgB,uBAAuB,CACrC,OAAuB,EACvB,KAA4B,EAC5B,SAAS,GAAG,IAAI;IAEhB,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,2FAA2F;IAC3F,IAAI,SAA0C,CAAC;IAC/C,IAAI,KAAK,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC5B,SAAS,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC9D,CAAC;IACD,MAAM,WAAW,GAAG,CAAC,EAAU,EAAmB,EAAE;QAClD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QACxB,kDAAkD;QAClD,IAAI,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,OAAO,IAAI,SAAS,EAAE,CAAC;YACjD,MAAM,GAAG,GAAG,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC9B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;gBACtB,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC;gBAC9B,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,GAAG,IAAI,EAAE,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;YAC3E,CAAC;QACH,CAAC;QACD,oBAAoB;QACpB,OAAO,KAAK,CAAC,UAAU,EAAE,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC;IACxC,CAAC,CAAC;IAEF,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,MAAM,kBAAkB,GAA2B,EAAE,CAAC;IAEtD,KAAK,MAAM,SAAS,IAAI,OAAO,EAAE,CAAC;QAChC,MAAM,YAAY,GAAG,WAAW,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACrD,IAAI,UAAU,GAAG,KAAK,CAAC;QAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,MAAM,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC;YACrC,IAAI,YAAY,IAAI,MAAM,EAAE,CAAC;gBAC3B,UAAU,GAAG,IAAA,yBAAgB,EAAC,YAAY,EAAE,MAAM,CAAC,GAAG,SAAS,CAAC;YAClE,CAAC;iBAAM,CAAC;gBACN,UAAU;oBACR,iBAAiB,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;wBACrE,IAAI,CAAC;YACT,CAAC;YACD,IAAI,UAAU;gBAAE,MAAM;QACxB,CAAC;QAED,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACzB,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,iBAAiB,CAAC,CAAS,EAAE,CAAS;IAC7C,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAC3B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAC3B,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEjD,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,MAAM,CAAC,IAAI,IAAI;QAAE,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,YAAY,EAAE,CAAC;IACtD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,YAAY,CAAC;IACnD,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,KAAK,CAAC;AAChD,CAAC;AAED,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,GAAG,CACZ,IAAI;SACD,WAAW,EAAE;SACb,KAAK,CAAC,SAAS,CAAC;SAChB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAChC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,23 @@
1
+ import type { EmbeddingCache, SearchResult } from "../core/types";
2
+ /**
3
+ * Maximal Marginal Relevance (MMR) reranking.
4
+ *
5
+ * Selects up to `topK` results from `candidates` by iteratively picking
6
+ * the candidate that maximises:
7
+ *
8
+ * MMR(d) = lambda * relevance(d) - (1 - lambda) * max_sim(d, selected)
9
+ *
10
+ * where:
11
+ * - relevance(d) is the original retrieval score (normalised to [0, 1])
12
+ * - max_sim(d, selected) is the maximum cosine similarity between d and
13
+ * any already-selected result (requires embeddings in the cache)
14
+ *
15
+ * lambda=1.0 → pure relevance ordering (identical to original ranking)
16
+ * lambda=0.5 → balanced relevance / diversity (default)
17
+ * lambda=0.0 → maximum diversity, ignores relevance
18
+ *
19
+ * Falls back to returning `candidates.slice(0, topK)` when embeddings
20
+ * are unavailable (keyword-only mode).
21
+ */
22
+ export declare function applyMMR(candidates: SearchResult[], cache: EmbeddingCache | null, topK: number, lambda?: number): SearchResult[];
23
+ //# sourceMappingURL=mmr.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mmr.d.ts","sourceRoot":"","sources":["../../src/search/mmr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAGlE;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,QAAQ,CACtB,UAAU,EAAE,YAAY,EAAE,EAC1B,KAAK,EAAE,cAAc,GAAG,IAAI,EAC5B,IAAI,EAAE,MAAM,EACZ,MAAM,SAAM,GACX,YAAY,EAAE,CAmFhB"}
@@ -0,0 +1,95 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.applyMMR = applyMMR;
4
+ const vector_1 = require("./vector");
5
+ /**
6
+ * Maximal Marginal Relevance (MMR) reranking.
7
+ *
8
+ * Selects up to `topK` results from `candidates` by iteratively picking
9
+ * the candidate that maximises:
10
+ *
11
+ * MMR(d) = lambda * relevance(d) - (1 - lambda) * max_sim(d, selected)
12
+ *
13
+ * where:
14
+ * - relevance(d) is the original retrieval score (normalised to [0, 1])
15
+ * - max_sim(d, selected) is the maximum cosine similarity between d and
16
+ * any already-selected result (requires embeddings in the cache)
17
+ *
18
+ * lambda=1.0 → pure relevance ordering (identical to original ranking)
19
+ * lambda=0.5 → balanced relevance / diversity (default)
20
+ * lambda=0.0 → maximum diversity, ignores relevance
21
+ *
22
+ * Falls back to returning `candidates.slice(0, topK)` when embeddings
23
+ * are unavailable (keyword-only mode).
24
+ */
25
+ function applyMMR(candidates, cache, topK, lambda = 0.5) {
26
+ if (candidates.length <= 1)
27
+ return candidates.slice(0, topK);
28
+ // Without vector data MMR cannot compute inter-result similarity.
29
+ if (!cache?.vectors || !cache.chunkIds || !cache.dimensions) {
30
+ return candidates.slice(0, topK);
31
+ }
32
+ const { vectors, chunkIds, dimensions } = cache;
33
+ // Build a fast chunk-id → flat-buffer-index map.
34
+ const idToIdx = new Map();
35
+ chunkIds.forEach((id, i) => idToIdx.set(id, i));
36
+ // Normalise relevance scores to [0, 1] so lambda is meaningful regardless
37
+ // of whether scores come from cosine similarity (already ~[0,1]) or BM25.
38
+ // Use reduce instead of Math.max(...spread) to avoid call-stack overflow on large arrays.
39
+ let maxScore = -Infinity;
40
+ let minScore = Infinity;
41
+ for (const r of candidates) {
42
+ if (r.score > maxScore)
43
+ maxScore = r.score;
44
+ if (r.score < minScore)
45
+ minScore = r.score;
46
+ }
47
+ const range = maxScore - minScore || 1;
48
+ const relNorm = candidates.map((r) => (r.score - minScore) / range);
49
+ // Pre-extract each candidate's vector as number[] so the inner loop avoids
50
+ // repeated Array.from() allocations (O(n² * d) → O(n * d) allocations).
51
+ const candidateVecs = candidates.map((r) => {
52
+ const idx = idToIdx.get(r.chunk.id);
53
+ if (idx === undefined)
54
+ return null;
55
+ const offset = idx * dimensions;
56
+ return Array.from(vectors.subarray(offset, offset + dimensions));
57
+ });
58
+ const selected = [];
59
+ // Flat-buffer indices of already-selected results (for similarity queries).
60
+ const selectedVecIdx = [];
61
+ // Track remaining candidates as index positions into `candidates`.
62
+ const remaining = candidates.map((_, i) => i);
63
+ while (selected.length < topK && remaining.length > 0) {
64
+ let bestPos = -1; // position in `remaining`
65
+ let bestMMR = -Infinity;
66
+ for (let pos = 0; pos < remaining.length; pos++) {
67
+ const candIdx = remaining[pos];
68
+ const relevance = relNorm[candIdx];
69
+ let maxSim = 0;
70
+ if (selectedVecIdx.length > 0) {
71
+ const vecA = candidateVecs[candIdx];
72
+ if (vecA !== null) {
73
+ for (const selIdx of selectedVecIdx) {
74
+ const sim = (0, vector_1.cosineSimilarityBinary)(vecA, vectors, selIdx * dimensions, dimensions);
75
+ if (sim > maxSim)
76
+ maxSim = sim;
77
+ }
78
+ }
79
+ }
80
+ const mmrScore = lambda * relevance - (1 - lambda) * maxSim;
81
+ if (mmrScore > bestMMR) {
82
+ bestMMR = mmrScore;
83
+ bestPos = pos;
84
+ }
85
+ }
86
+ const chosenIdx = remaining[bestPos];
87
+ selected.push(candidates[chosenIdx]);
88
+ const vecIdx = idToIdx.get(candidates[chosenIdx].chunk.id);
89
+ if (vecIdx !== undefined)
90
+ selectedVecIdx.push(vecIdx);
91
+ remaining.splice(bestPos, 1);
92
+ }
93
+ return selected;
94
+ }
95
+ //# sourceMappingURL=mmr.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mmr.js","sourceRoot":"","sources":["../../src/search/mmr.ts"],"names":[],"mappings":";;AAuBA,4BAwFC;AA9GD,qCAAkD;AAElD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,SAAgB,QAAQ,CACtB,UAA0B,EAC1B,KAA4B,EAC5B,IAAY,EACZ,MAAM,GAAG,GAAG;IAEZ,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAE7D,kEAAkE;IAClE,IAAI,CAAC,KAAK,EAAE,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;QAC5D,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,KAAK,CAAC;IAEhD,iDAAiD;IACjD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC1C,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAEhD,0EAA0E;IAC1E,0EAA0E;IAC1E,0FAA0F;IAC1F,IAAI,QAAQ,GAAG,CAAC,QAAQ,CAAC;IACzB,IAAI,QAAQ,GAAG,QAAQ,CAAC;IACxB,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,IAAI,CAAC,CAAC,KAAK,GAAG,QAAQ;YAAE,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC;QAC3C,IAAI,CAAC,CAAC,KAAK,GAAG,QAAQ;YAAE,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC;IAC7C,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,GAAG,QAAQ,IAAI,CAAC,CAAC;IACvC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,GAAG,KAAK,CAAC,CAAC;IAEpE,2EAA2E;IAC3E,wEAAwE;IACxE,MAAM,aAAa,GAA2B,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACjE,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACpC,IAAI,GAAG,KAAK,SAAS;YAAE,OAAO,IAAI,CAAC;QACnC,MAAM,MAAM,GAAG,GAAG,GAAG,UAAU,CAAC;QAChC,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,4EAA4E;IAC5E,MAAM,cAAc,GAAa,EAAE,CAAC;IAEpC,mEAAmE;IACnE,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAE9C,OAAO,QAAQ,CAAC,MAAM,GAAG,IAAI,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,IAAI,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B;QAC5C,IAAI,OAAO,GAAG,CAAC,QAAQ,CAAC;QAExB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,SAAS,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;YAC/B,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;YAEnC,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9B,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;gBACpC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBAClB,KAAK,MAAM,MAAM,IAAI,cAAc,EAAE,CAAC;wBACpC,MAAM,GAAG,GAAG,IAAA,+BAAsB,EAChC,IAAI,EACJ,OAAO,EACP,MAAM,GAAG,UAAU,EACnB,UAAU,CACX,CAAC;wBACF,IAAI,GAAG,GAAG,MAAM;4BAAE,MAAM,GAAG,GAAG,CAAC;oBACjC,CAAC;gBACH,CAAC;YACH,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;YAC5D,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;gBACvB,OAAO,GAAG,QAAQ,CAAC;gBACnB,OAAO,GAAG,GAAG,CAAC;YAChB,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACrC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC3D,IAAI,MAAM,KAAK,SAAS;YAAE,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEtD,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,12 @@
1
+ import type { RerankConfig, SearchResult } from "../core/types";
2
+ /**
3
+ * Re-scores search results using the configured reranker.
4
+ *
5
+ * @param query The user's natural language query
6
+ * @param results Initial search results (from vector or keyword search)
7
+ * @param topK Number of results to return after reranking
8
+ * @param config Reranker config (default: xenova cross-encoder)
9
+ * @param fallbackModel Model name to use when config.model is not set (ollama only)
10
+ */
11
+ export declare function rerankResults(query: string, results: SearchResult[], topK?: number, config?: RerankConfig, fallbackModel?: string): Promise<SearchResult[]>;
12
+ //# sourceMappingURL=rerank.d.ts.map