@gmickel/gno 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +256 -0
  2. package/assets/skill/SKILL.md +112 -0
  3. package/assets/skill/cli-reference.md +327 -0
  4. package/assets/skill/examples.md +234 -0
  5. package/assets/skill/mcp-reference.md +159 -0
  6. package/package.json +90 -0
  7. package/src/app/constants.ts +313 -0
  8. package/src/cli/colors.ts +65 -0
  9. package/src/cli/commands/ask.ts +545 -0
  10. package/src/cli/commands/cleanup.ts +105 -0
  11. package/src/cli/commands/collection/add.ts +120 -0
  12. package/src/cli/commands/collection/index.ts +10 -0
  13. package/src/cli/commands/collection/list.ts +108 -0
  14. package/src/cli/commands/collection/remove.ts +64 -0
  15. package/src/cli/commands/collection/rename.ts +95 -0
  16. package/src/cli/commands/context/add.ts +67 -0
  17. package/src/cli/commands/context/check.ts +153 -0
  18. package/src/cli/commands/context/index.ts +10 -0
  19. package/src/cli/commands/context/list.ts +109 -0
  20. package/src/cli/commands/context/rm.ts +52 -0
  21. package/src/cli/commands/doctor.ts +393 -0
  22. package/src/cli/commands/embed.ts +462 -0
  23. package/src/cli/commands/get.ts +356 -0
  24. package/src/cli/commands/index-cmd.ts +119 -0
  25. package/src/cli/commands/index.ts +102 -0
  26. package/src/cli/commands/init.ts +328 -0
  27. package/src/cli/commands/ls.ts +217 -0
  28. package/src/cli/commands/mcp/config.ts +300 -0
  29. package/src/cli/commands/mcp/index.ts +24 -0
  30. package/src/cli/commands/mcp/install.ts +203 -0
  31. package/src/cli/commands/mcp/paths.ts +470 -0
  32. package/src/cli/commands/mcp/status.ts +222 -0
  33. package/src/cli/commands/mcp/uninstall.ts +158 -0
  34. package/src/cli/commands/mcp.ts +20 -0
  35. package/src/cli/commands/models/clear.ts +103 -0
  36. package/src/cli/commands/models/index.ts +32 -0
  37. package/src/cli/commands/models/list.ts +214 -0
  38. package/src/cli/commands/models/path.ts +51 -0
  39. package/src/cli/commands/models/pull.ts +199 -0
  40. package/src/cli/commands/models/use.ts +85 -0
  41. package/src/cli/commands/multi-get.ts +400 -0
  42. package/src/cli/commands/query.ts +220 -0
  43. package/src/cli/commands/ref-parser.ts +108 -0
  44. package/src/cli/commands/reset.ts +191 -0
  45. package/src/cli/commands/search.ts +136 -0
  46. package/src/cli/commands/shared.ts +156 -0
  47. package/src/cli/commands/skill/index.ts +19 -0
  48. package/src/cli/commands/skill/install.ts +197 -0
  49. package/src/cli/commands/skill/paths-cmd.ts +81 -0
  50. package/src/cli/commands/skill/paths.ts +191 -0
  51. package/src/cli/commands/skill/show.ts +73 -0
  52. package/src/cli/commands/skill/uninstall.ts +141 -0
  53. package/src/cli/commands/status.ts +205 -0
  54. package/src/cli/commands/update.ts +68 -0
  55. package/src/cli/commands/vsearch.ts +188 -0
  56. package/src/cli/context.ts +64 -0
  57. package/src/cli/errors.ts +64 -0
  58. package/src/cli/format/search-results.ts +211 -0
  59. package/src/cli/options.ts +183 -0
  60. package/src/cli/program.ts +1330 -0
  61. package/src/cli/run.ts +213 -0
  62. package/src/cli/ui.ts +92 -0
  63. package/src/config/defaults.ts +20 -0
  64. package/src/config/index.ts +55 -0
  65. package/src/config/loader.ts +161 -0
  66. package/src/config/paths.ts +87 -0
  67. package/src/config/saver.ts +153 -0
  68. package/src/config/types.ts +280 -0
  69. package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
  70. package/src/converters/adapters/officeparser/adapter.ts +126 -0
  71. package/src/converters/canonicalize.ts +89 -0
  72. package/src/converters/errors.ts +218 -0
  73. package/src/converters/index.ts +51 -0
  74. package/src/converters/mime.ts +163 -0
  75. package/src/converters/native/markdown.ts +115 -0
  76. package/src/converters/native/plaintext.ts +56 -0
  77. package/src/converters/path.ts +48 -0
  78. package/src/converters/pipeline.ts +159 -0
  79. package/src/converters/registry.ts +74 -0
  80. package/src/converters/types.ts +123 -0
  81. package/src/converters/versions.ts +24 -0
  82. package/src/index.ts +27 -0
  83. package/src/ingestion/chunker.ts +238 -0
  84. package/src/ingestion/index.ts +32 -0
  85. package/src/ingestion/language.ts +276 -0
  86. package/src/ingestion/sync.ts +671 -0
  87. package/src/ingestion/types.ts +219 -0
  88. package/src/ingestion/walker.ts +235 -0
  89. package/src/llm/cache.ts +467 -0
  90. package/src/llm/errors.ts +191 -0
  91. package/src/llm/index.ts +58 -0
  92. package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
  93. package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
  94. package/src/llm/nodeLlamaCpp/generation.ts +88 -0
  95. package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
  96. package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
  97. package/src/llm/registry.ts +86 -0
  98. package/src/llm/types.ts +129 -0
  99. package/src/mcp/resources/index.ts +151 -0
  100. package/src/mcp/server.ts +229 -0
  101. package/src/mcp/tools/get.ts +220 -0
  102. package/src/mcp/tools/index.ts +160 -0
  103. package/src/mcp/tools/multi-get.ts +263 -0
  104. package/src/mcp/tools/query.ts +226 -0
  105. package/src/mcp/tools/search.ts +119 -0
  106. package/src/mcp/tools/status.ts +81 -0
  107. package/src/mcp/tools/vsearch.ts +198 -0
  108. package/src/pipeline/chunk-lookup.ts +44 -0
  109. package/src/pipeline/expansion.ts +256 -0
  110. package/src/pipeline/explain.ts +115 -0
  111. package/src/pipeline/fusion.ts +185 -0
  112. package/src/pipeline/hybrid.ts +535 -0
  113. package/src/pipeline/index.ts +64 -0
  114. package/src/pipeline/query-language.ts +118 -0
  115. package/src/pipeline/rerank.ts +223 -0
  116. package/src/pipeline/search.ts +261 -0
  117. package/src/pipeline/types.ts +328 -0
  118. package/src/pipeline/vsearch.ts +348 -0
  119. package/src/store/index.ts +41 -0
  120. package/src/store/migrations/001-initial.ts +196 -0
  121. package/src/store/migrations/index.ts +20 -0
  122. package/src/store/migrations/runner.ts +187 -0
  123. package/src/store/sqlite/adapter.ts +1242 -0
  124. package/src/store/sqlite/index.ts +7 -0
  125. package/src/store/sqlite/setup.ts +129 -0
  126. package/src/store/sqlite/types.ts +28 -0
  127. package/src/store/types.ts +506 -0
  128. package/src/store/vector/index.ts +13 -0
  129. package/src/store/vector/sqlite-vec.ts +373 -0
  130. package/src/store/vector/stats.ts +152 -0
  131. package/src/store/vector/types.ts +115 -0
@@ -0,0 +1,535 @@
1
+ /**
2
+ * Hybrid search orchestrator.
3
+ * Combines BM25, vector search, expansion, fusion, and reranking.
4
+ *
5
+ * @module src/pipeline/hybrid
6
+ */
7
+
8
+ import type { Config } from '../config/types';
9
+ import type { EmbeddingPort, GenerationPort, RerankPort } from '../llm/types';
10
+ import type { StorePort } from '../store/types';
11
+ import { err, ok } from '../store/types';
12
+ import type { VectorIndexPort } from '../store/vector/types';
13
+ import { createChunkLookup } from './chunk-lookup';
14
+ import { expandQuery } from './expansion';
15
+ import {
16
+ buildExplainResults,
17
+ explainBm25,
18
+ explainExpansion,
19
+ explainFusion,
20
+ explainRerank,
21
+ explainVector,
22
+ } from './explain';
23
+ import { type RankedInput, rrfFuse, toRankedInput } from './fusion';
24
+ import { detectQueryLanguage } from './query-language';
25
+ import { rerankCandidates } from './rerank';
26
+ import type {
27
+ ExpansionResult,
28
+ ExplainLine,
29
+ HybridSearchOptions,
30
+ PipelineConfig,
31
+ SearchResult,
32
+ SearchResults,
33
+ } from './types';
34
+ import { DEFAULT_PIPELINE_CONFIG } from './types';
35
+
36
+ // ─────────────────────────────────────────────────────────────────────────────
37
+ // Dependencies
38
+ // ─────────────────────────────────────────────────────────────────────────────
39
+
40
+ export interface HybridSearchDeps {
41
+ store: StorePort;
42
+ config: Config;
43
+ vectorIndex: VectorIndexPort | null;
44
+ embedPort: EmbeddingPort | null;
45
+ genPort: GenerationPort | null;
46
+ rerankPort: RerankPort | null;
47
+ pipelineConfig?: PipelineConfig;
48
+ }
49
+
50
+ // ─────────────────────────────────────────────────────────────────────────────
51
+ // Score Normalization
52
+ // ─────────────────────────────────────────────────────────────────────────────
53
+
54
+ function _normalizeVectorScore(distance: number): number {
55
+ return Math.max(0, Math.min(1, 1 - distance / 2));
56
+ }
57
+
58
+ // ─────────────────────────────────────────────────────────────────────────────
59
+ // BM25 Strength Check
60
+ // ─────────────────────────────────────────────────────────────────────────────
61
+
62
+ /**
63
+ * Check if BM25 results are strong enough to skip expansion.
64
+ * Uses gap-based metric: how much better is #1 than #2?
65
+ * Returns 0-1 where 1 = #1 is clearly dominant, 0 = results are similar.
66
+ * Raw BM25: smaller (more negative) is better.
67
+ */
68
+ async function checkBm25Strength(
69
+ store: StorePort,
70
+ query: string,
71
+ options?: { collection?: string; lang?: string }
72
+ ): Promise<number> {
73
+ const result = await store.searchFts(query, {
74
+ limit: 5,
75
+ collection: options?.collection,
76
+ language: options?.lang,
77
+ });
78
+ if (!result.ok || result.value.length === 0) {
79
+ return 0;
80
+ }
81
+
82
+ // Only one result = strong signal
83
+ if (result.value.length === 1) {
84
+ return 1;
85
+ }
86
+
87
+ // Get top 2 scores (smaller is better)
88
+ const scores = result.value.map((r) => r.score).sort((a, b) => a - b);
89
+ const best = scores[0] ?? 0;
90
+ const second = scores[1] ?? best;
91
+ const worst = scores.at(-1) ?? best;
92
+
93
+ // Compute gap-based strength
94
+ // If best and second are equal, gap = 0
95
+ // If second is much worse (larger), gap approaches 1
96
+ const range = worst - best;
97
+ if (range === 0) {
98
+ return 0; // All scores equal, no clear winner
99
+ }
100
+
101
+ // Gap = how much worse is #2 relative to the range (clamped for safety)
102
+ const gap = (second - best) / range;
103
+ return Math.max(0, Math.min(1, gap));
104
+ }
105
+
106
+ // ─────────────────────────────────────────────────────────────────────────────
107
+ // FTS Retrieval (returns ChunkIds)
108
+ // ─────────────────────────────────────────────────────────────────────────────
109
+
110
+ interface ChunkId {
111
+ mirrorHash: string;
112
+ seq: number;
113
+ }
114
+
115
+ type FtsChunksResult =
116
+ | { ok: true; chunks: ChunkId[] }
117
+ | { ok: false; code: 'INVALID_INPUT' | 'OTHER'; message: string };
118
+
119
+ async function searchFtsChunks(
120
+ store: StorePort,
121
+ query: string,
122
+ options: { limit: number; collection?: string; lang?: string }
123
+ ): Promise<FtsChunksResult> {
124
+ const result = await store.searchFts(query, {
125
+ limit: options.limit,
126
+ collection: options.collection,
127
+ language: options.lang,
128
+ });
129
+ if (!result.ok) {
130
+ // Propagate INVALID_INPUT for FTS syntax errors
131
+ const code =
132
+ result.error.code === 'INVALID_INPUT' ? 'INVALID_INPUT' : 'OTHER';
133
+ return { ok: false, code, message: result.error.message };
134
+ }
135
+ return {
136
+ ok: true,
137
+ chunks: result.value.map((r) => ({
138
+ mirrorHash: r.mirrorHash,
139
+ seq: r.seq,
140
+ })),
141
+ };
142
+ }
143
+
144
+ // ─────────────────────────────────────────────────────────────────────────────
145
+ // Vector Retrieval (returns ChunkIds)
146
+ // ─────────────────────────────────────────────────────────────────────────────
147
+
148
+ async function searchVectorChunks(
149
+ vectorIndex: VectorIndexPort,
150
+ embedPort: EmbeddingPort,
151
+ query: string,
152
+ options: { limit: number; minScore?: number }
153
+ ): Promise<ChunkId[]> {
154
+ if (!vectorIndex.searchAvailable) {
155
+ return [];
156
+ }
157
+
158
+ const embedResult = await embedPort.embed(query);
159
+ if (!embedResult.ok) {
160
+ return [];
161
+ }
162
+
163
+ const queryEmbedding = new Float32Array(embedResult.value);
164
+ const searchResult = await vectorIndex.searchNearest(
165
+ queryEmbedding,
166
+ options.limit,
167
+ { minScore: options.minScore }
168
+ );
169
+
170
+ if (!searchResult.ok) {
171
+ return [];
172
+ }
173
+
174
+ return searchResult.value.map((r) => ({
175
+ mirrorHash: r.mirrorHash,
176
+ seq: r.seq,
177
+ }));
178
+ }
179
+
180
+ // ─────────────────────────────────────────────────────────────────────────────
181
+ // Hybrid Search
182
+ // ─────────────────────────────────────────────────────────────────────────────
183
+
184
+ /**
185
+ * Execute hybrid search with full pipeline.
186
+ */
187
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: search orchestration with BM25, vector, expansion, fusion, and reranking
188
+ export async function searchHybrid(
189
+ deps: HybridSearchDeps,
190
+ query: string,
191
+ options: HybridSearchOptions = {}
192
+ ): Promise<
193
+ ReturnType<typeof ok<SearchResults>> | ReturnType<typeof err<SearchResults>>
194
+ > {
195
+ const { store, vectorIndex, embedPort, genPort, rerankPort } = deps;
196
+ const pipelineConfig = deps.pipelineConfig ?? DEFAULT_PIPELINE_CONFIG;
197
+
198
+ const limit = options.limit ?? 20;
199
+ const explainLines: ExplainLine[] = [];
200
+ let expansion: ExpansionResult | null = null;
201
+
202
+ // ─────────────────────────────────────────────────────────────────────────
203
+ // 0. Detect query language for PROMPT SELECTION only
204
+ // CRITICAL: Detection does NOT change retrieval filters - options.lang does
205
+ // Priority: queryLanguageHint (MCP) > lang (CLI) > detection
206
+ // ─────────────────────────────────────────────────────────────────────────
207
+ const detection = detectQueryLanguage(query);
208
+ // Use explicit hint > lang filter > detected language
209
+ const queryLanguage =
210
+ options.queryLanguageHint ?? options.lang ?? detection.bcp47;
211
+
212
+ // Build explain message for language detection
213
+ let langMessage: string;
214
+ if (options.queryLanguageHint) {
215
+ langMessage = `queryLanguage=${queryLanguage} (hint)`;
216
+ } else if (options.lang) {
217
+ langMessage = `queryLanguage=${queryLanguage} (explicit)`;
218
+ } else {
219
+ const confidence = detection.confident ? '' : ', low confidence';
220
+ langMessage = `queryLanguage=${queryLanguage} (detected${confidence})`;
221
+ }
222
+ explainLines.push({ stage: 'lang', message: langMessage });
223
+
224
+ // ─────────────────────────────────────────────────────────────────────────
225
+ // 1. Check if expansion needed
226
+ // ─────────────────────────────────────────────────────────────────────────
227
+ const shouldExpand = !options.noExpand && genPort !== null;
228
+ let skipExpansionDueToStrength = false;
229
+
230
+ if (shouldExpand) {
231
+ const bm25Strength = await checkBm25Strength(store, query, {
232
+ collection: options.collection,
233
+ lang: options.lang,
234
+ });
235
+ skipExpansionDueToStrength =
236
+ bm25Strength >= pipelineConfig.strongBm25Threshold;
237
+
238
+ if (!skipExpansionDueToStrength) {
239
+ const expandResult = await expandQuery(genPort, query, {
240
+ // Use queryLanguage for prompt selection, NOT options.lang (retrieval filter)
241
+ lang: queryLanguage,
242
+ timeout: pipelineConfig.expansionTimeout,
243
+ });
244
+ if (expandResult.ok) {
245
+ expansion = expandResult.value;
246
+ }
247
+ }
248
+ }
249
+
250
+ explainLines.push(
251
+ explainExpansion(shouldExpand && !skipExpansionDueToStrength, expansion)
252
+ );
253
+
254
+ // ─────────────────────────────────────────────────────────────────────────
255
+ // 2. Parallel retrieval using raw store/vector APIs for correct seq tracking
256
+ // ─────────────────────────────────────────────────────────────────────────
257
+ const rankedInputs: RankedInput[] = [];
258
+
259
+ // BM25: original query
260
+ const bm25Result = await searchFtsChunks(store, query, {
261
+ limit: limit * 2,
262
+ collection: options.collection,
263
+ lang: options.lang,
264
+ });
265
+
266
+ // Propagate FTS syntax errors as INVALID_INPUT
267
+ if (!bm25Result.ok && bm25Result.code === 'INVALID_INPUT') {
268
+ return err('INVALID_INPUT', `Invalid search query: ${bm25Result.message}`);
269
+ }
270
+ // Other errors: continue with empty BM25 results
271
+
272
+ const bm25Chunks = bm25Result.ok ? bm25Result.chunks : [];
273
+ const bm25Count = bm25Chunks.length;
274
+ if (bm25Count > 0) {
275
+ rankedInputs.push(toRankedInput('bm25', bm25Chunks));
276
+ }
277
+
278
+ // BM25: lexical variants (syntax errors here are ignored - variants are optional)
279
+ if (expansion?.lexicalQueries) {
280
+ for (const variant of expansion.lexicalQueries) {
281
+ const variantResult = await searchFtsChunks(store, variant, {
282
+ limit,
283
+ collection: options.collection,
284
+ lang: options.lang,
285
+ });
286
+ if (variantResult.ok && variantResult.chunks.length > 0) {
287
+ rankedInputs.push(toRankedInput('bm25_variant', variantResult.chunks));
288
+ }
289
+ }
290
+ }
291
+
292
+ explainLines.push(explainBm25(bm25Count));
293
+
294
+ // Vector search
295
+ let vecCount = 0;
296
+ const vectorAvailable = vectorIndex?.searchAvailable && embedPort !== null;
297
+
298
+ if (vectorAvailable && vectorIndex && embedPort) {
299
+ // Original query
300
+ const vecChunks = await searchVectorChunks(vectorIndex, embedPort, query, {
301
+ limit: limit * 2,
302
+ });
303
+
304
+ vecCount = vecChunks.length;
305
+ if (vecCount > 0) {
306
+ rankedInputs.push(toRankedInput('vector', vecChunks));
307
+ }
308
+
309
+ // Semantic variants
310
+ if (expansion?.vectorQueries) {
311
+ for (const variant of expansion.vectorQueries) {
312
+ const variantChunks = await searchVectorChunks(
313
+ vectorIndex,
314
+ embedPort,
315
+ variant,
316
+ { limit }
317
+ );
318
+ if (variantChunks.length > 0) {
319
+ rankedInputs.push(toRankedInput('vector_variant', variantChunks));
320
+ }
321
+ }
322
+ }
323
+
324
+ // HyDE
325
+ if (expansion?.hyde) {
326
+ const hydeChunks = await searchVectorChunks(
327
+ vectorIndex,
328
+ embedPort,
329
+ expansion.hyde,
330
+ { limit }
331
+ );
332
+ if (hydeChunks.length > 0) {
333
+ rankedInputs.push(toRankedInput('hyde', hydeChunks));
334
+ }
335
+ }
336
+ }
337
+
338
+ explainLines.push(explainVector(vecCount, vectorAvailable ?? false));
339
+
340
+ // ─────────────────────────────────────────────────────────────────────────
341
+ // 3. RRF Fusion
342
+ // ─────────────────────────────────────────────────────────────────────────
343
+ const fusedCandidates = rrfFuse(rankedInputs, pipelineConfig.rrf);
344
+ explainLines.push(
345
+ explainFusion(pipelineConfig.rrf.k, fusedCandidates.length)
346
+ );
347
+
348
+ // ─────────────────────────────────────────────────────────────────────────
349
+ // 4. Reranking
350
+ // ─────────────────────────────────────────────────────────────────────────
351
+ const rerankResult = await rerankCandidates(
352
+ { rerankPort: options.noRerank ? null : rerankPort, store },
353
+ query,
354
+ fusedCandidates,
355
+ { maxCandidates: pipelineConfig.rerankCandidates }
356
+ );
357
+
358
+ explainLines.push(
359
+ explainRerank(
360
+ !options.noRerank && rerankPort !== null,
361
+ pipelineConfig.rerankCandidates
362
+ )
363
+ );
364
+
365
+ // ─────────────────────────────────────────────────────────────────────────
366
+ // 4b. Apply minScore filter (blendedScore is now normalized to [0,1])
367
+ // ─────────────────────────────────────────────────────────────────────────
368
+ const minScore = options.minScore ?? 0;
369
+ const filteredCandidates =
370
+ minScore > 0
371
+ ? rerankResult.candidates.filter((c) => c.blendedScore >= minScore)
372
+ : rerankResult.candidates;
373
+
374
+ // ─────────────────────────────────────────────────────────────────────────
375
+ // 5. Build final results (optimized: batch lookups, no per-candidate queries)
376
+ // ─────────────────────────────────────────────────────────────────────────
377
+
378
+ // Collect unique mirrorHashes needed from candidates
379
+ // TODO: For large corpora (100k+ docs), add store.getDocumentsByMirrorHashes
380
+ // batch lookup to avoid loading all documents into memory.
381
+ const neededHashes = new Set(filteredCandidates.map((c) => c.mirrorHash));
382
+
383
+ // Fetch documents and collections
384
+ const docsResult = await store.listDocuments(options.collection);
385
+ const collectionsResult = await store.getCollections();
386
+
387
+ if (!docsResult.ok) {
388
+ return err('QUERY_FAILED', docsResult.error.message);
389
+ }
390
+
391
+ // Build lookup maps - only include docs needed by candidates
392
+ const docByMirrorHash = new Map<string, (typeof docsResult.value)[number]>();
393
+ for (const doc of docsResult.value) {
394
+ if (doc.active && doc.mirrorHash && neededHashes.has(doc.mirrorHash)) {
395
+ docByMirrorHash.set(doc.mirrorHash, doc);
396
+ }
397
+ }
398
+
399
+ const collectionPaths = new Map<string, string>();
400
+ if (collectionsResult.ok) {
401
+ for (const c of collectionsResult.value) {
402
+ collectionPaths.set(c.name, c.path);
403
+ }
404
+ }
405
+
406
+ // Pre-fetch all chunks in one batch query (eliminates N+1)
407
+ const chunksMapResult = await store.getChunksBatch([...neededHashes]);
408
+ if (!chunksMapResult.ok) {
409
+ return err('QUERY_FAILED', chunksMapResult.error.message);
410
+ }
411
+ const chunksMap = chunksMapResult.value;
412
+ const getChunk = createChunkLookup(chunksMap);
413
+
414
+ // Cache full content by mirrorHash for --full mode
415
+ const contentCache = new Map<
416
+ string,
417
+ Awaited<ReturnType<typeof store.getContent>>
418
+ >();
419
+
420
+ const results: SearchResult[] = [];
421
+ const docidMap = new Map<string, string>();
422
+ // Track seen docids for --full de-duplication
423
+ const seenDocids = new Set<string>();
424
+
425
+ // Iterate until we have enough results (don't slice early - deduping may skip candidates)
426
+ for (const candidate of filteredCandidates) {
427
+ // Stop when we have enough results
428
+ if (results.length >= limit) {
429
+ break;
430
+ }
431
+
432
+ // Find document from pre-fetched map
433
+ const doc = docByMirrorHash.get(candidate.mirrorHash);
434
+ if (!doc) {
435
+ continue;
436
+ }
437
+
438
+ // For --full mode, de-dupe by docid (keep best scoring candidate per doc)
439
+ if (options.full && seenDocids.has(doc.docid)) {
440
+ continue;
441
+ }
442
+
443
+ // Get chunk via O(1) lookup
444
+ const chunk = getChunk(candidate.mirrorHash, candidate.seq);
445
+ if (!chunk) {
446
+ continue;
447
+ }
448
+
449
+ docidMap.set(`${candidate.mirrorHash}:${candidate.seq}`, doc.docid);
450
+
451
+ const collectionPath = collectionPaths.get(doc.collection);
452
+
453
+ // For --full mode, fetch full mirror content
454
+ let snippet = chunk.text;
455
+ let snippetRange: { startLine: number; endLine: number } | undefined = {
456
+ startLine: chunk.startLine,
457
+ endLine: chunk.endLine,
458
+ };
459
+
460
+ if (options.full) {
461
+ // Get or fetch full content for this mirrorHash
462
+ let contentResult = contentCache.get(candidate.mirrorHash);
463
+ if (!contentResult) {
464
+ contentResult = await store.getContent(candidate.mirrorHash);
465
+ contentCache.set(candidate.mirrorHash, contentResult);
466
+ }
467
+
468
+ if (contentResult.ok && contentResult.value) {
469
+ snippet = contentResult.value;
470
+ snippetRange = undefined; // Full content has no range
471
+ }
472
+ // Fallback to chunk text if content unavailable
473
+ }
474
+
475
+ seenDocids.add(doc.docid);
476
+
477
+ results.push({
478
+ docid: doc.docid,
479
+ score: candidate.blendedScore,
480
+ uri: doc.uri,
481
+ title: doc.title ?? undefined,
482
+ snippet,
483
+ snippetLanguage: chunk.language ?? undefined,
484
+ snippetRange,
485
+ source: {
486
+ relPath: doc.relPath,
487
+ absPath: collectionPath
488
+ ? `${collectionPath}/${doc.relPath}`
489
+ : undefined,
490
+ mime: doc.sourceMime,
491
+ ext: doc.sourceExt,
492
+ modifiedAt: doc.sourceMtime,
493
+ sizeBytes: doc.sourceSize,
494
+ sourceHash: doc.sourceHash,
495
+ },
496
+ conversion: {
497
+ mirrorHash: candidate.mirrorHash,
498
+ converterId: doc.converterId ?? undefined,
499
+ converterVersion: doc.converterVersion ?? undefined,
500
+ },
501
+ });
502
+ }
503
+
504
+ // ─────────────────────────────────────────────────────────────────────────
505
+ // 6. Build explain data (if requested)
506
+ // ─────────────────────────────────────────────────────────────────────────
507
+ const explainData = options.explain
508
+ ? {
509
+ lines: explainLines,
510
+ results: buildExplainResults(
511
+ filteredCandidates.slice(0, limit),
512
+ docidMap
513
+ ),
514
+ }
515
+ : undefined;
516
+
517
+ // ─────────────────────────────────────────────────────────────────────────
518
+ // 7. Return results
519
+ // ─────────────────────────────────────────────────────────────────────────
520
+ return ok({
521
+ results,
522
+ meta: {
523
+ query,
524
+ mode: vectorAvailable ? 'hybrid' : 'bm25_only',
525
+ expanded: expansion !== null,
526
+ reranked: rerankResult.reranked,
527
+ vectorsUsed: vectorAvailable ?? false,
528
+ totalResults: results.length,
529
+ collection: options.collection,
530
+ lang: options.lang,
531
+ queryLanguage,
532
+ explain: explainData,
533
+ },
534
+ });
535
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Search pipeline public API.
3
+ *
4
+ * @module src/pipeline
5
+ */
6
+
7
+ // Expansion
8
+ export { expandQuery, generateCacheKey } from './expansion';
9
+ // Explain
10
+ export {
11
+ buildExplainResults,
12
+ explainBm25,
13
+ explainExpansion,
14
+ explainFusion,
15
+ explainRerank,
16
+ explainVector,
17
+ formatExplain,
18
+ formatResultExplain,
19
+ } from './explain';
20
+ // Fusion
21
+ export { type RankedInput, rrfFuse, toRankedInput } from './fusion';
22
+ // Hybrid search
23
+ export { type HybridSearchDeps, searchHybrid } from './hybrid';
24
+ // Rerank
25
+ export { rerankCandidates } from './rerank';
26
+ // BM25 search
27
+ export { searchBm25 } from './search';
28
+ // Types
29
+ export type {
30
+ AskMeta,
31
+ AskOptions,
32
+ AskPort,
33
+ AskResult,
34
+ BlendingTier,
35
+ Bm25SearchPort,
36
+ Citation,
37
+ ExpansionPort,
38
+ ExpansionResult,
39
+ ExplainLine,
40
+ ExplainResult,
41
+ FusionCandidate,
42
+ FusionSource,
43
+ HybridSearchOptions,
44
+ HybridSearchPort,
45
+ PipelineConfig,
46
+ RerankedCandidate,
47
+ RrfConfig,
48
+ SearchMeta,
49
+ SearchMode,
50
+ SearchOptions,
51
+ SearchResult,
52
+ SearchResultConversion,
53
+ SearchResultSource,
54
+ SearchResults,
55
+ SnippetRange,
56
+ VectorSearchPort,
57
+ } from './types';
58
+ export {
59
+ DEFAULT_BLENDING_SCHEDULE,
60
+ DEFAULT_PIPELINE_CONFIG,
61
+ DEFAULT_RRF_CONFIG,
62
+ } from './types';
63
+ // Vector search
64
+ export { searchVector, type VectorSearchDeps } from './vsearch';