@gmickel/gno 0.15.1 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -1
- package/package.json +7 -4
- package/src/cli/commands/ask.ts +9 -0
- package/src/cli/commands/query.ts +3 -2
- package/src/cli/pager.ts +1 -1
- package/src/cli/program.ts +89 -0
- package/src/core/links.ts +92 -20
- package/src/ingestion/sync.ts +267 -23
- package/src/ingestion/types.ts +2 -0
- package/src/ingestion/walker.ts +2 -1
- package/src/llm/nodeLlamaCpp/embedding.ts +53 -10
- package/src/mcp/tools/index.ts +30 -1
- package/src/mcp/tools/query.ts +22 -2
- package/src/mcp/tools/search.ts +8 -0
- package/src/mcp/tools/vsearch.ts +8 -0
- package/src/pipeline/answer.ts +324 -7
- package/src/pipeline/expansion.ts +243 -7
- package/src/pipeline/explain.ts +93 -5
- package/src/pipeline/hybrid.ts +240 -57
- package/src/pipeline/query-modes.ts +125 -0
- package/src/pipeline/rerank.ts +34 -13
- package/src/pipeline/search.ts +41 -3
- package/src/pipeline/temporal.ts +257 -0
- package/src/pipeline/types.ts +58 -0
- package/src/pipeline/vsearch.ts +107 -9
- package/src/serve/public/app.tsx +1 -3
- package/src/serve/public/globals.built.css +2 -2
- package/src/serve/public/lib/retrieval-filters.ts +167 -0
- package/src/serve/public/pages/Ask.tsx +339 -109
- package/src/serve/public/pages/Browse.tsx +71 -5
- package/src/serve/public/pages/DocView.tsx +2 -21
- package/src/serve/public/pages/Search.tsx +507 -120
- package/src/serve/routes/api.ts +202 -2
- package/src/store/migrations/006-document-metadata.ts +104 -0
- package/src/store/migrations/007-document-date-fields.ts +24 -0
- package/src/store/migrations/index.ts +3 -1
- package/src/store/sqlite/adapter.ts +218 -5
- package/src/store/types.ts +46 -0
package/src/pipeline/hybrid.ts
CHANGED
|
@@ -26,14 +26,27 @@ import {
|
|
|
26
26
|
buildExplainResults,
|
|
27
27
|
type ExpansionStatus,
|
|
28
28
|
explainBm25,
|
|
29
|
+
explainCounters,
|
|
29
30
|
explainExpansion,
|
|
30
31
|
explainFusion,
|
|
32
|
+
explainQueryModes,
|
|
31
33
|
explainRerank,
|
|
34
|
+
explainTimings,
|
|
32
35
|
explainVector,
|
|
33
36
|
} from "./explain";
|
|
34
37
|
import { type RankedInput, rrfFuse, toRankedInput } from "./fusion";
|
|
35
38
|
import { detectQueryLanguage } from "./query-language";
|
|
39
|
+
import {
|
|
40
|
+
buildExpansionFromQueryModes,
|
|
41
|
+
summarizeQueryModes,
|
|
42
|
+
} from "./query-modes";
|
|
36
43
|
import { rerankCandidates } from "./rerank";
|
|
44
|
+
import {
|
|
45
|
+
isWithinTemporalRange,
|
|
46
|
+
resolveRecencyTimestamp,
|
|
47
|
+
resolveTemporalRange,
|
|
48
|
+
shouldSortByRecency,
|
|
49
|
+
} from "./temporal";
|
|
37
50
|
import { DEFAULT_PIPELINE_CONFIG } from "./types";
|
|
38
51
|
|
|
39
52
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -94,6 +107,10 @@ async function checkBm25Strength(
|
|
|
94
107
|
lang?: string;
|
|
95
108
|
tagsAll?: string[];
|
|
96
109
|
tagsAny?: string[];
|
|
110
|
+
since?: string;
|
|
111
|
+
until?: string;
|
|
112
|
+
categories?: string[];
|
|
113
|
+
author?: string;
|
|
97
114
|
}
|
|
98
115
|
): Promise<boolean> {
|
|
99
116
|
const result = await store.searchFts(query, {
|
|
@@ -102,6 +119,10 @@ async function checkBm25Strength(
|
|
|
102
119
|
language: options?.lang,
|
|
103
120
|
tagsAll: options?.tagsAll,
|
|
104
121
|
tagsAny: options?.tagsAny,
|
|
122
|
+
since: options?.since,
|
|
123
|
+
until: options?.until,
|
|
124
|
+
categories: options?.categories,
|
|
125
|
+
author: options?.author,
|
|
105
126
|
});
|
|
106
127
|
|
|
107
128
|
if (!result.ok || result.value.length === 0) {
|
|
@@ -143,6 +164,10 @@ async function searchFtsChunks(
|
|
|
143
164
|
lang?: string;
|
|
144
165
|
tagsAll?: string[];
|
|
145
166
|
tagsAny?: string[];
|
|
167
|
+
since?: string;
|
|
168
|
+
until?: string;
|
|
169
|
+
categories?: string[];
|
|
170
|
+
author?: string;
|
|
146
171
|
}
|
|
147
172
|
): Promise<FtsChunksResult> {
|
|
148
173
|
const result = await store.searchFts(query, {
|
|
@@ -151,6 +176,10 @@ async function searchFtsChunks(
|
|
|
151
176
|
language: options.lang,
|
|
152
177
|
tagsAll: options.tagsAll,
|
|
153
178
|
tagsAny: options.tagsAny,
|
|
179
|
+
since: options.since,
|
|
180
|
+
until: options.until,
|
|
181
|
+
categories: options.categories,
|
|
182
|
+
author: options.author,
|
|
154
183
|
});
|
|
155
184
|
if (!result.ok) {
|
|
156
185
|
// Propagate INVALID_INPUT for FTS syntax errors
|
|
@@ -217,23 +246,54 @@ export async function searchHybrid(
|
|
|
217
246
|
query: string,
|
|
218
247
|
options: HybridSearchOptions = {}
|
|
219
248
|
): Promise<ReturnType<typeof ok<SearchResults>>> {
|
|
249
|
+
const runStartedAt = performance.now();
|
|
220
250
|
const { store, vectorIndex, embedPort, genPort, rerankPort } = deps;
|
|
221
251
|
const pipelineConfig = deps.pipelineConfig ?? DEFAULT_PIPELINE_CONFIG;
|
|
222
252
|
|
|
223
253
|
const limit = options.limit ?? 20;
|
|
254
|
+
const recencySort = shouldSortByRecency(query);
|
|
255
|
+
const temporalRange = resolveTemporalRange(
|
|
256
|
+
query,
|
|
257
|
+
options.since,
|
|
258
|
+
options.until
|
|
259
|
+
);
|
|
224
260
|
const explainLines: ExplainLine[] = [];
|
|
225
261
|
let expansion: ExpansionResult | null = null;
|
|
262
|
+
const timings = {
|
|
263
|
+
langMs: 0,
|
|
264
|
+
expansionMs: 0,
|
|
265
|
+
bm25Ms: 0,
|
|
266
|
+
vectorMs: 0,
|
|
267
|
+
fusionMs: 0,
|
|
268
|
+
rerankMs: 0,
|
|
269
|
+
assemblyMs: 0,
|
|
270
|
+
totalMs: 0,
|
|
271
|
+
};
|
|
272
|
+
const counters = {
|
|
273
|
+
expansionCacheHits: 0,
|
|
274
|
+
expansionCacheLookups: 0,
|
|
275
|
+
rerankCacheHits: 0,
|
|
276
|
+
rerankCacheLookups: 0,
|
|
277
|
+
fallbackEvents: [] as string[],
|
|
278
|
+
};
|
|
226
279
|
|
|
227
|
-
//
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
280
|
+
// Increase retrieval limits when post-retrieval filters are active.
|
|
281
|
+
const hasPostFilters = Boolean(
|
|
282
|
+
options.tagsAll?.length ||
|
|
283
|
+
options.tagsAny?.length ||
|
|
284
|
+
options.categories?.length ||
|
|
285
|
+
options.author ||
|
|
286
|
+
temporalRange.since ||
|
|
287
|
+
temporalRange.until
|
|
288
|
+
);
|
|
289
|
+
const retrievalMultiplier = hasPostFilters || recencySort ? 3 : 1;
|
|
231
290
|
|
|
232
291
|
// ─────────────────────────────────────────────────────────────────────────
|
|
233
292
|
// 0. Detect query language for PROMPT SELECTION only
|
|
234
293
|
// CRITICAL: Detection does NOT change retrieval filters - options.lang does
|
|
235
294
|
// Priority: queryLanguageHint (MCP) > lang (CLI) > detection
|
|
236
295
|
// ─────────────────────────────────────────────────────────────────────────
|
|
296
|
+
const langStartedAt = performance.now();
|
|
237
297
|
const detection = detectQueryLanguage(query);
|
|
238
298
|
// Use explicit hint > lang filter > detected language
|
|
239
299
|
const queryLanguage =
|
|
@@ -250,23 +310,39 @@ export async function searchHybrid(
|
|
|
250
310
|
langMessage = `queryLanguage=${queryLanguage} (detected${confidence})`;
|
|
251
311
|
}
|
|
252
312
|
explainLines.push({ stage: "lang", message: langMessage });
|
|
313
|
+
timings.langMs = performance.now() - langStartedAt;
|
|
253
314
|
|
|
254
315
|
// ─────────────────────────────────────────────────────────────────────────
|
|
255
316
|
// 1. Check if expansion needed
|
|
256
317
|
// ─────────────────────────────────────────────────────────────────────────
|
|
318
|
+
const expansionStartedAt = performance.now();
|
|
257
319
|
const shouldExpand = !options.noExpand && genPort !== null;
|
|
258
320
|
let expansionStatus: ExpansionStatus = "disabled";
|
|
321
|
+
let queryModeSummary: ReturnType<typeof summarizeQueryModes> | undefined =
|
|
322
|
+
undefined;
|
|
323
|
+
|
|
324
|
+
if (options.queryModes?.length) {
|
|
325
|
+
queryModeSummary = summarizeQueryModes(options.queryModes);
|
|
326
|
+
explainLines.push(explainQueryModes(queryModeSummary));
|
|
327
|
+
expansion = buildExpansionFromQueryModes(options.queryModes);
|
|
328
|
+
expansionStatus = "provided";
|
|
329
|
+
}
|
|
259
330
|
|
|
260
|
-
if (shouldExpand) {
|
|
331
|
+
if (expansionStatus !== "provided" && shouldExpand) {
|
|
261
332
|
const hasStrongSignal = await checkBm25Strength(store, query, {
|
|
262
333
|
collection: options.collection,
|
|
263
334
|
lang: options.lang,
|
|
264
335
|
tagsAll: options.tagsAll,
|
|
265
336
|
tagsAny: options.tagsAny,
|
|
337
|
+
since: temporalRange.since,
|
|
338
|
+
until: temporalRange.until,
|
|
339
|
+
categories: options.categories,
|
|
340
|
+
author: options.author,
|
|
266
341
|
});
|
|
267
342
|
|
|
268
343
|
if (hasStrongSignal) {
|
|
269
344
|
expansionStatus = "skipped_strong";
|
|
345
|
+
counters.fallbackEvents.push("expansion_skipped_strong");
|
|
270
346
|
} else {
|
|
271
347
|
expansionStatus = "attempted";
|
|
272
348
|
const expandResult = await expandQuery(genPort, query, {
|
|
@@ -279,21 +355,31 @@ export async function searchHybrid(
|
|
|
279
355
|
}
|
|
280
356
|
}
|
|
281
357
|
}
|
|
358
|
+
if (expansionStatus === "disabled") {
|
|
359
|
+
counters.fallbackEvents.push("expansion_disabled");
|
|
360
|
+
}
|
|
282
361
|
|
|
283
362
|
explainLines.push(explainExpansion(expansionStatus, expansion));
|
|
363
|
+
timings.expansionMs = performance.now() - expansionStartedAt;
|
|
284
364
|
|
|
285
365
|
// ─────────────────────────────────────────────────────────────────────────
|
|
286
366
|
// 2. Parallel retrieval using raw store/vector APIs for correct seq tracking
|
|
287
367
|
// ─────────────────────────────────────────────────────────────────────────
|
|
288
368
|
const rankedInputs: RankedInput[] = [];
|
|
289
369
|
|
|
370
|
+
const bm25StartedAt = performance.now();
|
|
371
|
+
|
|
290
372
|
// BM25: original query
|
|
291
373
|
const bm25Result = await searchFtsChunks(store, query, {
|
|
292
|
-
limit: limit * 2,
|
|
374
|
+
limit: limit * 2 * retrievalMultiplier,
|
|
293
375
|
collection: options.collection,
|
|
294
376
|
lang: options.lang,
|
|
295
377
|
tagsAll: options.tagsAll,
|
|
296
378
|
tagsAny: options.tagsAny,
|
|
379
|
+
since: temporalRange.since,
|
|
380
|
+
until: temporalRange.until,
|
|
381
|
+
categories: options.categories,
|
|
382
|
+
author: options.author,
|
|
297
383
|
});
|
|
298
384
|
|
|
299
385
|
// Propagate FTS syntax errors as INVALID_INPUT
|
|
@@ -308,21 +394,35 @@ export async function searchHybrid(
|
|
|
308
394
|
rankedInputs.push(toRankedInput("bm25", bm25Chunks));
|
|
309
395
|
}
|
|
310
396
|
|
|
311
|
-
// BM25: lexical variants (
|
|
312
|
-
if (expansion?.lexicalQueries) {
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
397
|
+
// BM25: lexical variants (optional; run in parallel and ignore failures)
|
|
398
|
+
if (expansion?.lexicalQueries?.length) {
|
|
399
|
+
const lexicalVariantResults = await Promise.allSettled(
|
|
400
|
+
expansion.lexicalQueries.map((variant) =>
|
|
401
|
+
searchFtsChunks(store, variant, {
|
|
402
|
+
limit: limit * retrievalMultiplier,
|
|
403
|
+
collection: options.collection,
|
|
404
|
+
lang: options.lang,
|
|
405
|
+
tagsAll: options.tagsAll,
|
|
406
|
+
tagsAny: options.tagsAny,
|
|
407
|
+
since: temporalRange.since,
|
|
408
|
+
until: temporalRange.until,
|
|
409
|
+
categories: options.categories,
|
|
410
|
+
author: options.author,
|
|
411
|
+
})
|
|
412
|
+
)
|
|
413
|
+
);
|
|
414
|
+
|
|
415
|
+
for (const settled of lexicalVariantResults) {
|
|
416
|
+
if (settled.status !== "fulfilled") {
|
|
417
|
+
continue;
|
|
418
|
+
}
|
|
419
|
+
const variantResult = settled.value;
|
|
321
420
|
if (variantResult.ok && variantResult.chunks.length > 0) {
|
|
322
421
|
rankedInputs.push(toRankedInput("bm25_variant", variantResult.chunks));
|
|
323
422
|
}
|
|
324
423
|
}
|
|
325
424
|
}
|
|
425
|
+
timings.bm25Ms = performance.now() - bm25StartedAt;
|
|
326
426
|
|
|
327
427
|
explainLines.push(explainBm25(bm25Count));
|
|
328
428
|
|
|
@@ -330,9 +430,14 @@ export async function searchHybrid(
|
|
|
330
430
|
let vecCount = 0;
|
|
331
431
|
const vectorAvailable =
|
|
332
432
|
(vectorIndex?.searchAvailable && embedPort !== null) ?? false;
|
|
433
|
+
if (!vectorAvailable) {
|
|
434
|
+
counters.fallbackEvents.push("vector_unavailable");
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
const vectorStartedAt = performance.now();
|
|
333
438
|
|
|
334
439
|
if (vectorAvailable && vectorIndex && embedPort) {
|
|
335
|
-
// Original query (increase limit when
|
|
440
|
+
// Original query (increase limit when post-filters are active).
|
|
336
441
|
const vecChunks = await searchVectorChunks(vectorIndex, embedPort, query, {
|
|
337
442
|
limit: limit * 2 * retrievalMultiplier,
|
|
338
443
|
});
|
|
@@ -342,41 +447,47 @@ export async function searchHybrid(
|
|
|
342
447
|
rankedInputs.push(toRankedInput("vector", vecChunks));
|
|
343
448
|
}
|
|
344
449
|
|
|
345
|
-
// Semantic variants
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
)
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
vectorIndex,
|
|
364
|
-
embedPort,
|
|
365
|
-
expansion.hyde,
|
|
366
|
-
{ limit: limit * retrievalMultiplier }
|
|
450
|
+
// Semantic variants + HyDE (optional; run in parallel and ignore failures)
|
|
451
|
+
const vectorVariantQueries = [
|
|
452
|
+
...(expansion?.vectorQueries?.map((query) => ({
|
|
453
|
+
source: "vector_variant" as const,
|
|
454
|
+
query,
|
|
455
|
+
})) ?? []),
|
|
456
|
+
...(expansion?.hyde
|
|
457
|
+
? [{ source: "hyde" as const, query: expansion.hyde }]
|
|
458
|
+
: []),
|
|
459
|
+
];
|
|
460
|
+
|
|
461
|
+
if (vectorVariantQueries.length > 0) {
|
|
462
|
+
const optionalVectorResults = await Promise.allSettled(
|
|
463
|
+
vectorVariantQueries.map((variant) =>
|
|
464
|
+
searchVectorChunks(vectorIndex, embedPort, variant.query, {
|
|
465
|
+
limit: limit * retrievalMultiplier,
|
|
466
|
+
})
|
|
467
|
+
)
|
|
367
468
|
);
|
|
368
|
-
|
|
369
|
-
|
|
469
|
+
|
|
470
|
+
for (const [index, settled] of optionalVectorResults.entries()) {
|
|
471
|
+
if (settled.status !== "fulfilled" || settled.value.length === 0) {
|
|
472
|
+
continue;
|
|
473
|
+
}
|
|
474
|
+
const variant = vectorVariantQueries[index];
|
|
475
|
+
if (variant) {
|
|
476
|
+
rankedInputs.push(toRankedInput(variant.source, settled.value));
|
|
477
|
+
}
|
|
370
478
|
}
|
|
371
479
|
}
|
|
372
480
|
}
|
|
481
|
+
timings.vectorMs = performance.now() - vectorStartedAt;
|
|
373
482
|
|
|
374
483
|
explainLines.push(explainVector(vecCount, vectorAvailable));
|
|
375
484
|
|
|
376
485
|
// ─────────────────────────────────────────────────────────────────────────
|
|
377
486
|
// 3. RRF Fusion
|
|
378
487
|
// ─────────────────────────────────────────────────────────────────────────
|
|
488
|
+
const fusionStartedAt = performance.now();
|
|
379
489
|
const fusedCandidates = rrfFuse(rankedInputs, pipelineConfig.rrf);
|
|
490
|
+
timings.fusionMs = performance.now() - fusionStartedAt;
|
|
380
491
|
explainLines.push(
|
|
381
492
|
explainFusion(pipelineConfig.rrf.k, fusedCandidates.length)
|
|
382
493
|
);
|
|
@@ -384,12 +495,22 @@ export async function searchHybrid(
|
|
|
384
495
|
// ─────────────────────────────────────────────────────────────────────────
|
|
385
496
|
// 4. Reranking
|
|
386
497
|
// ─────────────────────────────────────────────────────────────────────────
|
|
498
|
+
const rerankStartedAt = performance.now();
|
|
387
499
|
const rerankResult = await rerankCandidates(
|
|
388
500
|
{ rerankPort: options.noRerank ? null : rerankPort, store },
|
|
389
501
|
query,
|
|
390
502
|
fusedCandidates,
|
|
391
|
-
{
|
|
503
|
+
{
|
|
504
|
+
maxCandidates: pipelineConfig.rerankCandidates,
|
|
505
|
+
blendingSchedule: pipelineConfig.blendingSchedule,
|
|
506
|
+
}
|
|
392
507
|
);
|
|
508
|
+
if (rerankResult.fallbackReason === "disabled") {
|
|
509
|
+
counters.fallbackEvents.push("rerank_disabled");
|
|
510
|
+
} else if (rerankResult.fallbackReason === "error") {
|
|
511
|
+
counters.fallbackEvents.push("rerank_error");
|
|
512
|
+
}
|
|
513
|
+
timings.rerankMs = performance.now() - rerankStartedAt;
|
|
393
514
|
|
|
394
515
|
explainLines.push(
|
|
395
516
|
explainRerank(
|
|
@@ -411,21 +532,50 @@ export async function searchHybrid(
|
|
|
411
532
|
// 5. Build final results (optimized: batch lookups, no per-candidate queries)
|
|
412
533
|
// ─────────────────────────────────────────────────────────────────────────
|
|
413
534
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
//
|
|
535
|
+
const assemblyStartedAt = performance.now();
|
|
536
|
+
|
|
537
|
+
// Collect unique mirrorHashes needed from candidates.
|
|
417
538
|
const neededHashes = new Set(filteredCandidates.map((c) => c.mirrorHash));
|
|
418
539
|
|
|
419
|
-
// Fetch documents and collections
|
|
420
|
-
const docsResult = await store.
|
|
540
|
+
// Fetch only needed documents and collections.
|
|
541
|
+
const docsResult = await store.getDocumentsByMirrorHashes([...neededHashes], {
|
|
542
|
+
collection: options.collection,
|
|
543
|
+
activeOnly: true,
|
|
544
|
+
});
|
|
421
545
|
const collectionsResult = await store.getCollections();
|
|
422
546
|
|
|
423
547
|
if (!docsResult.ok) {
|
|
424
548
|
return err("QUERY_FAILED", docsResult.error.message);
|
|
425
549
|
}
|
|
426
550
|
|
|
427
|
-
// Build lookup maps
|
|
551
|
+
// Build lookup maps.
|
|
428
552
|
const docByMirrorHash = new Map<string, (typeof docsResult.value)[number]>();
|
|
553
|
+
const matchesMetadataFilters = (
|
|
554
|
+
doc: (typeof docsResult.value)[number]
|
|
555
|
+
): boolean => {
|
|
556
|
+
if (!isWithinTemporalRange(doc.sourceMtime, temporalRange)) {
|
|
557
|
+
return false;
|
|
558
|
+
}
|
|
559
|
+
if (
|
|
560
|
+
options.author &&
|
|
561
|
+
!doc.author?.toLowerCase().includes(options.author.toLowerCase())
|
|
562
|
+
) {
|
|
563
|
+
return false;
|
|
564
|
+
}
|
|
565
|
+
if (options.categories?.length) {
|
|
566
|
+
const allowed = new Set(options.categories.map((c) => c.toLowerCase()));
|
|
567
|
+
const contentTypeMatch = doc.contentType
|
|
568
|
+
? allowed.has(doc.contentType.toLowerCase())
|
|
569
|
+
: false;
|
|
570
|
+
const categoryMatch = (doc.categories ?? []).some((c) =>
|
|
571
|
+
allowed.has(c.toLowerCase())
|
|
572
|
+
);
|
|
573
|
+
if (!contentTypeMatch && !categoryMatch) {
|
|
574
|
+
return false;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
return true;
|
|
578
|
+
};
|
|
429
579
|
|
|
430
580
|
// Collect doc IDs that need tag filtering
|
|
431
581
|
const needsTagFilter = options.tagsAll?.length || options.tagsAny?.length;
|
|
@@ -433,11 +583,14 @@ export async function searchHybrid(
|
|
|
433
583
|
const candidateDocs: (typeof docsResult.value)[number][] = [];
|
|
434
584
|
|
|
435
585
|
for (const doc of docsResult.value) {
|
|
436
|
-
if (doc.
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
586
|
+
if (!doc.mirrorHash) {
|
|
587
|
+
continue;
|
|
588
|
+
}
|
|
589
|
+
if (needsTagFilter) {
|
|
590
|
+
docIdsForTagCheck.push(doc.id);
|
|
591
|
+
candidateDocs.push(doc);
|
|
592
|
+
} else {
|
|
593
|
+
if (matchesMetadataFilters(doc)) {
|
|
441
594
|
docByMirrorHash.set(doc.mirrorHash, doc);
|
|
442
595
|
}
|
|
443
596
|
}
|
|
@@ -465,7 +618,7 @@ export async function searchHybrid(
|
|
|
465
618
|
if (!hasAny) continue;
|
|
466
619
|
}
|
|
467
620
|
|
|
468
|
-
if (doc.mirrorHash) {
|
|
621
|
+
if (doc.mirrorHash && matchesMetadataFilters(doc)) {
|
|
469
622
|
docByMirrorHash.set(doc.mirrorHash, doc);
|
|
470
623
|
}
|
|
471
624
|
}
|
|
@@ -494,6 +647,7 @@ export async function searchHybrid(
|
|
|
494
647
|
>();
|
|
495
648
|
|
|
496
649
|
const results: SearchResult[] = [];
|
|
650
|
+
const assemblyLimit = recencySort ? limit * 3 : limit;
|
|
497
651
|
const docidMap = new Map<string, string>();
|
|
498
652
|
// Track seen docids for --full de-duplication
|
|
499
653
|
const seenDocids = new Set<string>();
|
|
@@ -501,7 +655,7 @@ export async function searchHybrid(
|
|
|
501
655
|
// Iterate until we have enough results (don't slice early - deduping may skip candidates)
|
|
502
656
|
for (const candidate of filteredCandidates) {
|
|
503
657
|
// Stop when we have enough results
|
|
504
|
-
if (results.length >=
|
|
658
|
+
if (results.length >= assemblyLimit) {
|
|
505
659
|
break;
|
|
506
660
|
}
|
|
507
661
|
|
|
@@ -577,6 +731,7 @@ export async function searchHybrid(
|
|
|
577
731
|
mime: doc.sourceMime,
|
|
578
732
|
ext: doc.sourceExt,
|
|
579
733
|
modifiedAt: doc.sourceMtime,
|
|
734
|
+
documentDate: doc.frontmatterDate ?? undefined,
|
|
580
735
|
sizeBytes: doc.sourceSize,
|
|
581
736
|
sourceHash: doc.sourceHash,
|
|
582
737
|
},
|
|
@@ -587,6 +742,10 @@ export async function searchHybrid(
|
|
|
587
742
|
},
|
|
588
743
|
});
|
|
589
744
|
}
|
|
745
|
+
timings.assemblyMs = performance.now() - assemblyStartedAt;
|
|
746
|
+
timings.totalMs = performance.now() - runStartedAt;
|
|
747
|
+
explainLines.push(explainTimings(timings));
|
|
748
|
+
explainLines.push(explainCounters(counters));
|
|
590
749
|
|
|
591
750
|
// ─────────────────────────────────────────────────────────────────────────
|
|
592
751
|
// 6. Build explain data (if requested)
|
|
@@ -604,18 +763,42 @@ export async function searchHybrid(
|
|
|
604
763
|
// ─────────────────────────────────────────────────────────────────────────
|
|
605
764
|
// 7. Return results
|
|
606
765
|
// ─────────────────────────────────────────────────────────────────────────
|
|
766
|
+
if (recencySort) {
|
|
767
|
+
results.sort((a, b) => {
|
|
768
|
+
const aTs = resolveRecencyTimestamp(
|
|
769
|
+
a.source.documentDate,
|
|
770
|
+
a.source.modifiedAt
|
|
771
|
+
);
|
|
772
|
+
const bTs = resolveRecencyTimestamp(
|
|
773
|
+
b.source.documentDate,
|
|
774
|
+
b.source.modifiedAt
|
|
775
|
+
);
|
|
776
|
+
if (aTs !== bTs) {
|
|
777
|
+
return bTs - aTs;
|
|
778
|
+
}
|
|
779
|
+
return b.score - a.score;
|
|
780
|
+
});
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
const finalResults = results.slice(0, limit);
|
|
784
|
+
|
|
607
785
|
return ok({
|
|
608
|
-
results,
|
|
786
|
+
results: finalResults,
|
|
609
787
|
meta: {
|
|
610
788
|
query,
|
|
611
789
|
mode: vectorAvailable ? "hybrid" : "bm25_only",
|
|
612
790
|
expanded: expansion !== null,
|
|
613
791
|
reranked: rerankResult.reranked,
|
|
614
792
|
vectorsUsed: vectorAvailable,
|
|
615
|
-
totalResults:
|
|
793
|
+
totalResults: finalResults.length,
|
|
616
794
|
collection: options.collection,
|
|
617
795
|
lang: options.lang,
|
|
796
|
+
since: temporalRange.since,
|
|
797
|
+
until: temporalRange.until,
|
|
798
|
+
categories: options.categories,
|
|
799
|
+
author: options.author,
|
|
618
800
|
queryLanguage,
|
|
801
|
+
queryModes: queryModeSummary,
|
|
619
802
|
explain: explainData,
|
|
620
803
|
},
|
|
621
804
|
});
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured query mode parsing and normalization.
|
|
3
|
+
*
|
|
4
|
+
* @module src/pipeline/query-modes
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type {
|
|
8
|
+
ExpansionResult,
|
|
9
|
+
QueryMode,
|
|
10
|
+
QueryModeInput,
|
|
11
|
+
QueryModeSummary,
|
|
12
|
+
} from "./types";
|
|
13
|
+
|
|
14
|
+
import { err, ok, type StoreResult } from "../store/types";
|
|
15
|
+
|
|
16
|
+
const QUERY_MODE_ENTRY = /^\s*(term|intent|hyde)\s*:\s*([\s\S]*\S[\s\S]*)\s*$/i;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Parse a single CLI/API query mode spec in `mode:text` form.
|
|
20
|
+
*/
|
|
21
|
+
export function parseQueryModeSpec(spec: string): StoreResult<QueryModeInput> {
|
|
22
|
+
const match = spec.match(QUERY_MODE_ENTRY);
|
|
23
|
+
if (!match) {
|
|
24
|
+
return err(
|
|
25
|
+
"INVALID_INPUT",
|
|
26
|
+
`Invalid --query-mode value "${spec}". Expected "term:<text>", "intent:<text>", or "hyde:<text>".`
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const mode = match[1]?.toLowerCase() as QueryMode | undefined;
|
|
31
|
+
const text = match[2]?.trim();
|
|
32
|
+
if (!mode || !text) {
|
|
33
|
+
return err(
|
|
34
|
+
"INVALID_INPUT",
|
|
35
|
+
`Invalid --query-mode value "${spec}". Expected non-empty text after mode prefix.`
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return ok({ mode, text });
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Parse and validate repeated query mode specs.
|
|
44
|
+
*/
|
|
45
|
+
export function parseQueryModeSpecs(
|
|
46
|
+
specs: string[]
|
|
47
|
+
): StoreResult<QueryModeInput[]> {
|
|
48
|
+
const parsed: QueryModeInput[] = [];
|
|
49
|
+
let hydeCount = 0;
|
|
50
|
+
|
|
51
|
+
for (const spec of specs) {
|
|
52
|
+
const entry = parseQueryModeSpec(spec);
|
|
53
|
+
if (!entry.ok) {
|
|
54
|
+
return entry;
|
|
55
|
+
}
|
|
56
|
+
if (entry.value.mode === "hyde") {
|
|
57
|
+
hydeCount += 1;
|
|
58
|
+
if (hydeCount > 1) {
|
|
59
|
+
return err(
|
|
60
|
+
"INVALID_INPUT",
|
|
61
|
+
"Only one hyde mode is allowed in structured query input."
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
parsed.push(entry.value);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return ok(parsed);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Normalize and summarize query modes for metadata/explain.
|
|
73
|
+
*/
|
|
74
|
+
export function summarizeQueryModes(
|
|
75
|
+
queryModes: QueryModeInput[]
|
|
76
|
+
): QueryModeSummary {
|
|
77
|
+
const summary: QueryModeSummary = { term: 0, intent: 0, hyde: false };
|
|
78
|
+
for (const entry of queryModes) {
|
|
79
|
+
if (entry.mode === "term") {
|
|
80
|
+
summary.term += 1;
|
|
81
|
+
} else if (entry.mode === "intent") {
|
|
82
|
+
summary.intent += 1;
|
|
83
|
+
} else {
|
|
84
|
+
summary.hyde = true;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return summary;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Convert structured query modes into ExpansionResult shape used by hybrid pipeline.
|
|
92
|
+
*/
|
|
93
|
+
export function buildExpansionFromQueryModes(
|
|
94
|
+
queryModes: QueryModeInput[]
|
|
95
|
+
): ExpansionResult | null {
|
|
96
|
+
if (queryModes.length === 0) {
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const lexicalQueries: string[] = [];
|
|
101
|
+
const vectorQueries: string[] = [];
|
|
102
|
+
let hyde: string | undefined;
|
|
103
|
+
|
|
104
|
+
for (const entry of queryModes) {
|
|
105
|
+
if (entry.mode === "term") {
|
|
106
|
+
lexicalQueries.push(entry.text);
|
|
107
|
+
} else if (entry.mode === "intent") {
|
|
108
|
+
vectorQueries.push(entry.text);
|
|
109
|
+
} else if (!hyde) {
|
|
110
|
+
hyde = entry.text;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Preserve existing expansion constraints (max 5 lexical/vector variants).
|
|
115
|
+
const result: ExpansionResult = {
|
|
116
|
+
lexicalQueries: [...new Set(lexicalQueries)].slice(0, 5),
|
|
117
|
+
vectorQueries: [...new Set(vectorQueries)].slice(0, 5),
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
if (hyde) {
|
|
121
|
+
result.hyde = hyde;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return result;
|
|
125
|
+
}
|