@gmickel/gno 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -2
- package/package.json +4 -1
- package/src/cli/commands/ask.ts +13 -0
- package/src/cli/commands/models/use.ts +1 -0
- package/src/cli/commands/query.ts +3 -2
- package/src/cli/pager.ts +1 -1
- package/src/cli/program.ts +107 -0
- package/src/config/types.ts +2 -0
- package/src/core/links.ts +92 -20
- package/src/ingestion/sync.ts +267 -23
- package/src/ingestion/types.ts +2 -0
- package/src/ingestion/walker.ts +2 -1
- package/src/llm/nodeLlamaCpp/generation.ts +3 -1
- package/src/llm/registry.ts +1 -0
- package/src/llm/types.ts +2 -0
- package/src/mcp/tools/index.ts +34 -1
- package/src/mcp/tools/query.ts +26 -2
- package/src/mcp/tools/search.ts +10 -0
- package/src/mcp/tools/vsearch.ts +10 -0
- package/src/pipeline/answer.ts +324 -7
- package/src/pipeline/expansion.ts +282 -11
- package/src/pipeline/explain.ts +93 -5
- package/src/pipeline/hybrid.ts +273 -70
- package/src/pipeline/intent.ts +152 -0
- package/src/pipeline/query-modes.ts +125 -0
- package/src/pipeline/rerank.ts +109 -51
- package/src/pipeline/search.ts +58 -4
- package/src/pipeline/temporal.ts +257 -0
- package/src/pipeline/types.ts +67 -0
- package/src/pipeline/vsearch.ts +121 -10
- package/src/serve/public/app.tsx +1 -3
- package/src/serve/public/globals.built.css +2 -2
- package/src/serve/public/lib/retrieval-filters.ts +174 -0
- package/src/serve/public/pages/Ask.tsx +378 -109
- package/src/serve/public/pages/Browse.tsx +71 -5
- package/src/serve/public/pages/DocView.tsx +2 -21
- package/src/serve/public/pages/Search.tsx +561 -120
- package/src/serve/routes/api.ts +247 -2
- package/src/store/migrations/006-document-metadata.ts +104 -0
- package/src/store/migrations/007-document-date-fields.ts +24 -0
- package/src/store/migrations/index.ts +3 -1
- package/src/store/sqlite/adapter.ts +218 -5
- package/src/store/types.ts +46 -0
package/src/pipeline/hybrid.ts
CHANGED
|
@@ -26,14 +26,28 @@ import {
|
|
|
26
26
|
buildExplainResults,
|
|
27
27
|
type ExpansionStatus,
|
|
28
28
|
explainBm25,
|
|
29
|
+
explainCounters,
|
|
29
30
|
explainExpansion,
|
|
30
31
|
explainFusion,
|
|
32
|
+
explainQueryModes,
|
|
31
33
|
explainRerank,
|
|
34
|
+
explainTimings,
|
|
32
35
|
explainVector,
|
|
33
36
|
} from "./explain";
|
|
34
37
|
import { type RankedInput, rrfFuse, toRankedInput } from "./fusion";
|
|
38
|
+
import { selectBestChunkForSteering } from "./intent";
|
|
35
39
|
import { detectQueryLanguage } from "./query-language";
|
|
40
|
+
import {
|
|
41
|
+
buildExpansionFromQueryModes,
|
|
42
|
+
summarizeQueryModes,
|
|
43
|
+
} from "./query-modes";
|
|
36
44
|
import { rerankCandidates } from "./rerank";
|
|
45
|
+
import {
|
|
46
|
+
isWithinTemporalRange,
|
|
47
|
+
resolveRecencyTimestamp,
|
|
48
|
+
resolveTemporalRange,
|
|
49
|
+
shouldSortByRecency,
|
|
50
|
+
} from "./temporal";
|
|
37
51
|
import { DEFAULT_PIPELINE_CONFIG } from "./types";
|
|
38
52
|
|
|
39
53
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -94,6 +108,10 @@ async function checkBm25Strength(
|
|
|
94
108
|
lang?: string;
|
|
95
109
|
tagsAll?: string[];
|
|
96
110
|
tagsAny?: string[];
|
|
111
|
+
since?: string;
|
|
112
|
+
until?: string;
|
|
113
|
+
categories?: string[];
|
|
114
|
+
author?: string;
|
|
97
115
|
}
|
|
98
116
|
): Promise<boolean> {
|
|
99
117
|
const result = await store.searchFts(query, {
|
|
@@ -102,6 +120,10 @@ async function checkBm25Strength(
|
|
|
102
120
|
language: options?.lang,
|
|
103
121
|
tagsAll: options?.tagsAll,
|
|
104
122
|
tagsAny: options?.tagsAny,
|
|
123
|
+
since: options?.since,
|
|
124
|
+
until: options?.until,
|
|
125
|
+
categories: options?.categories,
|
|
126
|
+
author: options?.author,
|
|
105
127
|
});
|
|
106
128
|
|
|
107
129
|
if (!result.ok || result.value.length === 0) {
|
|
@@ -143,6 +165,10 @@ async function searchFtsChunks(
|
|
|
143
165
|
lang?: string;
|
|
144
166
|
tagsAll?: string[];
|
|
145
167
|
tagsAny?: string[];
|
|
168
|
+
since?: string;
|
|
169
|
+
until?: string;
|
|
170
|
+
categories?: string[];
|
|
171
|
+
author?: string;
|
|
146
172
|
}
|
|
147
173
|
): Promise<FtsChunksResult> {
|
|
148
174
|
const result = await store.searchFts(query, {
|
|
@@ -151,6 +177,10 @@ async function searchFtsChunks(
|
|
|
151
177
|
language: options.lang,
|
|
152
178
|
tagsAll: options.tagsAll,
|
|
153
179
|
tagsAny: options.tagsAny,
|
|
180
|
+
since: options.since,
|
|
181
|
+
until: options.until,
|
|
182
|
+
categories: options.categories,
|
|
183
|
+
author: options.author,
|
|
154
184
|
});
|
|
155
185
|
if (!result.ok) {
|
|
156
186
|
// Propagate INVALID_INPUT for FTS syntax errors
|
|
@@ -217,23 +247,54 @@ export async function searchHybrid(
|
|
|
217
247
|
query: string,
|
|
218
248
|
options: HybridSearchOptions = {}
|
|
219
249
|
): Promise<ReturnType<typeof ok<SearchResults>>> {
|
|
250
|
+
const runStartedAt = performance.now();
|
|
220
251
|
const { store, vectorIndex, embedPort, genPort, rerankPort } = deps;
|
|
221
252
|
const pipelineConfig = deps.pipelineConfig ?? DEFAULT_PIPELINE_CONFIG;
|
|
222
253
|
|
|
223
254
|
const limit = options.limit ?? 20;
|
|
255
|
+
const recencySort = shouldSortByRecency(query);
|
|
256
|
+
const temporalRange = resolveTemporalRange(
|
|
257
|
+
query,
|
|
258
|
+
options.since,
|
|
259
|
+
options.until
|
|
260
|
+
);
|
|
224
261
|
const explainLines: ExplainLine[] = [];
|
|
225
262
|
let expansion: ExpansionResult | null = null;
|
|
263
|
+
const timings = {
|
|
264
|
+
langMs: 0,
|
|
265
|
+
expansionMs: 0,
|
|
266
|
+
bm25Ms: 0,
|
|
267
|
+
vectorMs: 0,
|
|
268
|
+
fusionMs: 0,
|
|
269
|
+
rerankMs: 0,
|
|
270
|
+
assemblyMs: 0,
|
|
271
|
+
totalMs: 0,
|
|
272
|
+
};
|
|
273
|
+
const counters = {
|
|
274
|
+
expansionCacheHits: 0,
|
|
275
|
+
expansionCacheLookups: 0,
|
|
276
|
+
rerankCacheHits: 0,
|
|
277
|
+
rerankCacheLookups: 0,
|
|
278
|
+
fallbackEvents: [] as string[],
|
|
279
|
+
};
|
|
226
280
|
|
|
227
|
-
//
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
281
|
+
// Increase retrieval limits when post-retrieval filters are active.
|
|
282
|
+
const hasPostFilters = Boolean(
|
|
283
|
+
options.tagsAll?.length ||
|
|
284
|
+
options.tagsAny?.length ||
|
|
285
|
+
options.categories?.length ||
|
|
286
|
+
options.author ||
|
|
287
|
+
temporalRange.since ||
|
|
288
|
+
temporalRange.until
|
|
289
|
+
);
|
|
290
|
+
const retrievalMultiplier = hasPostFilters || recencySort ? 3 : 1;
|
|
231
291
|
|
|
232
292
|
// ─────────────────────────────────────────────────────────────────────────
|
|
233
293
|
// 0. Detect query language for PROMPT SELECTION only
|
|
234
294
|
// CRITICAL: Detection does NOT change retrieval filters - options.lang does
|
|
235
295
|
// Priority: queryLanguageHint (MCP) > lang (CLI) > detection
|
|
236
296
|
// ─────────────────────────────────────────────────────────────────────────
|
|
297
|
+
const langStartedAt = performance.now();
|
|
237
298
|
const detection = detectQueryLanguage(query);
|
|
238
299
|
// Use explicit hint > lang filter > detected language
|
|
239
300
|
const queryLanguage =
|
|
@@ -250,50 +311,80 @@ export async function searchHybrid(
|
|
|
250
311
|
langMessage = `queryLanguage=${queryLanguage} (detected${confidence})`;
|
|
251
312
|
}
|
|
252
313
|
explainLines.push({ stage: "lang", message: langMessage });
|
|
314
|
+
timings.langMs = performance.now() - langStartedAt;
|
|
253
315
|
|
|
254
316
|
// ─────────────────────────────────────────────────────────────────────────
|
|
255
317
|
// 1. Check if expansion needed
|
|
256
318
|
// ─────────────────────────────────────────────────────────────────────────
|
|
319
|
+
const expansionStartedAt = performance.now();
|
|
257
320
|
const shouldExpand = !options.noExpand && genPort !== null;
|
|
258
321
|
let expansionStatus: ExpansionStatus = "disabled";
|
|
322
|
+
let queryModeSummary: ReturnType<typeof summarizeQueryModes> | undefined =
|
|
323
|
+
undefined;
|
|
324
|
+
|
|
325
|
+
if (options.queryModes?.length) {
|
|
326
|
+
queryModeSummary = summarizeQueryModes(options.queryModes);
|
|
327
|
+
explainLines.push(explainQueryModes(queryModeSummary));
|
|
328
|
+
expansion = buildExpansionFromQueryModes(options.queryModes);
|
|
329
|
+
expansionStatus = "provided";
|
|
330
|
+
}
|
|
259
331
|
|
|
260
|
-
if (shouldExpand) {
|
|
261
|
-
const hasStrongSignal =
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
332
|
+
if (expansionStatus !== "provided" && shouldExpand) {
|
|
333
|
+
const hasStrongSignal = options.intent?.trim()
|
|
334
|
+
? false
|
|
335
|
+
: await checkBm25Strength(store, query, {
|
|
336
|
+
collection: options.collection,
|
|
337
|
+
lang: options.lang,
|
|
338
|
+
tagsAll: options.tagsAll,
|
|
339
|
+
tagsAny: options.tagsAny,
|
|
340
|
+
since: temporalRange.since,
|
|
341
|
+
until: temporalRange.until,
|
|
342
|
+
categories: options.categories,
|
|
343
|
+
author: options.author,
|
|
344
|
+
});
|
|
267
345
|
|
|
268
346
|
if (hasStrongSignal) {
|
|
269
347
|
expansionStatus = "skipped_strong";
|
|
348
|
+
counters.fallbackEvents.push("expansion_skipped_strong");
|
|
270
349
|
} else {
|
|
271
350
|
expansionStatus = "attempted";
|
|
272
351
|
const expandResult = await expandQuery(genPort, query, {
|
|
273
352
|
// Use queryLanguage for prompt selection, NOT options.lang (retrieval filter)
|
|
274
353
|
lang: queryLanguage,
|
|
275
354
|
timeout: pipelineConfig.expansionTimeout,
|
|
355
|
+
intent: options.intent,
|
|
356
|
+
contextSize: deps.config.models?.expandContextSize,
|
|
276
357
|
});
|
|
277
358
|
if (expandResult.ok) {
|
|
278
359
|
expansion = expandResult.value;
|
|
279
360
|
}
|
|
280
361
|
}
|
|
281
362
|
}
|
|
363
|
+
if (expansionStatus === "disabled") {
|
|
364
|
+
counters.fallbackEvents.push("expansion_disabled");
|
|
365
|
+
}
|
|
282
366
|
|
|
283
367
|
explainLines.push(explainExpansion(expansionStatus, expansion));
|
|
368
|
+
timings.expansionMs = performance.now() - expansionStartedAt;
|
|
284
369
|
|
|
285
370
|
// ─────────────────────────────────────────────────────────────────────────
|
|
286
371
|
// 2. Parallel retrieval using raw store/vector APIs for correct seq tracking
|
|
287
372
|
// ─────────────────────────────────────────────────────────────────────────
|
|
288
373
|
const rankedInputs: RankedInput[] = [];
|
|
289
374
|
|
|
375
|
+
const bm25StartedAt = performance.now();
|
|
376
|
+
|
|
290
377
|
// BM25: original query
|
|
291
378
|
const bm25Result = await searchFtsChunks(store, query, {
|
|
292
|
-
limit: limit * 2,
|
|
379
|
+
limit: limit * 2 * retrievalMultiplier,
|
|
293
380
|
collection: options.collection,
|
|
294
381
|
lang: options.lang,
|
|
295
382
|
tagsAll: options.tagsAll,
|
|
296
383
|
tagsAny: options.tagsAny,
|
|
384
|
+
since: temporalRange.since,
|
|
385
|
+
until: temporalRange.until,
|
|
386
|
+
categories: options.categories,
|
|
387
|
+
author: options.author,
|
|
297
388
|
});
|
|
298
389
|
|
|
299
390
|
// Propagate FTS syntax errors as INVALID_INPUT
|
|
@@ -308,21 +399,35 @@ export async function searchHybrid(
|
|
|
308
399
|
rankedInputs.push(toRankedInput("bm25", bm25Chunks));
|
|
309
400
|
}
|
|
310
401
|
|
|
311
|
-
// BM25: lexical variants (
|
|
312
|
-
if (expansion?.lexicalQueries) {
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
402
|
+
// BM25: lexical variants (optional; run in parallel and ignore failures)
|
|
403
|
+
if (expansion?.lexicalQueries?.length) {
|
|
404
|
+
const lexicalVariantResults = await Promise.allSettled(
|
|
405
|
+
expansion.lexicalQueries.map((variant) =>
|
|
406
|
+
searchFtsChunks(store, variant, {
|
|
407
|
+
limit: limit * retrievalMultiplier,
|
|
408
|
+
collection: options.collection,
|
|
409
|
+
lang: options.lang,
|
|
410
|
+
tagsAll: options.tagsAll,
|
|
411
|
+
tagsAny: options.tagsAny,
|
|
412
|
+
since: temporalRange.since,
|
|
413
|
+
until: temporalRange.until,
|
|
414
|
+
categories: options.categories,
|
|
415
|
+
author: options.author,
|
|
416
|
+
})
|
|
417
|
+
)
|
|
418
|
+
);
|
|
419
|
+
|
|
420
|
+
for (const settled of lexicalVariantResults) {
|
|
421
|
+
if (settled.status !== "fulfilled") {
|
|
422
|
+
continue;
|
|
423
|
+
}
|
|
424
|
+
const variantResult = settled.value;
|
|
321
425
|
if (variantResult.ok && variantResult.chunks.length > 0) {
|
|
322
426
|
rankedInputs.push(toRankedInput("bm25_variant", variantResult.chunks));
|
|
323
427
|
}
|
|
324
428
|
}
|
|
325
429
|
}
|
|
430
|
+
timings.bm25Ms = performance.now() - bm25StartedAt;
|
|
326
431
|
|
|
327
432
|
explainLines.push(explainBm25(bm25Count));
|
|
328
433
|
|
|
@@ -330,9 +435,14 @@ export async function searchHybrid(
|
|
|
330
435
|
let vecCount = 0;
|
|
331
436
|
const vectorAvailable =
|
|
332
437
|
(vectorIndex?.searchAvailable && embedPort !== null) ?? false;
|
|
438
|
+
if (!vectorAvailable) {
|
|
439
|
+
counters.fallbackEvents.push("vector_unavailable");
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
const vectorStartedAt = performance.now();
|
|
333
443
|
|
|
334
444
|
if (vectorAvailable && vectorIndex && embedPort) {
|
|
335
|
-
// Original query (increase limit when
|
|
445
|
+
// Original query (increase limit when post-filters are active).
|
|
336
446
|
const vecChunks = await searchVectorChunks(vectorIndex, embedPort, query, {
|
|
337
447
|
limit: limit * 2 * retrievalMultiplier,
|
|
338
448
|
});
|
|
@@ -342,41 +452,47 @@ export async function searchHybrid(
|
|
|
342
452
|
rankedInputs.push(toRankedInput("vector", vecChunks));
|
|
343
453
|
}
|
|
344
454
|
|
|
345
|
-
// Semantic variants
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
)
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
vectorIndex,
|
|
364
|
-
embedPort,
|
|
365
|
-
expansion.hyde,
|
|
366
|
-
{ limit: limit * retrievalMultiplier }
|
|
455
|
+
// Semantic variants + HyDE (optional; run in parallel and ignore failures)
|
|
456
|
+
const vectorVariantQueries = [
|
|
457
|
+
...(expansion?.vectorQueries?.map((query) => ({
|
|
458
|
+
source: "vector_variant" as const,
|
|
459
|
+
query,
|
|
460
|
+
})) ?? []),
|
|
461
|
+
...(expansion?.hyde
|
|
462
|
+
? [{ source: "hyde" as const, query: expansion.hyde }]
|
|
463
|
+
: []),
|
|
464
|
+
];
|
|
465
|
+
|
|
466
|
+
if (vectorVariantQueries.length > 0) {
|
|
467
|
+
const optionalVectorResults = await Promise.allSettled(
|
|
468
|
+
vectorVariantQueries.map((variant) =>
|
|
469
|
+
searchVectorChunks(vectorIndex, embedPort, variant.query, {
|
|
470
|
+
limit: limit * retrievalMultiplier,
|
|
471
|
+
})
|
|
472
|
+
)
|
|
367
473
|
);
|
|
368
|
-
|
|
369
|
-
|
|
474
|
+
|
|
475
|
+
for (const [index, settled] of optionalVectorResults.entries()) {
|
|
476
|
+
if (settled.status !== "fulfilled" || settled.value.length === 0) {
|
|
477
|
+
continue;
|
|
478
|
+
}
|
|
479
|
+
const variant = vectorVariantQueries[index];
|
|
480
|
+
if (variant) {
|
|
481
|
+
rankedInputs.push(toRankedInput(variant.source, settled.value));
|
|
482
|
+
}
|
|
370
483
|
}
|
|
371
484
|
}
|
|
372
485
|
}
|
|
486
|
+
timings.vectorMs = performance.now() - vectorStartedAt;
|
|
373
487
|
|
|
374
488
|
explainLines.push(explainVector(vecCount, vectorAvailable));
|
|
375
489
|
|
|
376
490
|
// ─────────────────────────────────────────────────────────────────────────
|
|
377
491
|
// 3. RRF Fusion
|
|
378
492
|
// ─────────────────────────────────────────────────────────────────────────
|
|
493
|
+
const fusionStartedAt = performance.now();
|
|
379
494
|
const fusedCandidates = rrfFuse(rankedInputs, pipelineConfig.rrf);
|
|
495
|
+
timings.fusionMs = performance.now() - fusionStartedAt;
|
|
380
496
|
explainLines.push(
|
|
381
497
|
explainFusion(pipelineConfig.rrf.k, fusedCandidates.length)
|
|
382
498
|
);
|
|
@@ -384,18 +500,28 @@ export async function searchHybrid(
|
|
|
384
500
|
// ─────────────────────────────────────────────────────────────────────────
|
|
385
501
|
// 4. Reranking
|
|
386
502
|
// ─────────────────────────────────────────────────────────────────────────
|
|
503
|
+
const rerankStartedAt = performance.now();
|
|
504
|
+
const candidateLimit =
|
|
505
|
+
options.candidateLimit ?? pipelineConfig.rerankCandidates;
|
|
387
506
|
const rerankResult = await rerankCandidates(
|
|
388
507
|
{ rerankPort: options.noRerank ? null : rerankPort, store },
|
|
389
508
|
query,
|
|
390
509
|
fusedCandidates,
|
|
391
|
-
{
|
|
510
|
+
{
|
|
511
|
+
maxCandidates: candidateLimit,
|
|
512
|
+
blendingSchedule: pipelineConfig.blendingSchedule,
|
|
513
|
+
intent: options.intent,
|
|
514
|
+
}
|
|
392
515
|
);
|
|
516
|
+
if (rerankResult.fallbackReason === "disabled") {
|
|
517
|
+
counters.fallbackEvents.push("rerank_disabled");
|
|
518
|
+
} else if (rerankResult.fallbackReason === "error") {
|
|
519
|
+
counters.fallbackEvents.push("rerank_error");
|
|
520
|
+
}
|
|
521
|
+
timings.rerankMs = performance.now() - rerankStartedAt;
|
|
393
522
|
|
|
394
523
|
explainLines.push(
|
|
395
|
-
explainRerank(
|
|
396
|
-
!options.noRerank && rerankPort !== null,
|
|
397
|
-
pipelineConfig.rerankCandidates
|
|
398
|
-
)
|
|
524
|
+
explainRerank(!options.noRerank && rerankPort !== null, candidateLimit)
|
|
399
525
|
);
|
|
400
526
|
|
|
401
527
|
// ─────────────────────────────────────────────────────────────────────────
|
|
@@ -411,21 +537,50 @@ export async function searchHybrid(
|
|
|
411
537
|
// 5. Build final results (optimized: batch lookups, no per-candidate queries)
|
|
412
538
|
// ─────────────────────────────────────────────────────────────────────────
|
|
413
539
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
//
|
|
540
|
+
const assemblyStartedAt = performance.now();
|
|
541
|
+
|
|
542
|
+
// Collect unique mirrorHashes needed from candidates.
|
|
417
543
|
const neededHashes = new Set(filteredCandidates.map((c) => c.mirrorHash));
|
|
418
544
|
|
|
419
|
-
// Fetch documents and collections
|
|
420
|
-
const docsResult = await store.
|
|
545
|
+
// Fetch only needed documents and collections.
|
|
546
|
+
const docsResult = await store.getDocumentsByMirrorHashes([...neededHashes], {
|
|
547
|
+
collection: options.collection,
|
|
548
|
+
activeOnly: true,
|
|
549
|
+
});
|
|
421
550
|
const collectionsResult = await store.getCollections();
|
|
422
551
|
|
|
423
552
|
if (!docsResult.ok) {
|
|
424
553
|
return err("QUERY_FAILED", docsResult.error.message);
|
|
425
554
|
}
|
|
426
555
|
|
|
427
|
-
// Build lookup maps
|
|
556
|
+
// Build lookup maps.
|
|
428
557
|
const docByMirrorHash = new Map<string, (typeof docsResult.value)[number]>();
|
|
558
|
+
const matchesMetadataFilters = (
|
|
559
|
+
doc: (typeof docsResult.value)[number]
|
|
560
|
+
): boolean => {
|
|
561
|
+
if (!isWithinTemporalRange(doc.sourceMtime, temporalRange)) {
|
|
562
|
+
return false;
|
|
563
|
+
}
|
|
564
|
+
if (
|
|
565
|
+
options.author &&
|
|
566
|
+
!doc.author?.toLowerCase().includes(options.author.toLowerCase())
|
|
567
|
+
) {
|
|
568
|
+
return false;
|
|
569
|
+
}
|
|
570
|
+
if (options.categories?.length) {
|
|
571
|
+
const allowed = new Set(options.categories.map((c) => c.toLowerCase()));
|
|
572
|
+
const contentTypeMatch = doc.contentType
|
|
573
|
+
? allowed.has(doc.contentType.toLowerCase())
|
|
574
|
+
: false;
|
|
575
|
+
const categoryMatch = (doc.categories ?? []).some((c) =>
|
|
576
|
+
allowed.has(c.toLowerCase())
|
|
577
|
+
);
|
|
578
|
+
if (!contentTypeMatch && !categoryMatch) {
|
|
579
|
+
return false;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
return true;
|
|
583
|
+
};
|
|
429
584
|
|
|
430
585
|
// Collect doc IDs that need tag filtering
|
|
431
586
|
const needsTagFilter = options.tagsAll?.length || options.tagsAny?.length;
|
|
@@ -433,11 +588,14 @@ export async function searchHybrid(
|
|
|
433
588
|
const candidateDocs: (typeof docsResult.value)[number][] = [];
|
|
434
589
|
|
|
435
590
|
for (const doc of docsResult.value) {
|
|
436
|
-
if (doc.
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
591
|
+
if (!doc.mirrorHash) {
|
|
592
|
+
continue;
|
|
593
|
+
}
|
|
594
|
+
if (needsTagFilter) {
|
|
595
|
+
docIdsForTagCheck.push(doc.id);
|
|
596
|
+
candidateDocs.push(doc);
|
|
597
|
+
} else {
|
|
598
|
+
if (matchesMetadataFilters(doc)) {
|
|
441
599
|
docByMirrorHash.set(doc.mirrorHash, doc);
|
|
442
600
|
}
|
|
443
601
|
}
|
|
@@ -465,7 +623,7 @@ export async function searchHybrid(
|
|
|
465
623
|
if (!hasAny) continue;
|
|
466
624
|
}
|
|
467
625
|
|
|
468
|
-
if (doc.mirrorHash) {
|
|
626
|
+
if (doc.mirrorHash && matchesMetadataFilters(doc)) {
|
|
469
627
|
docByMirrorHash.set(doc.mirrorHash, doc);
|
|
470
628
|
}
|
|
471
629
|
}
|
|
@@ -494,6 +652,7 @@ export async function searchHybrid(
|
|
|
494
652
|
>();
|
|
495
653
|
|
|
496
654
|
const results: SearchResult[] = [];
|
|
655
|
+
const assemblyLimit = recencySort ? limit * 3 : limit;
|
|
497
656
|
const docidMap = new Map<string, string>();
|
|
498
657
|
// Track seen docids for --full de-duplication
|
|
499
658
|
const seenDocids = new Set<string>();
|
|
@@ -501,7 +660,7 @@ export async function searchHybrid(
|
|
|
501
660
|
// Iterate until we have enough results (don't slice early - deduping may skip candidates)
|
|
502
661
|
for (const candidate of filteredCandidates) {
|
|
503
662
|
// Stop when we have enough results
|
|
504
|
-
if (results.length >=
|
|
663
|
+
if (results.length >= assemblyLimit) {
|
|
505
664
|
break;
|
|
506
665
|
}
|
|
507
666
|
|
|
@@ -538,10 +697,23 @@ export async function searchHybrid(
|
|
|
538
697
|
const collectionPath = collectionPaths.get(doc.collection);
|
|
539
698
|
|
|
540
699
|
// For --full mode, fetch full mirror content
|
|
541
|
-
|
|
700
|
+
const snippetChunk =
|
|
701
|
+
options.full || !options.intent?.trim()
|
|
702
|
+
? chunk
|
|
703
|
+
: (selectBestChunkForSteering(
|
|
704
|
+
chunksMap.get(candidate.mirrorHash) ?? [],
|
|
705
|
+
query,
|
|
706
|
+
options.intent,
|
|
707
|
+
{
|
|
708
|
+
preferredSeq: chunk.seq,
|
|
709
|
+
intentWeight: 0.3,
|
|
710
|
+
}
|
|
711
|
+
) ?? chunk);
|
|
712
|
+
|
|
713
|
+
let snippet = snippetChunk.text;
|
|
542
714
|
let snippetRange: { startLine: number; endLine: number } | undefined = {
|
|
543
|
-
startLine:
|
|
544
|
-
endLine:
|
|
715
|
+
startLine: snippetChunk.startLine,
|
|
716
|
+
endLine: snippetChunk.endLine,
|
|
545
717
|
};
|
|
546
718
|
|
|
547
719
|
if (options.full) {
|
|
@@ -577,6 +749,7 @@ export async function searchHybrid(
|
|
|
577
749
|
mime: doc.sourceMime,
|
|
578
750
|
ext: doc.sourceExt,
|
|
579
751
|
modifiedAt: doc.sourceMtime,
|
|
752
|
+
documentDate: doc.frontmatterDate ?? undefined,
|
|
580
753
|
sizeBytes: doc.sourceSize,
|
|
581
754
|
sourceHash: doc.sourceHash,
|
|
582
755
|
},
|
|
@@ -587,6 +760,10 @@ export async function searchHybrid(
|
|
|
587
760
|
},
|
|
588
761
|
});
|
|
589
762
|
}
|
|
763
|
+
timings.assemblyMs = performance.now() - assemblyStartedAt;
|
|
764
|
+
timings.totalMs = performance.now() - runStartedAt;
|
|
765
|
+
explainLines.push(explainTimings(timings));
|
|
766
|
+
explainLines.push(explainCounters(counters));
|
|
590
767
|
|
|
591
768
|
// ─────────────────────────────────────────────────────────────────────────
|
|
592
769
|
// 6. Build explain data (if requested)
|
|
@@ -604,18 +781,44 @@ export async function searchHybrid(
|
|
|
604
781
|
// ─────────────────────────────────────────────────────────────────────────
|
|
605
782
|
// 7. Return results
|
|
606
783
|
// ─────────────────────────────────────────────────────────────────────────
|
|
784
|
+
if (recencySort) {
|
|
785
|
+
results.sort((a, b) => {
|
|
786
|
+
const aTs = resolveRecencyTimestamp(
|
|
787
|
+
a.source.documentDate,
|
|
788
|
+
a.source.modifiedAt
|
|
789
|
+
);
|
|
790
|
+
const bTs = resolveRecencyTimestamp(
|
|
791
|
+
b.source.documentDate,
|
|
792
|
+
b.source.modifiedAt
|
|
793
|
+
);
|
|
794
|
+
if (aTs !== bTs) {
|
|
795
|
+
return bTs - aTs;
|
|
796
|
+
}
|
|
797
|
+
return b.score - a.score;
|
|
798
|
+
});
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
const finalResults = results.slice(0, limit);
|
|
802
|
+
|
|
607
803
|
return ok({
|
|
608
|
-
results,
|
|
804
|
+
results: finalResults,
|
|
609
805
|
meta: {
|
|
610
806
|
query,
|
|
611
807
|
mode: vectorAvailable ? "hybrid" : "bm25_only",
|
|
612
808
|
expanded: expansion !== null,
|
|
613
809
|
reranked: rerankResult.reranked,
|
|
614
810
|
vectorsUsed: vectorAvailable,
|
|
615
|
-
totalResults:
|
|
811
|
+
totalResults: finalResults.length,
|
|
812
|
+
intent: options.intent,
|
|
616
813
|
collection: options.collection,
|
|
617
814
|
lang: options.lang,
|
|
815
|
+
since: temporalRange.since,
|
|
816
|
+
until: temporalRange.until,
|
|
817
|
+
categories: options.categories,
|
|
818
|
+
author: options.author,
|
|
819
|
+
candidateLimit,
|
|
618
820
|
queryLanguage,
|
|
821
|
+
queryModes: queryModeSummary,
|
|
619
822
|
explain: explainData,
|
|
620
823
|
},
|
|
621
824
|
});
|