@oomkapwn/enquire-mcp 2.7.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/README.md +5 -3
- package/dist/embed-db.d.ts +17 -5
- package/dist/embed-db.d.ts.map +1 -1
- package/dist/embed-db.js +28 -14
- package/dist/embed-db.js.map +1 -1
- package/dist/embeddings.d.ts +34 -0
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/embeddings.js +79 -0
- package/dist/embeddings.js.map +1 -1
- package/dist/fts5.d.ts +29 -2
- package/dist/fts5.d.ts.map +1 -1
- package/dist/fts5.js +51 -6
- package/dist/fts5.js.map +1 -1
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +221 -9
- package/dist/index.js.map +1 -1
- package/dist/tools.d.ts +45 -1
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +95 -9
- package/dist/tools.js.map +1 -1
- package/docs/api.md +1 -1
- package/package.json +1 -1
package/dist/tools.js
CHANGED
|
@@ -2366,7 +2366,8 @@ export async function embeddingsSearch(vault, args, embedFile) {
|
|
|
2366
2366
|
snippet: h.text_preview.slice(0, 240),
|
|
2367
2367
|
chunk_index: h.chunk_index,
|
|
2368
2368
|
line_start: h.line_start,
|
|
2369
|
-
line_end: h.line_end
|
|
2369
|
+
line_end: h.line_end,
|
|
2370
|
+
kind: h.kind
|
|
2370
2371
|
}));
|
|
2371
2372
|
return { query: args.query, method: "embeddings-cosine", model: model.alias, total_chunks: total, matches };
|
|
2372
2373
|
}
|
|
@@ -2411,7 +2412,8 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2411
2412
|
snippet: h.snippet,
|
|
2412
2413
|
chunk_index: h.chunk_index,
|
|
2413
2414
|
line_start: h.line_start,
|
|
2414
|
-
line_end: h.line_end
|
|
2415
|
+
line_end: h.line_end,
|
|
2416
|
+
kind: h.kind
|
|
2415
2417
|
}));
|
|
2416
2418
|
}
|
|
2417
2419
|
else {
|
|
@@ -2425,7 +2427,8 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2425
2427
|
snippet: h.snippet,
|
|
2426
2428
|
chunk_index: h.chunk_index,
|
|
2427
2429
|
line_start: h.line_start,
|
|
2428
|
-
line_end: h.line_end
|
|
2430
|
+
line_end: h.line_end,
|
|
2431
|
+
kind: h.kind
|
|
2429
2432
|
});
|
|
2430
2433
|
}
|
|
2431
2434
|
});
|
|
@@ -2436,7 +2439,8 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2436
2439
|
snippet: b.snippet,
|
|
2437
2440
|
chunk_index: b.chunk_index,
|
|
2438
2441
|
line_start: b.line_start,
|
|
2439
|
-
line_end: b.line_end
|
|
2442
|
+
line_end: b.line_end,
|
|
2443
|
+
kind: b.kind
|
|
2440
2444
|
}));
|
|
2441
2445
|
// Re-sort to ensure 1-based ranks are consecutive after dedup.
|
|
2442
2446
|
bm25Ranked.sort((a, b) => a.rank - b.rank);
|
|
@@ -2499,7 +2503,8 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2499
2503
|
snippet: m.snippet,
|
|
2500
2504
|
chunk_index: m.chunk_index,
|
|
2501
2505
|
line_start: m.line_start,
|
|
2502
|
-
line_end: m.line_end
|
|
2506
|
+
line_end: m.line_end,
|
|
2507
|
+
kind: m.kind
|
|
2503
2508
|
}));
|
|
2504
2509
|
}
|
|
2505
2510
|
else {
|
|
@@ -2513,7 +2518,8 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2513
2518
|
snippet: m.snippet,
|
|
2514
2519
|
chunk_index: m.chunk_index,
|
|
2515
2520
|
line_start: m.line_start,
|
|
2516
|
-
line_end: m.line_end
|
|
2521
|
+
line_end: m.line_end,
|
|
2522
|
+
kind: m.kind
|
|
2517
2523
|
});
|
|
2518
2524
|
}
|
|
2519
2525
|
});
|
|
@@ -2524,7 +2530,8 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2524
2530
|
snippet: b.snippet,
|
|
2525
2531
|
chunk_index: b.chunk_index,
|
|
2526
2532
|
line_start: b.line_start,
|
|
2527
|
-
line_end: b.line_end
|
|
2533
|
+
line_end: b.line_end,
|
|
2534
|
+
kind: b.kind
|
|
2528
2535
|
}));
|
|
2529
2536
|
embedRanked.sort((a, b) => a.rank - b.rank);
|
|
2530
2537
|
for (let i = 0; i < embedRanked.length; i++) {
|
|
@@ -2605,6 +2612,71 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2605
2612
|
const bm25Map = new Map(bm25Ranked.map((h) => [h.id, h]));
|
|
2606
2613
|
const tfidfMap = new Map(tfidfRanked.map((h) => [h.id, h]));
|
|
2607
2614
|
const embedMap = new Map(embedRanked.map((h) => [h.id, h]));
|
|
2615
|
+
// ─── v2.9.0: Cross-encoder reranking (post-RRF, post-graph-boost) ────────
|
|
2616
|
+
// Take the top-N fused candidates, score each (query, snippet) pair with a
|
|
2617
|
+
// BGE-style cross-encoder, and re-sort. Cross-encoder is far more accurate
|
|
2618
|
+
// than bi-encoder cosine for relevance ranking — it sees query+document
|
|
2619
|
+
// interaction directly. ~30-50ms per query overhead on M1 CPU at N=50.
|
|
2620
|
+
//
|
|
2621
|
+
// Failures are caught and surfaced as `signal_errors.reranker` so a model
|
|
2622
|
+
// load problem doesn't poison the whole search response. The fused order
|
|
2623
|
+
// (RRF + graph-boost) is preserved if reranking fails.
|
|
2624
|
+
let rerankerScores = null;
|
|
2625
|
+
if ((ctx.reranker || ctx.rerankerOverride) && fused.length > 0) {
|
|
2626
|
+
const topN = ctx.reranker?.topN ?? 50;
|
|
2627
|
+
const rerankBatch = fused.slice(0, topN);
|
|
2628
|
+
try {
|
|
2629
|
+
// Prefer the test-injected reranker when present; otherwise lazy-load.
|
|
2630
|
+
let reranker;
|
|
2631
|
+
if (ctx.rerankerOverride) {
|
|
2632
|
+
reranker = ctx.rerankerOverride;
|
|
2633
|
+
}
|
|
2634
|
+
else {
|
|
2635
|
+
const { loadReranker } = await import("./embeddings.js");
|
|
2636
|
+
reranker = await loadReranker(ctx.reranker?.alias);
|
|
2637
|
+
}
|
|
2638
|
+
// For each candidate, find the best snippet (BM25 > embeddings > TF-IDF)
|
|
2639
|
+
// and pair it with the query. Empty-snippet candidates go to the bottom
|
|
2640
|
+
// by getting a -Infinity score (sort below scored candidates).
|
|
2641
|
+
const passages = rerankBatch.map((f) => {
|
|
2642
|
+
const bm = bm25Map.get(f.id);
|
|
2643
|
+
const emb = embedMap.get(f.id);
|
|
2644
|
+
const tf = tfidfMap.get(f.id);
|
|
2645
|
+
const snippet = bm?.snippet ?? emb?.snippet ?? tf?.snippet ?? "";
|
|
2646
|
+
// Strip FTS5 «…» highlight markers — they're cosmetic and the
|
|
2647
|
+
// reranker should see clean prose. Limit to ~600 chars to stay
|
|
2648
|
+
// safely under the model's 512-token budget (rough char/token ratio
|
|
2649
|
+
// varies by language; 600 chars ≈ 200 tokens for English / Cyrillic
|
|
2650
|
+
// per the multilingual model's tokenizer, well under 512).
|
|
2651
|
+
return snippet.replace(/[«»]/g, "").slice(0, 600);
|
|
2652
|
+
});
|
|
2653
|
+
const scores = await reranker.score(args.query, passages);
|
|
2654
|
+
rerankerScores = new Map();
|
|
2655
|
+
for (let i = 0; i < rerankBatch.length; i++) {
|
|
2656
|
+
const f = rerankBatch[i];
|
|
2657
|
+
const s = scores[i];
|
|
2658
|
+
if (f && typeof s === "number")
|
|
2659
|
+
rerankerScores.set(f.id, s);
|
|
2660
|
+
}
|
|
2661
|
+
// Sort the top-N by reranker score; everything below top-N keeps RRF
|
|
2662
|
+
// order. We do this by re-ordering fused[0..topN] in place.
|
|
2663
|
+
const reordered = [...rerankBatch].sort((a, b) => {
|
|
2664
|
+
const sa = rerankerScores?.get(a.id) ?? -Infinity;
|
|
2665
|
+
const sb = rerankerScores?.get(b.id) ?? -Infinity;
|
|
2666
|
+
return sb - sa;
|
|
2667
|
+
});
|
|
2668
|
+
for (let i = 0; i < reordered.length; i++) {
|
|
2669
|
+
fused[i] = reordered[i];
|
|
2670
|
+
}
|
|
2671
|
+
}
|
|
2672
|
+
catch (err) {
|
|
2673
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
2674
|
+
// Add to signalErrors so it surfaces in the response. Reranker is not
|
|
2675
|
+
// a "signal" per se but the existing dict is the right home.
|
|
2676
|
+
signalErrors.reranker = msg;
|
|
2677
|
+
process.stderr.write(`obsidian_search: reranker failed — ${msg}\n`);
|
|
2678
|
+
}
|
|
2679
|
+
}
|
|
2608
2680
|
const matches = [];
|
|
2609
2681
|
for (const f of fused) {
|
|
2610
2682
|
const numSignals = Object.keys(f.per_signal).length;
|
|
@@ -2642,15 +2714,29 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2642
2714
|
chunkFromId = parsed;
|
|
2643
2715
|
}
|
|
2644
2716
|
}
|
|
2717
|
+
// v2.8.0: derive content-source kind. BM25 / embeddings hits carry it
|
|
2718
|
+
// explicitly; TF-IDF doesn't (it only runs over markdown). Either
|
|
2719
|
+
// ranker reporting "pdf" wins; otherwise fall back to "md".
|
|
2720
|
+
const kind = bm?.kind === "pdf" || emb?.kind === "pdf" ? "pdf" : "md";
|
|
2721
|
+
// For PDFs, the title is best derived from the filename without
|
|
2722
|
+
// `.md`-stripping (PDFs don't have that extension); use the .pdf-stripped
|
|
2723
|
+
// form so titles read naturally in agent output.
|
|
2724
|
+
const baseName = path.basename(pathPart);
|
|
2725
|
+
const title = kind === "pdf" ? baseName.replace(/\.pdf$/i, "") : stripMd(baseName);
|
|
2726
|
+
const rerankerScore = rerankerScores?.get(f.id);
|
|
2645
2727
|
matches.push({
|
|
2646
2728
|
path: pathPart,
|
|
2647
|
-
title
|
|
2729
|
+
title,
|
|
2648
2730
|
score: Math.round(f.score * 100000) / 100000,
|
|
2649
2731
|
snippet: bestEvidence?.snippet ?? "",
|
|
2650
2732
|
chunk_index: chunkFromId ?? bm?.chunk_index ?? emb?.chunk_index,
|
|
2651
2733
|
line_start: bm?.line_start ?? emb?.line_start,
|
|
2652
2734
|
line_end: bm?.line_end ?? emb?.line_end,
|
|
2653
|
-
|
|
2735
|
+
kind,
|
|
2736
|
+
per_signal: perSignal,
|
|
2737
|
+
...(typeof rerankerScore === "number" && Number.isFinite(rerankerScore)
|
|
2738
|
+
? { reranker_score: Math.round(rerankerScore * 100000) / 100000 }
|
|
2739
|
+
: {})
|
|
2654
2740
|
});
|
|
2655
2741
|
if (matches.length >= limit)
|
|
2656
2742
|
break;
|