npm - @oomkapwn/enquire-mcp - Versions diffs - 3.5.13 → 3.6.0-rc.1 - Mend

@oomkapwn/enquire-mcp 3.5.13 → 3.6.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/CHANGELOG.md +106 -0
package/dist/eval.js +1 -1
package/dist/eval.js.map +1 -1
package/dist/index.js +2 -2
package/dist/index.js.map +1 -1
package/dist/tools/index.d.ts +6 -0
package/dist/tools/index.d.ts.map +1 -0
package/dist/tools/index.js +6 -0
package/dist/tools/index.js.map +1 -0
package/dist/tools/media.d.ts +182 -0
package/dist/tools/media.d.ts.map +1 -0
package/dist/tools/media.js +304 -0
package/dist/tools/media.js.map +1 -0
package/dist/tools/meta.d.ts +201 -0
package/dist/tools/meta.d.ts.map +1 -0
package/dist/tools/meta.js +752 -0
package/dist/tools/meta.js.map +1 -0
package/dist/tools/read.d.ts +251 -0
package/dist/tools/read.d.ts.map +1 -0
package/dist/tools/read.js +643 -0
package/dist/tools/read.js.map +1 -0
package/dist/tools/search.d.ts +279 -0
package/dist/tools/search.d.ts.map +1 -0
package/dist/tools/search.js +891 -0
package/dist/tools/search.js.map +1 -0
package/dist/tools/write.d.ts +145 -0
package/dist/tools/write.d.ts.map +1 -0
package/dist/tools/write.js +560 -0
package/dist/tools/write.js.map +1 -0
package/package.json +1 -1
package/dist/tools.d.ts +0 -980
package/dist/tools.d.ts.map +0 -1
package/dist/tools.js +0 -3132
package/dist/tools.js.map +0 -1

package/dist/tools/search.js ADDED Viewed

@@ -0,0 +1,891 @@
+import * as path from "node:path";
+import { findBestMatch, intersectionSize, jaccard, ngrams, stripMd } from "./meta.js";
+import { resolveTarget } from "./write.js";
+export async function searchText(vault, args) {
+    await vault.ensureExists();
+    const limit = args.limit ?? 25;
+    const mode = args.mode ?? "all";
+    const q = args.query;
+    if (!q.trim())
+        throw new Error("query must not be empty");
+    // Tokenize on whitespace for "all" / "any". Phrase mode keeps the raw query.
+    const tokens = mode === "phrase" ? [q] : q.trim().split(/\s+/);
+    const lowerTokens = tokens.map((t) => t.toLowerCase());
+    const entries = await vault.listMarkdown(args.folder);
+    // Parallel file reads — was sequential, slow on large vaults. Chunk to
+    // bound concurrency (avoid blowing the open-fd limit on huge vaults).
+    const CHUNK = 16;
+    const matches = [];
+    for (let i = 0; i < entries.length; i += CHUNK) {
+        const chunk = entries.slice(i, i + CHUNK);
+        const results = await Promise.all(chunk.map(async (e) => {
+            const { content } = await vault.readNote(e.absPath, e.mtimeMs);
+            const lower = content.toLowerCase();
+            let totalScore = 0;
+            let firstHit = -1;
+            let firstHitLen = 0;
+            const matched = [];
+            for (let t = 0; t < lowerTokens.length; t++) {
+                const lowerT = lowerTokens[t];
+                if (lowerT === undefined || lowerT === "")
+                    continue;
+                let tokenScore = 0;
+                let from = 0;
+                while (true) {
+                    const idx = lower.indexOf(lowerT, from);
+                    if (idx === -1)
+                        break;
+                    tokenScore += 1;
+                    if (firstHit === -1 || idx < firstHit) {
+                        firstHit = idx;
+                        firstHitLen = lowerT.length;
+                    }
+                    from = idx + lowerT.length;
+                }
+                if (tokenScore > 0) {
+                    totalScore += tokenScore;
+                    matched.push(tokens[t] ?? lowerT);
+                }
+            }
+            // Mode policy: "all" requires every token to match; "any" requires at
+            // least one; "phrase" requires the raw query (single token).
+            if (mode === "all" && matched.length !== lowerTokens.filter(Boolean).length)
+                return null;
+            if (totalScore === 0)
+                return null;
+            const { snippet, line } = sliceSnippet(content, firstHit, firstHitLen);
+            const hit = {
+                path: e.relPath,
+                snippet,
+                score: totalScore,
+                line,
+                matched_terms: matched
+            };
+            return hit;
+        }));
+        for (const r of results)
+            if (r)
+                matches.push(r);
+    }
+    matches.sort((a, b) => b.score - a.score);
+    return {
+        query: q,
+        mode,
+        scanned_notes: entries.length,
+        matches: matches.slice(0, limit)
+    };
+}
+export async function findSimilar(vault, args) {
+    await vault.ensureExists();
+    const limit = args.limit ?? 10;
+    const minScore = args.min_score ?? 0.05;
+    const target = await resolveTarget(vault, args);
+    const entries = await vault.listMarkdown();
+    const metas = new Map();
+    for (const e of entries) {
+        const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
+        const tags = new Set(parsed.tags.map((t) => t.toLowerCase()));
+        const title3grams = ngrams(stripMd(e.basename).toLowerCase(), 3);
+        const outbound = new Set();
+        for (const link of parsed.wikilinks) {
+            const m = findBestMatch(entries, link.target, e.relPath);
+            if (m)
+                outbound.add(m.relPath);
+        }
+        metas.set(e.relPath, { entry: e, tags, title3grams, outbound });
+    }
+    const targetMeta = metas.get(target.relPath);
+    if (!targetMeta) {
+        // The target was found by resolveTarget but may have been excluded from
+        // listMarkdown by --exclude-glob. Treat as zero results rather than crash.
+        return [];
+    }
+    // For co-backlink: build "who links to X?" for everyone we care about
+    // (target + all candidates). Single pass over outbound sets.
+    const inboundFor = new Map();
+    for (const [from, m] of metas) {
+        for (const to of m.outbound) {
+            const set = inboundFor.get(to) ?? new Set();
+            set.add(from);
+            inboundFor.set(to, set);
+        }
+    }
+    const targetInbound = inboundFor.get(target.relPath) ?? new Set();
+    const out = [];
+    for (const [relPath, m] of metas) {
+        if (relPath === target.relPath)
+            continue;
+        const tagJ = jaccard(targetMeta.tags, m.tags);
+        const titleJ = jaccard(targetMeta.title3grams, m.title3grams);
+        const candInbound = inboundFor.get(relPath) ?? new Set();
+        // shared_outbound: how much of A's outbound is also in B's
+        const sharedOut = targetMeta.outbound.size === 0 ? 0 : intersectionSize(targetMeta.outbound, m.outbound) / targetMeta.outbound.size;
+        // co_backlink: how many notes link to both target and candidate, over union
+        const coBack = jaccard(targetInbound, candInbound);
+        const score = 3.0 * tagJ + 1.5 * titleJ + 2.0 * sharedOut + 2.0 * coBack;
+        if (score < minScore)
+            continue;
+        const shared = [];
+        for (const t of targetMeta.tags)
+            if (m.tags.has(t))
+                shared.push(t);
+        shared.sort();
+        out.push({
+            path: m.entry.relPath,
+            title: stripMd(m.entry.basename),
+            score: Math.round(score * 10000) / 10000,
+            signals: {
+                tag_jaccard: Math.round(tagJ * 10000) / 10000,
+                title_3gram: Math.round(titleJ * 10000) / 10000,
+                shared_outbound: Math.round(sharedOut * 10000) / 10000,
+                co_backlink: Math.round(coBack * 10000) / 10000
+            },
+            shared_tags: shared,
+            mtime: new Date(m.entry.mtimeMs).toISOString()
+        });
+    }
+    out.sort((a, b) => b.score - a.score);
+    return out.slice(0, limit);
+}
+const tfidfCache = new WeakMap();
+const STOP_WORDS = new Set([
+    "a",
+    "an",
+    "and",
+    "are",
+    "as",
+    "at",
+    "be",
+    "but",
+    "by",
+    "for",
+    "from",
+    "has",
+    "have",
+    "if",
+    "in",
+    "is",
+    "it",
+    "its",
+    "of",
+    "on",
+    "or",
+    "that",
+    "the",
+    "this",
+    "to",
+    "was",
+    "were",
+    "will",
+    "with",
+    "i",
+    "you",
+    "we",
+    "they",
+    "he",
+    "she",
+    "not",
+    "no",
+    "do",
+    "does",
+    "did",
+    "had",
+    "been",
+    "being",
+    "so",
+    "than",
+    "then",
+    "there",
+    "their",
+    "them",
+    "these",
+    "those",
+    "what",
+    "when",
+    "where",
+    "which",
+    "who",
+    "why",
+    "how"
+]);
+// v2.1.0: detect Chinese / Japanese / Thai / Khmer / Lao via script ranges.
+// These languages don't use spaces between words, so the Unicode-regex
+// tokenizer falls back to character-level (or huge multi-word tokens),
+// which tanks BM25 + TF-IDF precision. Intl.Segmenter (Node 16+ ICU)
+// gives word-break per language. Detection is per-document, branching the
+// tokenizer.
+const CJK_OR_THAI_RANGES = /[぀-ヿ㐀-䶿一-鿿가-힯฀-๿ༀ-࿿ក-៿]/;
+export function tokenizeForTfidf(text) {
+    // v1.11.1: Unicode-aware tokenizer. The previous ASCII-only regex
+    // (`/[a-z0-9][a-z0-9_-]*/g`) silently dropped Cyrillic, Greek, CJK,
+    // Hebrew, Arabic, and any non-Latin content from the TF-IDF index.
+    // `\p{L}` matches any Unicode letter; `\p{N}` matches any Unicode number.
+    //
+    // v2.1.0: when the text contains CJK / Thai / Khmer / Lao chars (no-
+    // whitespace scripts), use Intl.Segmenter for proper word-break first,
+    // then run the Unicode regex per-segment. This produces real word tokens
+    // instead of "認可サーバーがアクセストークン" as a single 12-char token
+    // that the length filter would drop.
+    const lower = text.toLowerCase();
+    const out = [];
+    if (CJK_OR_THAI_RANGES.test(lower) && typeof Intl !== "undefined" && typeof Intl.Segmenter !== "undefined") {
+        const segmenter = new Intl.Segmenter(undefined, { granularity: "word" });
+        for (const seg of segmenter.segment(lower)) {
+            if (!seg.isWordLike)
+                continue;
+            const t = seg.segment;
+            if (t.length < 1)
+                continue;
+            if (t.length > 40)
+                continue;
+            if (STOP_WORDS.has(t))
+                continue;
+            out.push(t);
+        }
+        return out;
+    }
+    for (const m of lower.matchAll(/[\p{L}\p{N}][\p{L}\p{N}_-]*/gu)) {
+        const t = m[0];
+        if (t.length < 2)
+            continue;
+        if (t.length > 40)
+            continue;
+        if (STOP_WORDS.has(t))
+            continue;
+        out.push(t);
+    }
+    return out;
+}
+export async function buildTfidfIndex(vault) {
+    const entries = await vault.listMarkdown();
+    const cached = tfidfCache.get(vault);
+    if (cached &&
+        cached.entriesRef.length === entries.length &&
+        cached.entriesRef.every((e, i) => entries[i]?.relPath === e.relPath && entries[i]?.mtimeMs === e.mtimeMs)) {
+        return cached;
+    }
+    const rawDocs = [];
+    const docFreq = new Map();
+    for (const e of entries) {
+        const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
+        const tokens = tokenizeForTfidf(parsed.body);
+        const tf = new Map();
+        for (const t of tokens)
+            tf.set(t, (tf.get(t) ?? 0) + 1);
+        rawDocs.push({ entry: e, tf });
+        for (const t of tf.keys())
+            docFreq.set(t, (docFreq.get(t) ?? 0) + 1);
+    }
+    // Smoothed IDF: ln(1 + N / (1 + df)). Smoothing keeps every-doc terms
+    // non-zero and tames inflation on small vaults.
+    const N = rawDocs.length || 1;
+    const idf = new Map();
+    for (const [term, df] of docFreq) {
+        idf.set(term, Math.log(1 + N / (1 + df)));
+    }
+    const docs = [];
+    for (const r of rawDocs) {
+        const weights = new Map();
+        let normSq = 0;
+        for (const [term, count] of r.tf) {
+            const w = (1 + Math.log(count)) * (idf.get(term) ?? 0);
+            if (w === 0)
+                continue;
+            weights.set(term, w);
+            normSq += w * w;
+        }
+        const norm = Math.sqrt(normSq);
+        if (norm > 0) {
+            for (const [t, w] of weights)
+                weights.set(t, w / norm);
+        }
+        docs.push({
+            relPath: r.entry.relPath,
+            basename: r.entry.basename,
+            mtimeMs: r.entry.mtimeMs,
+            weights
+        });
+    }
+    const result = { docs, idf, entriesRef: entries };
+    tfidfCache.set(vault, result);
+    return result;
+}
+export async function semanticSearch(vault, args) {
+    await vault.ensureExists();
+    const limit = args.limit ?? 10;
+    const minScore = args.min_score ?? 0.05;
+    if (!args.query.trim())
+        throw new Error("query must not be empty");
+    const { docs, idf } = await buildTfidfIndex(vault);
+    // Vectorize query: same tokenization, IDF from the corpus, L2 normalize.
+    const qTokens = tokenizeForTfidf(args.query);
+    const qTf = new Map();
+    for (const t of qTokens)
+        qTf.set(t, (qTf.get(t) ?? 0) + 1);
+    const qWeights = new Map();
+    let qNormSq = 0;
+    for (const [t, count] of qTf) {
+        const w = (1 + Math.log(count)) * (idf.get(t) ?? 0);
+        if (w === 0)
+            continue;
+        qWeights.set(t, w);
+        qNormSq += w * w;
+    }
+    const qNorm = Math.sqrt(qNormSq);
+    if (qNorm > 0) {
+        for (const [t, w] of qWeights)
+            qWeights.set(t, w / qNorm);
+    }
+    // Cosine = Σ q[t]·d[t] over shared terms (both vectors are L2-normed).
+    const folderPrefix = args.folder ? `${args.folder.replace(/\/+$/, "")}/` : null;
+    const scored = [];
+    for (const doc of docs) {
+        if (folderPrefix && !doc.relPath.startsWith(folderPrefix) && doc.relPath !== args.folder)
+            continue;
+        let s = 0;
+        const matched = [];
+        for (const [t, qw] of qWeights) {
+            const dw = doc.weights.get(t);
+            if (dw !== undefined) {
+                s += qw * dw;
+                matched.push(t);
+            }
+        }
+        if (s < minScore)
+            continue;
+        scored.push({ doc, score: s, matchedTerms: matched });
+    }
+    scored.sort((a, b) => b.score - a.score);
+    const matches = [];
+    for (const { doc, score, matchedTerms } of scored.slice(0, limit)) {
+        matchedTerms.sort((a, b) => (idf.get(b) ?? 0) - (idf.get(a) ?? 0));
+        // v1.8.1 fix: snippet was being built from `content` (full file with
+        // frontmatter), so a matched term that lived in the YAML block could leak
+        // YAML keys/values into the response. Use `parsed.body` instead — TF-IDF
+        // is built from body too, so the indexOf below is guaranteed to land if
+        // the term contributed to the cosine score.
+        const { parsed } = await vault.readNote(vault.resolveInside(doc.relPath), doc.mtimeMs);
+        const body = parsed.body;
+        let snippetText = "";
+        for (const t of matchedTerms) {
+            const idx = body.toLowerCase().indexOf(t);
+            if (idx >= 0) {
+                const { snippet } = sliceSnippet(body, idx, t.length);
+                snippetText = snippet;
+                break;
+            }
+        }
+        matches.push({
+            path: doc.relPath,
+            title: stripMd(doc.basename),
+            score: Math.round(score * 10000) / 10000,
+            snippet: snippetText,
+            matched_terms: matchedTerms.slice(0, 8),
+            mtime: new Date(doc.mtimeMs).toISOString()
+        });
+    }
+    return { query: args.query, total_docs: docs.length, method: "tfidf-cosine", matches };
+}
+/**
+ * v3.1.0 — pick the text that should be embedded for an embeddings-search
+ * call. HyDE-augmented retrieval prefers the agent-supplied
+ * `hypothetical_answer` (Gao et al 2023); falls back to the raw query
+ * when that's absent / empty / whitespace-only.
+ *
+ * Pure helper so we can unit-test the decision in isolation (the real
+ * `embeddingsSearch` function loads the @huggingface/transformers
+ * embedder, which is out of scope for unit tests).
+ */
+export function pickEmbedTextForHyde(args) {
+    const ha = args.hypothetical_answer?.trim() ?? "";
+    if (ha.length > 0)
+        return { text: ha, usedHyde: true };
+    return { text: args.query, usedHyde: false };
+}
+export async function embeddingsSearch(vault, args, embedFile, hnsw) {
+    await vault.ensureExists();
+    if (!args.query.trim())
+        throw new Error("query must not be empty");
+    // v3.1.0 — pick the actual text to embed. HyDE prefers the
+    // hypothetical answer when present; otherwise fall back to the query.
+    const { text: embedText, usedHyde } = pickEmbedTextForHyde(args);
+    const limit = args.limit ?? 10;
+    const minScore = args.min_score ?? 0.3;
+    // Lazy-load embed-db + embeddings only when the tool is actually called.
+    const [{ EmbedDb }, { loadEmbedder, resolveModel }] = await Promise.all([
+        import("../embed-db.js"),
+        import("../embeddings.js")
+    ]);
+    // Verify the embed db exists before doing anything heavy. This separates
+    // "user hasn't built the index yet" from "model failed to load".
+    const fsMod = await import("node:fs");
+    if (!fsMod.existsSync(embedFile)) {
+        throw new Error(`Embedding index not found at ${embedFile}. ` +
+            `Run: enquire-mcp build-embeddings --vault ${vault.root} ` +
+            `(first-time setup also needs: enquire-mcp install-model multilingual)`);
+    }
+    const model = resolveModel(args.model);
+    const db = new EmbedDb({
+        file: embedFile,
+        vaultRoot: vault.root,
+        modelAlias: model.alias,
+        dim: model.dim
+    });
+    await db.open();
+    try {
+        const total = db.totalChunks();
+        if (total === 0) {
+            return { query: args.query, method: "embeddings-cosine", model: model.alias, total_chunks: 0, matches: [] };
+        }
+        const embedder = await loadEmbedder(args.model);
+        const [qVec] = await embedder.embed([embedText]);
+        if (!qVec)
+            throw new Error("Embedder returned no vectors for the query");
+        // v2.0.0-beta.2 P0 fix: filter excluded paths from the embedding-index
+        // hits BEFORE returning. The persistent .embed.db is built once and may
+        // contain entries for paths now excluded by --exclude-glob / --read-paths
+        // (added between build-embeddings and serve, or between two serve runs).
+        // Pre-fix, those entries leaked through `text_preview` and `rel_path`,
+        // bypassing the privacy contract — same shape as the writeNote bug.
+        // We over-fetch by 2× to keep top-K stable when many hits get filtered.
+        const overFetch = limit * 2;
+        let rawHits;
+        if (hnsw) {
+            // v2.13.0 — HNSW path. Sub-10ms top-K at any scale. We over-fetch
+            // slightly more (3×) than brute-force because HNSW can occasionally
+            // miss a true nearest neighbor; the privacy filter then pares down.
+            const k = Math.min(Math.max(overFetch * 2, 30), Math.max(hnsw.rowByLabel.size, 1));
+            const result = hnsw.index.searchKnn(qVec, k, hnsw.ef !== undefined ? { ef: hnsw.ef } : undefined);
+            const { hnswResultsToHits } = await import("../hnsw.js");
+            rawHits = hnswResultsToHits(result, hnsw.rowByLabel);
+            // HNSW returns scores in [-1, 1] like brute-force cosine. Apply the
+            // same min_score floor + folder filter brute-force does.
+            if (args.folder) {
+                const prefix = `${args.folder.replace(/\/+$/, "")}/`;
+                rawHits = rawHits.filter((h) => h.rel_path.startsWith(prefix));
+            }
+            rawHits = rawHits.filter((h) => h.score >= minScore);
+        }
+        else {
+            rawHits = db.search(qVec, overFetch, { folder: args.folder, minScore });
+        }
+        const hits = rawHits.filter((h) => !vault.isExcluded(h.rel_path)).slice(0, limit);
+        const matches = hits.map((h) => ({
+            path: h.rel_path,
+            title: stripMd(path.basename(h.rel_path)),
+            score: Math.round(h.score * 10000) / 10000,
+            snippet: h.text_preview.slice(0, 240),
+            chunk_index: h.chunk_index,
+            line_start: h.line_start,
+            line_end: h.line_end,
+            kind: h.kind
+        }));
+        return {
+            query: args.query,
+            method: "embeddings-cosine",
+            model: model.alias,
+            total_chunks: total,
+            matches,
+            ...(usedHyde ? { hyde: true } : {})
+        };
+    }
+    finally {
+        db.close();
+    }
+}
+export async function searchHybrid(vault, args, ctx) {
+    await vault.ensureExists();
+    if (!args.query.trim())
+        throw new Error("query must not be empty");
+    const limit = args.limit ?? 10;
+    const minSignals = args.min_signals ?? 1;
+    const granularity = args.granularity ?? "note";
+    // Fan-out per-ranker top-K. Bigger than user's `limit` so RRF has room
+    // to surface a doc that's mid-rank in one signal but top in another.
+    const fanOutK = Math.max(50, limit * 5);
+    const [{ reciprocalRankFusion, RRF_K }, { existsSync }] = await Promise.all([import("../rrf.js"), import("node:fs")]);
+    // v2.0.0-beta.2 P1 fix: collect per-signal errors for response-side observability.
+    const signalErrors = {};
+    const signalsUsed = [];
+    // ─── BM25 (FTS5) ────────────────────────────────────────────────────────
+    // Note-level: collapse multi-chunk hits to the best rank per note.
+    let bm25Ranked = [];
+    if (ctx.ftsIndex) {
+        try {
+            // v2.0.0-beta.2 P0 fix: filter excluded paths from FTS5 hits BEFORE
+            // chunk-collapse + RRF. The .fts5.db can contain entries from when the
+            // index was built without exclusion flags (or with different flags).
+            // Pre-fix, BM25 search returned excluded chunks via the hybrid pipeline.
+            const rawFtsHits = ctx.ftsIndex.search(args.query, { limit: fanOutK, folder: args.folder });
+            const ftsHits = rawFtsHits.filter((h) => !vault.isExcluded(h.rel_path));
+            // v2.2.0: granularity branch.
+            //   "note"  → collapse multi-chunk hits per note (best-rank wins),
+            //             RRF fuses on path key.
+            //   "block" → keep each chunk distinct, RRF fuses on `path#chunk_index`.
+            if (granularity === "block") {
+                bm25Ranked = ftsHits.map((h, i) => ({
+                    id: `${h.rel_path}#${h.chunk_index}`,
+                    rank: i + 1,
+                    score: h.score,
+                    snippet: h.snippet,
+                    chunk_index: h.chunk_index,
+                    line_start: h.line_start,
+                    line_end: h.line_end,
+                    kind: h.kind
+                }));
+            }
+            else {
+                const bestPerNote = new Map();
+                ftsHits.forEach((h, i) => {
+                    const existing = bestPerNote.get(h.rel_path);
+                    if (!existing || i < existing.rank) {
+                        bestPerNote.set(h.rel_path, {
+                            score: h.score,
+                            rank: i + 1,
+                            snippet: h.snippet,
+                            chunk_index: h.chunk_index,
+                            line_start: h.line_start,
+                            line_end: h.line_end,
+                            kind: h.kind
+                        });
+                    }
+                });
+                bm25Ranked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
+                    id,
+                    rank: b.rank,
+                    score: b.score,
+                    snippet: b.snippet,
+                    chunk_index: b.chunk_index,
+                    line_start: b.line_start,
+                    line_end: b.line_end,
+                    kind: b.kind
+                }));
+                // Re-sort to ensure 1-based ranks are consecutive after dedup.
+                bm25Ranked.sort((a, b) => a.rank - b.rank);
+                for (let i = 0; i < bm25Ranked.length; i++) {
+                    const hit = bm25Ranked[i];
+                    if (hit)
+                        hit.rank = i + 1;
+                }
+            }
+            if (bm25Ranked.length > 0)
+                signalsUsed.push("bm25");
+        }
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            signalErrors.bm25 = msg;
+            process.stderr.write(`obsidian_search: BM25 ranker failed — ${msg}\n`);
+        }
+    }
+    // ─── TF-IDF ─────────────────────────────────────────────────────────────
+    // Always available (in-memory, no native deps).
+    let tfidfRanked = [];
+    try {
+        const tfidf = await semanticSearch(vault, {
+            query: args.query,
+            folder: args.folder,
+            limit: fanOutK,
+            min_score: 0.05
+        });
+        tfidfRanked = tfidf.matches.map((m, i) => ({
+            id: m.path,
+            rank: i + 1,
+            score: m.score,
+            snippet: m.snippet
+        }));
+        if (tfidfRanked.length > 0)
+            signalsUsed.push("tfidf");
+    }
+    catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        signalErrors.tfidf = msg;
+        process.stderr.write(`obsidian_search: TF-IDF ranker failed — ${msg}\n`);
+    }
+    // ─── ML embeddings (if .embed.db exists) ────────────────────────────────
+    let embedRanked = [];
+    if (existsSync(ctx.embedFile)) {
+        try {
+            // v2.0.0-beta.1 P1 fix: pass `min_score: 0` to fan-out the embeddings
+            // ranker uniformly with BM25 (no floor) and TF-IDF (0.05 floor). The
+            // user-facing precision filter happens AFTER fusion via `min_signals`,
+            // not before — pre-fix, embeddings used the standalone tool's 0.3
+            // default which silently shrank the embedding-side candidate pool and
+            // starved RRF of cross-signal evidence.
+            const embed = await embeddingsSearch(vault, { query: args.query, folder: args.folder, limit: fanOutK, model: args.embedding_model, min_score: 0 }, ctx.embedFile, ctx.hnsw);
+            // v2.2.0: granularity branch — same shape as BM25 above.
+            if (granularity === "block") {
+                embedRanked = embed.matches.map((m, i) => ({
+                    id: `${m.path}#${m.chunk_index ?? 0}`,
+                    rank: i + 1,
+                    score: m.score,
+                    snippet: m.snippet,
+                    chunk_index: m.chunk_index,
+                    line_start: m.line_start,
+                    line_end: m.line_end,
+                    kind: m.kind
+                }));
+            }
+            else {
+                const bestPerNote = new Map();
+                embed.matches.forEach((m, i) => {
+                    const existing = bestPerNote.get(m.path);
+                    if (!existing || i < existing.rank) {
+                        bestPerNote.set(m.path, {
+                            score: m.score,
+                            rank: i + 1,
+                            snippet: m.snippet,
+                            chunk_index: m.chunk_index,
+                            line_start: m.line_start,
+                            line_end: m.line_end,
+                            kind: m.kind
+                        });
+                    }
+                });
+                embedRanked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
+                    id,
+                    rank: b.rank,
+                    score: b.score,
+                    snippet: b.snippet,
+                    chunk_index: b.chunk_index,
+                    line_start: b.line_start,
+                    line_end: b.line_end,
+                    kind: b.kind
+                }));
+                embedRanked.sort((a, b) => a.rank - b.rank);
+                for (let i = 0; i < embedRanked.length; i++) {
+                    const hit = embedRanked[i];
+                    if (hit)
+                        hit.rank = i + 1;
+                }
+            }
+            if (embedRanked.length > 0)
+                signalsUsed.push("embeddings");
+        }
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            signalErrors.embeddings = msg;
+            process.stderr.write(`obsidian_search: embeddings ranker failed — ${msg}\n`);
+        }
+    }
+    // ─── RRF fusion ─────────────────────────────────────────────────────────
+    const fused = reciprocalRankFusion({
+        bm25: bm25Ranked.map((h) => ({ id: h.id, rank: h.rank, score: h.score })),
+        tfidf: tfidfRanked.map((h) => ({ id: h.id, rank: h.rank, score: h.score })),
+        embeddings: embedRanked.map((h) => ({ id: h.id, rank: h.rank, score: h.score }))
+    }, { topK: Math.max(limit * 4, 30) } // overshoot — graph boost may rerank
+    );
+    // ─── v2.3.0: Wikilink graph-boost ───────────────────────────────────────
+    // Re-rank top-K by counting how many *other* top-K hits link to each one.
+    // Equivalent to a 1-step personalised PageRank seeded by the fused top-K.
+    // Boost is small (α=0.005) — enough to break ties but won't override
+    // strong single-ranker signals. Requires no new index — uses already-
+    // cached parsed wikilinks per note.
+    // This is the "only enquire-mcp does this" feature: generic vector stores
+    // can't do this without an Obsidian-aware layer; Smart Connections doesn't
+    // do it either. Wikilinks ARE the differentiating Obsidian primitive.
+    const graphBoost = args.graph_boost !== false; // default ON
+    if (graphBoost && fused.length > 1) {
+        const candidatePaths = new Set();
+        for (const f of fused) {
+            candidatePaths.add(f.id.includes("#") ? (f.id.split("#")[0] ?? f.id) : f.id);
+        }
+        const outLinks = new Map();
+        for (const candidatePath of candidatePaths) {
+            try {
+                const note = await vault.readNote(vault.resolveInside(candidatePath));
+                const targets = new Set();
+                for (const wl of note.parsed.wikilinks) {
+                    if (!wl.target)
+                        continue;
+                    // Wikilinks can be by basename ("Foo") or relative path ("Sub/Foo").
+                    // Normalize both forms so the membership test catches either.
+                    targets.add(wl.target);
+                    targets.add(stripMd(wl.target));
+                }
+                outLinks.set(candidatePath, targets);
+            }
+            catch {
+                // skip unreadable notes
+            }
+        }
+        const ALPHA = 0.005;
+        for (const f of fused) {
+            const fPath = f.id.includes("#") ? (f.id.split("#")[0] ?? f.id) : f.id;
+            const fBasename = stripMd(path.basename(fPath));
+            let inDegree = 0;
+            for (const [otherPath, targets] of outLinks) {
+                if (otherPath === fPath)
+                    continue;
+                if (targets.has(fPath) || targets.has(stripMd(fPath)) || targets.has(fBasename)) {
+                    inDegree += 1;
+                }
+            }
+            if (inDegree > 0)
+                f.score += ALPHA * inDegree;
+        }
+        fused.sort((a, b) => b.score - a.score);
+    }
+    // Build snippet/chunk lookup tables for attaching the best evidence per
+    // note in the final response.
+    const bm25Map = new Map(bm25Ranked.map((h) => [h.id, h]));
+    const tfidfMap = new Map(tfidfRanked.map((h) => [h.id, h]));
+    const embedMap = new Map(embedRanked.map((h) => [h.id, h]));
+    // ─── v2.9.0: Cross-encoder reranking (post-RRF, post-graph-boost) ────────
+    // Take the top-N fused candidates, score each (query, snippet) pair with a
+    // BGE-style cross-encoder, and re-sort. Cross-encoder is far more accurate
+    // than bi-encoder cosine for relevance ranking — it sees query+document
+    // interaction directly. ~30-50ms per query overhead on M1 CPU at N=50.
+    //
+    // Failures are caught and surfaced as `signal_errors.reranker` so a model
+    // load problem doesn't poison the whole search response. The fused order
+    // (RRF + graph-boost) is preserved if reranking fails.
+    let rerankerScores = null;
+    if ((ctx.reranker || ctx.rerankerOverride) && fused.length > 0) {
+        const topN = ctx.reranker?.topN ?? 50;
+        const rerankBatch = fused.slice(0, topN);
+        try {
+            // Prefer the test-injected reranker when present; otherwise lazy-load.
+            let reranker;
+            if (ctx.rerankerOverride) {
+                reranker = ctx.rerankerOverride;
+            }
+            else {
+                const { loadReranker } = await import("../embeddings.js");
+                reranker = await loadReranker(ctx.reranker?.alias);
+            }
+            // For each candidate, find the best snippet (BM25 > embeddings > TF-IDF)
+            // and pair it with the query. Empty-snippet candidates go to the bottom
+            // by getting a -Infinity score (sort below scored candidates).
+            const passages = rerankBatch.map((f) => {
+                const bm = bm25Map.get(f.id);
+                const emb = embedMap.get(f.id);
+                const tf = tfidfMap.get(f.id);
+                const snippet = bm?.snippet ?? emb?.snippet ?? tf?.snippet ?? "";
+                // Strip FTS5 «…» highlight markers — they're cosmetic and the
+                // reranker should see clean prose. Limit to ~600 chars to stay
+                // safely under the model's 512-token budget (rough char/token ratio
+                // varies by language; 600 chars ≈ 200 tokens for English / Cyrillic
+                // per the multilingual model's tokenizer, well under 512).
+                return snippet.replace(/[«»]/g, "").slice(0, 600);
+            });
+            const scores = await reranker.score(args.query, passages);
+            rerankerScores = new Map();
+            for (let i = 0; i < rerankBatch.length; i++) {
+                const f = rerankBatch[i];
+                const s = scores[i];
+                if (f && typeof s === "number")
+                    rerankerScores.set(f.id, s);
+            }
+            // Sort the top-N by reranker score; everything below top-N keeps RRF
+            // order. We do this by re-ordering fused[0..topN] in place.
+            const reordered = [...rerankBatch].sort((a, b) => {
+                const sa = rerankerScores?.get(a.id) ?? -Infinity;
+                const sb = rerankerScores?.get(b.id) ?? -Infinity;
+                return sb - sa;
+            });
+            for (let i = 0; i < reordered.length; i++) {
+                fused[i] = reordered[i];
+            }
+        }
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            // Add to signalErrors so it surfaces in the response. Reranker is not
+            // a "signal" per se but the existing dict is the right home.
+            signalErrors.reranker = msg;
+            process.stderr.write(`obsidian_search: reranker failed — ${msg}\n`);
+        }
+    }
+    const matches = [];
+    for (const f of fused) {
+        const numSignals = Object.keys(f.per_signal).length;
+        if (numSignals < minSignals)
+            continue;
+        // Snippet preference: BM25 > embeddings > TF-IDF (BM25 snippets bracket
+        // the matched terms with «…», highest signal-to-noise).
+        const bm = bm25Map.get(f.id);
+        const emb = embedMap.get(f.id);
+        const tf = tfidfMap.get(f.id);
+        const bestEvidence = bm ?? emb ?? tf;
+        // Build per_signal as a Partial — only include keys that actually
+        // contributed. Setting `key: undefined` keeps the key visible in
+        // Object.keys() and JSON.stringify, which leaks "this signal exists
+        // but didn't match" instead of "this signal wasn't even running".
+        const perSignal = {};
+        if (f.per_signal.bm25)
+            perSignal.bm25 = { rank: f.per_signal.bm25.rank, score: f.per_signal.bm25.score };
+        if (f.per_signal.tfidf)
+            perSignal.tfidf = { rank: f.per_signal.tfidf.rank, score: f.per_signal.tfidf.score };
+        if (f.per_signal.embeddings) {
+            perSignal.embeddings = { rank: f.per_signal.embeddings.rank, score: f.per_signal.embeddings.score };
+        }
+        // v2.2.0: when granularity is "block", f.id is "path#chunk_index" — split
+        // back into path + chunk_index for the response. When "note", f.id is
+        // just the path.
+        let pathPart = f.id;
+        let chunkFromId;
+        if (granularity === "block") {
+            const hashIdx = f.id.lastIndexOf("#");
+            if (hashIdx > 0) {
+                pathPart = f.id.slice(0, hashIdx);
+                const parsed = Number.parseInt(f.id.slice(hashIdx + 1), 10);
+                if (Number.isInteger(parsed) && parsed >= 0)
+                    chunkFromId = parsed;
+            }
+        }
+        // v2.8.0: derive content-source kind. BM25 / embeddings hits carry it
+        // explicitly; TF-IDF doesn't (it only runs over markdown). Either
+        // ranker reporting "pdf" wins; otherwise fall back to "md".
+        const kind = bm?.kind === "pdf" || emb?.kind === "pdf" ? "pdf" : "md";
+        // For PDFs, the title is best derived from the filename without
+        // `.md`-stripping (PDFs don't have that extension); use the .pdf-stripped
+        // form so titles read naturally in agent output.
+        const baseName = path.basename(pathPart);
+        const title = kind === "pdf" ? baseName.replace(/\.pdf$/i, "") : stripMd(baseName);
+        const rerankerScore = rerankerScores?.get(f.id);
+        matches.push({
+            path: pathPart,
+            title,
+            score: Math.round(f.score * 100000) / 100000,
+            snippet: bestEvidence?.snippet ?? "",
+            chunk_index: chunkFromId ?? bm?.chunk_index ?? emb?.chunk_index,
+            line_start: bm?.line_start ?? emb?.line_start,
+            line_end: bm?.line_end ?? emb?.line_end,
+            kind,
+            per_signal: perSignal,
+            ...(typeof rerankerScore === "number" && Number.isFinite(rerankerScore)
+                ? { reranker_score: Math.round(rerankerScore * 100000) / 100000 }
+                : {})
+        });
+        if (matches.length >= limit)
+            break;
+    }
+    // v2.0.0-beta.2 P1 fix: surface signal_errors only when at least one
+    // ranker actually failed. Omit the key when all signals ran cleanly so
+    // happy-path responses stay narrow.
+    const response = {
+        query: args.query,
+        method: "rrf",
+        k: RRF_K,
+        signals_used: signalsUsed,
+        total_candidates: fused.length,
+        matches
+    };
+    if (Object.keys(signalErrors).length > 0) {
+        response.signal_errors = signalErrors;
+    }
+    return response;
+}
+export function sliceSnippet(text, idx, qLen) {
+    if (idx < 0)
+        return { snippet: "", line: 0 };
+    const before = Math.max(0, idx - 60);
+    const after = Math.min(text.length, idx + qLen + 60);
+    let snippet = text.slice(before, after).replace(/\s+/g, " ").trim();
+    if (before > 0)
+        snippet = `…${snippet}`;
+    if (after < text.length)
+        snippet = `${snippet}…`;
+    const line = text.slice(0, idx).split("\n").length;
+    return { snippet, line };
+}
+//# sourceMappingURL=search.js.map