npm - @oomkapwn/enquire-mcp - Versions diffs - 2.14.0 → 2.16.0 - Mend

@oomkapwn/enquire-mcp 2.14.0 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/index.js CHANGED Viewed

@@ -12,7 +12,7 @@ import { chunkContent, defaultIndexFile, FtsIndex } from "./fts5.js";
 import { appendToNote, archiveNote, chatThreadAppend, chatThreadRead, contextPack, createNote, dataviewQuery, embeddingsSearch, findPath, findSimilar, frontmatterGet, frontmatterSearch, frontmatterSet, getBacklinks, getNoteNeighbors, getOpenQuestions, getOutboundLinks, getRecentEdits, getUnresolvedWikilinks, getVaultStats, lintWiki, listCanvases, listNotes, listPdfs, listTags, ocrPdf, openInUi, paperAudit, readCanvas, readNote, readPdf, renameNote, replaceInNotes, resolveWikilink, searchHybrid, searchText, semanticSearch, validateNoteProposal } from "./tools.js";
 import { Vault } from "./vault.js";
 import { VaultWatcher } from "./watcher.js";
-const VERSION = "2.14.0";
+const VERSION = "2.16.0";
 /** Default location for the persistent embedding index, alongside .fts5.db. */
 function embedDbPath(vaultRoot) {
     // Match the FTS5 location convention by stripping the .fts5.db extension
@@ -49,6 +49,8 @@ async function main() {
         .option("--reranker-top-n <n>", "v2.9.0 — how many top RRF-fused candidates to rerank (default 50). Larger N improves recall ceiling but costs more reranker compute (~30-50ms per 50 pairs on M1). Only effective with `--enable-reranker`.")
         .option("--use-hnsw", "v2.13.0 — build an in-memory HNSW vector index on serve start (or rebuild if `.embed.db` is missing). Sub-10ms top-K queries at any vault scale, vs O(n) brute-force without it. Build cost: ~5s for 8K chunks, ~25s for 50K, ~4min for 500K (one-time per serve). Recall@10 ≥ 98% vs brute-force at default params. Requires the `hnswlib-wasm` optionalDependency (~340 KB, pure WASM, no native binding).")
         .option("--hnsw-ef <n>", "v2.13.0 — HNSW search-time beam width (default 100; must be ≥ requested k). Higher = more accurate, slightly slower. Common range: 50-500. Only effective with `--use-hnsw`.")
+        .option("--late-chunk-context <chars>", "v2.15.0 — late-chunking-style context windowing on embeddings. When > 0, prepends doc title + heading breadcrumb + tails of neighboring chunks (this many chars from each side) before sending to the embedder. Typical +2-5 NDCG@10 retrieval boost at zero new dep cost. Default 0 (off; matches v2.1.0+ breadcrumb-only behavior). Only effective during `build-embeddings` or auto-rebuild.")
+        .option("--no-hnsw-persist", "v2.16.0 — disable HNSW index persistence. By default (with --use-hnsw), the index is saved to a sidecar `.hnsw.bin` + `.meta.json` next to `.embed.db` after the first build, then re-loaded on subsequent serve starts when the embed-db signature matches. Skipping persistence means a fresh rebuild every serve start (~25s for 50K chunks). Pass this flag if you can't write to the cache dir or want diagnostic-fresh builds.")
         .action(async (opts) => {
         await startServer(opts);
     });
@@ -227,20 +229,24 @@ async function main() {
         .option("--exclude-glob <pattern...>", "Exclude paths matching glob (repeatable)")
         .option("--read-paths <pattern...>", "Strict allowlist of glob patterns (repeatable)")
         .option("--include-pdfs", "v2.8.0 — also embed PDF chunks. Off by default; PDF extraction + embedding is ~10-30x slower than markdown per file.")
+        .option("--late-chunk-context <chars>", "v2.15.0 — context-windowed embedding text (doc title + breadcrumb + neighbor-chunk tails of N chars). Default 0 (off). Typical 100-200 for +2-5 NDCG@10.")
         .action(async (opts) => {
         const model = resolveModel(opts.embeddingModel);
         const vault = new Vault(opts.vault, { excludeGlobs: opts.excludeGlob, readPaths: opts.readPaths });
         await vault.ensureExists();
         const embedFile = opts.embedFile ?? embedDbPath(vault.root);
         const db = new EmbedDb({ file: embedFile, vaultRoot: vault.root, modelAlias: model.alias, dim: model.dim });
+        const lateChunkContext = opts.lateChunkContext !== undefined
+            ? Math.max(0, parsePositiveInt(opts.lateChunkContext, "--late-chunk-context"))
+            : 0;
         await db.open();
         try {
             process.stderr.write(`enquire: loading embedder ${model.alias} (${model.hfId})...\n`);
             const embedder = await loadEmbedder(opts.embeddingModel);
-            const report = await syncEmbedDb(vault, db, embedder);
-            process.stdout.write(`enquire: embed db ${embedFile} (md) — added=${report.added} updated=${report.updated} deleted=${report.deleted} unchanged=${report.unchanged} total_chunks=${report.total_chunks}\n`);
+            const report = await syncEmbedDb(vault, db, embedder, { lateChunkContext });
+            process.stdout.write(`enquire: embed db ${embedFile} (md) — added=${report.added} updated=${report.updated} deleted=${report.deleted} unchanged=${report.unchanged} total_chunks=${report.total_chunks}${lateChunkContext > 0 ? ` late-chunk-context=${lateChunkContext}` : ""}\n`);
             if (opts.includePdfs) {
-                const pdfReport = await syncPdfEmbedDb(vault, db, embedder);
+                const pdfReport = await syncPdfEmbedDb(vault, db, embedder, { lateChunkContext });
                 process.stdout.write(`enquire: embed db ${embedFile} (pdf) — added=${pdfReport.added} updated=${pdfReport.updated} deleted=${pdfReport.deleted} unchanged=${pdfReport.unchanged} total_chunks=${pdfReport.total_chunks}\n`);
             }
         }
@@ -561,27 +567,62 @@ export async function prepareServerDeps(opts) {
                 await db.open();
                 try {
                     const startMs = Date.now();
-                    const rows = db.getAllVectors();
-                    if (rows.length === 0) {
-                        process.stderr.write(`enquire: --use-hnsw passed but embed-db is empty; skipping HNSW build.\n`);
+                    // v2.16.0 — try to load from disk first if persistence is enabled.
+                    // Skip-rebuild path: ~50ms read vs ~25s build for 50K-chunk
+                    // vault when nothing changed since last serve. Staleness
+                    // detected via `EmbedDb.computeSignature()` mismatch.
+                    const persistFile = `${embedFile.replace(/\.embed\.db$/, "")}.hnsw`;
+                    const signature = db.computeSignature();
+                    const efOverride = opts.hnswEf ? parsePositiveInt(opts.hnswEf, "--hnsw-ef") : undefined;
+                    let loaded = null;
+                    if (opts.hnswPersist !== false) {
+                        const { loadHnswFromDisk } = await import("./hnsw.js");
+                        const loadResult = await loadHnswFromDisk(persistFile, signature);
+                        if (loadResult) {
+                            loaded = { index: loadResult.index, rowByLabel: loadResult.rowsByLabel };
+                            process.stderr.write(`enquire: HNSW index loaded from disk (${loadResult.index.size} vectors, dim=${loadResult.index.dim}, ${Date.now() - startMs}ms — signature matched)\n`);
+                        }
+                    }
+                    if (loaded) {
+                        hnswContext = {
+                            index: loaded.index,
+                            rowByLabel: loaded.rowByLabel,
+                            ...(efOverride !== undefined ? { ef: efOverride } : {})
+                        };
                     }
                     else {
-                        const { buildHnsw } = await import("./hnsw.js");
-                        const index = await buildHnsw(rows.map((r) => ({ label: r.label, vector: r.vector })), { dim: model.dim, maxElements: rows.length });
-                        const rowByLabel = new Map();
-                        for (const r of rows) {
-                            rowByLabel.set(r.label, {
-                                rel_path: r.rel_path,
-                                chunk_index: r.chunk_index,
-                                line_start: r.line_start,
-                                line_end: r.line_end,
-                                text_preview: r.text_preview,
-                                kind: r.kind
-                            });
+                        const rows = db.getAllVectors();
+                        if (rows.length === 0) {
+                            process.stderr.write(`enquire: --use-hnsw passed but embed-db is empty; skipping HNSW build.\n`);
+                        }
+                        else {
+                            const { buildHnsw } = await import("./hnsw.js");
+                            const index = await buildHnsw(rows.map((r) => ({ label: r.label, vector: r.vector })), { dim: model.dim, maxElements: rows.length });
+                            const rowByLabel = new Map();
+                            for (const r of rows) {
+                                rowByLabel.set(r.label, {
+                                    rel_path: r.rel_path,
+                                    chunk_index: r.chunk_index,
+                                    line_start: r.line_start,
+                                    line_end: r.line_end,
+                                    text_preview: r.text_preview,
+                                    kind: r.kind
+                                });
+                            }
+                            hnswContext = { index, rowByLabel, ...(efOverride !== undefined ? { ef: efOverride } : {}) };
+                            process.stderr.write(`enquire: HNSW index built (${rows.length} vectors, dim=${model.dim}, ${Date.now() - startMs}ms)\n`);
+                            // v2.16.0 — persist the freshly-built index for next serve start.
+                            if (opts.hnswPersist !== false) {
+                                try {
+                                    await index.saveTo(persistFile, rowByLabel, signature);
+                                    process.stderr.write(`enquire: HNSW index persisted to ${persistFile}.bin (+ .meta.json)\n`);
+                                }
+                                catch (err) {
+                                    // Non-fatal — persistence is an optimization. Log + continue.
+                                    process.stderr.write(`enquire: HNSW persist failed (continuing with in-memory index) — ${err instanceof Error ? err.message : String(err)}\n`);
+                                }
+                            }
                         }
-                        const efOverride = opts.hnswEf ? parsePositiveInt(opts.hnswEf, "--hnsw-ef") : undefined;
-                        hnswContext = { index, rowByLabel, ...(efOverride !== undefined ? { ef: efOverride } : {}) };
-                        process.stderr.write(`enquire: HNSW index built (${rows.length} vectors, dim=${model.dim}, ${Date.now() - startMs}ms)\n`);
                     }
                 }
                 finally {
@@ -775,11 +816,60 @@ export function formatReadyBanner(deps) {
     const enabledMode = enabledTools.size > 0 ? `, enabled-tools=${enabledTools.size}` : "";
     return `enquire ${VERSION} ready (${writeMode}, vault=${vault.root}${cacheMode}${ftsMode}${privacyMode}${watchMode}${disabledMode}${enabledMode})`;
 }
+/**
+ * v2.15.0 — context-prefixed embedding text builder ("late-chunking-style"
+ * context windowing). Pre-pends the document title + heading breadcrumb,
+ * then includes a tail of the previous chunk + the chunk itself + a head
+ * of the next chunk, all bounded so the multilingual model's 128-token
+ * context budget isn't blown.
+ *
+ * Why: short standalone chunks ("Use Adam β=0.9, β=0.999") embed
+ * identically across documents, losing the surrounding context that
+ * disambiguates them. Adding ~50-100 chars of neighbor text + the
+ * doc title + breadcrumb gives the bi-encoder enough signal to keep
+ * cross-document semantic separation. Per Chroma 2024 + Jina AI's late
+ * chunking blog: +2-5 NDCG@10 typical at zero new dep cost.
+ *
+ * Returns the concatenated text. When `contextChars` ≤ 0, returns the
+ * legacy v2.1.0 form (just breadcrumb + chunk text), preserving
+ * bit-for-bit behavior for users who don't opt in.
+ */
+export function buildEmbedText(chunks, i, opts) {
+    const c = chunks[i];
+    if (!c)
+        return "";
+    if (opts.contextChars <= 0) {
+        // Legacy v2.1.0 form — breadcrumb only.
+        return c.breadcrumb ? `${c.breadcrumb}\n\n${c.text}` : c.text;
+    }
+    const parts = [];
+    if (opts.docTitle)
+        parts.push(`[doc: ${opts.docTitle}]`);
+    if (c.breadcrumb)
+        parts.push(c.breadcrumb);
+    // Previous chunk tail — last N chars, trimmed at word boundary.
+    const prev = chunks[i - 1];
+    if (prev) {
+        const tail = prev.text.slice(-opts.contextChars).replace(/^\S*\s/, "");
+        if (tail.length > 0)
+            parts.push(`… ${tail}`);
+    }
+    parts.push(c.text);
+    // Next chunk head — first N chars, trimmed at word boundary.
+    const next = chunks[i + 1];
+    if (next) {
+        const head = next.text.slice(0, opts.contextChars).replace(/\s\S*$/, "");
+        if (head.length > 0)
+            parts.push(`${head} …`);
+    }
+    return parts.join("\n\n");
+}
 // v2.0 alpha — sync the persistent embedding index. Same incremental-rebuild
 // pattern as syncFtsIndex (mtime tracked in source_state); we only re-embed
 // notes whose mtime changed. Embedding is the bottleneck (~5-30ms per chunk
 // CPU on M1), so incremental updates are critical for vaults of any size.
-async function syncEmbedDb(vault, db, embedder) {
+async function syncEmbedDb(vault, db, embedder, opts = {}) {
+    const contextChars = opts.lateChunkContext ?? 0;
     const entries = await vault.listMarkdown();
     const known = new Map();
     // v2.8.0: scope to kind="md" so the markdown-sync path doesn't see (and
@@ -824,9 +914,17 @@ async function syncEmbedDb(vault, db, embedder) {
             }
             // v2.1.0: prepend heading breadcrumb to embedded text so the model sees
             // structural context. Free win at zero token cost — Chroma 2024 +
-            // NAACL 2025 show +2-5 NDCG@10 from breadcrumb prepending. The text
-            // stored in `text_preview` (for snippets) stays clean.
-            const vectors = await embedder.embed(chunks.map((c) => (c.breadcrumb ? `${c.breadcrumb}\n\n${c.text}` : c.text)));
+            // NAACL 2025 show +2-5 NDCG@10 from breadcrumb prepending.
+            // v2.15.0: when `--late-chunk-context <n>` is set, also include
+            // doc title + neighbor-chunk tails so the embedding captures
+            // cross-paragraph context. The text stored in `text_preview`
+            // (for snippets) stays clean.
+            const docTitle = note.parsed.frontmatter?.title || path.basename(e.relPath, ".md");
+            const embedTexts = chunks.map((_c, i) => buildEmbedText(chunks, i, {
+                docTitle: typeof docTitle === "string" ? docTitle : undefined,
+                contextChars
+            }));
+            const vectors = await embedder.embed(embedTexts);
             const rows = chunks.map((c, i) => {
                 const vector = vectors[i];
                 if (!vector)
@@ -961,7 +1059,8 @@ async function syncPdfFtsIndex(vault, idx) {
  * but for PDFs. Page boundaries are preserved as `[page: N]` markers
  * before chunking so embeddings carry page-citation context.
  */
-async function syncPdfEmbedDb(vault, db, embedder) {
+async function syncPdfEmbedDb(vault, db, embedder, opts = {}) {
+    const contextChars = opts.lateChunkContext ?? 0;
     const pdfEntries = await vault.listFilesByExtension(".pdf");
     const known = new Map();
     for (const s of db.getSourceStates("pdf"))
@@ -1013,7 +1112,10 @@ async function syncPdfEmbedDb(vault, db, embedder) {
             }
             // Same breadcrumb-prepending logic as syncEmbedDb (no-op for PDFs
             // since chunkContent returns no breadcrumb on non-markdown).
-            const vectors = await embedder.embed(chunks.map((c) => (c.breadcrumb ? `${c.breadcrumb}\n\n${c.text}` : c.text)));
+            // v2.15.0: late-chunking context windowing applies here too.
+            const docTitle = path.basename(e.relPath, ".pdf");
+            const embedTexts = chunks.map((_c, i) => buildEmbedText(chunks, i, { docTitle, contextChars }));
+            const vectors = await embedder.embed(embedTexts);
             const rows = chunks.map((c, i) => {
                 const vector = vectors[i];
                 if (!vector)