@oomkapwn/enquire-mcp 2.14.0 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -12,7 +12,7 @@ import { chunkContent, defaultIndexFile, FtsIndex } from "./fts5.js";
12
12
  import { appendToNote, archiveNote, chatThreadAppend, chatThreadRead, contextPack, createNote, dataviewQuery, embeddingsSearch, findPath, findSimilar, frontmatterGet, frontmatterSearch, frontmatterSet, getBacklinks, getNoteNeighbors, getOpenQuestions, getOutboundLinks, getRecentEdits, getUnresolvedWikilinks, getVaultStats, lintWiki, listCanvases, listNotes, listPdfs, listTags, ocrPdf, openInUi, paperAudit, readCanvas, readNote, readPdf, renameNote, replaceInNotes, resolveWikilink, searchHybrid, searchText, semanticSearch, validateNoteProposal } from "./tools.js";
13
13
  import { Vault } from "./vault.js";
14
14
  import { VaultWatcher } from "./watcher.js";
15
- const VERSION = "2.14.0";
15
+ const VERSION = "2.16.0";
16
16
  /** Default location for the persistent embedding index, alongside .fts5.db. */
17
17
  function embedDbPath(vaultRoot) {
18
18
  // Match the FTS5 location convention by stripping the .fts5.db extension
@@ -49,6 +49,8 @@ async function main() {
49
49
  .option("--reranker-top-n <n>", "v2.9.0 — how many top RRF-fused candidates to rerank (default 50). Larger N improves recall ceiling but costs more reranker compute (~30-50ms per 50 pairs on M1). Only effective with `--enable-reranker`.")
50
50
  .option("--use-hnsw", "v2.13.0 — build an in-memory HNSW vector index on serve start (or rebuild if `.embed.db` is missing). Sub-10ms top-K queries at any vault scale, vs O(n) brute-force without it. Build cost: ~5s for 8K chunks, ~25s for 50K, ~4min for 500K (one-time per serve). Recall@10 ≥ 98% vs brute-force at default params. Requires the `hnswlib-wasm` optionalDependency (~340 KB, pure WASM, no native binding).")
51
51
  .option("--hnsw-ef <n>", "v2.13.0 — HNSW search-time beam width (default 100; must be ≥ requested k). Higher = more accurate, slightly slower. Common range: 50-500. Only effective with `--use-hnsw`.")
52
+ .option("--late-chunk-context <chars>", "v2.15.0 — late-chunking-style context windowing on embeddings. When > 0, prepends doc title + heading breadcrumb + tails of neighboring chunks (this many chars from each side) before sending to the embedder. Typical +2-5 NDCG@10 retrieval boost at zero new dep cost. Default 0 (off; matches v2.1.0+ breadcrumb-only behavior). Only effective during `build-embeddings` or auto-rebuild.")
53
+ .option("--no-hnsw-persist", "v2.16.0 — disable HNSW index persistence. By default (with --use-hnsw), the index is saved to a sidecar `.hnsw.bin` + `.meta.json` next to `.embed.db` after the first build, then re-loaded on subsequent serve starts when the embed-db signature matches. Skipping persistence means a fresh rebuild every serve start (~25s for 50K chunks). Pass this flag if you can't write to the cache dir or want diagnostic-fresh builds.")
52
54
  .action(async (opts) => {
53
55
  await startServer(opts);
54
56
  });
@@ -227,20 +229,24 @@ async function main() {
227
229
  .option("--exclude-glob <pattern...>", "Exclude paths matching glob (repeatable)")
228
230
  .option("--read-paths <pattern...>", "Strict allowlist of glob patterns (repeatable)")
229
231
  .option("--include-pdfs", "v2.8.0 — also embed PDF chunks. Off by default; PDF extraction + embedding is ~10-30x slower than markdown per file.")
232
+ .option("--late-chunk-context <chars>", "v2.15.0 — context-windowed embedding text (doc title + breadcrumb + neighbor-chunk tails of N chars). Default 0 (off). Typical 100-200 for +2-5 NDCG@10.")
230
233
  .action(async (opts) => {
231
234
  const model = resolveModel(opts.embeddingModel);
232
235
  const vault = new Vault(opts.vault, { excludeGlobs: opts.excludeGlob, readPaths: opts.readPaths });
233
236
  await vault.ensureExists();
234
237
  const embedFile = opts.embedFile ?? embedDbPath(vault.root);
235
238
  const db = new EmbedDb({ file: embedFile, vaultRoot: vault.root, modelAlias: model.alias, dim: model.dim });
239
+ const lateChunkContext = opts.lateChunkContext !== undefined
240
+ ? Math.max(0, parsePositiveInt(opts.lateChunkContext, "--late-chunk-context"))
241
+ : 0;
236
242
  await db.open();
237
243
  try {
238
244
  process.stderr.write(`enquire: loading embedder ${model.alias} (${model.hfId})...\n`);
239
245
  const embedder = await loadEmbedder(opts.embeddingModel);
240
- const report = await syncEmbedDb(vault, db, embedder);
241
- process.stdout.write(`enquire: embed db ${embedFile} (md) — added=${report.added} updated=${report.updated} deleted=${report.deleted} unchanged=${report.unchanged} total_chunks=${report.total_chunks}\n`);
246
+ const report = await syncEmbedDb(vault, db, embedder, { lateChunkContext });
247
+ process.stdout.write(`enquire: embed db ${embedFile} (md) — added=${report.added} updated=${report.updated} deleted=${report.deleted} unchanged=${report.unchanged} total_chunks=${report.total_chunks}${lateChunkContext > 0 ? ` late-chunk-context=${lateChunkContext}` : ""}\n`);
242
248
  if (opts.includePdfs) {
243
- const pdfReport = await syncPdfEmbedDb(vault, db, embedder);
249
+ const pdfReport = await syncPdfEmbedDb(vault, db, embedder, { lateChunkContext });
244
250
  process.stdout.write(`enquire: embed db ${embedFile} (pdf) — added=${pdfReport.added} updated=${pdfReport.updated} deleted=${pdfReport.deleted} unchanged=${pdfReport.unchanged} total_chunks=${pdfReport.total_chunks}\n`);
245
251
  }
246
252
  }
@@ -561,27 +567,62 @@ export async function prepareServerDeps(opts) {
561
567
  await db.open();
562
568
  try {
563
569
  const startMs = Date.now();
564
- const rows = db.getAllVectors();
565
- if (rows.length === 0) {
566
- process.stderr.write(`enquire: --use-hnsw passed but embed-db is empty; skipping HNSW build.\n`);
570
+ // v2.16.0 try to load from disk first if persistence is enabled.
571
+ // Skip-rebuild path: ~50ms read vs ~25s build for 50K-chunk
572
+ // vault when nothing changed since last serve. Staleness
573
+ // detected via `EmbedDb.computeSignature()` mismatch.
574
+ const persistFile = `${embedFile.replace(/\.embed\.db$/, "")}.hnsw`;
575
+ const signature = db.computeSignature();
576
+ const efOverride = opts.hnswEf ? parsePositiveInt(opts.hnswEf, "--hnsw-ef") : undefined;
577
+ let loaded = null;
578
+ if (opts.hnswPersist !== false) {
579
+ const { loadHnswFromDisk } = await import("./hnsw.js");
580
+ const loadResult = await loadHnswFromDisk(persistFile, signature);
581
+ if (loadResult) {
582
+ loaded = { index: loadResult.index, rowByLabel: loadResult.rowsByLabel };
583
+ process.stderr.write(`enquire: HNSW index loaded from disk (${loadResult.index.size} vectors, dim=${loadResult.index.dim}, ${Date.now() - startMs}ms — signature matched)\n`);
584
+ }
585
+ }
586
+ if (loaded) {
587
+ hnswContext = {
588
+ index: loaded.index,
589
+ rowByLabel: loaded.rowByLabel,
590
+ ...(efOverride !== undefined ? { ef: efOverride } : {})
591
+ };
567
592
  }
568
593
  else {
569
- const { buildHnsw } = await import("./hnsw.js");
570
- const index = await buildHnsw(rows.map((r) => ({ label: r.label, vector: r.vector })), { dim: model.dim, maxElements: rows.length });
571
- const rowByLabel = new Map();
572
- for (const r of rows) {
573
- rowByLabel.set(r.label, {
574
- rel_path: r.rel_path,
575
- chunk_index: r.chunk_index,
576
- line_start: r.line_start,
577
- line_end: r.line_end,
578
- text_preview: r.text_preview,
579
- kind: r.kind
580
- });
594
+ const rows = db.getAllVectors();
595
+ if (rows.length === 0) {
596
+ process.stderr.write(`enquire: --use-hnsw passed but embed-db is empty; skipping HNSW build.\n`);
597
+ }
598
+ else {
599
+ const { buildHnsw } = await import("./hnsw.js");
600
+ const index = await buildHnsw(rows.map((r) => ({ label: r.label, vector: r.vector })), { dim: model.dim, maxElements: rows.length });
601
+ const rowByLabel = new Map();
602
+ for (const r of rows) {
603
+ rowByLabel.set(r.label, {
604
+ rel_path: r.rel_path,
605
+ chunk_index: r.chunk_index,
606
+ line_start: r.line_start,
607
+ line_end: r.line_end,
608
+ text_preview: r.text_preview,
609
+ kind: r.kind
610
+ });
611
+ }
612
+ hnswContext = { index, rowByLabel, ...(efOverride !== undefined ? { ef: efOverride } : {}) };
613
+ process.stderr.write(`enquire: HNSW index built (${rows.length} vectors, dim=${model.dim}, ${Date.now() - startMs}ms)\n`);
614
+ // v2.16.0 — persist the freshly-built index for next serve start.
615
+ if (opts.hnswPersist !== false) {
616
+ try {
617
+ await index.saveTo(persistFile, rowByLabel, signature);
618
+ process.stderr.write(`enquire: HNSW index persisted to ${persistFile}.bin (+ .meta.json)\n`);
619
+ }
620
+ catch (err) {
621
+ // Non-fatal — persistence is an optimization. Log + continue.
622
+ process.stderr.write(`enquire: HNSW persist failed (continuing with in-memory index) — ${err instanceof Error ? err.message : String(err)}\n`);
623
+ }
624
+ }
581
625
  }
582
- const efOverride = opts.hnswEf ? parsePositiveInt(opts.hnswEf, "--hnsw-ef") : undefined;
583
- hnswContext = { index, rowByLabel, ...(efOverride !== undefined ? { ef: efOverride } : {}) };
584
- process.stderr.write(`enquire: HNSW index built (${rows.length} vectors, dim=${model.dim}, ${Date.now() - startMs}ms)\n`);
585
626
  }
586
627
  }
587
628
  finally {
@@ -775,11 +816,60 @@ export function formatReadyBanner(deps) {
775
816
  const enabledMode = enabledTools.size > 0 ? `, enabled-tools=${enabledTools.size}` : "";
776
817
  return `enquire ${VERSION} ready (${writeMode}, vault=${vault.root}${cacheMode}${ftsMode}${privacyMode}${watchMode}${disabledMode}${enabledMode})`;
777
818
  }
819
+ /**
820
+ * v2.15.0 — context-prefixed embedding text builder ("late-chunking-style"
821
+ * context windowing). Pre-pends the document title + heading breadcrumb,
822
+ * then includes a tail of the previous chunk + the chunk itself + a head
823
+ * of the next chunk, all bounded so the multilingual model's 128-token
824
+ * context budget isn't blown.
825
+ *
826
+ * Why: short standalone chunks ("Use Adam β=0.9, β=0.999") embed
827
+ * identically across documents, losing the surrounding context that
828
+ * disambiguates them. Adding ~50-100 chars of neighbor text + the
829
+ * doc title + breadcrumb gives the bi-encoder enough signal to keep
830
+ * cross-document semantic separation. Per Chroma 2024 + Jina AI's late
831
+ * chunking blog: +2-5 NDCG@10 typical at zero new dep cost.
832
+ *
833
+ * Returns the concatenated text. When `contextChars` ≤ 0, returns the
834
+ * legacy v2.1.0 form (just breadcrumb + chunk text), preserving
835
+ * bit-for-bit behavior for users who don't opt in.
836
+ */
837
+ export function buildEmbedText(chunks, i, opts) {
838
+ const c = chunks[i];
839
+ if (!c)
840
+ return "";
841
+ if (opts.contextChars <= 0) {
842
+ // Legacy v2.1.0 form — breadcrumb only.
843
+ return c.breadcrumb ? `${c.breadcrumb}\n\n${c.text}` : c.text;
844
+ }
845
+ const parts = [];
846
+ if (opts.docTitle)
847
+ parts.push(`[doc: ${opts.docTitle}]`);
848
+ if (c.breadcrumb)
849
+ parts.push(c.breadcrumb);
850
+ // Previous chunk tail — last N chars, trimmed at word boundary.
851
+ const prev = chunks[i - 1];
852
+ if (prev) {
853
+ const tail = prev.text.slice(-opts.contextChars).replace(/^\S*\s/, "");
854
+ if (tail.length > 0)
855
+ parts.push(`… ${tail}`);
856
+ }
857
+ parts.push(c.text);
858
+ // Next chunk head — first N chars, trimmed at word boundary.
859
+ const next = chunks[i + 1];
860
+ if (next) {
861
+ const head = next.text.slice(0, opts.contextChars).replace(/\s\S*$/, "");
862
+ if (head.length > 0)
863
+ parts.push(`${head} …`);
864
+ }
865
+ return parts.join("\n\n");
866
+ }
778
867
  // v2.0 alpha — sync the persistent embedding index. Same incremental-rebuild
779
868
  // pattern as syncFtsIndex (mtime tracked in source_state); we only re-embed
780
869
  // notes whose mtime changed. Embedding is the bottleneck (~5-30ms per chunk
781
870
  // CPU on M1), so incremental updates are critical for vaults of any size.
782
- async function syncEmbedDb(vault, db, embedder) {
871
+ async function syncEmbedDb(vault, db, embedder, opts = {}) {
872
+ const contextChars = opts.lateChunkContext ?? 0;
783
873
  const entries = await vault.listMarkdown();
784
874
  const known = new Map();
785
875
  // v2.8.0: scope to kind="md" so the markdown-sync path doesn't see (and
@@ -824,9 +914,17 @@ async function syncEmbedDb(vault, db, embedder) {
824
914
  }
825
915
  // v2.1.0: prepend heading breadcrumb to embedded text so the model sees
826
916
  // structural context. Free win at zero token cost — Chroma 2024 +
827
- // NAACL 2025 show +2-5 NDCG@10 from breadcrumb prepending. The text
828
- // stored in `text_preview` (for snippets) stays clean.
829
- const vectors = await embedder.embed(chunks.map((c) => (c.breadcrumb ? `${c.breadcrumb}\n\n${c.text}` : c.text)));
917
+ // NAACL 2025 show +2-5 NDCG@10 from breadcrumb prepending.
918
+ // v2.15.0: when `--late-chunk-context <n>` is set, also include
919
+ // doc title + neighbor-chunk tails so the embedding captures
920
+ // cross-paragraph context. The text stored in `text_preview`
921
+ // (for snippets) stays clean.
922
+ const docTitle = note.parsed.frontmatter?.title || path.basename(e.relPath, ".md");
923
+ const embedTexts = chunks.map((_c, i) => buildEmbedText(chunks, i, {
924
+ docTitle: typeof docTitle === "string" ? docTitle : undefined,
925
+ contextChars
926
+ }));
927
+ const vectors = await embedder.embed(embedTexts);
830
928
  const rows = chunks.map((c, i) => {
831
929
  const vector = vectors[i];
832
930
  if (!vector)
@@ -961,7 +1059,8 @@ async function syncPdfFtsIndex(vault, idx) {
961
1059
  * but for PDFs. Page boundaries are preserved as `[page: N]` markers
962
1060
  * before chunking so embeddings carry page-citation context.
963
1061
  */
964
- async function syncPdfEmbedDb(vault, db, embedder) {
1062
+ async function syncPdfEmbedDb(vault, db, embedder, opts = {}) {
1063
+ const contextChars = opts.lateChunkContext ?? 0;
965
1064
  const pdfEntries = await vault.listFilesByExtension(".pdf");
966
1065
  const known = new Map();
967
1066
  for (const s of db.getSourceStates("pdf"))
@@ -1013,7 +1112,10 @@ async function syncPdfEmbedDb(vault, db, embedder) {
1013
1112
  }
1014
1113
  // Same breadcrumb-prepending logic as syncEmbedDb (no-op for PDFs
1015
1114
  // since chunkContent returns no breadcrumb on non-markdown).
1016
- const vectors = await embedder.embed(chunks.map((c) => (c.breadcrumb ? `${c.breadcrumb}\n\n${c.text}` : c.text)));
1115
+ // v2.15.0: late-chunking context windowing applies here too.
1116
+ const docTitle = path.basename(e.relPath, ".pdf");
1117
+ const embedTexts = chunks.map((_c, i) => buildEmbedText(chunks, i, { docTitle, contextChars }));
1118
+ const vectors = await embedder.embed(embedTexts);
1017
1119
  const rows = chunks.map((c, i) => {
1018
1120
  const vector = vectors[i];
1019
1121
  if (!vector)