npm - @tobilu/qmd - Versions diffs - 2.0.1 → 2.5.1 - Mend

@tobilu/qmd 2.0.1 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/CHANGELOG.md +177 -0
package/README.md +64 -1
package/bin/qmd +49 -4
package/dist/ast.d.ts +65 -0
package/dist/ast.js +334 -0
package/dist/bench/bench.d.ts +23 -0
package/dist/bench/bench.js +280 -0
package/dist/bench/score.d.ts +33 -0
package/dist/bench/score.js +88 -0
package/dist/bench/types.d.ts +80 -0
package/dist/bench/types.js +8 -0
package/dist/cli/formatter.js +5 -1
package/dist/cli/qmd.d.ts +27 -0
package/dist/cli/qmd.js +1328 -115
package/dist/collections.d.ts +20 -0
package/dist/collections.js +32 -7
package/dist/db.d.ts +14 -3
package/dist/db.js +45 -4
package/dist/index.d.ts +11 -1
package/dist/index.js +18 -5
package/dist/llm.d.ts +77 -6
package/dist/llm.js +445 -62
package/dist/mcp/server.d.ts +6 -3
package/dist/mcp/server.js +68 -29
package/dist/paths.d.ts +1 -0
package/dist/paths.js +4 -0
package/dist/store.d.ts +148 -23
package/dist/store.js +1018 -255
package/package.json +48 -20
package/scripts/build.mjs +29 -0
package/scripts/check-package-grammars.mjs +29 -0
package/scripts/package-smoke.mjs +65 -0
package/scripts/test-all.mjs +27 -0
package/skills/qmd/SKILL.md +203 -0
package/skills/qmd/references/mcp-setup.md +102 -0
package/skills/release/SKILL.md +139 -0
package/skills/release/scripts/install-hooks.sh +38 -0
package/dist/embedded-skills.d.ts +0 -6
package/dist/embedded-skills.js +0 -14

package/dist/store.js CHANGED Viewed

@@ -16,16 +16,21 @@ import { createHash } from "crypto";
 import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs";
 // Note: node:path resolve is not imported — we export our own cross-platform resolve()
 import fastGlob from "fast-glob";
-import { LlamaCpp, getDefaultLlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, withLLMSessionForLlm, } from "./llm.js";
+import { qmdHomedir } from "./paths.js";
+import { LlamaCpp, getDefaultLlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, withLLMSessionForLlm, DEFAULT_EMBED_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, } from "./llm.js";
 // =============================================================================
 // Configuration
 // =============================================================================
-const HOME = process.env.HOME || "/tmp";
-export const DEFAULT_EMBED_MODEL = "embeddinggemma";
-export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
-export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
+export const DEFAULT_EMBED_MODEL = DEFAULT_EMBED_MODEL_URI;
+export const DEFAULT_RERANK_MODEL = DEFAULT_RERANK_MODEL_URI;
+export const DEFAULT_QUERY_MODEL = DEFAULT_GENERATE_MODEL_URI;
 export const DEFAULT_GLOB = "**/*.md";
 export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB
+export const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64;
+export const DEFAULT_EMBED_MAX_BATCH_BYTES = 64 * 1024 * 1024; // 64MB
+const EMBED_FINGERPRINT_PROBE_QUERY = "__qmd_embedding_query_probe__";
+const EMBED_FINGERPRINT_PROBE_TITLE = "__qmd_embedding_title_probe__";
+const EMBED_FINGERPRINT_PROBE_DOC = "__qmd_embedding_document_probe__";
 // Chunking: 900 tokens per chunk with 15% overlap
 // Increased from 800 to accommodate smart chunking finding natural break points
 export const CHUNK_SIZE_TOKENS = 900;
@@ -36,6 +41,16 @@ export const CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP_TOKENS * 4; // 540 chars
 // Search window for finding optimal break points (in tokens, ~200 tokens)
 export const CHUNK_WINDOW_TOKENS = 200;
 export const CHUNK_WINDOW_CHARS = CHUNK_WINDOW_TOKENS * 4; // 800 chars
+export function getEmbeddingFingerprint(model = DEFAULT_EMBED_MODEL) {
+    const significant = [
+        `model:${model}`,
+        `query:${formatQueryForEmbedding(EMBED_FINGERPRINT_PROBE_QUERY, model)}`,
+        `doc:${formatDocForEmbedding(EMBED_FINGERPRINT_PROBE_DOC, EMBED_FINGERPRINT_PROBE_TITLE, model)}`,
+        `chunk_tokens:${CHUNK_SIZE_TOKENS}`,
+        `chunk_overlap_tokens:${CHUNK_OVERLAP_TOKENS}`,
+    ].join("\n");
+    return createHash("sha256").update(significant).digest("hex").slice(0, 6);
+}
 /**
  * Get the LlamaCpp instance for a store — prefers the store's own instance,
  * falls back to the global singleton.
@@ -161,6 +176,60 @@ export function findBestCutoff(breakPoints, targetCharPos, windowChars = CHUNK_W
     }
     return bestPos;
 }
+/**
+ * Merge two sets of break points (e.g. regex + AST), keeping the highest
+ * score at each position. Result is sorted by position.
+ */
+export function mergeBreakPoints(a, b) {
+    const seen = new Map();
+    for (const bp of a) {
+        const existing = seen.get(bp.pos);
+        if (!existing || bp.score > existing.score) {
+            seen.set(bp.pos, bp);
+        }
+    }
+    for (const bp of b) {
+        const existing = seen.get(bp.pos);
+        if (!existing || bp.score > existing.score) {
+            seen.set(bp.pos, bp);
+        }
+    }
+    return Array.from(seen.values()).sort((a, b) => a.pos - b.pos);
+}
+/**
+ * Core chunk algorithm that operates on precomputed break points and code fences.
+ * This is the shared implementation used by both regex-only and AST-aware chunking.
+ */
+export function chunkDocumentWithBreakPoints(content, breakPoints, codeFences, maxChars = CHUNK_SIZE_CHARS, overlapChars = CHUNK_OVERLAP_CHARS, windowChars = CHUNK_WINDOW_CHARS) {
+    if (content.length <= maxChars) {
+        return [{ text: content, pos: 0 }];
+    }
+    const chunks = [];
+    let charPos = 0;
+    while (charPos < content.length) {
+        const targetEndPos = Math.min(charPos + maxChars, content.length);
+        let endPos = targetEndPos;
+        if (endPos < content.length) {
+            const bestCutoff = findBestCutoff(breakPoints, targetEndPos, windowChars, 0.7, codeFences);
+            if (bestCutoff > charPos && bestCutoff <= targetEndPos) {
+                endPos = bestCutoff;
+            }
+        }
+        if (endPos <= charPos) {
+            endPos = Math.min(charPos + maxChars, content.length);
+        }
+        chunks.push({ text: content.slice(charPos, endPos), pos: charPos });
+        if (endPos >= content.length) {
+            break;
+        }
+        charPos = endPos - overlapChars;
+        const lastChunkPos = chunks.at(-1).pos;
+        if (charPos <= lastChunkPos) {
+            charPos = endPos;
+        }
+    }
+    return chunks;
+}
 // Hybrid query: strong BM25 signal detection thresholds
 // Skip expensive LLM expansion when top result is strong AND clearly separated from runner-up
 export const STRONG_SIGNAL_MIN_SCORE = 0.85;
@@ -172,7 +241,7 @@ export const RERANK_CANDIDATE_LIMIT = 40;
 // Path utilities
 // =============================================================================
 export function homedir() {
-    return HOME;
+    return qmdHomedir();
 }
 /**
  * Check if a path is absolute.
@@ -191,7 +260,8 @@ export function isAbsolutePath(path) {
     if (path.startsWith('/')) {
         // Check if it's a Git Bash style path like /c/ or /c/Users (C-Z only, not A or B)
         // Requires path[2] === '/' to distinguish from Unix paths like /c or /cache
-        if (path.length >= 3 && path[2] === '/') {
+        // Skipped on WSL where /c/ is a valid drvfs mount point, not a drive letter
+        if (!isWSL() && path.length >= 3 && path[2] === '/') {
             const driveLetter = path[1];
             if (driveLetter && /[c-zC-Z]/.test(driveLetter)) {
                 return true;
@@ -213,6 +283,13 @@ export function isAbsolutePath(path) {
 export function normalizePathSeparators(path) {
     return path.replace(/\\/g, '/');
 }
+/**
+ * Detect if running inside WSL (Windows Subsystem for Linux).
+ * On WSL, paths like /c/work/... are valid drvfs mount points, not Git Bash paths.
+ */
+function isWSL() {
+    return !!(process.env.WSL_DISTRO_NAME || process.env.WSL_INTEROP);
+}
 /**
  * Get the relative path from a prefix.
  * Returns null if path is not under prefix.
@@ -256,8 +333,9 @@ export function resolve(...paths) {
             windowsDrive = firstPath.slice(0, 2);
             result = firstPath.slice(2);
         }
-        else if (firstPath.startsWith('/') && firstPath.length >= 3 && firstPath[2] === '/') {
+        else if (!isWSL() && firstPath.startsWith('/') && firstPath.length >= 3 && firstPath[2] === '/') {
             // Git Bash style: /c/ -> C: (C-Z drives only, not A or B)
+            // Skipped on WSL where /c/ is a valid drvfs mount point, not a drive letter
             const driveLetter = firstPath[1];
             if (driveLetter && /[c-zC-Z]/.test(driveLetter)) {
                 windowsDrive = driveLetter.toUpperCase() + ':';
@@ -288,8 +366,9 @@ export function resolve(...paths) {
                 windowsDrive = p.slice(0, 2);
                 result = p.slice(2);
             }
-            else if (p.startsWith('/') && p.length >= 3 && p[2] === '/') {
+            else if (!isWSL() && p.startsWith('/') && p.length >= 3 && p[2] === '/') {
                 // Git Bash style (C-Z drives only, not A or B)
+                // Skipped on WSL where /c/ is a valid drvfs mount point, not a drive letter
                 const driveLetter = p[1];
                 if (driveLetter && /[c-zC-Z]/.test(driveLetter)) {
                     windowsDrive = driveLetter.toUpperCase() + ':';
@@ -332,6 +411,10 @@ let _productionMode = false;
 export function enableProductionMode() {
     _productionMode = true;
 }
+/** Reset production mode flag — only for testing. */
+export function _resetProductionModeForTesting() {
+    _productionMode = false;
+}
 export function getDefaultDbPath(indexName = "index") {
     // Always allow override via INDEX_PATH (for testing)
     if (process.env.INDEX_PATH) {
@@ -398,21 +481,25 @@ export function normalizeVirtualPath(input) {
 export function parseVirtualPath(virtualPath) {
     // Normalize the path first
     const normalized = normalizeVirtualPath(virtualPath);
+    const [pathPart = normalized, queryString = ""] = normalized.split("?");
     // Match: qmd://collection-name[/optional-path]
     // Allows: qmd://name, qmd://name/, qmd://name/path
-    const match = normalized.match(/^qmd:\/\/([^\/]+)\/?(.*)$/);
+    const match = pathPart.match(/^qmd:\/\/([^\/]+)\/?(.*)$/);
     if (!match?.[1])
         return null;
+    const indexName = new URLSearchParams(queryString).get("index")?.trim() || undefined;
     return {
         collectionName: match[1],
         path: match[2] ?? '', // Empty string for collection root
+        ...(indexName ? { indexName } : {}),
     };
 }
 /**
  * Build a virtual path from collection name and relative path.
  */
-export function buildVirtualPath(collectionName, path) {
-    return `qmd://${collectionName}/${path}`;
+export function buildVirtualPath(collectionName, path, indexName) {
+    const base = `qmd://${collectionName}/${path}`;
+    return indexName ? `${base}?index=${encodeURIComponent(indexName)}` : base;
 }
 /**
  * Check if a path is explicitly a virtual path.
@@ -482,6 +569,7 @@ function createSqliteVecUnavailableError(reason) {
         "Install Homebrew SQLite so the sqlite-vec extension can be loaded, " +
         "and set BREW_PREFIX if Homebrew is installed in a non-standard location.");
 }
+let _sqliteVecUnavailableReason = null;
 function getErrorMessage(err) {
     return err instanceof Error ? err.message : String(err);
 }
@@ -498,15 +586,76 @@ export function verifySqliteVecLoaded(db) {
     }
 }
 let _sqliteVecAvailable = null;
+const CJK_CHAR_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
+const CJK_RUN_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]+/gu;
+const FTS_CJK_NORMALIZED_VERSION = "1";
+/**
+ * FTS5's unicode61 tokenizer does not segment CJK text into searchable words.
+ * Normalize CJK runs by spacing every character so exact CJK queries can be
+ * translated into phrase queries while Latin text keeps the default tokenizer.
+ */
+export function normalizeCjkForFTS(text) {
+    return text.replace(CJK_RUN_PATTERN, run => ` ${Array.from(run).join(' ')} `);
+}
+function containsCjk(text) {
+    return CJK_CHAR_PATTERN.test(text);
+}
+function sanitizeFTS5Phrase(phrase) {
+    return normalizeCjkForFTS(phrase)
+        .split(/\s+/)
+        .map(t => sanitizeFTS5Term(t))
+        .filter(t => t)
+        .join(' ');
+}
+function rebuildFTSForCjkNormalization(db) {
+    const version = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get();
+    if (version?.value === FTS_CJK_NORMALIZED_VERSION)
+        return;
+    try {
+        db.exec(`DELETE FROM documents_fts WHERE rowid >= 0`);
+    }
+    catch {
+        // Some older/corrupt FTS5 shadow-table states can reject bulk deletes even
+        // though reads still work. Recreate the virtual table; documents_fts is a
+        // derived index, so rebuilding it from documents/content is safe.
+        db.exec(`DROP TABLE IF EXISTS documents_fts`);
+        db.exec(`
+      CREATE VIRTUAL TABLE documents_fts USING fts5(
+        filepath, title, body,
+        tokenize='porter unicode61'
+      )
+    `);
+    }
+    const rows = db.prepare(`
+    SELECT d.id, d.collection, d.path, d.title, content.doc as body
+    FROM documents d
+    JOIN content ON content.hash = d.hash
+    WHERE d.active = 1
+  `).all();
+    const insert = db.prepare(`INSERT INTO documents_fts(rowid, filepath, title, body) VALUES (?, ?, ?, ?)`);
+    const rebuild = db.transaction(() => {
+        for (const row of rows) {
+            insert.run(row.id, normalizeCjkForFTS(`${row.collection}/${row.path}`), normalizeCjkForFTS(row.title), normalizeCjkForFTS(row.body));
+        }
+    });
+    rebuild();
+    db.prepare(`
+    INSERT OR REPLACE INTO store_config(key, value)
+    VALUES ('fts_cjk_normalized_version', ?)
+  `).run(FTS_CJK_NORMALIZED_VERSION);
+}
 function initializeDatabase(db) {
     try {
         loadSqliteVec(db);
         verifySqliteVecLoaded(db);
         _sqliteVecAvailable = true;
+        _sqliteVecUnavailableReason = null;
     }
-    catch {
+    catch (err) {
         // sqlite-vec is optional — vector search won't work but FTS is fine
         _sqliteVecAvailable = false;
+        _sqliteVecUnavailableReason = getErrorMessage(err);
+        console.warn(_sqliteVecUnavailableReason);
     }
     db.exec("PRAGMA journal_mode = WAL");
     db.exec("PRAGMA foreign_keys = ON");
@@ -548,19 +697,16 @@ function initializeDatabase(db) {
       created_at TEXT NOT NULL
     )
   `);
-    // Content vectors
-    const cvInfo = db.prepare(`PRAGMA table_info(content_vectors)`).all();
-    const hasSeqColumn = cvInfo.some(col => col.name === 'seq');
-    if (cvInfo.length > 0 && !hasSeqColumn) {
-        db.exec(`DROP TABLE IF EXISTS content_vectors`);
-        db.exec(`DROP TABLE IF EXISTS vectors_vec`);
-    }
+    // Content vectors. Avoid PRAGMA schema probes during startup; legacy vector
+    // columns are repaired lazily when a vector/embedding query first needs them.
     db.exec(`
     CREATE TABLE IF NOT EXISTS content_vectors (
       hash TEXT NOT NULL,
       seq INTEGER NOT NULL DEFAULT 0,
       pos INTEGER NOT NULL DEFAULT 0,
       model TEXT NOT NULL,
+      embed_fingerprint TEXT NOT NULL DEFAULT '',
+      total_chunks INTEGER NOT NULL DEFAULT 1,
       embedded_at TEXT NOT NULL,
       PRIMARY KEY (hash, seq)
     )
@@ -591,9 +737,12 @@ function initializeDatabase(db) {
       tokenize='porter unicode61'
     )
   `);
-    // Triggers to keep FTS in sync
+    // Triggers keep FTS in sync for callers that write directly to documents.
+    // Production indexing paths rebuild entries in TypeScript so CJK text can be
+    // normalized before it reaches the unicode61 tokenizer.
+    db.exec(`DROP TRIGGER IF EXISTS documents_ai`);
     db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents
+    CREATE TRIGGER documents_ai AFTER INSERT ON documents
     WHEN new.active = 1
     BEGIN
       INSERT INTO documents_fts(rowid, filepath, title, body)
@@ -605,13 +754,15 @@ function initializeDatabase(db) {
       WHERE new.active = 1;
     END
   `);
+    db.exec(`DROP TRIGGER IF EXISTS documents_ad`);
     db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
+    CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
       DELETE FROM documents_fts WHERE rowid = old.id;
     END
   `);
+    db.exec(`DROP TRIGGER IF EXISTS documents_au`);
     db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents
+    CREATE TRIGGER documents_au AFTER UPDATE ON documents
     BEGIN
       -- Delete from FTS if no longer active
       DELETE FROM documents_fts WHERE rowid = old.id AND new.active = 0;
@@ -626,6 +777,7 @@ function initializeDatabase(db) {
       WHERE new.active = 1;
     END
   `);
+    rebuildFTSForCjkNormalization(db);
 }
 function rowToNamedCollection(row) {
     return {
@@ -767,7 +919,7 @@ export function isSqliteVecAvailable() {
 }
 function ensureVecTableInternal(db, dimensions) {
     if (!_sqliteVecAvailable) {
-        throw new Error("sqlite-vec is not available. Vector operations require a SQLite build with extension loading support.");
+        throw createSqliteVecUnavailableError(_sqliteVecUnavailableReason ?? "vector operations require a SQLite build with extension loading support");
     }
     const tableInfo = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
     if (tableInfo) {
@@ -777,7 +929,10 @@ function ensureVecTableInternal(db, dimensions) {
         const existingDims = match?.[1] ? parseInt(match[1], 10) : null;
         if (existingDims === dimensions && hasHashSeq && hasCosine)
             return;
-        // Table exists but wrong schema - need to rebuild
+        if (existingDims !== null && existingDims !== dimensions) {
+            throw new Error(`Embedding dimension mismatch: existing vectors are ${existingDims}d but the current model produces ${dimensions}d. ` +
+                `Run 'qmd embed -f' to re-embed with the new model.`);
+        }
         db.exec("DROP TABLE IF EXISTS vectors_vec");
     }
     db.exec(`CREATE VIRTUAL TABLE vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[${dimensions}] distance_metric=cosine)`);
@@ -828,7 +983,7 @@ export async function reindexCollection(store, collectionPath, globPattern, coll
         }
         const hash = await hashContent(content);
         const title = extractTitle(content, relativeFile);
-        const existing = findActiveDocument(db, collectionName, path);
+        const existing = findOrMigrateLegacyDocument(db, collectionName, path);
         if (existing) {
             if (existing.hash === hash) {
                 if (existing.title !== title) {
@@ -867,6 +1022,125 @@ export async function reindexCollection(store, collectionPath, globPattern, coll
     const orphanedCleaned = cleanupOrphanedContent(db);
     return { indexed, updated, unchanged, removed, orphanedCleaned };
 }
+function validatePositiveIntegerOption(name, value, fallback) {
+    if (value === undefined)
+        return fallback;
+    if (!Number.isInteger(value) || value < 1) {
+        throw new Error(`${name} must be a positive integer`);
+    }
+    return value;
+}
+function resolveEmbedOptions(options) {
+    return {
+        maxDocsPerBatch: validatePositiveIntegerOption("maxDocsPerBatch", options?.maxDocsPerBatch, DEFAULT_EMBED_MAX_DOCS_PER_BATCH),
+        maxBatchBytes: validatePositiveIntegerOption("maxBatchBytes", options?.maxBatchBytes, DEFAULT_EMBED_MAX_BATCH_BYTES),
+    };
+}
+const CONTENT_VECTOR_DESIRED_COLUMNS = [
+    { name: "seq", definition: "INTEGER NOT NULL DEFAULT 0" },
+    { name: "pos", definition: "INTEGER NOT NULL DEFAULT 0" },
+    { name: "model", definition: "TEXT NOT NULL DEFAULT ''" },
+    { name: "embed_fingerprint", definition: "TEXT NOT NULL DEFAULT ''" },
+    { name: "total_chunks", definition: "INTEGER NOT NULL DEFAULT 1" },
+    { name: "embedded_at", definition: "TEXT NOT NULL DEFAULT ''" },
+];
+function isContentVectorColumnError(error) {
+    const message = error instanceof Error ? error.message : String(error);
+    if (!/(no such column|has no column named)/i.test(message)) {
+        return false;
+    }
+    return CONTENT_VECTOR_DESIRED_COLUMNS.some(col => message.includes(col.name));
+}
+function runContentVectorColumnRepairs(db) {
+    for (const column of CONTENT_VECTOR_DESIRED_COLUMNS) {
+        try {
+            db.exec(`ALTER TABLE content_vectors ADD COLUMN ${column.name} ${column.definition}`);
+        }
+        catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            // The repair series is intentionally idempotent: most columns should
+            // already exist, and another caller may have repaired a missing column
+            // between the failed query and this ALTER series.
+            if (!message.includes("duplicate column name")) {
+                throw error;
+            }
+        }
+    }
+}
+function withLazyContentVectorMigration(db, operation) {
+    let repaired = false;
+    while (true) {
+        try {
+            return operation();
+        }
+        catch (error) {
+            if (repaired || !isContentVectorColumnError(error)) {
+                throw error;
+            }
+            runContentVectorColumnRepairs(db);
+            repaired = true;
+        }
+    }
+}
+function getPendingEmbeddingDocs(db, collection, model = DEFAULT_EMBED_MODEL) {
+    const collectionFilter = collection ? `AND d.collection = ?` : ``;
+    const fingerprint = getEmbeddingFingerprint(model);
+    return withLazyContentVectorMigration(db, () => {
+        const stmt = db.prepare(`
+      SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
+      FROM documents d
+      JOIN content c ON d.hash = c.hash
+      LEFT JOIN (
+        SELECT hash, model, COUNT(*) AS chunk_count, MAX(total_chunks) AS expected_chunks
+        FROM content_vectors
+        WHERE model = ? AND embed_fingerprint = ?
+        GROUP BY hash, model, embed_fingerprint
+      ) v ON d.hash = v.hash
+      WHERE d.active = 1
+        AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
+        ${collectionFilter}
+      GROUP BY d.hash
+      ORDER BY MIN(d.path)
+    `);
+        return (collection ? stmt.all(model, fingerprint, collection) : stmt.all(model, fingerprint));
+    });
+}
+function buildEmbeddingBatches(docs, maxDocsPerBatch, maxBatchBytes) {
+    const batches = [];
+    let currentBatch = [];
+    let currentBytes = 0;
+    for (const doc of docs) {
+        const docBytes = Math.max(0, doc.bytes);
+        const wouldExceedDocs = currentBatch.length >= maxDocsPerBatch;
+        const wouldExceedBytes = currentBatch.length > 0 && (currentBytes + docBytes) > maxBatchBytes;
+        if (wouldExceedDocs || wouldExceedBytes) {
+            batches.push(currentBatch);
+            currentBatch = [];
+            currentBytes = 0;
+        }
+        currentBatch.push(doc);
+        currentBytes += docBytes;
+    }
+    if (currentBatch.length > 0) {
+        batches.push(currentBatch);
+    }
+    return batches;
+}
+function getEmbeddingDocsForBatch(db, batch) {
+    if (batch.length === 0)
+        return [];
+    const placeholders = batch.map(() => "?").join(",");
+    const rows = db.prepare(`
+    SELECT hash, doc as body
+    FROM content
+    WHERE hash IN (${placeholders})
+  `).all(...batch.map(doc => doc.hash));
+    const bodyByHash = new Map(rows.map(row => [row.hash, row.body]));
+    return batch.map((doc) => ({
+        ...doc,
+        body: bodyByHash.get(doc.hash) ?? "",
+    }));
+}
 /**
  * Generate vector embeddings for documents that need them.
  * Pure function — no console output, no db lifecycle management.
@@ -874,104 +1148,238 @@ export async function reindexCollection(store, collectionPath, globPattern, coll
  */
 export async function generateEmbeddings(store, options) {
     const db = store.db;
-    const model = options?.model ?? DEFAULT_EMBED_MODEL;
+    const llm = getLlm(store);
+    const model = options?.model ?? llm.embedModelName ?? DEFAULT_EMBED_MODEL;
+    const fingerprint = getEmbeddingFingerprint(model);
     const now = new Date().toISOString();
+    const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
+    const encoder = new TextEncoder();
     if (options?.force) {
-        clearAllEmbeddings(db);
+        clearAllEmbeddings(db, options?.collection);
     }
-    const hashesToEmbed = getHashesForEmbedding(db);
-    if (hashesToEmbed.length === 0) {
+    const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection, model);
+    if (docsToEmbed.length === 0) {
         return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
     }
-    const allChunks = [];
-    for (const item of hashesToEmbed) {
-        const encoder = new TextEncoder();
-        const bodyBytes = encoder.encode(item.body).length;
-        if (bodyBytes === 0)
-            continue;
-        const title = extractTitle(item.body, item.path);
-        const chunks = await chunkDocumentByTokens(item.body);
-        for (let seq = 0; seq < chunks.length; seq++) {
-            allChunks.push({
-                hash: item.hash,
-                title,
-                text: chunks[seq].text,
-                seq,
-                pos: chunks[seq].pos,
-                tokens: chunks[seq].tokens,
-                bytes: encoder.encode(chunks[seq].text).length,
-            });
-        }
-    }
-    if (allChunks.length === 0) {
-        return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
-    }
-    const totalBytes = allChunks.reduce((sum, chk) => sum + chk.bytes, 0);
-    const totalChunks = allChunks.length;
-    const totalDocs = hashesToEmbed.length;
+    const totalBytes = docsToEmbed.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
+    const totalDocs = docsToEmbed.length;
     const startTime = Date.now();
     // Use store's LlamaCpp or global singleton, wrapped in a session
-    const llm = getLlm(store);
-    const sessionOptions = { maxDuration: 30 * 60 * 1000, name: 'generateEmbeddings' };
+    const embedModelUri = model;
     // Create a session manager for this llm instance
     const result = await withLLMSessionForLlm(llm, async (session) => {
-        // Get embedding dimensions from first chunk
-        const firstChunk = allChunks[0];
-        const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title);
-        const firstResult = await session.embed(firstText);
-        if (!firstResult) {
-            throw new Error("Failed to get embedding dimensions from first chunk");
-        }
-        store.ensureVecTable(firstResult.embedding.length);
-        let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
+        let chunksEmbedded = 0;
+        let bytesProcessed = 0;
+        let totalChunks = 0;
+        let vectorTableInitialized = false;
         const BATCH_SIZE = 32;
-        for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
-            const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
-            const batch = allChunks.slice(batchStart, batchEnd);
-            const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
+        const RETRY_AFTER_SUCCESSFUL_CHUNKS = 64;
+        const MAX_RETRY_ATTEMPTS = 3;
+        const failures = new Map();
+        const retryQueue = new Map();
+        let successesSinceRetry = 0;
+        const failureList = () => [...failures.values()];
+        const activeErrorCount = () => failures.size;
+        const chunkKey = (chunk) => `${chunk.hash}:${chunk.seq}`;
+        const reasonFromError = (error) => {
+            const raw = error instanceof Error ? error.message : String(error);
+            return raw.length > 180 ? `${raw.slice(0, 177)}...` : raw;
+        };
+        const recordFailure = (chunk, reason) => {
+            const key = chunkKey(chunk);
+            const previous = failures.get(key);
+            failures.set(key, {
+                path: chunk.path,
+                hash: chunk.hash,
+                seq: chunk.seq,
+                attempts: (previous?.attempts ?? 0) + 1,
+                reason,
+            });
+            retryQueue.set(key, chunk);
+        };
+        const clearFailure = (chunk) => {
+            const key = chunkKey(chunk);
+            failures.delete(key);
+            retryQueue.delete(key);
+        };
+        const tryEmbedChunk = async (chunk) => {
             try {
-                const embeddings = await session.embedBatch(texts);
-                for (let i = 0; i < batch.length; i++) {
-                    const chunk = batch[i];
-                    const embedding = embeddings[i];
-                    if (embedding) {
-                        insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
-                        chunksEmbedded++;
-                    }
-                    else {
-                        errors++;
-                    }
-                    bytesProcessed += chunk.bytes;
+                const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
+                const result = await session.embed(text, { model });
+                if (!result) {
+                    recordFailure(chunk, "embedding returned no vector");
+                    return false;
                 }
+                insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now, chunk.expectedTotalChunks, fingerprint);
+                chunksEmbedded++;
+                successesSinceRetry++;
+                clearFailure(chunk);
+                return true;
             }
-            catch {
-                // Batch failed — try individual embeddings as fallback
-                for (const chunk of batch) {
-                    try {
-                        const text = formatDocForEmbedding(chunk.text, chunk.title);
-                        const result = await session.embed(text);
-                        if (result) {
-                            insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
+            catch (error) {
+                recordFailure(chunk, reasonFromError(error));
+                return false;
+            }
+        };
+        const retryFailedChunks = async (force = false) => {
+            if (!session.isValid || retryQueue.size === 0)
+                return;
+            if (!force && successesSinceRetry < RETRY_AFTER_SUCCESSFUL_CHUNKS)
+                return;
+            successesSinceRetry = 0;
+            // Normal mode: one retry pass after enough unrelated chunks succeeded.
+            // Force mode: we have run out of other chunks for this batch, so keep
+            // retrying outstanding failures until they recover or hit the cap. The
+            // cap prevents endless loops on permanently bad chunks.
+            do {
+                let retried = 0;
+                for (const [key, chunk] of [...retryQueue]) {
+                    const failure = failures.get(key);
+                    if (!failure || failure.attempts >= MAX_RETRY_ATTEMPTS)
+                        continue;
+                    retried++;
+                    await tryEmbedChunk(chunk);
+                }
+                if (!force || retried === 0)
+                    break;
+            } while (session.isValid && [...retryQueue].some(([key]) => {
+                const failure = failures.get(key);
+                return !!failure && failure.attempts < MAX_RETRY_ATTEMPTS;
+            }));
+        };
+        const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes);
+        for (const batchMeta of batches) {
+            // Abort early if session has been invalidated
+            if (!session.isValid) {
+                console.warn(`⚠ Session expired — skipping remaining document batches`);
+                break;
+            }
+            const batchDocs = getEmbeddingDocsForBatch(db, batchMeta);
+            const batchChunks = [];
+            const expectedChunksByHash = new Map();
+            const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
+            for (const doc of batchDocs) {
+                if (!doc.body.trim())
+                    continue;
+                const title = extractTitle(doc.body, doc.path);
+                const chunks = await chunkDocumentByTokens(doc.body, undefined, undefined, undefined, doc.path, options?.chunkStrategy, session.signal);
+                for (let seq = 0; seq < chunks.length; seq++) {
+                    batchChunks.push({
+                        hash: doc.hash,
+                        path: doc.path,
+                        title,
+                        text: chunks[seq].text,
+                        seq,
+                        pos: chunks[seq].pos,
+                        tokens: chunks[seq].tokens,
+                        bytes: encoder.encode(chunks[seq].text).length,
+                        expectedTotalChunks: chunks.length,
+                    });
+                }
+                expectedChunksByHash.set(doc.hash, chunks.length);
+            }
+            totalChunks += batchChunks.length;
+            if (batchChunks.length === 0) {
+                bytesProcessed += batchBytes;
+                options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors: activeErrorCount(), failures: failureList() });
+                continue;
+            }
+            if (!vectorTableInitialized) {
+                const firstChunk = batchChunks[0];
+                const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title, embedModelUri);
+                const firstResult = await session.embed(firstText, { model });
+                if (!firstResult) {
+                    throw new Error("Failed to get embedding dimensions from first chunk");
+                }
+                store.ensureVecTable(firstResult.embedding.length);
+                vectorTableInitialized = true;
+            }
+            const totalBatchChunkBytes = batchChunks.reduce((sum, chunk) => sum + chunk.bytes, 0);
+            let batchChunkBytesProcessed = 0;
+            for (let batchStart = 0; batchStart < batchChunks.length; batchStart += BATCH_SIZE) {
+                // Abort early if session has been invalidated (e.g. max duration exceeded)
+                if (!session.isValid) {
+                    const remainingChunks = batchChunks.slice(batchStart);
+                    for (const chunk of remainingChunks)
+                        recordFailure(chunk, "LLM session expired before embedding chunk");
+                    console.warn(`⚠ Session expired — skipping ${remainingChunks.length} remaining chunks`);
+                    break;
+                }
+                // Abort early if active error rate is too high (>80% of attempted chunks failed)
+                const processed = chunksEmbedded + activeErrorCount();
+                if (processed >= BATCH_SIZE && activeErrorCount() > processed * 0.8) {
+                    const remainingChunks = batchChunks.slice(batchStart);
+                    for (const chunk of remainingChunks)
+                        recordFailure(chunk, "embedding aborted because error rate was too high");
+                    console.warn(`⚠ Error rate too high (${activeErrorCount()}/${processed}) — aborting embedding`);
+                    break;
+                }
+                const batchEnd = Math.min(batchStart + BATCH_SIZE, batchChunks.length);
+                const chunkBatch = batchChunks.slice(batchStart, batchEnd);
+                const texts = chunkBatch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title, embedModelUri));
+                try {
+                    const embeddings = await session.embedBatch(texts, { model });
+                    for (let i = 0; i < chunkBatch.length; i++) {
+                        const chunk = chunkBatch[i];
+                        const embedding = embeddings[i];
+                        if (embedding) {
+                            insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now, chunk.expectedTotalChunks, fingerprint);
                             chunksEmbedded++;
+                            successesSinceRetry++;
+                            clearFailure(chunk);
                         }
                         else {
-                            errors++;
+                            recordFailure(chunk, "batch embedding returned no vector");
                         }
+                        batchChunkBytesProcessed += chunk.bytes;
+                    }
+                    await retryFailedChunks();
+                }
+                catch (error) {
+                    // Batch failed — try individual embeddings as fallback. If an
+                    // individual retry succeeds, any prior failure for that chunk is
+                    // cleared, so the visible error count reflects outstanding failures.
+                    const batchReason = reasonFromError(error);
+                    if (!session.isValid) {
+                        for (const chunk of chunkBatch)
+                            recordFailure(chunk, `batch failed and session expired: ${batchReason}`);
+                        batchChunkBytesProcessed += chunkBatch.reduce((sum, c) => sum + c.bytes, 0);
                     }
-                    catch {
-                        errors++;
+                    else {
+                        for (const chunk of chunkBatch) {
+                            await tryEmbedChunk(chunk);
+                            batchChunkBytesProcessed += chunk.bytes;
+                            await retryFailedChunks();
+                        }
                     }
-                    bytesProcessed += chunk.bytes;
                 }
+                const proportionalBytes = totalBatchChunkBytes === 0
+                    ? batchBytes
+                    : Math.min(batchBytes, Math.round((batchChunkBytesProcessed / totalBatchChunkBytes) * batchBytes));
+                options?.onProgress?.({
+                    chunksEmbedded,
+                    totalChunks,
+                    bytesProcessed: bytesProcessed + proportionalBytes,
+                    totalBytes,
+                    errors: activeErrorCount(),
+                    failures: failureList(),
+                });
+            }
+            await retryFailedChunks(true);
+            const removedPartialChunks = removeIncompleteEmbeddings(db, expectedChunksByHash, model);
+            if (removedPartialChunks > 0) {
+                chunksEmbedded = Math.max(0, chunksEmbedded - removedPartialChunks);
             }
-            options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
+            bytesProcessed += batchBytes;
+            options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors: activeErrorCount(), failures: failureList() });
         }
-        return { chunksEmbedded, errors };
-    }, sessionOptions);
+        return { chunksEmbedded, errors: activeErrorCount(), failures: failureList() };
+    }, { maxDuration: 30 * 60 * 1000, name: 'generateEmbeddings' });
     return {
         docsProcessed: totalDocs,
         chunksEmbedded: result.chunksEmbedded,
         errors: result.errors,
+        failures: result.failures,
         durationMs: Date.now() - startTime,
     };
 }
@@ -992,9 +1400,9 @@ export function createStore(dbPath) {
         close: () => db.close(),
         ensureVecTable: (dimensions) => ensureVecTableInternal(db, dimensions),
         // Index health
-        getHashesNeedingEmbedding: () => getHashesNeedingEmbedding(db),
-        getIndexHealth: () => getIndexHealth(db),
-        getStatus: () => getStatus(db),
+        getHashesNeedingEmbedding: (model) => getHashesNeedingEmbedding(db, undefined, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
+        getIndexHealth: (model) => getIndexHealth(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
+        getStatus: (model) => getStatus(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
         // Caching
         getCacheKey,
         getCachedResult: (cacheKey) => getCachedResult(db, cacheKey),
@@ -1022,8 +1430,8 @@ export function createStore(dbPath) {
         searchFTS: (query, limit, collectionName) => searchFTS(db, query, limit, collectionName),
         searchVec: (query, model, limit, collectionName, session, precomputedEmbedding) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding),
         // Query expansion & reranking
-        expandQuery: (query, model, intent) => expandQuery(query, model, db, intent, store.llm),
-        rerank: (query, documents, model, intent) => rerank(query, documents, model, db, intent, store.llm),
+        expandQuery: (query, model, intent) => expandQuery(query, model ?? store.llm?.generateModelName ?? DEFAULT_QUERY_MODEL, db, intent, store.llm),
+        rerank: (query, documents, model, intent) => rerank(query, documents, model ?? store.llm?.rerankModelName ?? DEFAULT_RERANK_MODEL, db, intent, store.llm),
         // Document retrieval
         findDocument: (filename, options) => findDocument(db, filename, options),
         getDocumentBody: (doc, fromLine, maxLines) => getDocumentBody(db, doc, fromLine, maxLines),
@@ -1036,6 +1444,7 @@ export function createStore(dbPath) {
         insertContent: (hash, content, createdAt) => insertContent(db, hash, content, createdAt),
         insertDocument: (collectionName, path, title, hash, createdAt, modifiedAt) => insertDocument(db, collectionName, path, title, hash, createdAt, modifiedAt),
         findActiveDocument: (collectionName, path) => findActiveDocument(db, collectionName, path),
+        findOrMigrateLegacyDocument: (collectionName, path) => findOrMigrateLegacyDocument(db, collectionName, path),
         updateDocumentTitle: (documentId, title, modifiedAt) => updateDocumentTitle(db, documentId, title, modifiedAt),
         updateDocument: (documentId, title, hash, modifiedAt) => updateDocument(db, documentId, title, hash, modifiedAt),
         deactivateDocument: (collectionName, path) => deactivateDocument(db, collectionName, path),
@@ -1043,7 +1452,7 @@ export function createStore(dbPath) {
         // Vector/embedding operations
         getHashesForEmbedding: () => getHashesForEmbedding(db),
         clearAllEmbeddings: () => clearAllEmbeddings(db),
-        insertEmbedding: (hash, seq, pos, embedding, model, embeddedAt) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt),
+        insertEmbedding: (hash, seq, pos, embedding, model, embeddedAt, totalChunks, fingerprint) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt, totalChunks, fingerprint),
     };
     return store;
 }
@@ -1056,11 +1465,11 @@ export function getDocid(hash) {
 /**
  * Handelize a filename to be more token-friendly.
  * - Convert triple underscore `___` to `/` (folder separator)
- * - Convert to lowercase
  * - Replace sequences of non-word chars (except /) with single dash
  * - Remove leading/trailing dashes from path segments
  * - Preserve folder structure (a/b/c/d.md stays structured)
  * - Preserve file extension
+ * - Preserve original case (important for case-sensitive filesystems)
  */
 /** Replace emoji/symbol codepoints with their hex representation (e.g. 🐘 → 1f418) */
 function emojiToHex(str) {
@@ -1085,7 +1494,6 @@ export function handelize(path) {
     }
     const result = path
         .replace(/___/g, '/') // Triple underscore becomes folder separator
-        .toLowerCase()
         .split('/')
         .map((segment, idx, arr) => {
         const isLastSegment = idx === arr.length - 1;
@@ -1097,7 +1505,7 @@ export function handelize(path) {
             const ext = extMatch ? extMatch[1] : '';
             const nameWithoutExt = ext ? segment.slice(0, -ext.length) : segment;
             const cleanedName = nameWithoutExt
-                .replace(/[^\p{L}\p{N}$]+/gu, '-') // Keep route marker "$", dash-separate other chars
+                .replace(/[^\p{L}\p{N}$]+/gu, '-') // Keep letters, numbers, "$"; dash-separate rest (including dots)
                 .replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
             return cleanedName + ext;
         }
@@ -1118,17 +1526,85 @@ export function handelize(path) {
 // =============================================================================
 // Index health
 // =============================================================================
-export function getHashesNeedingEmbedding(db) {
-    const result = db.prepare(`
-    SELECT COUNT(DISTINCT d.hash) as count
-    FROM documents d
-    LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
-    WHERE d.active = 1 AND v.hash IS NULL
-  `).get();
-    return result.count;
+export function getHashesNeedingEmbedding(db, collection, model = DEFAULT_EMBED_MODEL) {
+    const collectionFilter = collection ? `AND d.collection = ?` : ``;
+    const fingerprint = getEmbeddingFingerprint(model);
+    return withLazyContentVectorMigration(db, () => {
+        const stmt = db.prepare(`
+      SELECT COUNT(DISTINCT d.hash) as count
+      FROM documents d
+      LEFT JOIN (
+        SELECT hash, model, COUNT(*) AS chunk_count, MAX(total_chunks) AS expected_chunks
+        FROM content_vectors
+        WHERE model = ? AND embed_fingerprint = ?
+        GROUP BY hash, model, embed_fingerprint
+      ) v ON d.hash = v.hash
+      WHERE d.active = 1
+        AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
+        ${collectionFilter}
+    `);
+        const result = (collection ? stmt.get(model, fingerprint, collection) : stmt.get(model, fingerprint));
+        return result.count;
+    });
 }
-export function getIndexHealth(db) {
-    const needsEmbedding = getHashesNeedingEmbedding(db);
+export async function maybeAdoptLegacyEmbeddingFingerprint(store, model = DEFAULT_EMBED_MODEL) {
+    const db = store.db;
+    const fingerprint = getEmbeddingFingerprint(model);
+    const legacyCount = withLazyContentVectorMigration(db, () => {
+        const row = db.prepare(`SELECT COUNT(DISTINCT hash) AS count FROM content_vectors WHERE model = ? AND embed_fingerprint = ''`).get(model);
+        return row.count;
+    });
+    if (legacyCount === 0) {
+        return { checked: false, adopted: 0, reason: "no legacy empty-fingerprint embeddings" };
+    }
+    const sample = withLazyContentVectorMigration(db, () => db.prepare(`
+    SELECT cv.hash, cv.seq, cv.pos, cv.total_chunks, c.doc AS body, MIN(d.path) AS path
+    FROM content_vectors cv
+    JOIN documents d ON d.hash = cv.hash AND d.active = 1
+    JOIN content c ON c.hash = cv.hash
+    WHERE cv.model = ? AND cv.embed_fingerprint = ''
+    GROUP BY cv.hash, cv.seq, cv.pos, cv.total_chunks, c.doc
+    ORDER BY cv.hash, cv.seq
+    LIMIT 1
+  `).get(model));
+    if (!sample) {
+        return { checked: false, adopted: 0, reason: `${legacyCount} legacy docs have no active sample` };
+    }
+    const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
+    if (!tableExists) {
+        return { checked: false, adopted: 0, reason: "vectors_vec table is missing" };
+    }
+    const expectedHashSeq = `${sample.hash}_${sample.seq}`;
+    const title = extractTitle(sample.body, sample.path);
+    const llm = getLlm(store);
+    return await withLLMSessionForLlm(llm, async (session) => {
+        const chunks = await chunkDocumentByTokens(sample.body, undefined, undefined, undefined, sample.path, undefined, session.signal);
+        const chunk = chunks[sample.seq];
+        if (!chunk) {
+            return { checked: true, adopted: 0, reason: `sample chunk ${expectedHashSeq} no longer exists` };
+        }
+        const result = await session.embed(formatDocForEmbedding(chunk.text, title, model), { model });
+        if (!result) {
+            return { checked: true, adopted: 0, reason: "failed to embed legacy sample" };
+        }
+        const nearest = db.prepare(`
+      SELECT hash_seq, distance
+      FROM vectors_vec
+      WHERE embedding MATCH ? AND k = 1
+    `).get(new Float32Array(result.embedding));
+        if (!nearest) {
+            return { checked: true, adopted: 0, reason: "legacy sample vector not found" };
+        }
+        const threshold = 0.0001;
+        if (nearest.hash_seq !== expectedHashSeq || nearest.distance > threshold) {
+            return { checked: true, adopted: 0, reason: `legacy sample differs from current fingerprint (nearest ${nearest.hash_seq}, distance ${nearest.distance.toFixed(6)})` };
+        }
+        const update = withLazyContentVectorMigration(db, () => db.prepare(`UPDATE content_vectors SET embed_fingerprint = ? WHERE model = ? AND embed_fingerprint = ''`).run(fingerprint, model));
+        return { checked: true, adopted: update.changes, reason: `sample ${expectedHashSeq} matched current fingerprint at distance ${nearest.distance.toFixed(6)}` };
+    });
+}
+export function getIndexHealth(db, model = DEFAULT_EMBED_MODEL) {
+    const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
     const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get().count;
     const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get();
     let daysStale = null;
@@ -1181,13 +1657,15 @@ export function deleteInactiveDocuments(db) {
     return result.changes;
 }
 /**
- * Remove orphaned content hashes that are not referenced by any active document.
+ * Remove orphaned content hashes that are not referenced by any document.
+ * Inactive documents are soft-deleted tombstones, so their content rows must
+ * remain referenced until deleteInactiveDocuments() hard-deletes them.
  * Returns the number of orphaned content hashes deleted.
  */
 export function cleanupOrphanedContent(db) {
     const result = db.prepare(`
     DELETE FROM content
-    WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
+    WHERE hash NOT IN (SELECT DISTINCT hash FROM documents)
   `).run();
     return result.changes;
 }
@@ -1196,39 +1674,50 @@ export function cleanupOrphanedContent(db) {
  * Returns the number of orphaned embedding chunks deleted.
  */
 export function cleanupOrphanedVectors(db) {
-    // Check if vectors_vec table exists
-    const tableExists = db.prepare(`
-    SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'
-  `).get();
-    if (!tableExists) {
+    // sqlite-vec may not be loaded (e.g. Bun's bun:sqlite lacks loadExtension).
+    // The vectors_vec virtual table can appear in sqlite_master from a prior
+    // session, but querying it without the vec0 module loaded will crash (#380).
+    if (!isSqliteVecAvailable()) {
         return 0;
     }
-    // Count orphaned vectors first
-    const countResult = db.prepare(`
-    SELECT COUNT(*) as c FROM content_vectors cv
-    WHERE NOT EXISTS (
-      SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
-    )
-  `).get();
-    if (countResult.c === 0) {
+    // The schema entry can exist even when sqlite-vec itself is unavailable
+    // (for example when reopening a DB without vec0 loaded). In that case,
+    // touching the virtual table throws "no such module: vec0" and cleanup
+    // should degrade gracefully like the rest of the vector features.
+    try {
+        db.prepare(`SELECT 1 FROM vectors_vec LIMIT 0`).get();
+    }
+    catch {
         return 0;
     }
-    // Delete from vectors_vec first
-    db.exec(`
-    DELETE FROM vectors_vec WHERE hash_seq IN (
-      SELECT cv.hash || '_' || cv.seq FROM content_vectors cv
+    return withLazyContentVectorMigration(db, () => {
+        // Count orphaned vectors first
+        const countResult = db.prepare(`
+      SELECT COUNT(*) as c FROM content_vectors cv
       WHERE NOT EXISTS (
         SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
       )
-    )
-  `);
-    // Delete from content_vectors
-    db.exec(`
-    DELETE FROM content_vectors WHERE hash NOT IN (
-      SELECT hash FROM documents WHERE active = 1
-    )
-  `);
-    return countResult.c;
+    `).get();
+        if (countResult.c === 0) {
+            return 0;
+        }
+        // Delete from vectors_vec first
+        db.exec(`
+      DELETE FROM vectors_vec WHERE hash_seq IN (
+        SELECT cv.hash || '_' || cv.seq FROM content_vectors cv
+        WHERE NOT EXISTS (
+          SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
+        )
+      )
+    `);
+        // Delete from content_vectors
+        db.exec(`
+      DELETE FROM content_vectors WHERE hash NOT IN (
+        SELECT hash FROM documents WHERE active = 1
+      )
+    `);
+        return countResult.c;
+    });
 }
 /**
  * Run VACUUM to reclaim unused space in the database.
@@ -1290,6 +1779,21 @@ export function insertContent(db, hash, content, createdAt) {
     db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
         .run(hash, content, createdAt);
 }
+function rebuildDocumentFTS(db, documentId) {
+    const row = db.prepare(`
+    SELECT d.id, d.collection, d.path, d.title, content.doc as body
+    FROM documents d
+    JOIN content ON content.hash = d.hash
+    WHERE d.id = ? AND d.active = 1
+  `).get(documentId);
+    db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(documentId);
+    if (!row)
+        return;
+    db.prepare(`
+    INSERT INTO documents_fts(rowid, filepath, title, body)
+    VALUES (?, ?, ?, ?)
+  `).run(row.id, normalizeCjkForFTS(`${row.collection}/${row.path}`), normalizeCjkForFTS(row.title), normalizeCjkForFTS(row.body));
+}
 /**
  * Insert a new document into the documents table.
  */
@@ -1303,6 +1807,9 @@ export function insertDocument(db, collectionName, path, title, hash, createdAt,
       modified_at = excluded.modified_at,
       active = 1
   `).run(collectionName, path, title, hash, createdAt, modifiedAt);
+    const row = db.prepare(`SELECT id FROM documents WHERE collection = ? AND path = ?`).get(collectionName, path);
+    if (row)
+        rebuildDocumentFTS(db, row.id);
 }
 /**
  * Find an active document by collection name and path.
@@ -1314,12 +1821,48 @@ export function findActiveDocument(db, collectionName, path) {
   `).get(collectionName, path);
     return row ?? null;
 }
+/**
+ * Find an active document, falling back to a case-insensitive path match.
+ * If found under a different casing, renames it in-place and rebuilds the
+ * FTS entry. Embeddings are keyed by content hash, so the rename is
+ * safe — no re-embedding required.
+ *
+ * @internal Used by reindexCollection and indexFiles during qmd update.
+ * Returns null if the document does not exist under either path.
+ */
+export function findOrMigrateLegacyDocument(db, collectionName, path) {
+    const existing = findActiveDocument(db, collectionName, path);
+    if (existing)
+        return existing;
+    const legacy = db.prepare(`
+    SELECT id, hash, title FROM documents
+    WHERE collection = ? AND path COLLATE NOCASE = ? AND active = 1
+    ORDER BY id
+    LIMIT 1
+  `).get(collectionName, path);
+    if (!legacy)
+        return null;
+    // Wrap rename + FTS rebuild in a transaction for atomicity.
+    const migrate = db.transaction(() => {
+        // Use OR IGNORE so a UNIQUE conflict (e.g. both "readme.md" and
+        // "README.md" already exist) is a no-op rather than crashing.
+        const result = db.prepare(`UPDATE OR IGNORE documents SET path = ? WHERE id = ? AND active = 1`).run(path, legacy.id);
+        if (result.changes === 0)
+            return false;
+        rebuildDocumentFTS(db, legacy.id);
+        return true;
+    });
+    if (!migrate())
+        return null;
+    return findActiveDocument(db, collectionName, path);
+}
 /**
  * Update the title and modified_at timestamp for a document.
  */
 export function updateDocumentTitle(db, documentId, title, modifiedAt) {
     db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`)
         .run(title, modifiedAt, documentId);
+    rebuildDocumentFTS(db, documentId);
 }
 /**
  * Update an existing document's hash, title, and modified_at timestamp.
@@ -1328,6 +1871,7 @@ export function updateDocumentTitle(db, documentId, title, modifiedAt) {
 export function updateDocument(db, documentId, title, hash, modifiedAt) {
     db.prepare(`UPDATE documents SET title = ?, hash = ?, modified_at = ? WHERE id = ?`)
         .run(title, hash, modifiedAt, documentId);
+    rebuildDocumentFTS(db, documentId);
 }
 /**
  * Deactivate a document (mark as inactive but don't delete).
@@ -1346,52 +1890,44 @@ export function getActiveDocumentPaths(db, collectionName) {
     return rows.map(r => r.path);
 }
 export { formatQueryForEmbedding, formatDocForEmbedding };
+/**
+ * Chunk a document using regex-only break point detection.
+ * This is the sync, backward-compatible API used by tests and legacy callers.
+ */
 export function chunkDocument(content, maxChars = CHUNK_SIZE_CHARS, overlapChars = CHUNK_OVERLAP_CHARS, windowChars = CHUNK_WINDOW_CHARS) {
-    if (content.length <= maxChars) {
-        return [{ text: content, pos: 0 }];
-    }
-    // Pre-scan all break points and code fences once
     const breakPoints = scanBreakPoints(content);
     const codeFences = findCodeFences(content);
-    const chunks = [];
-    let charPos = 0;
-    while (charPos < content.length) {
-        // Calculate target end position for this chunk
-        const targetEndPos = Math.min(charPos + maxChars, content.length);
-        let endPos = targetEndPos;
-        // If not at the end, find the best break point
-        if (endPos < content.length) {
-            // Find best cutoff using scored algorithm
-            const bestCutoff = findBestCutoff(breakPoints, targetEndPos, windowChars, 0.7, codeFences);
-            // Only use the cutoff if it's within our current chunk
-            if (bestCutoff > charPos && bestCutoff <= targetEndPos) {
-                endPos = bestCutoff;
-            }
-        }
-        // Ensure we make progress
-        if (endPos <= charPos) {
-            endPos = Math.min(charPos + maxChars, content.length);
-        }
-        chunks.push({ text: content.slice(charPos, endPos), pos: charPos });
-        // Move forward, but overlap with previous chunk
-        // For last chunk, don't overlap (just go to the end)
-        if (endPos >= content.length) {
-            break;
-        }
-        charPos = endPos - overlapChars;
-        const lastChunkPos = chunks.at(-1).pos;
-        if (charPos <= lastChunkPos) {
-            // Prevent infinite loop - move forward at least a bit
-            charPos = endPos;
+    return chunkDocumentWithBreakPoints(content, breakPoints, codeFences, maxChars, overlapChars, windowChars);
+}
+/**
+ * Async AST-aware chunking. Detects language from filepath, computes AST
+ * break points for supported code files, merges with regex break points,
+ * and delegates to the shared chunk algorithm.
+ *
+ * Falls back to regex-only when strategy is "regex", filepath is absent,
+ * or language is unsupported.
+ */
+export async function chunkDocumentAsync(content, maxChars = CHUNK_SIZE_CHARS, overlapChars = CHUNK_OVERLAP_CHARS, windowChars = CHUNK_WINDOW_CHARS, filepath, chunkStrategy = "regex") {
+    const regexPoints = scanBreakPoints(content);
+    const codeFences = findCodeFences(content);
+    let breakPoints = regexPoints;
+    if (chunkStrategy === "auto" && filepath) {
+        const { getASTBreakPoints } = await import("./ast.js");
+        const astPoints = await getASTBreakPoints(content, filepath);
+        if (astPoints.length > 0) {
+            breakPoints = mergeBreakPoints(regexPoints, astPoints);
         }
     }
-    return chunks;
+    return chunkDocumentWithBreakPoints(content, breakPoints, codeFences, maxChars, overlapChars, windowChars);
 }
 /**
  * Chunk a document by actual token count using the LLM tokenizer.
  * More accurate than character-based chunking but requires async.
+ *
+ * When filepath and chunkStrategy are provided, uses AST-aware break points
+ * for supported code files.
  */
-export async function chunkDocumentByTokens(content, maxTokens = CHUNK_SIZE_TOKENS, overlapTokens = CHUNK_OVERLAP_TOKENS, windowTokens = CHUNK_WINDOW_TOKENS) {
+export async function chunkDocumentByTokens(content, maxTokens = CHUNK_SIZE_TOKENS, overlapTokens = CHUNK_OVERLAP_TOKENS, windowTokens = CHUNK_WINDOW_TOKENS, filepath, chunkStrategy = "regex", signal) {
     const llm = getDefaultLlamaCpp();
     // Use moderate chars/token estimate (prose ~4, code ~2, mixed ~3)
     // If chunks exceed limit, they'll be re-split with actual ratio
@@ -1400,29 +1936,58 @@ export async function chunkDocumentByTokens(content, maxTokens = CHUNK_SIZE_TOKE
     const overlapChars = overlapTokens * avgCharsPerToken;
     const windowChars = windowTokens * avgCharsPerToken;
     // Chunk in character space with conservative estimate
-    let charChunks = chunkDocument(content, maxChars, overlapChars, windowChars);
+    // Use AST-aware chunking for the first pass when filepath/strategy provided
+    let charChunks = await chunkDocumentAsync(content, maxChars, overlapChars, windowChars, filepath, chunkStrategy);
     // Tokenize and split any chunks that still exceed limit
     const results = [];
-    for (const chunk of charChunks) {
-        const tokens = await llm.tokenize(chunk.text);
-        if (tokens.length <= maxTokens) {
-            results.push({ text: chunk.text, pos: chunk.pos, tokens: tokens.length });
+    const clampOverlapChars = (value, maxChars) => {
+        if (maxChars <= 1)
+            return 0;
+        return Math.max(0, Math.min(maxChars - 1, Math.floor(value)));
+    };
+    const pushChunkWithinTokenLimit = async (text, pos) => {
+        if (signal?.aborted)
+            return;
+        const tokens = await llm.tokenize(text);
+        if (tokens.length <= maxTokens || text.length <= 1) {
+            results.push({ text, pos, tokens: tokens.length });
+            return;
         }
-        else {
-            // Chunk is still too large - split it further
-            // Use actual token count to estimate better char limit
-            const actualCharsPerToken = chunk.text.length / tokens.length;
-            const safeMaxChars = Math.floor(maxTokens * actualCharsPerToken * 0.95); // 5% safety margin
-            const subChunks = chunkDocument(chunk.text, safeMaxChars, Math.floor(overlapChars * actualCharsPerToken / 2), Math.floor(windowChars * actualCharsPerToken / 2));
-            for (const subChunk of subChunks) {
-                const subTokens = await llm.tokenize(subChunk.text);
-                results.push({
-                    text: subChunk.text,
-                    pos: chunk.pos + subChunk.pos,
-                    tokens: subTokens.length,
-                });
-            }
+        const actualCharsPerToken = text.length / tokens.length;
+        let safeMaxChars = Math.floor(maxTokens * actualCharsPerToken * 0.95);
+        if (!Number.isFinite(safeMaxChars) || safeMaxChars < 1) {
+            safeMaxChars = Math.floor(text.length / 2);
+        }
+        safeMaxChars = Math.max(1, Math.min(text.length - 1, safeMaxChars));
+        let nextOverlapChars = clampOverlapChars(overlapChars * actualCharsPerToken / 2, safeMaxChars);
+        let nextWindowChars = Math.max(0, Math.floor(windowChars * actualCharsPerToken / 2));
+        let subChunks = chunkDocument(text, safeMaxChars, nextOverlapChars, nextWindowChars);
+        // Pathological single-line blobs can produce no meaningful breakpoint progress.
+        // Fall back to a simple half split so every recursion step strictly shrinks.
+        if (subChunks.length <= 1
+            || subChunks[0]?.text.length === text.length) {
+            safeMaxChars = Math.max(1, Math.floor(text.length / 2));
+            nextOverlapChars = 0;
+            nextWindowChars = 0;
+            subChunks = chunkDocument(text, safeMaxChars, nextOverlapChars, nextWindowChars);
+        }
+        if (subChunks.length <= 1
+            || subChunks[0]?.text.length === text.length) {
+            const fallbackTokens = tokens.slice(0, Math.max(1, maxTokens));
+            const truncatedText = await llm.detokenize(fallbackTokens);
+            results.push({
+                text: truncatedText,
+                pos,
+                tokens: fallbackTokens.length,
+            });
+            return;
         }
+        for (const subChunk of subChunks) {
+            await pushChunkWithinTokenLimit(text.slice(subChunk.pos, subChunk.pos + subChunk.text.length), pos + subChunk.pos);
+        }
+    };
+    for (const chunk of charChunks) {
+        await pushChunkWithinTokenLimit(chunk.text, chunk.pos);
     }
     return results;
 }
@@ -1523,7 +2088,7 @@ export function matchFilesByGlob(db, pattern) {
   `).all();
     const isMatch = picomatch(pattern);
     return allFiles
-        .filter(f => isMatch(f.virtual_path) || isMatch(f.path))
+        .filter(f => isMatch(f.virtual_path) || isMatch(f.path) || isMatch(f.collection + '/' + f.path))
         .map(f => ({
         filepath: f.virtual_path, // Virtual path for precise lookup
         displayPath: f.path, // Relative path for display
@@ -1874,8 +2439,23 @@ export function getTopLevelPathsWithoutContext(db, collectionName) {
 // =============================================================================
 // FTS Search
 // =============================================================================
-function sanitizeFTS5Term(term) {
-    return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase();
+export function sanitizeFTS5Term(term) {
+    return term.replace(/[^\p{L}\p{N}'_]/gu, '').toLowerCase();
+}
+/**
+ * Check if a token is a hyphenated compound word (e.g., multi-agent, DEC-0054, gpt-4).
+ * Returns true if the token contains internal hyphens between word/digit characters.
+ */
+function isHyphenatedToken(token) {
+    return /^[\p{L}\p{N}][\p{L}\p{N}'-]*-[\p{L}\p{N}][\p{L}\p{N}'-]*$/u.test(token);
+}
+/**
+ * Sanitize a hyphenated term into an FTS5 phrase by splitting on hyphens
+ * and sanitizing each part. Returns the parts joined by spaces for use
+ * inside FTS5 quotes: "multi agent" matches "multi-agent" in porter tokenizer.
+ */
+function sanitizeHyphenatedTerm(term) {
+    return term.split('-').map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
 }
 /**
  * Parse lex query syntax into FTS5 query.
@@ -1883,14 +2463,23 @@ function sanitizeFTS5Term(term) {
  * Supports:
  * - Quoted phrases: "exact phrase" → "exact phrase" (exact match)
  * - Negation: -term or -"phrase" → uses FTS5 NOT operator
+ * - Hyphenated tokens: multi-agent, DEC-0054, gpt-4 → treated as phrases
  * - Plain terms: term → "term"* (prefix match)
  *
  * FTS5 NOT is a binary operator: `term1 NOT term2` means "match term1 but not term2".
  * So `-term` only works when there are also positive terms.
  *
+ * Hyphen disambiguation: `-sports` at a word boundary is negation, but `multi-agent`
+ * (where `-` is between word characters) is treated as a hyphenated phrase.
+ * When a leading `-` is followed by what looks like a hyphenated compound word
+ * (e.g., `-multi-agent`), the entire token is treated as a negated phrase.
+ *
  * Examples:
  *   performance -sports     → "performance"* NOT "sports"*
  *   "machine learning"      → "machine learning"
+ *   multi-agent memory      → "multi agent" AND "memory"*
+ *   DEC-0054               → "dec 0054"
+ *   -multi-agent            → NOT "multi agent"
  */
 function buildFTS5Query(query) {
     const positive = [];
@@ -1916,7 +2505,7 @@ function buildFTS5Query(query) {
             const phrase = s.slice(start, i).trim();
             i++; // skip closing quote
             if (phrase.length > 0) {
-                const sanitized = phrase.split(/\s+/).map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
+                const sanitized = sanitizeFTS5Phrase(phrase);
                 if (sanitized) {
                     const ftsPhrase = `"${sanitized}"`; // Exact phrase, no prefix match
                     if (negated) {
@@ -1934,14 +2523,42 @@ function buildFTS5Query(query) {
             while (i < s.length && !/[\s"]/.test(s[i]))
                 i++;
             const term = s.slice(start, i);
-            const sanitized = sanitizeFTS5Term(term);
-            if (sanitized) {
-                const ftsTerm = `"${sanitized}"*`; // Prefix match
-                if (negated) {
-                    negative.push(ftsTerm);
+            // Handle hyphenated tokens: multi-agent, DEC-0054, gpt-4
+            // These get split into phrase queries so FTS5 porter tokenizer matches them.
+            if (isHyphenatedToken(term)) {
+                const sanitized = sanitizeHyphenatedTerm(term);
+                if (sanitized) {
+                    const ftsPhrase = `"${sanitized}"`; // Phrase match (no prefix)
+                    if (negated) {
+                        negative.push(ftsPhrase);
+                    }
+                    else {
+                        positive.push(ftsPhrase);
+                    }
                 }
-                else {
-                    positive.push(ftsTerm);
+            }
+            else if (containsCjk(term)) {
+                const sanitized = sanitizeFTS5Phrase(term);
+                if (sanitized) {
+                    const ftsPhrase = `"${sanitized}"`; // CJK phrase over character tokens
+                    if (negated) {
+                        negative.push(ftsPhrase);
+                    }
+                    else {
+                        positive.push(ftsPhrase);
+                    }
+                }
+            }
+            else {
+                const sanitized = sanitizeFTS5Term(term);
+                if (sanitized) {
+                    const ftsTerm = `"${sanitized}"*`; // Prefix match
+                    if (negated) {
+                        negative.push(ftsTerm);
+                    }
+                    else {
+                        positive.push(ftsTerm);
+                    }
                 }
             }
         }
@@ -1964,8 +2581,9 @@ function buildFTS5Query(query) {
  * Returns error message if invalid, null if valid.
  */
 export function validateSemanticQuery(query) {
-    // Check for negation syntax
-    if (/-\w/.test(query) || /-"/.test(query)) {
+    // Check for negation syntax — only at token boundaries (start of string or after whitespace).
+    // Hyphenated words like "real-time" or "write-ahead" must not trigger this.
+    if (/(^|\s)-[\w"]/.test(query)) {
         return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.';
     }
     return null;
@@ -1984,26 +2602,42 @@ export function searchFTS(db, query, limit = 20, collectionName) {
     const ftsQuery = buildFTS5Query(query);
     if (!ftsQuery)
         return [];
+    // Use a CTE to force FTS5 to run first, then filter by collection.
+    // Without the CTE, SQLite's query planner combines FTS5 MATCH with the
+    // collection filter in a single WHERE clause, which can cause it to
+    // abandon the FTS5 index and fall back to a full scan — turning an 8ms
+    // query into a 17-second query on large collections.
+    const params = [ftsQuery];
+    // When filtering by collection, fetch extra candidates from the FTS index
+    // since some will be filtered out. Without a collection filter we can
+    // fetch exactly the requested limit.
+    const ftsLimit = collectionName ? limit * 10 : limit;
     let sql = `
+    WITH fts_matches AS (
+      SELECT rowid, bm25(documents_fts, 1.5, 4.0, 1.0) as bm25_score
+      FROM documents_fts
+      WHERE documents_fts MATCH ?
+      ORDER BY bm25_score ASC
+      LIMIT ${ftsLimit}
+    )
     SELECT
       'qmd://' || d.collection || '/' || d.path as filepath,
       d.collection || '/' || d.path as display_path,
       d.title,
       content.doc as body,
       d.hash,
-      bm25(documents_fts, 10.0, 1.0) as bm25_score
-    FROM documents_fts f
-    JOIN documents d ON d.id = f.rowid
+      fm.bm25_score
+    FROM fts_matches fm
+    JOIN documents d ON d.id = fm.rowid
     JOIN content ON content.hash = d.hash
-    WHERE documents_fts MATCH ? AND d.active = 1
+    WHERE d.active = 1
   `;
-    const params = [ftsQuery];
     if (collectionName) {
         sql += ` AND d.collection = ?`;
         params.push(String(collectionName));
     }
     // bm25 lower is better; sort ascending.
-    sql += ` ORDER BY bm25_score ASC LIMIT ?`;
+    sql += ` ORDER BY fm.bm25_score ASC LIMIT ?`;
     params.push(limit);
     const rows = db.prepare(sql).all(...params);
     return rows.map(row => {
@@ -2075,7 +2709,7 @@ export async function searchVec(db, query, model, limit = 20, collectionName, se
         docSql += ` AND d.collection = ?`;
         params.push(collectionName);
     }
-    const docRows = db.prepare(docSql).all(...params);
+    const docRows = withLazyContentVectorMigration(db, () => db.prepare(docSql).all(...params));
     // Combine with distances and dedupe by filepath
     const seen = new Map();
     for (const row of docRows) {
@@ -2122,34 +2756,124 @@ async function getEmbedding(text, model, isQuery, session, llmOverride) {
  * Get all unique content hashes that need embeddings (from active documents).
  * Returns hash, document body, and a sample path for display purposes.
  */
-export function getHashesForEmbedding(db) {
-    return db.prepare(`
+export function getHashesForEmbedding(db, model = DEFAULT_EMBED_MODEL) {
+    const fingerprint = getEmbeddingFingerprint(model);
+    return withLazyContentVectorMigration(db, () => db.prepare(`
     SELECT d.hash, c.doc as body, MIN(d.path) as path
     FROM documents d
     JOIN content c ON d.hash = c.hash
-    LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
-    WHERE d.active = 1 AND v.hash IS NULL
+    LEFT JOIN (
+      SELECT hash, model, COUNT(*) AS chunk_count, MAX(total_chunks) AS expected_chunks
+      FROM content_vectors
+      WHERE model = ? AND embed_fingerprint = ?
+      GROUP BY hash, model, embed_fingerprint
+    ) v ON d.hash = v.hash
+    WHERE d.active = 1
+      AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
     GROUP BY d.hash
-  `).all();
+  `).all(model, fingerprint));
 }
 /**
- * Clear all embeddings from the database (force re-index).
- * Deletes all rows from content_vectors and drops the vectors_vec table.
- */
-export function clearAllEmbeddings(db) {
-    db.exec(`DELETE FROM content_vectors`);
-    db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+ * Clear embeddings for the whole index, or just for one collection.
+ *
+ * When `collection` is omitted the entire content_vectors table is emptied and
+ * the vectors_vec virtual table is dropped (it is recreated with the right
+ * dimensions on the next embed run).
+ *
+ * When `collection` is provided, only vectors whose hash is referenced
+ * exclusively by active documents in that collection are removed. Hashes
+ * shared with active documents in other collections are left in place so
+ * vector search keeps working there (content_vectors is keyed globally by
+ * content hash; identical document bodies across collections share a row).
+ * vectors_vec is preserved so other collections keep working unless the scoped
+ * clear empties content_vectors entirely, in which case it is dropped so the
+ * next embed can recreate the table with the current dimensions.
+ */
+export function clearAllEmbeddings(db, collection) {
+    if (!collection) {
+        db.exec(`DELETE FROM content_vectors`);
+        db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+        return;
+    }
+    const exclusiveHashesQuery = `
+    SELECT DISTINCT d.hash
+    FROM documents d
+    WHERE d.collection = ? AND d.active = 1
+      AND NOT EXISTS (
+        SELECT 1 FROM documents d2
+        WHERE d2.hash = d.hash
+          AND d2.active = 1
+          AND d2.collection != d.collection
+      )
+  `;
+    const vecTableExists = db
+        .prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors_vec'`)
+        .get();
+    withLazyContentVectorMigration(db, () => {
+        if (vecTableExists) {
+            const hashSeqRows = db.prepare(`
+        SELECT cv.hash, cv.seq
+        FROM content_vectors cv
+        WHERE cv.hash IN (${exclusiveHashesQuery})
+      `).all(collection);
+            const delVec = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+            for (const row of hashSeqRows) {
+                delVec.run(`${row.hash}_${row.seq}`);
+            }
+        }
+        db.prepare(`
+      DELETE FROM content_vectors
+      WHERE hash IN (${exclusiveHashesQuery})
+    `).run(collection);
+        const remaining = db
+            .prepare(`SELECT COUNT(*) AS n FROM content_vectors`)
+            .get();
+        if (remaining.n === 0) {
+            db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+        }
+    });
 }
 /**
  * Insert a single embedding into both content_vectors and vectors_vec tables.
  * The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
+ *
+ * content_vectors is inserted first so that getHashesForEmbedding (which checks
+ * only content_vectors) won't re-select the hash on a crash between the two inserts.
+ *
+ * vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
+ * vec0 virtual tables silently ignore the OR REPLACE conflict clause.
  */
-export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt) {
+export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt, totalChunks = 1, fingerprint = getEmbeddingFingerprint(model)) {
     const hashSeq = `${hash}_${seq}`;
-    const insertVecStmt = db.prepare(`INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
-    const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`);
-    insertVecStmt.run(hashSeq, embedding);
-    insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt);
+    withLazyContentVectorMigration(db, () => {
+        // Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding)
+        const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embed_fingerprint, total_chunks, embedded_at) VALUES (?, ?, ?, ?, ?, ?, ?)`);
+        insertContentVectorStmt.run(hash, seq, pos, model, fingerprint, totalChunks, embeddedAt);
+        // vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT
+        const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+        const insertVecStmt = db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
+        deleteVecStmt.run(hashSeq);
+        insertVecStmt.run(hashSeq, embedding);
+    });
+}
+function removeIncompleteEmbeddings(db, expectedChunksByHash, model) {
+    return withLazyContentVectorMigration(db, () => {
+        let removed = 0;
+        const rowsStmt = db.prepare(`SELECT seq FROM content_vectors WHERE hash = ? AND model = ?`);
+        const deleteContentStmt = db.prepare(`DELETE FROM content_vectors WHERE hash = ? AND model = ?`);
+        const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+        for (const [hash, expectedChunks] of expectedChunksByHash) {
+            const rows = rowsStmt.all(hash, model);
+            if (rows.length === 0 || rows.length === expectedChunks)
+                continue;
+            for (const row of rows) {
+                deleteVecStmt.run(`${hash}_${row.seq}`);
+            }
+            deleteContentStmt.run(hash, model);
+            removed += rows.length;
+        }
+        return removed;
+    });
 }
 // =============================================================================
 // Query expansion
@@ -2161,12 +2885,15 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db, intent
     if (cached) {
         try {
             const parsed = JSON.parse(cached);
+            if (!Array.isArray(parsed))
+                return [];
+            const rows = parsed;
             // Migrate old cache format: { type, text } → { type, query }
-            if (parsed.length > 0 && parsed[0].query) {
-                return parsed;
+            if (rows.length > 0 && typeof rows[0]?.query === "string") {
+                return rows.map((r) => ({ type: r.type, query: String(r.query) }));
             }
-            else if (parsed.length > 0 && parsed[0].text) {
-                return parsed.map((r) => ({ type: r.type, query: r.text }));
+            else if (rows.length > 0 && typeof rows[0]?.text === "string") {
+                return rows.map((r) => ({ type: r.type, query: String(r.text) }));
             }
         }
         catch {
@@ -2473,7 +3200,7 @@ export function getDocumentBody(db, doc, fromLine, maxLines) {
     let body = row.body;
     if (fromLine !== undefined || maxLines !== undefined) {
         const lines = body.split('\n');
-        const start = (fromLine || 1) - 1;
+        const start = Math.max(0, (fromLine || 1) - 1);
         const end = maxLines !== undefined ? start + maxLines : lines.length;
         body = lines.slice(start, end).join('\n');
     }
@@ -2484,7 +3211,7 @@ export function getDocumentBody(db, doc, fromLine, maxLines) {
  * Returns documents without body by default (use getDocumentBody to load)
  */
 export function findDocuments(db, pattern, options = {}) {
-    const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
+    const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?') && !pattern.includes('{');
     const errors = [];
     const maxBytes = options.maxBytes ?? DEFAULT_MULTI_GET_MAX_BYTES;
     const bodyCol = options.includeBody ? `, content.doc as body` : ``;
@@ -2581,7 +3308,7 @@ export function findDocuments(db, pattern, options = {}) {
 // =============================================================================
 // Status
 // =============================================================================
-export function getStatus(db) {
+export function getStatus(db, model = DEFAULT_EMBED_MODEL) {
     // DB is source of truth for collections — config provides supplementary metadata
     const dbCollections = db.prepare(`
     SELECT
@@ -2614,7 +3341,7 @@ export function getStatus(db) {
         return new Date(b.lastUpdated).getTime() - new Date(a.lastUpdated).getTime();
     });
     const totalDocs = db.prepare(`SELECT COUNT(*) as c FROM documents WHERE active = 1`).get().c;
-    const needsEmbedding = getHashesNeedingEmbedding(db);
+    const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
     const hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
     return {
         totalDocuments: totalDocs,
@@ -2661,7 +3388,7 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen, in
     const totalLines = body.split('\n').length;
     let searchBody = body;
     let lineOffset = 0;
-    if (chunkPos && chunkPos > 0) {
+    if (chunkPos !== undefined && chunkPos >= 0) {
         // Search within the chunk region, with some padding for context
         // Use provided chunkLen or fall back to max chunk size (covers variable-length chunks)
         const searchLen = chunkLen || CHUNK_SIZE_CHARS;
@@ -2692,6 +3419,22 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen, in
             bestLine = i;
         }
     }
+    if (chunkPos !== undefined && chunkPos >= 0 && bestScore <= 0) {
+        if (chunkPos === 0) {
+            // chunkPos=0 may be the chunk selector's initialization default for queries
+            // where lexical chunk scoring found no winner (e.g. tokens filtered to empty
+            // by the length>2 guard). Retry with full body so the real match isn't missed.
+            return extractSnippet(body, query, maxLen, undefined, undefined, intent);
+        }
+        // For chunkPos > 0 the reranker actively picked this chunk. Tokens failing to
+        // match literally is most likely a tokenizer limitation (quoted phrases, FTS5
+        // syntax, HYDE passages, semantic hits), so anchor on the chunk start rather
+        // than disregarding the reranker's pick.
+        const contextStart = Math.max(0, chunkPos - 100);
+        bestLine = chunkPos > contextStart
+            ? searchBody.slice(0, chunkPos - contextStart).split('\n').length - 1
+            : 0;
+    }
     const start = Math.max(0, bestLine - 1);
     const end = Math.min(lines.length, bestLine + 3);
     const snippetLines = lines.slice(start, end);
@@ -2729,6 +3472,20 @@ export function addLineNumbers(text, startLine = 1) {
     const lines = text.split('\n');
     return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
 }
+/**
+ * RRF list weights for hybridQuery.
+ *
+ * Original-query retrieval paths are the primary evidence and get 2x weight:
+ * - original FTS
+ * - original vector search
+ *
+ * Expansion-derived lists (lex/vec/hyde) stay at 1x regardless of list order,
+ * so a lex expansion inserted before original vector search cannot steal the
+ * original vector boost.
+ */
+export function getHybridRrfWeights(rankedListMeta) {
+    return rankedListMeta.map(meta => meta.queryType === "original" ? 2.0 : 1.0);
+}
 /**
  * Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
  *
@@ -2817,7 +3574,8 @@ export async function hybridQuery(store, query, options) {
         }
         // Batch embed all vector queries in a single call
         const llm = getLlm(store);
-        const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text));
+        const embedModel = llm.embedModelName;
+        const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, embedModel));
         hooks?.onEmbedStart?.(textsToEmbed.length);
         const embedStart = Date.now();
         const embeddings = await llm.embedBatch(textsToEmbed);
@@ -2827,7 +3585,7 @@ export async function hybridQuery(store, query, options) {
             const embedding = embeddings[i]?.embedding;
             if (!embedding)
                 continue;
-            const vecResults = await store.searchVec(vecQueries[i].text, DEFAULT_EMBED_MODEL, 20, collection, undefined, embedding);
+            const vecResults = await store.searchVec(vecQueries[i].text, embedModel, 20, collection, undefined, embedding);
             if (vecResults.length > 0) {
                 for (const r of vecResults)
                     docidMap.set(r.filepath, r.docid);
@@ -2843,8 +3601,9 @@ export async function hybridQuery(store, query, options) {
             }
         }
     }
-    // Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
-    const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
+    // Step 4: RRF fusion — original-query FTS and vector lists get 2x weight;
+    // expansion-derived lists stay at 1x independent of insertion order.
+    const weights = getHybridRrfWeights(rankedListMeta);
     const fused = reciprocalRankFusion(rankedLists, weights);
     const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
     const candidates = fused.slice(0, candidateLimit);
@@ -2855,8 +3614,9 @@ export async function hybridQuery(store, query, options) {
     const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
     const intentTerms = intent ? extractIntentTerms(intent) : [];
     const docChunkMap = new Map();
+    const chunkStrategy = options?.chunkStrategy;
     for (const cand of candidates) {
-        const chunks = chunkDocument(cand.body);
+        const chunks = await chunkDocumentAsync(cand.body, undefined, undefined, undefined, cand.file, chunkStrategy);
         if (chunks.length === 0)
             continue;
         // Pick chunk with most keyword overlap (fallback: first chunk)
@@ -3024,10 +3784,11 @@ export async function vectorSearchQuery(store, query, options) {
     const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
     options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
     // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
+    const embedModel = getLlm(store).embedModelName;
     const queryTexts = [query, ...vecExpanded.map(q => q.query)];
     const allResults = new Map();
     for (const q of queryTexts) {
-        const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
+        const vecResults = await store.searchVec(q, embedModel, limit, collection);
         for (const r of vecResults) {
             const existing = allResults.get(r.filepath);
             if (!existing || r.score > existing.score) {
@@ -3128,7 +3889,8 @@ export async function structuredSearch(store, searches, options) {
         const vecSearches = searches.filter((s) => s.type === 'vec' || s.type === 'hyde');
         if (vecSearches.length > 0) {
             const llm = getLlm(store);
-            const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
+            const embedModel = llm.embedModelName;
+            const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, embedModel));
             hooks?.onEmbedStart?.(textsToEmbed.length);
             const embedStart = Date.now();
             const embeddings = await llm.embedBatch(textsToEmbed);
@@ -3138,7 +3900,7 @@ export async function structuredSearch(store, searches, options) {
                 if (!embedding)
                     continue;
                 for (const coll of collectionList) {
-                    const vecResults = await store.searchVec(vecSearches[i].query, DEFAULT_EMBED_MODEL, 20, coll, undefined, embedding);
+                    const vecResults = await store.searchVec(vecSearches[i].query, embedModel, 20, coll, undefined, embedding);
                     if (vecResults.length > 0) {
                         for (const r of vecResults)
                             docidMap.set(r.filepath, r.docid);
@@ -3174,8 +3936,9 @@ export async function structuredSearch(store, searches, options) {
     const queryTerms = primaryQuery.toLowerCase().split(/\s+/).filter(t => t.length > 2);
     const intentTerms = intent ? extractIntentTerms(intent) : [];
     const docChunkMap = new Map();
+    const ssChunkStrategy = options?.chunkStrategy;
     for (const cand of candidates) {
-        const chunks = chunkDocument(cand.body);
+        const chunks = await chunkDocumentAsync(cand.body, undefined, undefined, undefined, cand.file, ssChunkStrategy);
         if (chunks.length === 0)
             continue;
         // Pick chunk with most keyword overlap