npm - @tobilu/qmd - Versions diffs - 2.1.0 → 2.5.1 - Mend

@tobilu/qmd 2.1.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CHANGELOG.md +81 -0
package/README.md +3 -0
package/bin/qmd +39 -3
package/dist/ast.d.ts +1 -0
package/dist/ast.js +18 -8
package/dist/bench/bench.d.ts +2 -0
package/dist/bench/bench.js +108 -13
package/dist/bench/score.d.ts +11 -4
package/dist/bench/score.js +34 -13
package/dist/bench/types.d.ts +13 -0
package/dist/cli/qmd.d.ts +26 -0
package/dist/cli/qmd.js +1172 -121
package/dist/collections.d.ts +9 -0
package/dist/collections.js +32 -7
package/dist/db.d.ts +6 -3
package/dist/db.js +1 -1
package/dist/index.d.ts +4 -0
package/dist/index.js +5 -2
package/dist/llm.d.ts +65 -3
package/dist/llm.js +376 -63
package/dist/mcp/server.d.ts +6 -3
package/dist/mcp/server.js +41 -26
package/dist/paths.d.ts +1 -0
package/dist/paths.js +4 -0
package/dist/store.d.ts +92 -17
package/dist/store.js +676 -176
package/package.json +23 -12
package/scripts/build.mjs +29 -0
package/scripts/check-package-grammars.mjs +29 -0
package/scripts/package-smoke.mjs +65 -0
package/scripts/test-all.mjs +27 -0
package/skills/qmd/SKILL.md +203 -0
package/skills/qmd/references/mcp-setup.md +102 -0
package/skills/release/SKILL.md +139 -0
package/skills/release/scripts/install-hooks.sh +38 -0
package/dist/embedded-skills.d.ts +0 -6
package/dist/embedded-skills.js +0 -14

package/dist/store.js CHANGED Viewed

@@ -16,18 +16,21 @@ import { createHash } from "crypto";
 import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs";
 // Note: node:path resolve is not imported — we export our own cross-platform resolve()
 import fastGlob from "fast-glob";
-import { LlamaCpp, getDefaultLlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, withLLMSessionForLlm, } from "./llm.js";
+import { qmdHomedir } from "./paths.js";
+import { LlamaCpp, getDefaultLlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, withLLMSessionForLlm, DEFAULT_EMBED_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, } from "./llm.js";
 // =============================================================================
 // Configuration
 // =============================================================================
-const HOME = process.env.HOME || "/tmp";
-export const DEFAULT_EMBED_MODEL = "embeddinggemma";
-export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
-export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
+export const DEFAULT_EMBED_MODEL = DEFAULT_EMBED_MODEL_URI;
+export const DEFAULT_RERANK_MODEL = DEFAULT_RERANK_MODEL_URI;
+export const DEFAULT_QUERY_MODEL = DEFAULT_GENERATE_MODEL_URI;
 export const DEFAULT_GLOB = "**/*.md";
 export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB
 export const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64;
 export const DEFAULT_EMBED_MAX_BATCH_BYTES = 64 * 1024 * 1024; // 64MB
+const EMBED_FINGERPRINT_PROBE_QUERY = "__qmd_embedding_query_probe__";
+const EMBED_FINGERPRINT_PROBE_TITLE = "__qmd_embedding_title_probe__";
+const EMBED_FINGERPRINT_PROBE_DOC = "__qmd_embedding_document_probe__";
 // Chunking: 900 tokens per chunk with 15% overlap
 // Increased from 800 to accommodate smart chunking finding natural break points
 export const CHUNK_SIZE_TOKENS = 900;
@@ -38,6 +41,16 @@ export const CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP_TOKENS * 4; // 540 chars
 // Search window for finding optimal break points (in tokens, ~200 tokens)
 export const CHUNK_WINDOW_TOKENS = 200;
 export const CHUNK_WINDOW_CHARS = CHUNK_WINDOW_TOKENS * 4; // 800 chars
+export function getEmbeddingFingerprint(model = DEFAULT_EMBED_MODEL) {
+    const significant = [
+        `model:${model}`,
+        `query:${formatQueryForEmbedding(EMBED_FINGERPRINT_PROBE_QUERY, model)}`,
+        `doc:${formatDocForEmbedding(EMBED_FINGERPRINT_PROBE_DOC, EMBED_FINGERPRINT_PROBE_TITLE, model)}`,
+        `chunk_tokens:${CHUNK_SIZE_TOKENS}`,
+        `chunk_overlap_tokens:${CHUNK_OVERLAP_TOKENS}`,
+    ].join("\n");
+    return createHash("sha256").update(significant).digest("hex").slice(0, 6);
+}
 /**
  * Get the LlamaCpp instance for a store — prefers the store's own instance,
  * falls back to the global singleton.
@@ -228,7 +241,7 @@ export const RERANK_CANDIDATE_LIMIT = 40;
 // Path utilities
 // =============================================================================
 export function homedir() {
-    return HOME;
+    return qmdHomedir();
 }
 /**
  * Check if a path is absolute.
@@ -468,21 +481,25 @@ export function normalizeVirtualPath(input) {
 export function parseVirtualPath(virtualPath) {
     // Normalize the path first
     const normalized = normalizeVirtualPath(virtualPath);
+    const [pathPart = normalized, queryString = ""] = normalized.split("?");
     // Match: qmd://collection-name[/optional-path]
     // Allows: qmd://name, qmd://name/, qmd://name/path
-    const match = normalized.match(/^qmd:\/\/([^\/]+)\/?(.*)$/);
+    const match = pathPart.match(/^qmd:\/\/([^\/]+)\/?(.*)$/);
     if (!match?.[1])
         return null;
+    const indexName = new URLSearchParams(queryString).get("index")?.trim() || undefined;
     return {
         collectionName: match[1],
         path: match[2] ?? '', // Empty string for collection root
+        ...(indexName ? { indexName } : {}),
     };
 }
 /**
  * Build a virtual path from collection name and relative path.
  */
-export function buildVirtualPath(collectionName, path) {
-    return `qmd://${collectionName}/${path}`;
+export function buildVirtualPath(collectionName, path, indexName) {
+    const base = `qmd://${collectionName}/${path}`;
+    return indexName ? `${base}?index=${encodeURIComponent(indexName)}` : base;
 }
 /**
  * Check if a path is explicitly a virtual path.
@@ -552,6 +569,7 @@ function createSqliteVecUnavailableError(reason) {
         "Install Homebrew SQLite so the sqlite-vec extension can be loaded, " +
         "and set BREW_PREFIX if Homebrew is installed in a non-standard location.");
 }
+let _sqliteVecUnavailableReason = null;
 function getErrorMessage(err) {
     return err instanceof Error ? err.message : String(err);
 }
@@ -568,16 +586,76 @@ export function verifySqliteVecLoaded(db) {
     }
 }
 let _sqliteVecAvailable = null;
+const CJK_CHAR_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
+const CJK_RUN_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]+/gu;
+const FTS_CJK_NORMALIZED_VERSION = "1";
+/**
+ * FTS5's unicode61 tokenizer does not segment CJK text into searchable words.
+ * Normalize CJK runs by spacing every character so exact CJK queries can be
+ * translated into phrase queries while Latin text keeps the default tokenizer.
+ */
+export function normalizeCjkForFTS(text) {
+    return text.replace(CJK_RUN_PATTERN, run => ` ${Array.from(run).join(' ')} `);
+}
+function containsCjk(text) {
+    return CJK_CHAR_PATTERN.test(text);
+}
+function sanitizeFTS5Phrase(phrase) {
+    return normalizeCjkForFTS(phrase)
+        .split(/\s+/)
+        .map(t => sanitizeFTS5Term(t))
+        .filter(t => t)
+        .join(' ');
+}
+function rebuildFTSForCjkNormalization(db) {
+    const version = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get();
+    if (version?.value === FTS_CJK_NORMALIZED_VERSION)
+        return;
+    try {
+        db.exec(`DELETE FROM documents_fts WHERE rowid >= 0`);
+    }
+    catch {
+        // Some older/corrupt FTS5 shadow-table states can reject bulk deletes even
+        // though reads still work. Recreate the virtual table; documents_fts is a
+        // derived index, so rebuilding it from documents/content is safe.
+        db.exec(`DROP TABLE IF EXISTS documents_fts`);
+        db.exec(`
+      CREATE VIRTUAL TABLE documents_fts USING fts5(
+        filepath, title, body,
+        tokenize='porter unicode61'
+      )
+    `);
+    }
+    const rows = db.prepare(`
+    SELECT d.id, d.collection, d.path, d.title, content.doc as body
+    FROM documents d
+    JOIN content ON content.hash = d.hash
+    WHERE d.active = 1
+  `).all();
+    const insert = db.prepare(`INSERT INTO documents_fts(rowid, filepath, title, body) VALUES (?, ?, ?, ?)`);
+    const rebuild = db.transaction(() => {
+        for (const row of rows) {
+            insert.run(row.id, normalizeCjkForFTS(`${row.collection}/${row.path}`), normalizeCjkForFTS(row.title), normalizeCjkForFTS(row.body));
+        }
+    });
+    rebuild();
+    db.prepare(`
+    INSERT OR REPLACE INTO store_config(key, value)
+    VALUES ('fts_cjk_normalized_version', ?)
+  `).run(FTS_CJK_NORMALIZED_VERSION);
+}
 function initializeDatabase(db) {
     try {
         loadSqliteVec(db);
         verifySqliteVecLoaded(db);
         _sqliteVecAvailable = true;
+        _sqliteVecUnavailableReason = null;
     }
     catch (err) {
         // sqlite-vec is optional — vector search won't work but FTS is fine
         _sqliteVecAvailable = false;
-        console.warn(getErrorMessage(err));
+        _sqliteVecUnavailableReason = getErrorMessage(err);
+        console.warn(_sqliteVecUnavailableReason);
     }
     db.exec("PRAGMA journal_mode = WAL");
     db.exec("PRAGMA foreign_keys = ON");
@@ -619,19 +697,16 @@ function initializeDatabase(db) {
       created_at TEXT NOT NULL
     )
   `);
-    // Content vectors
-    const cvInfo = db.prepare(`PRAGMA table_info(content_vectors)`).all();
-    const hasSeqColumn = cvInfo.some(col => col.name === 'seq');
-    if (cvInfo.length > 0 && !hasSeqColumn) {
-        db.exec(`DROP TABLE IF EXISTS content_vectors`);
-        db.exec(`DROP TABLE IF EXISTS vectors_vec`);
-    }
+    // Content vectors. Avoid PRAGMA schema probes during startup; legacy vector
+    // columns are repaired lazily when a vector/embedding query first needs them.
     db.exec(`
     CREATE TABLE IF NOT EXISTS content_vectors (
       hash TEXT NOT NULL,
       seq INTEGER NOT NULL DEFAULT 0,
       pos INTEGER NOT NULL DEFAULT 0,
       model TEXT NOT NULL,
+      embed_fingerprint TEXT NOT NULL DEFAULT '',
+      total_chunks INTEGER NOT NULL DEFAULT 1,
       embedded_at TEXT NOT NULL,
       PRIMARY KEY (hash, seq)
     )
@@ -662,9 +737,12 @@ function initializeDatabase(db) {
       tokenize='porter unicode61'
     )
   `);
-    // Triggers to keep FTS in sync
+    // Triggers keep FTS in sync for callers that write directly to documents.
+    // Production indexing paths rebuild entries in TypeScript so CJK text can be
+    // normalized before it reaches the unicode61 tokenizer.
+    db.exec(`DROP TRIGGER IF EXISTS documents_ai`);
     db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents
+    CREATE TRIGGER documents_ai AFTER INSERT ON documents
     WHEN new.active = 1
     BEGIN
       INSERT INTO documents_fts(rowid, filepath, title, body)
@@ -676,13 +754,15 @@ function initializeDatabase(db) {
       WHERE new.active = 1;
     END
   `);
+    db.exec(`DROP TRIGGER IF EXISTS documents_ad`);
     db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
+    CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
       DELETE FROM documents_fts WHERE rowid = old.id;
     END
   `);
+    db.exec(`DROP TRIGGER IF EXISTS documents_au`);
     db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents
+    CREATE TRIGGER documents_au AFTER UPDATE ON documents
     BEGIN
       -- Delete from FTS if no longer active
       DELETE FROM documents_fts WHERE rowid = old.id AND new.active = 0;
@@ -697,6 +777,7 @@ function initializeDatabase(db) {
       WHERE new.active = 1;
     END
   `);
+    rebuildFTSForCjkNormalization(db);
 }
 function rowToNamedCollection(row) {
     return {
@@ -838,7 +919,7 @@ export function isSqliteVecAvailable() {
 }
 function ensureVecTableInternal(db, dimensions) {
     if (!_sqliteVecAvailable) {
-        throw new Error("sqlite-vec is not available. Vector operations require a SQLite build with extension loading support.");
+        throw createSqliteVecUnavailableError(_sqliteVecUnavailableReason ?? "vector operations require a SQLite build with extension loading support");
     }
     const tableInfo = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
     if (tableInfo) {
@@ -902,7 +983,7 @@ export async function reindexCollection(store, collectionPath, globPattern, coll
         }
         const hash = await hashContent(content);
         const title = extractTitle(content, relativeFile);
-        const existing = findActiveDocument(db, collectionName, path);
+        const existing = findOrMigrateLegacyDocument(db, collectionName, path);
         if (existing) {
             if (existing.hash === hash) {
                 if (existing.title !== title) {
@@ -955,16 +1036,74 @@ function resolveEmbedOptions(options) {
         maxBatchBytes: validatePositiveIntegerOption("maxBatchBytes", options?.maxBatchBytes, DEFAULT_EMBED_MAX_BATCH_BYTES),
     };
 }
-function getPendingEmbeddingDocs(db) {
-    return db.prepare(`
-    SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
-    FROM documents d
-    JOIN content c ON d.hash = c.hash
-    LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
-    WHERE d.active = 1 AND v.hash IS NULL
-    GROUP BY d.hash
-    ORDER BY MIN(d.path)
-  `).all();
+const CONTENT_VECTOR_DESIRED_COLUMNS = [
+    { name: "seq", definition: "INTEGER NOT NULL DEFAULT 0" },
+    { name: "pos", definition: "INTEGER NOT NULL DEFAULT 0" },
+    { name: "model", definition: "TEXT NOT NULL DEFAULT ''" },
+    { name: "embed_fingerprint", definition: "TEXT NOT NULL DEFAULT ''" },
+    { name: "total_chunks", definition: "INTEGER NOT NULL DEFAULT 1" },
+    { name: "embedded_at", definition: "TEXT NOT NULL DEFAULT ''" },
+];
+function isContentVectorColumnError(error) {
+    const message = error instanceof Error ? error.message : String(error);
+    if (!/(no such column|has no column named)/i.test(message)) {
+        return false;
+    }
+    return CONTENT_VECTOR_DESIRED_COLUMNS.some(col => message.includes(col.name));
+}
+function runContentVectorColumnRepairs(db) {
+    for (const column of CONTENT_VECTOR_DESIRED_COLUMNS) {
+        try {
+            db.exec(`ALTER TABLE content_vectors ADD COLUMN ${column.name} ${column.definition}`);
+        }
+        catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            // The repair series is intentionally idempotent: most columns should
+            // already exist, and another caller may have repaired a missing column
+            // between the failed query and this ALTER series.
+            if (!message.includes("duplicate column name")) {
+                throw error;
+            }
+        }
+    }
+}
+function withLazyContentVectorMigration(db, operation) {
+    let repaired = false;
+    while (true) {
+        try {
+            return operation();
+        }
+        catch (error) {
+            if (repaired || !isContentVectorColumnError(error)) {
+                throw error;
+            }
+            runContentVectorColumnRepairs(db);
+            repaired = true;
+        }
+    }
+}
+function getPendingEmbeddingDocs(db, collection, model = DEFAULT_EMBED_MODEL) {
+    const collectionFilter = collection ? `AND d.collection = ?` : ``;
+    const fingerprint = getEmbeddingFingerprint(model);
+    return withLazyContentVectorMigration(db, () => {
+        const stmt = db.prepare(`
+      SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
+      FROM documents d
+      JOIN content c ON d.hash = c.hash
+      LEFT JOIN (
+        SELECT hash, model, COUNT(*) AS chunk_count, MAX(total_chunks) AS expected_chunks
+        FROM content_vectors
+        WHERE model = ? AND embed_fingerprint = ?
+        GROUP BY hash, model, embed_fingerprint
+      ) v ON d.hash = v.hash
+      WHERE d.active = 1
+        AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
+        ${collectionFilter}
+      GROUP BY d.hash
+      ORDER BY MIN(d.path)
+    `);
+        return (collection ? stmt.all(model, fingerprint, collection) : stmt.all(model, fingerprint));
+    });
 }
 function buildEmbeddingBatches(docs, maxDocsPerBatch, maxBatchBytes) {
     const batches = [];
@@ -1009,14 +1148,16 @@ function getEmbeddingDocsForBatch(db, batch) {
  */
 export async function generateEmbeddings(store, options) {
     const db = store.db;
-    const model = options?.model ?? DEFAULT_EMBED_MODEL;
+    const llm = getLlm(store);
+    const model = options?.model ?? llm.embedModelName ?? DEFAULT_EMBED_MODEL;
+    const fingerprint = getEmbeddingFingerprint(model);
     const now = new Date().toISOString();
     const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
     const encoder = new TextEncoder();
     if (options?.force) {
-        clearAllEmbeddings(db);
+        clearAllEmbeddings(db, options?.collection);
     }
-    const docsToEmbed = getPendingEmbeddingDocs(db);
+    const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection, model);
     if (docsToEmbed.length === 0) {
         return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
     }
@@ -1024,16 +1165,88 @@ export async function generateEmbeddings(store, options) {
     const totalDocs = docsToEmbed.length;
     const startTime = Date.now();
     // Use store's LlamaCpp or global singleton, wrapped in a session
-    const llm = getLlm(store);
-    const embedModelUri = llm.embedModelName;
+    const embedModelUri = model;
     // Create a session manager for this llm instance
     const result = await withLLMSessionForLlm(llm, async (session) => {
         let chunksEmbedded = 0;
-        let errors = 0;
         let bytesProcessed = 0;
         let totalChunks = 0;
         let vectorTableInitialized = false;
         const BATCH_SIZE = 32;
+        const RETRY_AFTER_SUCCESSFUL_CHUNKS = 64;
+        const MAX_RETRY_ATTEMPTS = 3;
+        const failures = new Map();
+        const retryQueue = new Map();
+        let successesSinceRetry = 0;
+        const failureList = () => [...failures.values()];
+        const activeErrorCount = () => failures.size;
+        const chunkKey = (chunk) => `${chunk.hash}:${chunk.seq}`;
+        const reasonFromError = (error) => {
+            const raw = error instanceof Error ? error.message : String(error);
+            return raw.length > 180 ? `${raw.slice(0, 177)}...` : raw;
+        };
+        const recordFailure = (chunk, reason) => {
+            const key = chunkKey(chunk);
+            const previous = failures.get(key);
+            failures.set(key, {
+                path: chunk.path,
+                hash: chunk.hash,
+                seq: chunk.seq,
+                attempts: (previous?.attempts ?? 0) + 1,
+                reason,
+            });
+            retryQueue.set(key, chunk);
+        };
+        const clearFailure = (chunk) => {
+            const key = chunkKey(chunk);
+            failures.delete(key);
+            retryQueue.delete(key);
+        };
+        const tryEmbedChunk = async (chunk) => {
+            try {
+                const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
+                const result = await session.embed(text, { model });
+                if (!result) {
+                    recordFailure(chunk, "embedding returned no vector");
+                    return false;
+                }
+                insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now, chunk.expectedTotalChunks, fingerprint);
+                chunksEmbedded++;
+                successesSinceRetry++;
+                clearFailure(chunk);
+                return true;
+            }
+            catch (error) {
+                recordFailure(chunk, reasonFromError(error));
+                return false;
+            }
+        };
+        const retryFailedChunks = async (force = false) => {
+            if (!session.isValid || retryQueue.size === 0)
+                return;
+            if (!force && successesSinceRetry < RETRY_AFTER_SUCCESSFUL_CHUNKS)
+                return;
+            successesSinceRetry = 0;
+            // Normal mode: one retry pass after enough unrelated chunks succeeded.
+            // Force mode: we have run out of other chunks for this batch, so keep
+            // retrying outstanding failures until they recover or hit the cap. The
+            // cap prevents endless loops on permanently bad chunks.
+            do {
+                let retried = 0;
+                for (const [key, chunk] of [...retryQueue]) {
+                    const failure = failures.get(key);
+                    if (!failure || failure.attempts >= MAX_RETRY_ATTEMPTS)
+                        continue;
+                    retried++;
+                    await tryEmbedChunk(chunk);
+                }
+                if (!force || retried === 0)
+                    break;
+            } while (session.isValid && [...retryQueue].some(([key]) => {
+                const failure = failures.get(key);
+                return !!failure && failure.attempts < MAX_RETRY_ATTEMPTS;
+            }));
+        };
         const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes);
         for (const batchMeta of batches) {
             // Abort early if session has been invalidated
@@ -1043,6 +1256,7 @@ export async function generateEmbeddings(store, options) {
             }
             const batchDocs = getEmbeddingDocsForBatch(db, batchMeta);
             const batchChunks = [];
+            const expectedChunksByHash = new Map();
             const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
             for (const doc of batchDocs) {
                 if (!doc.body.trim())
@@ -1052,19 +1266,22 @@ export async function generateEmbeddings(store, options) {
                 for (let seq = 0; seq < chunks.length; seq++) {
                     batchChunks.push({
                         hash: doc.hash,
+                        path: doc.path,
                         title,
                         text: chunks[seq].text,
                         seq,
                         pos: chunks[seq].pos,
                         tokens: chunks[seq].tokens,
                         bytes: encoder.encode(chunks[seq].text).length,
+                        expectedTotalChunks: chunks.length,
                     });
                 }
+                expectedChunksByHash.set(doc.hash, chunks.length);
             }
             totalChunks += batchChunks.length;
             if (batchChunks.length === 0) {
                 bytesProcessed += batchBytes;
-                options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
+                options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors: activeErrorCount(), failures: failureList() });
                 continue;
             }
             if (!vectorTableInitialized) {
@@ -1082,17 +1299,19 @@ export async function generateEmbeddings(store, options) {
             for (let batchStart = 0; batchStart < batchChunks.length; batchStart += BATCH_SIZE) {
                 // Abort early if session has been invalidated (e.g. max duration exceeded)
                 if (!session.isValid) {
-                    const remaining = batchChunks.length - batchStart;
-                    errors += remaining;
-                    console.warn(`⚠ Session expired — skipping ${remaining} remaining chunks`);
+                    const remainingChunks = batchChunks.slice(batchStart);
+                    for (const chunk of remainingChunks)
+                        recordFailure(chunk, "LLM session expired before embedding chunk");
+                    console.warn(`⚠ Session expired — skipping ${remainingChunks.length} remaining chunks`);
                     break;
                 }
-                // Abort early if error rate is too high (>80% of processed chunks failed)
-                const processed = chunksEmbedded + errors;
-                if (processed >= BATCH_SIZE && errors > processed * 0.8) {
-                    const remaining = batchChunks.length - batchStart;
-                    errors += remaining;
-                    console.warn(`⚠ Error rate too high (${errors}/${processed}) — aborting embedding`);
+                // Abort early if active error rate is too high (>80% of attempted chunks failed)
+                const processed = chunksEmbedded + activeErrorCount();
+                if (processed >= BATCH_SIZE && activeErrorCount() > processed * 0.8) {
+                    const remainingChunks = batchChunks.slice(batchStart);
+                    for (const chunk of remainingChunks)
+                        recordFailure(chunk, "embedding aborted because error rate was too high");
+                    console.warn(`⚠ Error rate too high (${activeErrorCount()}/${processed}) — aborting embedding`);
                     break;
                 }
                 const batchEnd = Math.min(batchStart + BATCH_SIZE, batchChunks.length);
@@ -1104,39 +1323,33 @@ export async function generateEmbeddings(store, options) {
                         const chunk = chunkBatch[i];
                         const embedding = embeddings[i];
                         if (embedding) {
-                            insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
+                            insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now, chunk.expectedTotalChunks, fingerprint);
                             chunksEmbedded++;
+                            successesSinceRetry++;
+                            clearFailure(chunk);
                         }
                         else {
-                            errors++;
+                            recordFailure(chunk, "batch embedding returned no vector");
                         }
                         batchChunkBytesProcessed += chunk.bytes;
                     }
+                    await retryFailedChunks();
                 }
-                catch {
-                    // Batch failed — try individual embeddings as fallback
-                    // But skip if session is already invalid (avoids N doomed retries)
+                catch (error) {
+                    // Batch failed — try individual embeddings as fallback. If an
+                    // individual retry succeeds, any prior failure for that chunk is
+                    // cleared, so the visible error count reflects outstanding failures.
+                    const batchReason = reasonFromError(error);
                     if (!session.isValid) {
-                        errors += chunkBatch.length;
+                        for (const chunk of chunkBatch)
+                            recordFailure(chunk, `batch failed and session expired: ${batchReason}`);
                         batchChunkBytesProcessed += chunkBatch.reduce((sum, c) => sum + c.bytes, 0);
                     }
                     else {
                         for (const chunk of chunkBatch) {
-                            try {
-                                const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
-                                const result = await session.embed(text, { model });
-                                if (result) {
-                                    insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
-                                    chunksEmbedded++;
-                                }
-                                else {
-                                    errors++;
-                                }
-                            }
-                            catch {
-                                errors++;
-                            }
+                            await tryEmbedChunk(chunk);
                             batchChunkBytesProcessed += chunk.bytes;
+                            await retryFailedChunks();
                         }
                     }
                 }
@@ -1148,18 +1361,25 @@ export async function generateEmbeddings(store, options) {
                     totalChunks,
                     bytesProcessed: bytesProcessed + proportionalBytes,
                     totalBytes,
-                    errors,
+                    errors: activeErrorCount(),
+                    failures: failureList(),
                 });
             }
+            await retryFailedChunks(true);
+            const removedPartialChunks = removeIncompleteEmbeddings(db, expectedChunksByHash, model);
+            if (removedPartialChunks > 0) {
+                chunksEmbedded = Math.max(0, chunksEmbedded - removedPartialChunks);
+            }
             bytesProcessed += batchBytes;
-            options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
+            options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors: activeErrorCount(), failures: failureList() });
         }
-        return { chunksEmbedded, errors };
+        return { chunksEmbedded, errors: activeErrorCount(), failures: failureList() };
     }, { maxDuration: 30 * 60 * 1000, name: 'generateEmbeddings' });
     return {
         docsProcessed: totalDocs,
         chunksEmbedded: result.chunksEmbedded,
         errors: result.errors,
+        failures: result.failures,
         durationMs: Date.now() - startTime,
     };
 }
@@ -1180,9 +1400,9 @@ export function createStore(dbPath) {
         close: () => db.close(),
         ensureVecTable: (dimensions) => ensureVecTableInternal(db, dimensions),
         // Index health
-        getHashesNeedingEmbedding: () => getHashesNeedingEmbedding(db),
-        getIndexHealth: () => getIndexHealth(db),
-        getStatus: () => getStatus(db),
+        getHashesNeedingEmbedding: (model) => getHashesNeedingEmbedding(db, undefined, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
+        getIndexHealth: (model) => getIndexHealth(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
+        getStatus: (model) => getStatus(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
         // Caching
         getCacheKey,
         getCachedResult: (cacheKey) => getCachedResult(db, cacheKey),
@@ -1210,8 +1430,8 @@ export function createStore(dbPath) {
         searchFTS: (query, limit, collectionName) => searchFTS(db, query, limit, collectionName),
         searchVec: (query, model, limit, collectionName, session, precomputedEmbedding) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding),
         // Query expansion & reranking
-        expandQuery: (query, model, intent) => expandQuery(query, model, db, intent, store.llm),
-        rerank: (query, documents, model, intent) => rerank(query, documents, model, db, intent, store.llm),
+        expandQuery: (query, model, intent) => expandQuery(query, model ?? store.llm?.generateModelName ?? DEFAULT_QUERY_MODEL, db, intent, store.llm),
+        rerank: (query, documents, model, intent) => rerank(query, documents, model ?? store.llm?.rerankModelName ?? DEFAULT_RERANK_MODEL, db, intent, store.llm),
         // Document retrieval
         findDocument: (filename, options) => findDocument(db, filename, options),
         getDocumentBody: (doc, fromLine, maxLines) => getDocumentBody(db, doc, fromLine, maxLines),
@@ -1224,6 +1444,7 @@ export function createStore(dbPath) {
         insertContent: (hash, content, createdAt) => insertContent(db, hash, content, createdAt),
         insertDocument: (collectionName, path, title, hash, createdAt, modifiedAt) => insertDocument(db, collectionName, path, title, hash, createdAt, modifiedAt),
         findActiveDocument: (collectionName, path) => findActiveDocument(db, collectionName, path),
+        findOrMigrateLegacyDocument: (collectionName, path) => findOrMigrateLegacyDocument(db, collectionName, path),
         updateDocumentTitle: (documentId, title, modifiedAt) => updateDocumentTitle(db, documentId, title, modifiedAt),
         updateDocument: (documentId, title, hash, modifiedAt) => updateDocument(db, documentId, title, hash, modifiedAt),
         deactivateDocument: (collectionName, path) => deactivateDocument(db, collectionName, path),
@@ -1231,7 +1452,7 @@ export function createStore(dbPath) {
         // Vector/embedding operations
         getHashesForEmbedding: () => getHashesForEmbedding(db),
         clearAllEmbeddings: () => clearAllEmbeddings(db),
-        insertEmbedding: (hash, seq, pos, embedding, model, embeddedAt) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt),
+        insertEmbedding: (hash, seq, pos, embedding, model, embeddedAt, totalChunks, fingerprint) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt, totalChunks, fingerprint),
     };
     return store;
 }
@@ -1244,11 +1465,11 @@ export function getDocid(hash) {
 /**
  * Handelize a filename to be more token-friendly.
  * - Convert triple underscore `___` to `/` (folder separator)
- * - Convert to lowercase
  * - Replace sequences of non-word chars (except /) with single dash
  * - Remove leading/trailing dashes from path segments
  * - Preserve folder structure (a/b/c/d.md stays structured)
  * - Preserve file extension
+ * - Preserve original case (important for case-sensitive filesystems)
  */
 /** Replace emoji/symbol codepoints with their hex representation (e.g. 🐘 → 1f418) */
 function emojiToHex(str) {
@@ -1273,7 +1494,6 @@ export function handelize(path) {
     }
     const result = path
         .replace(/___/g, '/') // Triple underscore becomes folder separator
-        .toLowerCase()
         .split('/')
         .map((segment, idx, arr) => {
         const isLastSegment = idx === arr.length - 1;
@@ -1306,17 +1526,85 @@ export function handelize(path) {
 // =============================================================================
 // Index health
 // =============================================================================
-export function getHashesNeedingEmbedding(db) {
-    const result = db.prepare(`
-    SELECT COUNT(DISTINCT d.hash) as count
-    FROM documents d
-    LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
-    WHERE d.active = 1 AND v.hash IS NULL
-  `).get();
-    return result.count;
+export function getHashesNeedingEmbedding(db, collection, model = DEFAULT_EMBED_MODEL) {
+    const collectionFilter = collection ? `AND d.collection = ?` : ``;
+    const fingerprint = getEmbeddingFingerprint(model);
+    return withLazyContentVectorMigration(db, () => {
+        const stmt = db.prepare(`
+      SELECT COUNT(DISTINCT d.hash) as count
+      FROM documents d
+      LEFT JOIN (
+        SELECT hash, model, COUNT(*) AS chunk_count, MAX(total_chunks) AS expected_chunks
+        FROM content_vectors
+        WHERE model = ? AND embed_fingerprint = ?
+        GROUP BY hash, model, embed_fingerprint
+      ) v ON d.hash = v.hash
+      WHERE d.active = 1
+        AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
+        ${collectionFilter}
+    `);
+        const result = (collection ? stmt.get(model, fingerprint, collection) : stmt.get(model, fingerprint));
+        return result.count;
+    });
 }
-export function getIndexHealth(db) {
-    const needsEmbedding = getHashesNeedingEmbedding(db);
+export async function maybeAdoptLegacyEmbeddingFingerprint(store, model = DEFAULT_EMBED_MODEL) {
+    const db = store.db;
+    const fingerprint = getEmbeddingFingerprint(model);
+    const legacyCount = withLazyContentVectorMigration(db, () => {
+        const row = db.prepare(`SELECT COUNT(DISTINCT hash) AS count FROM content_vectors WHERE model = ? AND embed_fingerprint = ''`).get(model);
+        return row.count;
+    });
+    if (legacyCount === 0) {
+        return { checked: false, adopted: 0, reason: "no legacy empty-fingerprint embeddings" };
+    }
+    const sample = withLazyContentVectorMigration(db, () => db.prepare(`
+    SELECT cv.hash, cv.seq, cv.pos, cv.total_chunks, c.doc AS body, MIN(d.path) AS path
+    FROM content_vectors cv
+    JOIN documents d ON d.hash = cv.hash AND d.active = 1
+    JOIN content c ON c.hash = cv.hash
+    WHERE cv.model = ? AND cv.embed_fingerprint = ''
+    GROUP BY cv.hash, cv.seq, cv.pos, cv.total_chunks, c.doc
+    ORDER BY cv.hash, cv.seq
+    LIMIT 1
+  `).get(model));
+    if (!sample) {
+        return { checked: false, adopted: 0, reason: `${legacyCount} legacy docs have no active sample` };
+    }
+    const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
+    if (!tableExists) {
+        return { checked: false, adopted: 0, reason: "vectors_vec table is missing" };
+    }
+    const expectedHashSeq = `${sample.hash}_${sample.seq}`;
+    const title = extractTitle(sample.body, sample.path);
+    const llm = getLlm(store);
+    return await withLLMSessionForLlm(llm, async (session) => {
+        const chunks = await chunkDocumentByTokens(sample.body, undefined, undefined, undefined, sample.path, undefined, session.signal);
+        const chunk = chunks[sample.seq];
+        if (!chunk) {
+            return { checked: true, adopted: 0, reason: `sample chunk ${expectedHashSeq} no longer exists` };
+        }
+        const result = await session.embed(formatDocForEmbedding(chunk.text, title, model), { model });
+        if (!result) {
+            return { checked: true, adopted: 0, reason: "failed to embed legacy sample" };
+        }
+        const nearest = db.prepare(`
+      SELECT hash_seq, distance
+      FROM vectors_vec
+      WHERE embedding MATCH ? AND k = 1
+    `).get(new Float32Array(result.embedding));
+        if (!nearest) {
+            return { checked: true, adopted: 0, reason: "legacy sample vector not found" };
+        }
+        const threshold = 0.0001;
+        if (nearest.hash_seq !== expectedHashSeq || nearest.distance > threshold) {
+            return { checked: true, adopted: 0, reason: `legacy sample differs from current fingerprint (nearest ${nearest.hash_seq}, distance ${nearest.distance.toFixed(6)})` };
+        }
+        const update = withLazyContentVectorMigration(db, () => db.prepare(`UPDATE content_vectors SET embed_fingerprint = ? WHERE model = ? AND embed_fingerprint = ''`).run(fingerprint, model));
+        return { checked: true, adopted: update.changes, reason: `sample ${expectedHashSeq} matched current fingerprint at distance ${nearest.distance.toFixed(6)}` };
+    });
+}
+export function getIndexHealth(db, model = DEFAULT_EMBED_MODEL) {
+    const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
     const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get().count;
     const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get();
     let daysStale = null;
@@ -1369,13 +1657,15 @@ export function deleteInactiveDocuments(db) {
     return result.changes;
 }
 /**
- * Remove orphaned content hashes that are not referenced by any active document.
+ * Remove orphaned content hashes that are not referenced by any document.
+ * Inactive documents are soft-deleted tombstones, so their content rows must
+ * remain referenced until deleteInactiveDocuments() hard-deletes them.
  * Returns the number of orphaned content hashes deleted.
  */
 export function cleanupOrphanedContent(db) {
     const result = db.prepare(`
     DELETE FROM content
-    WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
+    WHERE hash NOT IN (SELECT DISTINCT hash FROM documents)
   `).run();
     return result.changes;
 }
@@ -1400,32 +1690,34 @@ export function cleanupOrphanedVectors(db) {
     catch {
         return 0;
     }
-    // Count orphaned vectors first
-    const countResult = db.prepare(`
-    SELECT COUNT(*) as c FROM content_vectors cv
-    WHERE NOT EXISTS (
-      SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
-    )
-  `).get();
-    if (countResult.c === 0) {
-        return 0;
-    }
-    // Delete from vectors_vec first
-    db.exec(`
-    DELETE FROM vectors_vec WHERE hash_seq IN (
-      SELECT cv.hash || '_' || cv.seq FROM content_vectors cv
+    return withLazyContentVectorMigration(db, () => {
+        // Count orphaned vectors first
+        const countResult = db.prepare(`
+      SELECT COUNT(*) as c FROM content_vectors cv
       WHERE NOT EXISTS (
         SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
       )
-    )
-  `);
-    // Delete from content_vectors
-    db.exec(`
-    DELETE FROM content_vectors WHERE hash NOT IN (
-      SELECT hash FROM documents WHERE active = 1
-    )
-  `);
-    return countResult.c;
+    `).get();
+        if (countResult.c === 0) {
+            return 0;
+        }
+        // Delete from vectors_vec first
+        db.exec(`
+      DELETE FROM vectors_vec WHERE hash_seq IN (
+        SELECT cv.hash || '_' || cv.seq FROM content_vectors cv
+        WHERE NOT EXISTS (
+          SELECT 1 FROM documents d WHERE d.hash = cv.hash AND d.active = 1
+        )
+      )
+    `);
+        // Delete from content_vectors
+        db.exec(`
+      DELETE FROM content_vectors WHERE hash NOT IN (
+        SELECT hash FROM documents WHERE active = 1
+      )
+    `);
+        return countResult.c;
+    });
 }
 /**
  * Run VACUUM to reclaim unused space in the database.
@@ -1487,6 +1779,21 @@ export function insertContent(db, hash, content, createdAt) {
     db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
         .run(hash, content, createdAt);
 }
+function rebuildDocumentFTS(db, documentId) {
+    const row = db.prepare(`
+    SELECT d.id, d.collection, d.path, d.title, content.doc as body
+    FROM documents d
+    JOIN content ON content.hash = d.hash
+    WHERE d.id = ? AND d.active = 1
+  `).get(documentId);
+    db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(documentId);
+    if (!row)
+        return;
+    db.prepare(`
+    INSERT INTO documents_fts(rowid, filepath, title, body)
+    VALUES (?, ?, ?, ?)
+  `).run(row.id, normalizeCjkForFTS(`${row.collection}/${row.path}`), normalizeCjkForFTS(row.title), normalizeCjkForFTS(row.body));
+}
 /**
  * Insert a new document into the documents table.
  */
@@ -1500,6 +1807,9 @@ export function insertDocument(db, collectionName, path, title, hash, createdAt,
       modified_at = excluded.modified_at,
       active = 1
   `).run(collectionName, path, title, hash, createdAt, modifiedAt);
+    const row = db.prepare(`SELECT id FROM documents WHERE collection = ? AND path = ?`).get(collectionName, path);
+    if (row)
+        rebuildDocumentFTS(db, row.id);
 }
 /**
  * Find an active document by collection name and path.
@@ -1511,12 +1821,48 @@ export function findActiveDocument(db, collectionName, path) {
   `).get(collectionName, path);
     return row ?? null;
 }
+/**
+ * Find an active document, falling back to a case-insensitive path match.
+ * If found under a different casing, renames it in-place and rebuilds the
+ * FTS entry. Embeddings are keyed by content hash, so the rename is
+ * safe — no re-embedding required.
+ *
+ * @internal Used by reindexCollection and indexFiles during qmd update.
+ * Returns null if the document does not exist under either path.
+ */
+export function findOrMigrateLegacyDocument(db, collectionName, path) {
+    const existing = findActiveDocument(db, collectionName, path);
+    if (existing)
+        return existing;
+    const legacy = db.prepare(`
+    SELECT id, hash, title FROM documents
+    WHERE collection = ? AND path COLLATE NOCASE = ? AND active = 1
+    ORDER BY id
+    LIMIT 1
+  `).get(collectionName, path);
+    if (!legacy)
+        return null;
+    // Wrap rename + FTS rebuild in a transaction for atomicity.
+    const migrate = db.transaction(() => {
+        // Use OR IGNORE so a UNIQUE conflict (e.g. both "readme.md" and
+        // "README.md" already exist) is a no-op rather than crashing.
+        const result = db.prepare(`UPDATE OR IGNORE documents SET path = ? WHERE id = ? AND active = 1`).run(path, legacy.id);
+        if (result.changes === 0)
+            return false;
+        rebuildDocumentFTS(db, legacy.id);
+        return true;
+    });
+    if (!migrate())
+        return null;
+    return findActiveDocument(db, collectionName, path);
+}
 /**
  * Update the title and modified_at timestamp for a document.
  */
 export function updateDocumentTitle(db, documentId, title, modifiedAt) {
     db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`)
         .run(title, modifiedAt, documentId);
+    rebuildDocumentFTS(db, documentId);
 }
 /**
  * Update an existing document's hash, title, and modified_at timestamp.
@@ -1525,6 +1871,7 @@ export function updateDocumentTitle(db, documentId, title, modifiedAt) {
 export function updateDocument(db, documentId, title, hash, modifiedAt) {
     db.prepare(`UPDATE documents SET title = ?, hash = ?, modified_at = ? WHERE id = ?`)
         .run(title, hash, modifiedAt, documentId);
+    rebuildDocumentFTS(db, documentId);
 }
 /**
  * Deactivate a document (mark as inactive but don't delete).
@@ -1593,31 +1940,54 @@ export async function chunkDocumentByTokens(content, maxTokens = CHUNK_SIZE_TOKE
     let charChunks = await chunkDocumentAsync(content, maxChars, overlapChars, windowChars, filepath, chunkStrategy);
     // Tokenize and split any chunks that still exceed limit
     const results = [];
-    for (const chunk of charChunks) {
-        // Respect abort signal to avoid runaway tokenization
+    const clampOverlapChars = (value, maxChars) => {
+        if (maxChars <= 1)
+            return 0;
+        return Math.max(0, Math.min(maxChars - 1, Math.floor(value)));
+    };
+    const pushChunkWithinTokenLimit = async (text, pos) => {
         if (signal?.aborted)
-            break;
-        const tokens = await llm.tokenize(chunk.text);
-        if (tokens.length <= maxTokens) {
-            results.push({ text: chunk.text, pos: chunk.pos, tokens: tokens.length });
+            return;
+        const tokens = await llm.tokenize(text);
+        if (tokens.length <= maxTokens || text.length <= 1) {
+            results.push({ text, pos, tokens: tokens.length });
+            return;
         }
-        else {
-            // Chunk is still too large - split it further
-            // Use actual token count to estimate better char limit
-            const actualCharsPerToken = chunk.text.length / tokens.length;
-            const safeMaxChars = Math.floor(maxTokens * actualCharsPerToken * 0.95); // 5% safety margin
-            const subChunks = chunkDocument(chunk.text, safeMaxChars, Math.floor(overlapChars * actualCharsPerToken / 2), Math.floor(windowChars * actualCharsPerToken / 2));
-            for (const subChunk of subChunks) {
-                if (signal?.aborted)
-                    break;
-                const subTokens = await llm.tokenize(subChunk.text);
-                results.push({
-                    text: subChunk.text,
-                    pos: chunk.pos + subChunk.pos,
-                    tokens: subTokens.length,
-                });
-            }
+        const actualCharsPerToken = text.length / tokens.length;
+        let safeMaxChars = Math.floor(maxTokens * actualCharsPerToken * 0.95);
+        if (!Number.isFinite(safeMaxChars) || safeMaxChars < 1) {
+            safeMaxChars = Math.floor(text.length / 2);
+        }
+        safeMaxChars = Math.max(1, Math.min(text.length - 1, safeMaxChars));
+        let nextOverlapChars = clampOverlapChars(overlapChars * actualCharsPerToken / 2, safeMaxChars);
+        let nextWindowChars = Math.max(0, Math.floor(windowChars * actualCharsPerToken / 2));
+        let subChunks = chunkDocument(text, safeMaxChars, nextOverlapChars, nextWindowChars);
+        // Pathological single-line blobs can produce no meaningful breakpoint progress.
+        // Fall back to a simple half split so every recursion step strictly shrinks.
+        if (subChunks.length <= 1
+            || subChunks[0]?.text.length === text.length) {
+            safeMaxChars = Math.max(1, Math.floor(text.length / 2));
+            nextOverlapChars = 0;
+            nextWindowChars = 0;
+            subChunks = chunkDocument(text, safeMaxChars, nextOverlapChars, nextWindowChars);
+        }
+        if (subChunks.length <= 1
+            || subChunks[0]?.text.length === text.length) {
+            const fallbackTokens = tokens.slice(0, Math.max(1, maxTokens));
+            const truncatedText = await llm.detokenize(fallbackTokens);
+            results.push({
+                text: truncatedText,
+                pos,
+                tokens: fallbackTokens.length,
+            });
+            return;
         }
+        for (const subChunk of subChunks) {
+            await pushChunkWithinTokenLimit(text.slice(subChunk.pos, subChunk.pos + subChunk.text.length), pos + subChunk.pos);
+        }
+    };
+    for (const chunk of charChunks) {
+        await pushChunkWithinTokenLimit(chunk.text, chunk.pos);
     }
     return results;
 }
@@ -2135,7 +2505,7 @@ function buildFTS5Query(query) {
             const phrase = s.slice(start, i).trim();
             i++; // skip closing quote
             if (phrase.length > 0) {
-                const sanitized = phrase.split(/\s+/).map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
+                const sanitized = sanitizeFTS5Phrase(phrase);
                 if (sanitized) {
                     const ftsPhrase = `"${sanitized}"`; // Exact phrase, no prefix match
                     if (negated) {
@@ -2167,6 +2537,18 @@ function buildFTS5Query(query) {
                     }
                 }
             }
+            else if (containsCjk(term)) {
+                const sanitized = sanitizeFTS5Phrase(term);
+                if (sanitized) {
+                    const ftsPhrase = `"${sanitized}"`; // CJK phrase over character tokens
+                    if (negated) {
+                        negative.push(ftsPhrase);
+                    }
+                    else {
+                        positive.push(ftsPhrase);
+                    }
+                }
+            }
             else {
                 const sanitized = sanitizeFTS5Term(term);
                 if (sanitized) {
@@ -2199,8 +2581,9 @@ function buildFTS5Query(query) {
  * Returns error message if invalid, null if valid.
  */
 export function validateSemanticQuery(query) {
-    // Check for negation syntax
-    if (/-\w/.test(query) || /-"/.test(query)) {
+    // Check for negation syntax — only at token boundaries (start of string or after whitespace).
+    // Hyphenated words like "real-time" or "write-ahead" must not trigger this.
+    if (/(^|\s)-[\w"]/.test(query)) {
         return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.';
     }
     return null;
@@ -2326,7 +2709,7 @@ export async function searchVec(db, query, model, limit = 20, collectionName, se
         docSql += ` AND d.collection = ?`;
         params.push(collectionName);
     }
-    const docRows = db.prepare(docSql).all(...params);
+    const docRows = withLazyContentVectorMigration(db, () => db.prepare(docSql).all(...params));
     // Combine with distances and dedupe by filepath
     const seen = new Map();
     for (const row of docRows) {
@@ -2373,23 +2756,82 @@ async function getEmbedding(text, model, isQuery, session, llmOverride) {
  * Get all unique content hashes that need embeddings (from active documents).
  * Returns hash, document body, and a sample path for display purposes.
  */
-export function getHashesForEmbedding(db) {
-    return db.prepare(`
+export function getHashesForEmbedding(db, model = DEFAULT_EMBED_MODEL) {
+    const fingerprint = getEmbeddingFingerprint(model);
+    return withLazyContentVectorMigration(db, () => db.prepare(`
     SELECT d.hash, c.doc as body, MIN(d.path) as path
     FROM documents d
     JOIN content c ON d.hash = c.hash
-    LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
-    WHERE d.active = 1 AND v.hash IS NULL
+    LEFT JOIN (
+      SELECT hash, model, COUNT(*) AS chunk_count, MAX(total_chunks) AS expected_chunks
+      FROM content_vectors
+      WHERE model = ? AND embed_fingerprint = ?
+      GROUP BY hash, model, embed_fingerprint
+    ) v ON d.hash = v.hash
+    WHERE d.active = 1
+      AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
     GROUP BY d.hash
-  `).all();
+  `).all(model, fingerprint));
 }
 /**
- * Clear all embeddings from the database (force re-index).
- * Deletes all rows from content_vectors and drops the vectors_vec table.
- */
-export function clearAllEmbeddings(db) {
-    db.exec(`DELETE FROM content_vectors`);
-    db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+ * Clear embeddings for the whole index, or just for one collection.
+ *
+ * When `collection` is omitted the entire content_vectors table is emptied and
+ * the vectors_vec virtual table is dropped (it is recreated with the right
+ * dimensions on the next embed run).
+ *
+ * When `collection` is provided, only vectors whose hash is referenced
+ * exclusively by active documents in that collection are removed. Hashes
+ * shared with active documents in other collections are left in place so
+ * vector search keeps working there (content_vectors is keyed globally by
+ * content hash; identical document bodies across collections share a row).
+ * vectors_vec is preserved so other collections keep working unless the scoped
+ * clear empties content_vectors entirely, in which case it is dropped so the
+ * next embed can recreate the table with the current dimensions.
+ */
+export function clearAllEmbeddings(db, collection) {
+    if (!collection) {
+        db.exec(`DELETE FROM content_vectors`);
+        db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+        return;
+    }
+    const exclusiveHashesQuery = `
+    SELECT DISTINCT d.hash
+    FROM documents d
+    WHERE d.collection = ? AND d.active = 1
+      AND NOT EXISTS (
+        SELECT 1 FROM documents d2
+        WHERE d2.hash = d.hash
+          AND d2.active = 1
+          AND d2.collection != d.collection
+      )
+  `;
+    const vecTableExists = db
+        .prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors_vec'`)
+        .get();
+    withLazyContentVectorMigration(db, () => {
+        if (vecTableExists) {
+            const hashSeqRows = db.prepare(`
+        SELECT cv.hash, cv.seq
+        FROM content_vectors cv
+        WHERE cv.hash IN (${exclusiveHashesQuery})
+      `).all(collection);
+            const delVec = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+            for (const row of hashSeqRows) {
+                delVec.run(`${row.hash}_${row.seq}`);
+            }
+        }
+        db.prepare(`
+      DELETE FROM content_vectors
+      WHERE hash IN (${exclusiveHashesQuery})
+    `).run(collection);
+        const remaining = db
+            .prepare(`SELECT COUNT(*) AS n FROM content_vectors`)
+            .get();
+        if (remaining.n === 0) {
+            db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+        }
+    });
 }
 /**
  * Insert a single embedding into both content_vectors and vectors_vec tables.
@@ -2401,16 +2843,37 @@ export function clearAllEmbeddings(db) {
  * vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
  * vec0 virtual tables silently ignore the OR REPLACE conflict clause.
  */
-export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt) {
+export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt, totalChunks = 1, fingerprint = getEmbeddingFingerprint(model)) {
     const hashSeq = `${hash}_${seq}`;
-    // Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding)
-    const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`);
-    insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt);
-    // vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT
-    const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
-    const insertVecStmt = db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
-    deleteVecStmt.run(hashSeq);
-    insertVecStmt.run(hashSeq, embedding);
+    withLazyContentVectorMigration(db, () => {
+        // Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding)
+        const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embed_fingerprint, total_chunks, embedded_at) VALUES (?, ?, ?, ?, ?, ?, ?)`);
+        insertContentVectorStmt.run(hash, seq, pos, model, fingerprint, totalChunks, embeddedAt);
+        // vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT
+        const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+        const insertVecStmt = db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
+        deleteVecStmt.run(hashSeq);
+        insertVecStmt.run(hashSeq, embedding);
+    });
+}
+function removeIncompleteEmbeddings(db, expectedChunksByHash, model) {
+    return withLazyContentVectorMigration(db, () => {
+        let removed = 0;
+        const rowsStmt = db.prepare(`SELECT seq FROM content_vectors WHERE hash = ? AND model = ?`);
+        const deleteContentStmt = db.prepare(`DELETE FROM content_vectors WHERE hash = ? AND model = ?`);
+        const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+        for (const [hash, expectedChunks] of expectedChunksByHash) {
+            const rows = rowsStmt.all(hash, model);
+            if (rows.length === 0 || rows.length === expectedChunks)
+                continue;
+            for (const row of rows) {
+                deleteVecStmt.run(`${hash}_${row.seq}`);
+            }
+            deleteContentStmt.run(hash, model);
+            removed += rows.length;
+        }
+        return removed;
+    });
 }
 // =============================================================================
 // Query expansion
@@ -2422,12 +2885,15 @@ export async function expandQuery(query, model = DEFAULT_QUERY_MODEL, db, intent
     if (cached) {
         try {
             const parsed = JSON.parse(cached);
+            if (!Array.isArray(parsed))
+                return [];
+            const rows = parsed;
             // Migrate old cache format: { type, text } → { type, query }
-            if (parsed.length > 0 && parsed[0].query) {
-                return parsed;
+            if (rows.length > 0 && typeof rows[0]?.query === "string") {
+                return rows.map((r) => ({ type: r.type, query: String(r.query) }));
             }
-            else if (parsed.length > 0 && parsed[0].text) {
-                return parsed.map((r) => ({ type: r.type, query: r.text }));
+            else if (rows.length > 0 && typeof rows[0]?.text === "string") {
+                return rows.map((r) => ({ type: r.type, query: String(r.text) }));
             }
         }
         catch {
@@ -2734,7 +3200,7 @@ export function getDocumentBody(db, doc, fromLine, maxLines) {
     let body = row.body;
     if (fromLine !== undefined || maxLines !== undefined) {
         const lines = body.split('\n');
-        const start = (fromLine || 1) - 1;
+        const start = Math.max(0, (fromLine || 1) - 1);
         const end = maxLines !== undefined ? start + maxLines : lines.length;
         body = lines.slice(start, end).join('\n');
     }
@@ -2842,7 +3308,7 @@ export function findDocuments(db, pattern, options = {}) {
 // =============================================================================
 // Status
 // =============================================================================
-export function getStatus(db) {
+export function getStatus(db, model = DEFAULT_EMBED_MODEL) {
     // DB is source of truth for collections — config provides supplementary metadata
     const dbCollections = db.prepare(`
     SELECT
@@ -2875,7 +3341,7 @@ export function getStatus(db) {
         return new Date(b.lastUpdated).getTime() - new Date(a.lastUpdated).getTime();
     });
     const totalDocs = db.prepare(`SELECT COUNT(*) as c FROM documents WHERE active = 1`).get().c;
-    const needsEmbedding = getHashesNeedingEmbedding(db);
+    const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
     const hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
     return {
         totalDocuments: totalDocs,
@@ -2922,7 +3388,7 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen, in
     const totalLines = body.split('\n').length;
     let searchBody = body;
     let lineOffset = 0;
-    if (chunkPos && chunkPos > 0) {
+    if (chunkPos !== undefined && chunkPos >= 0) {
         // Search within the chunk region, with some padding for context
         // Use provided chunkLen or fall back to max chunk size (covers variable-length chunks)
         const searchLen = chunkLen || CHUNK_SIZE_CHARS;
@@ -2953,6 +3419,22 @@ export function extractSnippet(body, query, maxLen = 500, chunkPos, chunkLen, in
             bestLine = i;
         }
     }
+    if (chunkPos !== undefined && chunkPos >= 0 && bestScore <= 0) {
+        if (chunkPos === 0) {
+            // chunkPos=0 may be the chunk selector's initialization default for queries
+            // where lexical chunk scoring found no winner (e.g. tokens filtered to empty
+            // by the length>2 guard). Retry with full body so the real match isn't missed.
+            return extractSnippet(body, query, maxLen, undefined, undefined, intent);
+        }
+        // For chunkPos > 0 the reranker actively picked this chunk. Tokens failing to
+        // match literally is most likely a tokenizer limitation (quoted phrases, FTS5
+        // syntax, HYDE passages, semantic hits), so anchor on the chunk start rather
+        // than disregarding the reranker's pick.
+        const contextStart = Math.max(0, chunkPos - 100);
+        bestLine = chunkPos > contextStart
+            ? searchBody.slice(0, chunkPos - contextStart).split('\n').length - 1
+            : 0;
+    }
     const start = Math.max(0, bestLine - 1);
     const end = Math.min(lines.length, bestLine + 3);
     const snippetLines = lines.slice(start, end);
@@ -2990,6 +3472,20 @@ export function addLineNumbers(text, startLine = 1) {
     const lines = text.split('\n');
     return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
 }
+/**
+ * RRF list weights for hybridQuery.
+ *
+ * Original-query retrieval paths are the primary evidence and get 2x weight:
+ * - original FTS
+ * - original vector search
+ *
+ * Expansion-derived lists (lex/vec/hyde) stay at 1x regardless of list order,
+ * so a lex expansion inserted before original vector search cannot steal the
+ * original vector boost.
+ */
+export function getHybridRrfWeights(rankedListMeta) {
+    return rankedListMeta.map(meta => meta.queryType === "original" ? 2.0 : 1.0);
+}
 /**
  * Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
  *
@@ -3078,7 +3574,8 @@ export async function hybridQuery(store, query, options) {
         }
         // Batch embed all vector queries in a single call
         const llm = getLlm(store);
-        const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName));
+        const embedModel = llm.embedModelName;
+        const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, embedModel));
         hooks?.onEmbedStart?.(textsToEmbed.length);
         const embedStart = Date.now();
         const embeddings = await llm.embedBatch(textsToEmbed);
@@ -3088,7 +3585,7 @@ export async function hybridQuery(store, query, options) {
             const embedding = embeddings[i]?.embedding;
             if (!embedding)
                 continue;
-            const vecResults = await store.searchVec(vecQueries[i].text, DEFAULT_EMBED_MODEL, 20, collection, undefined, embedding);
+            const vecResults = await store.searchVec(vecQueries[i].text, embedModel, 20, collection, undefined, embedding);
             if (vecResults.length > 0) {
                 for (const r of vecResults)
                     docidMap.set(r.filepath, r.docid);
@@ -3104,8 +3601,9 @@ export async function hybridQuery(store, query, options) {
             }
         }
     }
-    // Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
-    const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
+    // Step 4: RRF fusion — original-query FTS and vector lists get 2x weight;
+    // expansion-derived lists stay at 1x independent of insertion order.
+    const weights = getHybridRrfWeights(rankedListMeta);
     const fused = reciprocalRankFusion(rankedLists, weights);
     const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
     const candidates = fused.slice(0, candidateLimit);
@@ -3286,10 +3784,11 @@ export async function vectorSearchQuery(store, query, options) {
     const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
     options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
     // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
+    const embedModel = getLlm(store).embedModelName;
     const queryTexts = [query, ...vecExpanded.map(q => q.query)];
     const allResults = new Map();
     for (const q of queryTexts) {
-        const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
+        const vecResults = await store.searchVec(q, embedModel, limit, collection);
         for (const r of vecResults) {
             const existing = allResults.get(r.filepath);
             if (!existing || r.score > existing.score) {
@@ -3390,7 +3889,8 @@ export async function structuredSearch(store, searches, options) {
         const vecSearches = searches.filter((s) => s.type === 'vec' || s.type === 'hyde');
         if (vecSearches.length > 0) {
             const llm = getLlm(store);
-            const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName));
+            const embedModel = llm.embedModelName;
+            const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, embedModel));
             hooks?.onEmbedStart?.(textsToEmbed.length);
             const embedStart = Date.now();
             const embeddings = await llm.embedBatch(textsToEmbed);
@@ -3400,7 +3900,7 @@ export async function structuredSearch(store, searches, options) {
                 if (!embedding)
                     continue;
                 for (const coll of collectionList) {
-                    const vecResults = await store.searchVec(vecSearches[i].query, DEFAULT_EMBED_MODEL, 20, coll, undefined, embedding);
+                    const vecResults = await store.searchVec(vecSearches[i].query, embedModel, 20, coll, undefined, embedding);
                     if (vecResults.length > 0) {
                         for (const r of vecResults)
                             docidMap.set(r.filepath, r.docid);