npm - @tobilu/qmd - Versions diffs - 2.0.1 → 2.1.0 - Mend

@tobilu/qmd 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/store.js CHANGED Viewed

@@ -26,6 +26,8 @@ export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
 export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
 export const DEFAULT_GLOB = "**/*.md";
 export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB
+export const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64;
+export const DEFAULT_EMBED_MAX_BATCH_BYTES = 64 * 1024 * 1024; // 64MB
 // Chunking: 900 tokens per chunk with 15% overlap
 // Increased from 800 to accommodate smart chunking finding natural break points
 export const CHUNK_SIZE_TOKENS = 900;
@@ -161,6 +163,60 @@ export function findBestCutoff(breakPoints, targetCharPos, windowChars = CHUNK_W
     }
     return bestPos;
 }
+/**
+ * Merge two sets of break points (e.g. regex + AST), keeping the highest
+ * score at each position. Result is sorted by position.
+ */
+export function mergeBreakPoints(a, b) {
+    const seen = new Map();
+    for (const bp of a) {
+        const existing = seen.get(bp.pos);
+        if (!existing || bp.score > existing.score) {
+            seen.set(bp.pos, bp);
+        }
+    }
+    for (const bp of b) {
+        const existing = seen.get(bp.pos);
+        if (!existing || bp.score > existing.score) {
+            seen.set(bp.pos, bp);
+        }
+    }
+    return Array.from(seen.values()).sort((a, b) => a.pos - b.pos);
+}
+/**
+ * Core chunk algorithm that operates on precomputed break points and code fences.
+ * This is the shared implementation used by both regex-only and AST-aware chunking.
+ */
+export function chunkDocumentWithBreakPoints(content, breakPoints, codeFences, maxChars = CHUNK_SIZE_CHARS, overlapChars = CHUNK_OVERLAP_CHARS, windowChars = CHUNK_WINDOW_CHARS) {
+    if (content.length <= maxChars) {
+        return [{ text: content, pos: 0 }];
+    }
+    const chunks = [];
+    let charPos = 0;
+    while (charPos < content.length) {
+        const targetEndPos = Math.min(charPos + maxChars, content.length);
+        let endPos = targetEndPos;
+        if (endPos < content.length) {
+            const bestCutoff = findBestCutoff(breakPoints, targetEndPos, windowChars, 0.7, codeFences);
+            if (bestCutoff > charPos && bestCutoff <= targetEndPos) {
+                endPos = bestCutoff;
+            }
+        }
+        if (endPos <= charPos) {
+            endPos = Math.min(charPos + maxChars, content.length);
+        }
+        chunks.push({ text: content.slice(charPos, endPos), pos: charPos });
+        if (endPos >= content.length) {
+            break;
+        }
+        charPos = endPos - overlapChars;
+        const lastChunkPos = chunks.at(-1).pos;
+        if (charPos <= lastChunkPos) {
+            charPos = endPos;
+        }
+    }
+    return chunks;
+}
 // Hybrid query: strong BM25 signal detection thresholds
 // Skip expensive LLM expansion when top result is strong AND clearly separated from runner-up
 export const STRONG_SIGNAL_MIN_SCORE = 0.85;
@@ -191,7 +247,8 @@ export function isAbsolutePath(path) {
     if (path.startsWith('/')) {
         // Check if it's a Git Bash style path like /c/ or /c/Users (C-Z only, not A or B)
         // Requires path[2] === '/' to distinguish from Unix paths like /c or /cache
-        if (path.length >= 3 && path[2] === '/') {
+        // Skipped on WSL where /c/ is a valid drvfs mount point, not a drive letter
+        if (!isWSL() && path.length >= 3 && path[2] === '/') {
             const driveLetter = path[1];
             if (driveLetter && /[c-zC-Z]/.test(driveLetter)) {
                 return true;
@@ -213,6 +270,13 @@ export function isAbsolutePath(path) {
 export function normalizePathSeparators(path) {
     return path.replace(/\\/g, '/');
 }
+/**
+ * Detect if running inside WSL (Windows Subsystem for Linux).
+ * On WSL, paths like /c/work/... are valid drvfs mount points, not Git Bash paths.
+ */
+function isWSL() {
+    return !!(process.env.WSL_DISTRO_NAME || process.env.WSL_INTEROP);
+}
 /**
  * Get the relative path from a prefix.
  * Returns null if path is not under prefix.
@@ -256,8 +320,9 @@ export function resolve(...paths) {
             windowsDrive = firstPath.slice(0, 2);
             result = firstPath.slice(2);
         }
-        else if (firstPath.startsWith('/') && firstPath.length >= 3 && firstPath[2] === '/') {
+        else if (!isWSL() && firstPath.startsWith('/') && firstPath.length >= 3 && firstPath[2] === '/') {
             // Git Bash style: /c/ -> C: (C-Z drives only, not A or B)
+            // Skipped on WSL where /c/ is a valid drvfs mount point, not a drive letter
             const driveLetter = firstPath[1];
             if (driveLetter && /[c-zC-Z]/.test(driveLetter)) {
                 windowsDrive = driveLetter.toUpperCase() + ':';
@@ -288,8 +353,9 @@ export function resolve(...paths) {
                 windowsDrive = p.slice(0, 2);
                 result = p.slice(2);
             }
-            else if (p.startsWith('/') && p.length >= 3 && p[2] === '/') {
+            else if (!isWSL() && p.startsWith('/') && p.length >= 3 && p[2] === '/') {
                 // Git Bash style (C-Z drives only, not A or B)
+                // Skipped on WSL where /c/ is a valid drvfs mount point, not a drive letter
                 const driveLetter = p[1];
                 if (driveLetter && /[c-zC-Z]/.test(driveLetter)) {
                     windowsDrive = driveLetter.toUpperCase() + ':';
@@ -332,6 +398,10 @@ let _productionMode = false;
 export function enableProductionMode() {
     _productionMode = true;
 }
+/** Reset production mode flag — only for testing. */
+export function _resetProductionModeForTesting() {
+    _productionMode = false;
+}
 export function getDefaultDbPath(indexName = "index") {
     // Always allow override via INDEX_PATH (for testing)
     if (process.env.INDEX_PATH) {
@@ -504,9 +574,10 @@ function initializeDatabase(db) {
         verifySqliteVecLoaded(db);
         _sqliteVecAvailable = true;
     }
-    catch {
+    catch (err) {
         // sqlite-vec is optional — vector search won't work but FTS is fine
         _sqliteVecAvailable = false;
+        console.warn(getErrorMessage(err));
     }
     db.exec("PRAGMA journal_mode = WAL");
     db.exec("PRAGMA foreign_keys = ON");
@@ -777,7 +848,10 @@ function ensureVecTableInternal(db, dimensions) {
         const existingDims = match?.[1] ? parseInt(match[1], 10) : null;
         if (existingDims === dimensions && hasHashSeq && hasCosine)
             return;
-        // Table exists but wrong schema - need to rebuild
+        if (existingDims !== null && existingDims !== dimensions) {
+            throw new Error(`Embedding dimension mismatch: existing vectors are ${existingDims}d but the current model produces ${dimensions}d. ` +
+                `Run 'qmd embed -f' to re-embed with the new model.`);
+        }
         db.exec("DROP TABLE IF EXISTS vectors_vec");
     }
     db.exec(`CREATE VIRTUAL TABLE vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[${dimensions}] distance_metric=cosine)`);
@@ -867,6 +941,67 @@ export async function reindexCollection(store, collectionPath, globPattern, coll
     const orphanedCleaned = cleanupOrphanedContent(db);
     return { indexed, updated, unchanged, removed, orphanedCleaned };
 }
+function validatePositiveIntegerOption(name, value, fallback) {
+    if (value === undefined)
+        return fallback;
+    if (!Number.isInteger(value) || value < 1) {
+        throw new Error(`${name} must be a positive integer`);
+    }
+    return value;
+}
+function resolveEmbedOptions(options) {
+    return {
+        maxDocsPerBatch: validatePositiveIntegerOption("maxDocsPerBatch", options?.maxDocsPerBatch, DEFAULT_EMBED_MAX_DOCS_PER_BATCH),
+        maxBatchBytes: validatePositiveIntegerOption("maxBatchBytes", options?.maxBatchBytes, DEFAULT_EMBED_MAX_BATCH_BYTES),
+    };
+}
+function getPendingEmbeddingDocs(db) {
+    return db.prepare(`
+    SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
+    FROM documents d
+    JOIN content c ON d.hash = c.hash
+    LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
+    WHERE d.active = 1 AND v.hash IS NULL
+    GROUP BY d.hash
+    ORDER BY MIN(d.path)
+  `).all();
+}
+function buildEmbeddingBatches(docs, maxDocsPerBatch, maxBatchBytes) {
+    const batches = [];
+    let currentBatch = [];
+    let currentBytes = 0;
+    for (const doc of docs) {
+        const docBytes = Math.max(0, doc.bytes);
+        const wouldExceedDocs = currentBatch.length >= maxDocsPerBatch;
+        const wouldExceedBytes = currentBatch.length > 0 && (currentBytes + docBytes) > maxBatchBytes;
+        if (wouldExceedDocs || wouldExceedBytes) {
+            batches.push(currentBatch);
+            currentBatch = [];
+            currentBytes = 0;
+        }
+        currentBatch.push(doc);
+        currentBytes += docBytes;
+    }
+    if (currentBatch.length > 0) {
+        batches.push(currentBatch);
+    }
+    return batches;
+}
+function getEmbeddingDocsForBatch(db, batch) {
+    if (batch.length === 0)
+        return [];
+    const placeholders = batch.map(() => "?").join(",");
+    const rows = db.prepare(`
+    SELECT hash, doc as body
+    FROM content
+    WHERE hash IN (${placeholders})
+  `).all(...batch.map(doc => doc.hash));
+    const bodyByHash = new Map(rows.map(row => [row.hash, row.body]));
+    return batch.map((doc) => ({
+        ...doc,
+        body: bodyByHash.get(doc.hash) ?? "",
+    }));
+}
 /**
  * Generate vector embeddings for documents that need them.
  * Pure function — no console output, no db lifecycle management.
@@ -876,98 +1011,151 @@ export async function generateEmbeddings(store, options) {
     const db = store.db;
     const model = options?.model ?? DEFAULT_EMBED_MODEL;
     const now = new Date().toISOString();
+    const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
+    const encoder = new TextEncoder();
     if (options?.force) {
         clearAllEmbeddings(db);
     }
-    const hashesToEmbed = getHashesForEmbedding(db);
-    if (hashesToEmbed.length === 0) {
-        return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
-    }
-    const allChunks = [];
-    for (const item of hashesToEmbed) {
-        const encoder = new TextEncoder();
-        const bodyBytes = encoder.encode(item.body).length;
-        if (bodyBytes === 0)
-            continue;
-        const title = extractTitle(item.body, item.path);
-        const chunks = await chunkDocumentByTokens(item.body);
-        for (let seq = 0; seq < chunks.length; seq++) {
-            allChunks.push({
-                hash: item.hash,
-                title,
-                text: chunks[seq].text,
-                seq,
-                pos: chunks[seq].pos,
-                tokens: chunks[seq].tokens,
-                bytes: encoder.encode(chunks[seq].text).length,
-            });
-        }
-    }
-    if (allChunks.length === 0) {
+    const docsToEmbed = getPendingEmbeddingDocs(db);
+    if (docsToEmbed.length === 0) {
         return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
     }
-    const totalBytes = allChunks.reduce((sum, chk) => sum + chk.bytes, 0);
-    const totalChunks = allChunks.length;
-    const totalDocs = hashesToEmbed.length;
+    const totalBytes = docsToEmbed.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
+    const totalDocs = docsToEmbed.length;
     const startTime = Date.now();
     // Use store's LlamaCpp or global singleton, wrapped in a session
     const llm = getLlm(store);
-    const sessionOptions = { maxDuration: 30 * 60 * 1000, name: 'generateEmbeddings' };
+    const embedModelUri = llm.embedModelName;
     // Create a session manager for this llm instance
     const result = await withLLMSessionForLlm(llm, async (session) => {
-        // Get embedding dimensions from first chunk
-        const firstChunk = allChunks[0];
-        const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title);
-        const firstResult = await session.embed(firstText);
-        if (!firstResult) {
-            throw new Error("Failed to get embedding dimensions from first chunk");
-        }
-        store.ensureVecTable(firstResult.embedding.length);
-        let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
+        let chunksEmbedded = 0;
+        let errors = 0;
+        let bytesProcessed = 0;
+        let totalChunks = 0;
+        let vectorTableInitialized = false;
         const BATCH_SIZE = 32;
-        for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
-            const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
-            const batch = allChunks.slice(batchStart, batchEnd);
-            const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
-            try {
-                const embeddings = await session.embedBatch(texts);
-                for (let i = 0; i < batch.length; i++) {
-                    const chunk = batch[i];
-                    const embedding = embeddings[i];
-                    if (embedding) {
-                        insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
-                        chunksEmbedded++;
-                    }
-                    else {
-                        errors++;
-                    }
-                    bytesProcessed += chunk.bytes;
+        const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes);
+        for (const batchMeta of batches) {
+            // Abort early if session has been invalidated
+            if (!session.isValid) {
+                console.warn(`⚠ Session expired — skipping remaining document batches`);
+                break;
+            }
+            const batchDocs = getEmbeddingDocsForBatch(db, batchMeta);
+            const batchChunks = [];
+            const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
+            for (const doc of batchDocs) {
+                if (!doc.body.trim())
+                    continue;
+                const title = extractTitle(doc.body, doc.path);
+                const chunks = await chunkDocumentByTokens(doc.body, undefined, undefined, undefined, doc.path, options?.chunkStrategy, session.signal);
+                for (let seq = 0; seq < chunks.length; seq++) {
+                    batchChunks.push({
+                        hash: doc.hash,
+                        title,
+                        text: chunks[seq].text,
+                        seq,
+                        pos: chunks[seq].pos,
+                        tokens: chunks[seq].tokens,
+                        bytes: encoder.encode(chunks[seq].text).length,
+                    });
                 }
             }
-            catch {
-                // Batch failed — try individual embeddings as fallback
-                for (const chunk of batch) {
-                    try {
-                        const text = formatDocForEmbedding(chunk.text, chunk.title);
-                        const result = await session.embed(text);
-                        if (result) {
-                            insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
+            totalChunks += batchChunks.length;
+            if (batchChunks.length === 0) {
+                bytesProcessed += batchBytes;
+                options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
+                continue;
+            }
+            if (!vectorTableInitialized) {
+                const firstChunk = batchChunks[0];
+                const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title, embedModelUri);
+                const firstResult = await session.embed(firstText, { model });
+                if (!firstResult) {
+                    throw new Error("Failed to get embedding dimensions from first chunk");
+                }
+                store.ensureVecTable(firstResult.embedding.length);
+                vectorTableInitialized = true;
+            }
+            const totalBatchChunkBytes = batchChunks.reduce((sum, chunk) => sum + chunk.bytes, 0);
+            let batchChunkBytesProcessed = 0;
+            for (let batchStart = 0; batchStart < batchChunks.length; batchStart += BATCH_SIZE) {
+                // Abort early if session has been invalidated (e.g. max duration exceeded)
+                if (!session.isValid) {
+                    const remaining = batchChunks.length - batchStart;
+                    errors += remaining;
+                    console.warn(`⚠ Session expired — skipping ${remaining} remaining chunks`);
+                    break;
+                }
+                // Abort early if error rate is too high (>80% of processed chunks failed)
+                const processed = chunksEmbedded + errors;
+                if (processed >= BATCH_SIZE && errors > processed * 0.8) {
+                    const remaining = batchChunks.length - batchStart;
+                    errors += remaining;
+                    console.warn(`⚠ Error rate too high (${errors}/${processed}) — aborting embedding`);
+                    break;
+                }
+                const batchEnd = Math.min(batchStart + BATCH_SIZE, batchChunks.length);
+                const chunkBatch = batchChunks.slice(batchStart, batchEnd);
+                const texts = chunkBatch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title, embedModelUri));
+                try {
+                    const embeddings = await session.embedBatch(texts, { model });
+                    for (let i = 0; i < chunkBatch.length; i++) {
+                        const chunk = chunkBatch[i];
+                        const embedding = embeddings[i];
+                        if (embedding) {
+                            insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
                             chunksEmbedded++;
                         }
                         else {
                             errors++;
                         }
+                        batchChunkBytesProcessed += chunk.bytes;
+                    }
+                }
+                catch {
+                    // Batch failed — try individual embeddings as fallback
+                    // But skip if session is already invalid (avoids N doomed retries)
+                    if (!session.isValid) {
+                        errors += chunkBatch.length;
+                        batchChunkBytesProcessed += chunkBatch.reduce((sum, c) => sum + c.bytes, 0);
                     }
-                    catch {
-                        errors++;
+                    else {
+                        for (const chunk of chunkBatch) {
+                            try {
+                                const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
+                                const result = await session.embed(text, { model });
+                                if (result) {
+                                    insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
+                                    chunksEmbedded++;
+                                }
+                                else {
+                                    errors++;
+                                }
+                            }
+                            catch {
+                                errors++;
+                            }
+                            batchChunkBytesProcessed += chunk.bytes;
+                        }
                     }
-                    bytesProcessed += chunk.bytes;
                 }
+                const proportionalBytes = totalBatchChunkBytes === 0
+                    ? batchBytes
+                    : Math.min(batchBytes, Math.round((batchChunkBytesProcessed / totalBatchChunkBytes) * batchBytes));
+                options?.onProgress?.({
+                    chunksEmbedded,
+                    totalChunks,
+                    bytesProcessed: bytesProcessed + proportionalBytes,
+                    totalBytes,
+                    errors,
+                });
             }
+            bytesProcessed += batchBytes;
             options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
         }
         return { chunksEmbedded, errors };
-    }, sessionOptions);
+    }, { maxDuration: 30 * 60 * 1000, name: 'generateEmbeddings' });
     return {
         docsProcessed: totalDocs,
         chunksEmbedded: result.chunksEmbedded,
@@ -1097,7 +1285,7 @@ export function handelize(path) {
             const ext = extMatch ? extMatch[1] : '';
             const nameWithoutExt = ext ? segment.slice(0, -ext.length) : segment;
             const cleanedName = nameWithoutExt
-                .replace(/[^\p{L}\p{N}$]+/gu, '-') // Keep route marker "$", dash-separate other chars
+                .replace(/[^\p{L}\p{N}$]+/gu, '-') // Keep letters, numbers, "$"; dash-separate rest (including dots)
                 .replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
             return cleanedName + ext;
         }
@@ -1196,11 +1384,20 @@ export function cleanupOrphanedContent(db) {
  * Returns the number of orphaned embedding chunks deleted.
  */
 export function cleanupOrphanedVectors(db) {
-    // Check if vectors_vec table exists
-    const tableExists = db.prepare(`
-    SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'
-  `).get();
-    if (!tableExists) {
+    // sqlite-vec may not be loaded (e.g. Bun's bun:sqlite lacks loadExtension).
+    // The vectors_vec virtual table can appear in sqlite_master from a prior
+    // session, but querying it without the vec0 module loaded will crash (#380).
+    if (!isSqliteVecAvailable()) {
+        return 0;
+    }
+    // The schema entry can exist even when sqlite-vec itself is unavailable
+    // (for example when reopening a DB without vec0 loaded). In that case,
+    // touching the virtual table throws "no such module: vec0" and cleanup
+    // should degrade gracefully like the rest of the vector features.
+    try {
+        db.prepare(`SELECT 1 FROM vectors_vec LIMIT 0`).get();
+    }
+    catch {
         return 0;
     }
     // Count orphaned vectors first
@@ -1346,52 +1543,44 @@ export function getActiveDocumentPaths(db, collectionName) {
     return rows.map(r => r.path);
 }
 export { formatQueryForEmbedding, formatDocForEmbedding };
+/**
+ * Chunk a document using regex-only break point detection.
+ * This is the sync, backward-compatible API used by tests and legacy callers.
+ */
 export function chunkDocument(content, maxChars = CHUNK_SIZE_CHARS, overlapChars = CHUNK_OVERLAP_CHARS, windowChars = CHUNK_WINDOW_CHARS) {
-    if (content.length <= maxChars) {
-        return [{ text: content, pos: 0 }];
-    }
-    // Pre-scan all break points and code fences once
     const breakPoints = scanBreakPoints(content);
     const codeFences = findCodeFences(content);
-    const chunks = [];
-    let charPos = 0;
-    while (charPos < content.length) {
-        // Calculate target end position for this chunk
-        const targetEndPos = Math.min(charPos + maxChars, content.length);
-        let endPos = targetEndPos;
-        // If not at the end, find the best break point
-        if (endPos < content.length) {
-            // Find best cutoff using scored algorithm
-            const bestCutoff = findBestCutoff(breakPoints, targetEndPos, windowChars, 0.7, codeFences);
-            // Only use the cutoff if it's within our current chunk
-            if (bestCutoff > charPos && bestCutoff <= targetEndPos) {
-                endPos = bestCutoff;
-            }
-        }
-        // Ensure we make progress
-        if (endPos <= charPos) {
-            endPos = Math.min(charPos + maxChars, content.length);
-        }
-        chunks.push({ text: content.slice(charPos, endPos), pos: charPos });
-        // Move forward, but overlap with previous chunk
-        // For last chunk, don't overlap (just go to the end)
-        if (endPos >= content.length) {
-            break;
-        }
-        charPos = endPos - overlapChars;
-        const lastChunkPos = chunks.at(-1).pos;
-        if (charPos <= lastChunkPos) {
-            // Prevent infinite loop - move forward at least a bit
-            charPos = endPos;
+    return chunkDocumentWithBreakPoints(content, breakPoints, codeFences, maxChars, overlapChars, windowChars);
+}
+/**
+ * Async AST-aware chunking. Detects language from filepath, computes AST
+ * break points for supported code files, merges with regex break points,
+ * and delegates to the shared chunk algorithm.
+ *
+ * Falls back to regex-only when strategy is "regex", filepath is absent,
+ * or language is unsupported.
+ */
+export async function chunkDocumentAsync(content, maxChars = CHUNK_SIZE_CHARS, overlapChars = CHUNK_OVERLAP_CHARS, windowChars = CHUNK_WINDOW_CHARS, filepath, chunkStrategy = "regex") {
+    const regexPoints = scanBreakPoints(content);
+    const codeFences = findCodeFences(content);
+    let breakPoints = regexPoints;
+    if (chunkStrategy === "auto" && filepath) {
+        const { getASTBreakPoints } = await import("./ast.js");
+        const astPoints = await getASTBreakPoints(content, filepath);
+        if (astPoints.length > 0) {
+            breakPoints = mergeBreakPoints(regexPoints, astPoints);
         }
     }
-    return chunks;
+    return chunkDocumentWithBreakPoints(content, breakPoints, codeFences, maxChars, overlapChars, windowChars);
 }
 /**
  * Chunk a document by actual token count using the LLM tokenizer.
  * More accurate than character-based chunking but requires async.
+ *
+ * When filepath and chunkStrategy are provided, uses AST-aware break points
+ * for supported code files.
  */
-export async function chunkDocumentByTokens(content, maxTokens = CHUNK_SIZE_TOKENS, overlapTokens = CHUNK_OVERLAP_TOKENS, windowTokens = CHUNK_WINDOW_TOKENS) {
+export async function chunkDocumentByTokens(content, maxTokens = CHUNK_SIZE_TOKENS, overlapTokens = CHUNK_OVERLAP_TOKENS, windowTokens = CHUNK_WINDOW_TOKENS, filepath, chunkStrategy = "regex", signal) {
     const llm = getDefaultLlamaCpp();
     // Use moderate chars/token estimate (prose ~4, code ~2, mixed ~3)
     // If chunks exceed limit, they'll be re-split with actual ratio
@@ -1400,10 +1589,14 @@ export async function chunkDocumentByTokens(content, maxTokens = CHUNK_SIZE_TOKE
     const overlapChars = overlapTokens * avgCharsPerToken;
     const windowChars = windowTokens * avgCharsPerToken;
     // Chunk in character space with conservative estimate
-    let charChunks = chunkDocument(content, maxChars, overlapChars, windowChars);
+    // Use AST-aware chunking for the first pass when filepath/strategy provided
+    let charChunks = await chunkDocumentAsync(content, maxChars, overlapChars, windowChars, filepath, chunkStrategy);
     // Tokenize and split any chunks that still exceed limit
     const results = [];
     for (const chunk of charChunks) {
+        // Respect abort signal to avoid runaway tokenization
+        if (signal?.aborted)
+            break;
         const tokens = await llm.tokenize(chunk.text);
         if (tokens.length <= maxTokens) {
             results.push({ text: chunk.text, pos: chunk.pos, tokens: tokens.length });
@@ -1415,6 +1608,8 @@ export async function chunkDocumentByTokens(content, maxTokens = CHUNK_SIZE_TOKE
             const safeMaxChars = Math.floor(maxTokens * actualCharsPerToken * 0.95); // 5% safety margin
             const subChunks = chunkDocument(chunk.text, safeMaxChars, Math.floor(overlapChars * actualCharsPerToken / 2), Math.floor(windowChars * actualCharsPerToken / 2));
             for (const subChunk of subChunks) {
+                if (signal?.aborted)
+                    break;
                 const subTokens = await llm.tokenize(subChunk.text);
                 results.push({
                     text: subChunk.text,
@@ -1523,7 +1718,7 @@ export function matchFilesByGlob(db, pattern) {
   `).all();
     const isMatch = picomatch(pattern);
     return allFiles
-        .filter(f => isMatch(f.virtual_path) || isMatch(f.path))
+        .filter(f => isMatch(f.virtual_path) || isMatch(f.path) || isMatch(f.collection + '/' + f.path))
         .map(f => ({
         filepath: f.virtual_path, // Virtual path for precise lookup
         displayPath: f.path, // Relative path for display
@@ -1874,8 +2069,23 @@ export function getTopLevelPathsWithoutContext(db, collectionName) {
 // =============================================================================
 // FTS Search
 // =============================================================================
-function sanitizeFTS5Term(term) {
-    return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase();
+export function sanitizeFTS5Term(term) {
+    return term.replace(/[^\p{L}\p{N}'_]/gu, '').toLowerCase();
+}
+/**
+ * Check if a token is a hyphenated compound word (e.g., multi-agent, DEC-0054, gpt-4).
+ * Returns true if the token contains internal hyphens between word/digit characters.
+ */
+function isHyphenatedToken(token) {
+    return /^[\p{L}\p{N}][\p{L}\p{N}'-]*-[\p{L}\p{N}][\p{L}\p{N}'-]*$/u.test(token);
+}
+/**
+ * Sanitize a hyphenated term into an FTS5 phrase by splitting on hyphens
+ * and sanitizing each part. Returns the parts joined by spaces for use
+ * inside FTS5 quotes: "multi agent" matches "multi-agent" in porter tokenizer.
+ */
+function sanitizeHyphenatedTerm(term) {
+    return term.split('-').map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
 }
 /**
  * Parse lex query syntax into FTS5 query.
@@ -1883,14 +2093,23 @@ function sanitizeFTS5Term(term) {
  * Supports:
  * - Quoted phrases: "exact phrase" → "exact phrase" (exact match)
  * - Negation: -term or -"phrase" → uses FTS5 NOT operator
+ * - Hyphenated tokens: multi-agent, DEC-0054, gpt-4 → treated as phrases
  * - Plain terms: term → "term"* (prefix match)
  *
  * FTS5 NOT is a binary operator: `term1 NOT term2` means "match term1 but not term2".
  * So `-term` only works when there are also positive terms.
  *
+ * Hyphen disambiguation: `-sports` at a word boundary is negation, but `multi-agent`
+ * (where `-` is between word characters) is treated as a hyphenated phrase.
+ * When a leading `-` is followed by what looks like a hyphenated compound word
+ * (e.g., `-multi-agent`), the entire token is treated as a negated phrase.
+ *
  * Examples:
  *   performance -sports     → "performance"* NOT "sports"*
  *   "machine learning"      → "machine learning"
+ *   multi-agent memory      → "multi agent" AND "memory"*
+ *   DEC-0054               → "dec 0054"
+ *   -multi-agent            → NOT "multi agent"
  */
 function buildFTS5Query(query) {
     const positive = [];
@@ -1934,14 +2153,30 @@ function buildFTS5Query(query) {
             while (i < s.length && !/[\s"]/.test(s[i]))
                 i++;
             const term = s.slice(start, i);
-            const sanitized = sanitizeFTS5Term(term);
-            if (sanitized) {
-                const ftsTerm = `"${sanitized}"*`; // Prefix match
-                if (negated) {
-                    negative.push(ftsTerm);
+            // Handle hyphenated tokens: multi-agent, DEC-0054, gpt-4
+            // These get split into phrase queries so FTS5 porter tokenizer matches them.
+            if (isHyphenatedToken(term)) {
+                const sanitized = sanitizeHyphenatedTerm(term);
+                if (sanitized) {
+                    const ftsPhrase = `"${sanitized}"`; // Phrase match (no prefix)
+                    if (negated) {
+                        negative.push(ftsPhrase);
+                    }
+                    else {
+                        positive.push(ftsPhrase);
+                    }
                 }
-                else {
-                    positive.push(ftsTerm);
+            }
+            else {
+                const sanitized = sanitizeFTS5Term(term);
+                if (sanitized) {
+                    const ftsTerm = `"${sanitized}"*`; // Prefix match
+                    if (negated) {
+                        negative.push(ftsTerm);
+                    }
+                    else {
+                        positive.push(ftsTerm);
+                    }
                 }
             }
         }
@@ -1984,26 +2219,42 @@ export function searchFTS(db, query, limit = 20, collectionName) {
     const ftsQuery = buildFTS5Query(query);
     if (!ftsQuery)
         return [];
+    // Use a CTE to force FTS5 to run first, then filter by collection.
+    // Without the CTE, SQLite's query planner combines FTS5 MATCH with the
+    // collection filter in a single WHERE clause, which can cause it to
+    // abandon the FTS5 index and fall back to a full scan — turning an 8ms
+    // query into a 17-second query on large collections.
+    const params = [ftsQuery];
+    // When filtering by collection, fetch extra candidates from the FTS index
+    // since some will be filtered out. Without a collection filter we can
+    // fetch exactly the requested limit.
+    const ftsLimit = collectionName ? limit * 10 : limit;
     let sql = `
+    WITH fts_matches AS (
+      SELECT rowid, bm25(documents_fts, 1.5, 4.0, 1.0) as bm25_score
+      FROM documents_fts
+      WHERE documents_fts MATCH ?
+      ORDER BY bm25_score ASC
+      LIMIT ${ftsLimit}
+    )
     SELECT
       'qmd://' || d.collection || '/' || d.path as filepath,
       d.collection || '/' || d.path as display_path,
       d.title,
       content.doc as body,
       d.hash,
-      bm25(documents_fts, 10.0, 1.0) as bm25_score
-    FROM documents_fts f
-    JOIN documents d ON d.id = f.rowid
+      fm.bm25_score
+    FROM fts_matches fm
+    JOIN documents d ON d.id = fm.rowid
     JOIN content ON content.hash = d.hash
-    WHERE documents_fts MATCH ? AND d.active = 1
+    WHERE d.active = 1
   `;
-    const params = [ftsQuery];
     if (collectionName) {
         sql += ` AND d.collection = ?`;
         params.push(String(collectionName));
     }
     // bm25 lower is better; sort ascending.
-    sql += ` ORDER BY bm25_score ASC LIMIT ?`;
+    sql += ` ORDER BY fm.bm25_score ASC LIMIT ?`;
     params.push(limit);
     const rows = db.prepare(sql).all(...params);
     return rows.map(row => {
@@ -2143,13 +2394,23 @@ export function clearAllEmbeddings(db) {
 /**
  * Insert a single embedding into both content_vectors and vectors_vec tables.
  * The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
+ *
+ * content_vectors is inserted first so that getHashesForEmbedding (which checks
+ * only content_vectors) won't re-select the hash on a crash between the two inserts.
+ *
+ * vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
+ * vec0 virtual tables silently ignore the OR REPLACE conflict clause.
  */
 export function insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt) {
     const hashSeq = `${hash}_${seq}`;
-    const insertVecStmt = db.prepare(`INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
+    // Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding)
     const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`);
-    insertVecStmt.run(hashSeq, embedding);
     insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt);
+    // vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT
+    const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+    const insertVecStmt = db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
+    deleteVecStmt.run(hashSeq);
+    insertVecStmt.run(hashSeq, embedding);
 }
 // =============================================================================
 // Query expansion
@@ -2484,7 +2745,7 @@ export function getDocumentBody(db, doc, fromLine, maxLines) {
  * Returns documents without body by default (use getDocumentBody to load)
  */
 export function findDocuments(db, pattern, options = {}) {
-    const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
+    const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?') && !pattern.includes('{');
     const errors = [];
     const maxBytes = options.maxBytes ?? DEFAULT_MULTI_GET_MAX_BYTES;
     const bodyCol = options.includeBody ? `, content.doc as body` : ``;
@@ -2817,7 +3078,7 @@ export async function hybridQuery(store, query, options) {
         }
         // Batch embed all vector queries in a single call
         const llm = getLlm(store);
-        const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text));
+        const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName));
         hooks?.onEmbedStart?.(textsToEmbed.length);
         const embedStart = Date.now();
         const embeddings = await llm.embedBatch(textsToEmbed);
@@ -2855,8 +3116,9 @@ export async function hybridQuery(store, query, options) {
     const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
     const intentTerms = intent ? extractIntentTerms(intent) : [];
     const docChunkMap = new Map();
+    const chunkStrategy = options?.chunkStrategy;
     for (const cand of candidates) {
-        const chunks = chunkDocument(cand.body);
+        const chunks = await chunkDocumentAsync(cand.body, undefined, undefined, undefined, cand.file, chunkStrategy);
         if (chunks.length === 0)
             continue;
         // Pick chunk with most keyword overlap (fallback: first chunk)
@@ -3128,7 +3390,7 @@ export async function structuredSearch(store, searches, options) {
         const vecSearches = searches.filter((s) => s.type === 'vec' || s.type === 'hyde');
         if (vecSearches.length > 0) {
             const llm = getLlm(store);
-            const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
+            const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName));
             hooks?.onEmbedStart?.(textsToEmbed.length);
             const embedStart = Date.now();
             const embeddings = await llm.embedBatch(textsToEmbed);
@@ -3174,8 +3436,9 @@ export async function structuredSearch(store, searches, options) {
     const queryTerms = primaryQuery.toLowerCase().split(/\s+/).filter(t => t.length > 2);
     const intentTerms = intent ? extractIntentTerms(intent) : [];
     const docChunkMap = new Map();
+    const ssChunkStrategy = options?.chunkStrategy;
     for (const cand of candidates) {
-        const chunks = chunkDocument(cand.body);
+        const chunks = await chunkDocumentAsync(cand.body, undefined, undefined, undefined, cand.file, ssChunkStrategy);
         if (chunks.length === 0)
             continue;
         // Pick chunk with most keyword overlap