npm - @tobilu/qmd - Versions diffs - 2.0.1 → 2.1.0 - Mend

@tobilu/qmd 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/cli/qmd.js CHANGED Viewed

@@ -7,8 +7,8 @@ import { dirname, join as pathJoin, relative as relativePath } from "path";
 import { parseArgs } from "util";
 import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs";
 import { createInterface } from "readline/promises";
-import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, syncConfigToDb, } from "../store.js";
-import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
+import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, syncConfigToDb, } from "../store.js";
+import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
 import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
 import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, } from "../collections.js";
 import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";
@@ -27,6 +27,13 @@ function getStore() {
         try {
             const config = loadConfig();
             syncConfigToDb(store.db, config);
+            if (config.models) {
+                setDefaultLlamaCpp(new LlamaCpp({
+                    embedModel: config.models.embed,
+                    generateModel: config.models.generate,
+                    rerankModel: config.models.rerank,
+                }));
+            }
         }
         catch {
             // Config may not exist yet — that's fine, DB works without it
@@ -261,6 +268,34 @@ async function showStatus() {
             context: ctx.context
         });
     }
+    // AST chunking status
+    try {
+        const { getASTStatus } = await import("../ast.js");
+        const ast = await getASTStatus();
+        console.log(`\n${c.bold}AST Chunking${c.reset}`);
+        if (ast.available) {
+            const ok = ast.languages.filter(l => l.available).map(l => l.language);
+            const fail = ast.languages.filter(l => !l.available);
+            console.log(`  Status:   ${c.green}active${c.reset}`);
+            console.log(`  Languages: ${ok.join(", ")}`);
+            if (fail.length > 0) {
+                for (const f of fail) {
+                    console.log(`  ${c.yellow}Unavailable: ${f.language} (${f.error})${c.reset}`);
+                }
+            }
+        }
+        else {
+            console.log(`  Status:   ${c.yellow}unavailable${c.reset} (falling back to regex chunking)`);
+            for (const l of ast.languages) {
+                if (l.error)
+                    console.log(`  ${c.dim}${l.language}: ${l.error}${c.reset}`);
+            }
+        }
+    }
+    catch {
+        console.log(`\n${c.bold}AST Chunking${c.reset}`);
+        console.log(`  Status:   ${c.dim}not available${c.reset}`);
+    }
     if (collections.length > 0) {
         console.log(`\n${c.bold}Collections${c.reset}`);
         for (const col of collections) {
@@ -787,7 +822,7 @@ function getDocument(filename, fromLine, maxLines, lineNumbers) {
 function multiGet(pattern, maxLines, maxBytes = DEFAULT_MULTI_GET_MAX_BYTES, format = "cli") {
     const db = getDb();
     // Check if it's a comma-separated list or a glob pattern
-    const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
+    const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?') && !pattern.includes('{');
     let files;
     if (isCommaSeparated) {
         // Comma-separated list of files (can be virtual paths or relative paths)
@@ -1367,26 +1402,51 @@ function renderProgressBar(percent, width = 30) {
     const bar = "█".repeat(filled) + "░".repeat(empty);
     return bar;
 }
-async function vectorIndex(model = DEFAULT_EMBED_MODEL, force = false) {
+function parseEmbedBatchOption(name, value) {
+    if (value === undefined)
+        return undefined;
+    const parsed = Number(value);
+    if (!Number.isInteger(parsed) || parsed < 1) {
+        throw new Error(`${name} must be a positive integer`);
+    }
+    return parsed;
+}
+function parseChunkStrategy(value) {
+    if (value === undefined)
+        return undefined;
+    const s = String(value);
+    if (s === "auto" || s === "regex")
+        return s;
+    throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
+}
+async function vectorIndex(model = DEFAULT_EMBED_MODEL_URI, force = false, batchOptions) {
     const storeInstance = getStore();
     const db = storeInstance.db;
     if (force) {
         console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
     }
     // Check if there's work to do before starting
-    const hashesToEmbed = getHashesForEmbedding(db);
-    if (hashesToEmbed.length === 0 && !force) {
+    const hashesToEmbed = getHashesNeedingEmbedding(db);
+    if (hashesToEmbed === 0 && !force) {
         console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
         closeDb();
         return;
     }
     console.log(`${c.dim}Model: ${model}${c.reset}\n`);
+    if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
+        const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
+        const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
+        console.log(`${c.dim}Batch: ${maxDocsPerBatch} docs / ${formatBytes(maxBatchBytes)}${c.reset}\n`);
+    }
     cursor.hide();
     progress.indeterminate();
     const startTime = Date.now();
     const result = await generateEmbeddings(storeInstance, {
         force,
         model,
+        maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
+        maxBatchBytes: batchOptions?.maxBatchBytes,
+        chunkStrategy: batchOptions?.chunkStrategy,
         onProgress: (info) => {
             if (info.totalBytes === 0)
                 return;
@@ -1513,6 +1573,45 @@ function printEmptySearchResults(format, reason = "no_results") {
     }
     console.log("No results found.");
 }
+const DEFAULT_EDITOR_URI_TEMPLATE = "vscode://file/{path}:{line}:{col}";
+function encodePathForEditorUri(absolutePath) {
+    return encodeURI(absolutePath)
+        .replace(/\?/g, "%3F")
+        .replace(/#/g, "%23");
+}
+function getEditorUriTemplate() {
+    const envTemplate = process.env.QMD_EDITOR_URI?.trim();
+    if (envTemplate)
+        return envTemplate;
+    try {
+        const config = loadConfig();
+        const configTemplate = (config.editor_uri
+            || config.editor_uri_template
+            || config.editorUri
+            || (typeof config["editor-uri"] === "string" ? config["editor-uri"] : undefined))?.trim();
+        if (configTemplate)
+            return configTemplate;
+    }
+    catch {
+        // Ignore config parsing issues and use default template.
+    }
+    return DEFAULT_EDITOR_URI_TEMPLATE;
+}
+export function buildEditorUri(template, absolutePath, line, col) {
+    const safeLine = Number.isFinite(line) && line > 0 ? Math.floor(line) : 1;
+    const safeCol = Number.isFinite(col) && col > 0 ? Math.floor(col) : 1;
+    const encodedPath = encodePathForEditorUri(absolutePath);
+    return template
+        .replace(/\{path\}/g, encodedPath)
+        .replace(/\{line\}/g, String(safeLine))
+        .replace(/\{col\}/g, String(safeCol))
+        .replace(/\{column\}/g, String(safeCol));
+}
+export function termLink(text, url, isTTY = !!process.stdout.isTTY) {
+    if (!isTTY)
+        return text;
+    return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`;
+}
 function outputResults(results, query, opts) {
     const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
     if (filtered.length === 0) {
@@ -1555,6 +1654,8 @@ function outputResults(results, query, opts) {
         }
     }
     else if (opts.format === "cli") {
+        const editorUriTemplate = getEditorUriTemplate();
+        const linkDb = getDb();
         for (let i = 0; i < filtered.length; i++) {
             const row = filtered[i];
             if (!row)
@@ -1562,13 +1663,25 @@ function outputResults(results, query, opts) {
             const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
             const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
             // Line 1: filepath with docid
-            const path = toQmdPath(row.displayPath);
+            const virtualPath = row.file.startsWith("qmd://") ? row.file : toQmdPath(row.displayPath);
+            const parsed = parseVirtualPath(virtualPath);
+            const absolutePath = resolveVirtualPath(linkDb, virtualPath);
+            const legacyPath = toQmdPath(row.displayPath);
+            const displayPath = parsed?.path || row.displayPath;
             // Only show :line if we actually found a term match in the snippet body (exclude header line).
             const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
             const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
             const lineInfo = hasMatch ? `:${line}` : "";
             const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
-            console.log(`${c.cyan}${path}${c.dim}${lineInfo}${c.reset}${docidStr}`);
+            if (process.stdout.isTTY && absolutePath && parsed?.path) {
+                const linkLine = hasMatch ? line : 1;
+                const linkTarget = buildEditorUri(editorUriTemplate, absolutePath, linkLine, 1);
+                const clickable = termLink(`${displayPath}${lineInfo}`, linkTarget);
+                console.log(`${c.cyan}${clickable}${c.reset}${docidStr}`);
+            }
+            else {
+                console.log(`${c.cyan}${legacyPath}${c.dim}${lineInfo}${c.reset}${docidStr}`);
+            }
             // Line 2: Title (if available)
             if (row.title) {
                 console.log(`${c.bold}Title: ${row.title}${c.reset}`);
@@ -1867,8 +1980,10 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
                 limit: opts.all ? 500 : (opts.limit || 10),
                 minScore: opts.minScore || 0,
                 candidateLimit: opts.candidateLimit,
+                skipRerank: opts.skipRerank,
                 explain: !!opts.explain,
                 intent,
+                chunkStrategy: opts.chunkStrategy,
                 hooks: {
                     onEmbedStart: (count) => {
                         process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
@@ -1894,8 +2009,10 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
                 limit: opts.all ? 500 : (opts.limit || 10),
                 minScore: opts.minScore || 0,
                 candidateLimit: opts.candidateLimit,
+                skipRerank: opts.skipRerank,
                 explain: !!opts.explain,
                 intent,
+                chunkStrategy: opts.chunkStrategy,
                 hooks: {
                     onStrongSignal: (score) => {
                         process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@@ -1990,6 +2107,8 @@ function parseCLI() {
             mask: { type: "string" }, // glob pattern
             // Embed options
             force: { type: "boolean", short: "f" },
+            "max-docs-per-batch": { type: "string" },
+            "max-batch-mb": { type: "string" },
             // Update options
             pull: { type: "boolean" }, // git pull before update
             refresh: { type: "boolean" },
@@ -2000,7 +2119,10 @@ function parseCLI() {
             "line-numbers": { type: "boolean" }, // add line numbers to output
             // Query options
             "candidate-limit": { type: "string", short: "C" },
+            "no-rerank": { type: "boolean", default: false },
             intent: { type: "string" },
+            // Chunking options
+            "chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
             // MCP HTTP transport options
             http: { type: "boolean" },
             daemon: { type: "boolean" },
@@ -2040,8 +2162,10 @@ function parseCLI() {
         collection: values.collection,
         lineNumbers: !!values["line-numbers"],
         candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
+        skipRerank: !!values["no-rerank"],
         explain: !!values.explain,
         intent: values.intent,
+        chunkStrategy: parseChunkStrategy(values["chunk-strategy"]),
     };
     return {
         command: positionals[0] || "",
@@ -2177,6 +2301,7 @@ function showHelp() {
     console.log("  qmd multi-get <pattern>       - Batch fetch via glob or comma-separated list");
     console.log("  qmd skill show/install        - Show or install the packaged QMD skill");
     console.log("  qmd mcp                       - Start the MCP server (stdio transport for AI agents)");
+    console.log("  qmd bench <fixture.json>      - Run search quality benchmarks against a fixture file");
     console.log("");
     console.log("Collections & context:");
     console.log("  qmd collection add/list/remove/rename/show   - Manage indexed folders");
@@ -2187,6 +2312,8 @@ function showHelp() {
     console.log("  qmd status                    - View index + collection health");
     console.log("  qmd update [--pull]           - Re-index collections (optionally git pull first)");
     console.log("  qmd embed [-f]                - Generate/refresh vector embeddings");
+    console.log("    --max-docs-per-batch <n>    - Cap docs loaded into memory per embedding batch");
+    console.log("    --max-batch-mb <n>          - Cap UTF-8 MB loaded into memory per embedding batch");
     console.log("  qmd cleanup                   - Clear caches, vacuum DB");
     console.log("");
     console.log("Query syntax (qmd query):");
@@ -2232,6 +2359,7 @@ function showHelp() {
     console.log("");
     console.log("Global options:");
     console.log("  --index <name>             - Use a named index (default: index)");
+    console.log("  QMD_EDITOR_URI             - Editor link template for clickable TTY search output");
     console.log("");
     console.log("Search options:");
     console.log("  -n <num>                   - Max results (default 5, or 20 for --files/--json)");
@@ -2239,11 +2367,15 @@ function showHelp() {
     console.log("  --min-score <num>          - Minimum similarity score");
     console.log("  --full                     - Output full document instead of snippet");
     console.log("  -C, --candidate-limit <n>  - Max candidates to rerank (default 40, lower = faster)");
+    console.log("  --no-rerank                - Skip LLM reranking (use RRF scores only, much faster on CPU)");
     console.log("  --line-numbers             - Include line numbers in output");
     console.log("  --explain                  - Include retrieval score traces (query --json/CLI)");
     console.log("  --files | --json | --csv | --md | --xml  - Output format");
     console.log("  -c, --collection <name>    - Filter by one or more collections");
     console.log("");
+    console.log("Embed/query options:");
+    console.log("  --chunk-strategy <auto|regex> - Chunking mode (default: regex; auto uses AST for code files)");
+    console.log("");
     console.log("Multi-get options:");
     console.log("  -l <num>                   - Maximum lines per file");
     console.log("  --max-bytes <num>          - Skip files larger than N bytes (default 10240)");
@@ -2533,7 +2665,20 @@ if (isMain) {
             await updateCollections();
             break;
         case "embed":
-            await vectorIndex(DEFAULT_EMBED_MODEL, !!cli.values.force);
+            try {
+                const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
+                const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
+                const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
+                await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
+                    maxDocsPerBatch,
+                    maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
+                    chunkStrategy: embedChunkStrategy,
+                });
+            }
+            catch (error) {
+                console.error(error instanceof Error ? error.message : String(error));
+                process.exit(1);
+            }
             break;
         case "pull": {
             const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
@@ -2581,6 +2726,23 @@ if (isMain) {
             }
             await querySearch(cli.query, cli.opts);
             break;
+        case "bench": {
+            const fixturePath = cli.args[0];
+            if (!fixturePath) {
+                console.error("Usage: qmd bench <fixture.json> [--json] [-c collection]");
+                console.error("");
+                console.error("Run search quality benchmarks against a fixture file.");
+                console.error("See src/bench/fixtures/example.json for the fixture format.");
+                process.exit(1);
+            }
+            const { runBenchmark } = await import("../bench/bench.js");
+            const benchCollection = cli.opts.collection;
+            await runBenchmark(fixturePath, {
+                json: !!cli.opts.json,
+                collection: Array.isArray(benchCollection) ? benchCollection[0] : benchCollection,
+            });
+            break;
+        }
         case "mcp": {
             const sub = cli.args[0]; // stop | status | undefined
             // Cache dir for PID/log files — same dir as the index

package/dist/collections.d.ts CHANGED Viewed

@@ -21,12 +21,23 @@ export interface Collection {
     update?: string;
     includeByDefault?: boolean;
 }
+/**
+ * Model configuration for embedding, reranking, and generation
+ */
+export interface ModelsConfig {
+    embed?: string;
+    rerank?: string;
+    generate?: string;
+}
 /**
  * The complete configuration file structure
  */
 export interface CollectionConfig {
     global_context?: string;
+    editor_uri?: string;
+    editor_uri_template?: string;
     collections: Record<string, Collection>;
+    models?: ModelsConfig;
 }
 /**
  * Collection with its name (for return values)

package/dist/db.d.ts CHANGED Viewed

@@ -4,6 +4,11 @@
  * Provides a unified Database export that works under both Bun (bun:sqlite)
  * and Node.js (better-sqlite3). The APIs are nearly identical — the main
  * difference is the import path.
+ *
+ * On macOS, Apple's system SQLite is compiled with SQLITE_OMIT_LOAD_EXTENSION,
+ * which prevents loading native extensions like sqlite-vec. When running under
+ * Bun we call Database.setCustomSQLite() to swap in Homebrew's full-featured
+ * SQLite build before creating any database instances.
  */
 export declare const isBun: boolean;
 /**
@@ -29,5 +34,8 @@ export interface Statement {
 }
 /**
  * Load the sqlite-vec extension into a database.
+ *
+ * Throws with platform-specific fix instructions when the extension is
+ * unavailable.
  */
 export declare function loadSqliteVec(db: Database): void;

package/dist/db.js CHANGED Viewed

@@ -4,6 +4,11 @@
  * Provides a unified Database export that works under both Bun (bun:sqlite)
  * and Node.js (better-sqlite3). The APIs are nearly identical — the main
  * difference is the import path.
+ *
+ * On macOS, Apple's system SQLite is compiled with SQLITE_OMIT_LOAD_EXTENSION,
+ * which prevents loading native extensions like sqlite-vec. When running under
+ * Bun we call Database.setCustomSQLite() to swap in Homebrew's full-featured
+ * SQLite build before creating any database instances.
  */
 export const isBun = typeof globalThis.Bun !== "undefined";
 let _Database;
@@ -11,9 +16,35 @@ let _sqliteVecLoad;
 if (isBun) {
     // Dynamic string prevents tsc from resolving bun:sqlite on Node.js builds
     const bunSqlite = "bun:" + "sqlite";
-    _Database = (await import(/* @vite-ignore */ bunSqlite)).Database;
-    const { getLoadablePath } = await import("sqlite-vec");
-    _sqliteVecLoad = (db) => db.loadExtension(getLoadablePath());
+    const BunDatabase = (await import(/* @vite-ignore */ bunSqlite)).Database;
+    // See: https://bun.com/docs/runtime/sqlite#setcustomsqlite
+    if (process.platform === "darwin") {
+        const homebrewPaths = [
+            "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib", // Apple Silicon
+            "/usr/local/opt/sqlite/lib/libsqlite3.dylib", // Intel
+        ];
+        for (const p of homebrewPaths) {
+            try {
+                BunDatabase.setCustomSQLite(p);
+                break;
+            }
+            catch { }
+        }
+    }
+    _Database = BunDatabase;
+    // setCustomSQLite may have silently failed — test that extensions actually work.
+    try {
+        const { getLoadablePath } = await import("sqlite-vec");
+        const vecPath = getLoadablePath();
+        const testDb = new BunDatabase(":memory:");
+        testDb.loadExtension(vecPath);
+        testDb.close();
+        _sqliteVecLoad = (db) => db.loadExtension(vecPath);
+    }
+    catch {
+        // Vector search won't work, but BM25 and other operations are unaffected.
+        _sqliteVecLoad = null;
+    }
 }
 else {
     _Database = (await import("better-sqlite3")).default;
@@ -28,7 +59,17 @@ export function openDatabase(path) {
 }
 /**
  * Load the sqlite-vec extension into a database.
+ *
+ * Throws with platform-specific fix instructions when the extension is
+ * unavailable.
  */
 export function loadSqliteVec(db) {
+    if (!_sqliteVecLoad) {
+        const hint = isBun && process.platform === "darwin"
+            ? "On macOS with Bun, install Homebrew SQLite: brew install sqlite\n" +
+                "Or install qmd with npm instead: npm install -g @tobilu/qmd"
+            : "Ensure the sqlite-vec native module is installed correctly.";
+        throw new Error(`sqlite-vec extension is unavailable. ${hint}`);
+    }
     _sqliteVecLoad(db);
 }

package/dist/index.d.ts CHANGED Viewed

@@ -16,11 +16,12 @@
  *   const results = await store.search({ query: "how does auth work?" })
  *   await store.close()
  */
-import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult } from "./store.js";
+import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult, type ChunkStrategy } from "./store.js";
 import { type Collection, type CollectionConfig, type NamedCollection, type ContextMap } from "./collections.js";
 export type { DocumentResult, DocumentNotFound, SearchResult, HybridQueryResult, HybridQueryOptions, HybridQueryExplain, ExpandedQuery, StructuredSearchOptions, MultiGetResult, IndexStatus, IndexHealthInfo, SearchHooks, ReindexProgress, ReindexResult, EmbedProgress, EmbedResult, Collection, CollectionConfig, NamedCollection, ContextMap, };
 export type { InternalStore };
 export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
+export type { ChunkStrategy } from "./store.js";
 export { getDefaultDbPath } from "./store.js";
 export { Maintenance } from "./maintenance.js";
 /**
@@ -65,6 +66,8 @@ export interface SearchOptions {
     minScore?: number;
     /** Include explain traces */
     explain?: boolean;
+    /** Chunk strategy: "auto" (default, uses AST for code files) or "regex" (legacy) */
+    chunkStrategy?: ChunkStrategy;
 }
 /**
  * Options for searchLex() — BM25 keyword search.
@@ -183,6 +186,9 @@ export interface QMDStore {
     embed(options?: {
         force?: boolean;
         model?: string;
+        maxDocsPerBatch?: number;
+        maxBatchBytes?: number;
+        chunkStrategy?: ChunkStrategy;
         onProgress?: (info: EmbedProgress) => void;
     }): Promise<EmbedResult>;
     /** Get index status (document counts, collections, embedding state) */

package/dist/index.js CHANGED Viewed

@@ -19,7 +19,7 @@
 import { createStore as createStoreInternal, hybridQuery, structuredSearch, extractSnippet, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_MULTI_GET_MAX_BYTES, reindexCollection, generateEmbeddings, listCollections as storeListCollections, syncConfigToDb, getStoreCollections, getStoreCollection, getStoreGlobalContext, getStoreContexts, upsertStoreCollection, deleteStoreCollection, renameStoreCollection, updateStoreContext, removeStoreContext, setStoreGlobalContext, vacuumDatabase, cleanupOrphanedContent, cleanupOrphanedVectors, deleteLLMCache, deleteInactiveDocuments, clearAllEmbeddings, } from "./store.js";
 import { LlamaCpp, } from "./llm.js";
 import { setConfigSource, loadConfig, addCollection as collectionsAddCollection, removeCollection as collectionsRemoveCollection, renameCollection as collectionsRenameCollection, addContext as collectionsAddContext, removeContext as collectionsRemoveContext, setGlobalContext as collectionsSetGlobalContext, } from "./collections.js";
-// Re-export utility functions used by frontends
+// Re-export utility functions and types used by frontends
 export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
 // Re-export getDefaultDbPath for CLI/MCP that need the default database location
 export { getDefaultDbPath } from "./store.js";
@@ -63,21 +63,26 @@ export async function createStore(options) {
     // Track whether we have a YAML config path for write-through
     const hasYamlConfig = !!options.configPath;
     // Sync config into SQLite store_collections
+    let config;
     if (options.configPath) {
         // YAML mode: inject config source for write-through, sync to DB
         setConfigSource({ configPath: options.configPath });
-        const config = loadConfig();
+        config = loadConfig();
         syncConfigToDb(db, config);
     }
     else if (options.config) {
         // Inline config mode: inject config source for mutations, sync to DB
         setConfigSource({ config: options.config });
-        syncConfigToDb(db, options.config);
+        config = options.config;
+        syncConfigToDb(db, config);
     }
     // else: DB-only mode — no external config, use existing store_collections
     // Create a per-store LlamaCpp instance — lazy-loads models on first use,
     // auto-unloads after 5 min inactivity to free VRAM.
     const llm = new LlamaCpp({
+        embedModel: config?.models?.embed,
+        generateModel: config?.models?.generate,
+        rerankModel: config?.models?.rerank,
         inactivityTimeoutMs: 5 * 60 * 1000,
         disposeModelsOnInactivity: true,
     });
@@ -105,6 +110,7 @@ export async function createStore(options) {
                     explain: opts.explain,
                     intent: opts.intent,
                     skipRerank,
+                    chunkStrategy: opts.chunkStrategy,
                 });
             }
             // Simple query string — use hybridQuery (expand + search + rerank)
@@ -115,6 +121,7 @@ export async function createStore(options) {
                 explain: opts.explain,
                 intent: opts.intent,
                 skipRerank,
+                chunkStrategy: opts.chunkStrategy,
             });
         },
         searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
@@ -210,6 +217,9 @@ export async function createStore(options) {
             return generateEmbeddings(internal, {
                 force: embedOpts?.force,
                 model: embedOpts?.model,
+                maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
+                maxBatchBytes: embedOpts?.maxBatchBytes,
+                chunkStrategy: embedOpts?.chunkStrategy,
                 onProgress: embedOpts?.onProgress,
             });
         },

package/dist/llm.d.ts CHANGED Viewed

@@ -105,7 +105,7 @@ export type LLMSessionOptions = {
  */
 export interface ILLMSession {
     embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
-    embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
+    embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
     expandQuery(query: string, options?: {
         context?: string;
         includeLexical?: boolean;
@@ -137,7 +137,7 @@ export type RerankDocument = {
 };
 export declare const LFM2_GENERATE_MODEL = "hf:LiquidAI/LFM2-1.2B-GGUF/LFM2-1.2B-Q4_K_M.gguf";
 export declare const LFM2_INSTRUCT_MODEL = "hf:LiquidAI/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf";
-export declare const DEFAULT_EMBED_MODEL_URI: string;
+export declare const DEFAULT_EMBED_MODEL_URI = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
 export declare const DEFAULT_RERANK_MODEL_URI = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
 export declare const DEFAULT_GENERATE_MODEL_URI = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
 export declare const DEFAULT_MODEL_CACHE_DIR: string;
@@ -232,6 +232,7 @@ export declare class LlamaCpp implements LLM {
     private disposeModelsOnInactivity;
     private disposed;
     constructor(config?: LlamaCppConfig);
+    get embedModelName(): string;
     /**
      * Reset the inactivity timer. Called after each model operation.
      * When timer fires, models are unloaded to free memory (if no active sessions).
@@ -306,6 +307,7 @@ export declare class LlamaCpp implements LLM {
      * - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
      */
     private static readonly RERANK_CONTEXT_SIZE;
+    private static readonly EMBED_CONTEXT_SIZE;
     private ensureRerankContexts;
     /**
      * Tokenize text using the embedding model's tokenizer
@@ -320,12 +322,19 @@ export declare class LlamaCpp implements LLM {
      * Detokenize token IDs back to text
      */
     detokenize(tokens: readonly LlamaToken[]): Promise<string>;
+    /**
+     * Truncate text to fit within the embedding model's context window.
+     * Uses the model's own tokenizer for accurate token counting, then
+     * detokenizes back to text if truncation is needed.
+     * Returns the (possibly truncated) text and whether truncation occurred.
+     */
+    private truncateToContextSize;
     embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
     /**
      * Batch embed multiple texts efficiently
      * Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
      */
-    embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
+    embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
     generate(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null>;
     modelExists(modelUri: string): Promise<ModelInfo>;
     expandQuery(query: string, options?: {