@tobilu/qmd 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/qmd.js CHANGED
@@ -7,8 +7,8 @@ import { dirname, join as pathJoin, relative as relativePath } from "path";
7
7
  import { parseArgs } from "util";
8
8
  import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs";
9
9
  import { createInterface } from "readline/promises";
10
- import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, syncConfigToDb, } from "../store.js";
11
- import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
10
+ import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, syncConfigToDb, } from "../store.js";
11
+ import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
12
12
  import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
13
13
  import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, } from "../collections.js";
14
14
  import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";
@@ -27,6 +27,13 @@ function getStore() {
27
27
  try {
28
28
  const config = loadConfig();
29
29
  syncConfigToDb(store.db, config);
30
+ if (config.models) {
31
+ setDefaultLlamaCpp(new LlamaCpp({
32
+ embedModel: config.models.embed,
33
+ generateModel: config.models.generate,
34
+ rerankModel: config.models.rerank,
35
+ }));
36
+ }
30
37
  }
31
38
  catch {
32
39
  // Config may not exist yet — that's fine, DB works without it
@@ -261,6 +268,34 @@ async function showStatus() {
261
268
  context: ctx.context
262
269
  });
263
270
  }
271
+ // AST chunking status
272
+ try {
273
+ const { getASTStatus } = await import("../ast.js");
274
+ const ast = await getASTStatus();
275
+ console.log(`\n${c.bold}AST Chunking${c.reset}`);
276
+ if (ast.available) {
277
+ const ok = ast.languages.filter(l => l.available).map(l => l.language);
278
+ const fail = ast.languages.filter(l => !l.available);
279
+ console.log(` Status: ${c.green}active${c.reset}`);
280
+ console.log(` Languages: ${ok.join(", ")}`);
281
+ if (fail.length > 0) {
282
+ for (const f of fail) {
283
+ console.log(` ${c.yellow}Unavailable: ${f.language} (${f.error})${c.reset}`);
284
+ }
285
+ }
286
+ }
287
+ else {
288
+ console.log(` Status: ${c.yellow}unavailable${c.reset} (falling back to regex chunking)`);
289
+ for (const l of ast.languages) {
290
+ if (l.error)
291
+ console.log(` ${c.dim}${l.language}: ${l.error}${c.reset}`);
292
+ }
293
+ }
294
+ }
295
+ catch {
296
+ console.log(`\n${c.bold}AST Chunking${c.reset}`);
297
+ console.log(` Status: ${c.dim}not available${c.reset}`);
298
+ }
264
299
  if (collections.length > 0) {
265
300
  console.log(`\n${c.bold}Collections${c.reset}`);
266
301
  for (const col of collections) {
@@ -787,7 +822,7 @@ function getDocument(filename, fromLine, maxLines, lineNumbers) {
787
822
  function multiGet(pattern, maxLines, maxBytes = DEFAULT_MULTI_GET_MAX_BYTES, format = "cli") {
788
823
  const db = getDb();
789
824
  // Check if it's a comma-separated list or a glob pattern
790
- const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
825
+ const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?') && !pattern.includes('{');
791
826
  let files;
792
827
  if (isCommaSeparated) {
793
828
  // Comma-separated list of files (can be virtual paths or relative paths)
@@ -1367,26 +1402,51 @@ function renderProgressBar(percent, width = 30) {
1367
1402
  const bar = "█".repeat(filled) + "░".repeat(empty);
1368
1403
  return bar;
1369
1404
  }
1370
- async function vectorIndex(model = DEFAULT_EMBED_MODEL, force = false) {
1405
+ function parseEmbedBatchOption(name, value) {
1406
+ if (value === undefined)
1407
+ return undefined;
1408
+ const parsed = Number(value);
1409
+ if (!Number.isInteger(parsed) || parsed < 1) {
1410
+ throw new Error(`${name} must be a positive integer`);
1411
+ }
1412
+ return parsed;
1413
+ }
1414
+ function parseChunkStrategy(value) {
1415
+ if (value === undefined)
1416
+ return undefined;
1417
+ const s = String(value);
1418
+ if (s === "auto" || s === "regex")
1419
+ return s;
1420
+ throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
1421
+ }
1422
+ async function vectorIndex(model = DEFAULT_EMBED_MODEL_URI, force = false, batchOptions) {
1371
1423
  const storeInstance = getStore();
1372
1424
  const db = storeInstance.db;
1373
1425
  if (force) {
1374
1426
  console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
1375
1427
  }
1376
1428
  // Check if there's work to do before starting
1377
- const hashesToEmbed = getHashesForEmbedding(db);
1378
- if (hashesToEmbed.length === 0 && !force) {
1429
+ const hashesToEmbed = getHashesNeedingEmbedding(db);
1430
+ if (hashesToEmbed === 0 && !force) {
1379
1431
  console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
1380
1432
  closeDb();
1381
1433
  return;
1382
1434
  }
1383
1435
  console.log(`${c.dim}Model: ${model}${c.reset}\n`);
1436
+ if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
1437
+ const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
1438
+ const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
1439
+ console.log(`${c.dim}Batch: ${maxDocsPerBatch} docs / ${formatBytes(maxBatchBytes)}${c.reset}\n`);
1440
+ }
1384
1441
  cursor.hide();
1385
1442
  progress.indeterminate();
1386
1443
  const startTime = Date.now();
1387
1444
  const result = await generateEmbeddings(storeInstance, {
1388
1445
  force,
1389
1446
  model,
1447
+ maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
1448
+ maxBatchBytes: batchOptions?.maxBatchBytes,
1449
+ chunkStrategy: batchOptions?.chunkStrategy,
1390
1450
  onProgress: (info) => {
1391
1451
  if (info.totalBytes === 0)
1392
1452
  return;
@@ -1513,6 +1573,45 @@ function printEmptySearchResults(format, reason = "no_results") {
1513
1573
  }
1514
1574
  console.log("No results found.");
1515
1575
  }
1576
+ const DEFAULT_EDITOR_URI_TEMPLATE = "vscode://file/{path}:{line}:{col}";
1577
+ function encodePathForEditorUri(absolutePath) {
1578
+ return encodeURI(absolutePath)
1579
+ .replace(/\?/g, "%3F")
1580
+ .replace(/#/g, "%23");
1581
+ }
1582
+ function getEditorUriTemplate() {
1583
+ const envTemplate = process.env.QMD_EDITOR_URI?.trim();
1584
+ if (envTemplate)
1585
+ return envTemplate;
1586
+ try {
1587
+ const config = loadConfig();
1588
+ const configTemplate = (config.editor_uri
1589
+ || config.editor_uri_template
1590
+ || config.editorUri
1591
+ || (typeof config["editor-uri"] === "string" ? config["editor-uri"] : undefined))?.trim();
1592
+ if (configTemplate)
1593
+ return configTemplate;
1594
+ }
1595
+ catch {
1596
+ // Ignore config parsing issues and use default template.
1597
+ }
1598
+ return DEFAULT_EDITOR_URI_TEMPLATE;
1599
+ }
1600
+ export function buildEditorUri(template, absolutePath, line, col) {
1601
+ const safeLine = Number.isFinite(line) && line > 0 ? Math.floor(line) : 1;
1602
+ const safeCol = Number.isFinite(col) && col > 0 ? Math.floor(col) : 1;
1603
+ const encodedPath = encodePathForEditorUri(absolutePath);
1604
+ return template
1605
+ .replace(/\{path\}/g, encodedPath)
1606
+ .replace(/\{line\}/g, String(safeLine))
1607
+ .replace(/\{col\}/g, String(safeCol))
1608
+ .replace(/\{column\}/g, String(safeCol));
1609
+ }
1610
+ export function termLink(text, url, isTTY = !!process.stdout.isTTY) {
1611
+ if (!isTTY)
1612
+ return text;
1613
+ return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`;
1614
+ }
1516
1615
  function outputResults(results, query, opts) {
1517
1616
  const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
1518
1617
  if (filtered.length === 0) {
@@ -1555,6 +1654,8 @@ function outputResults(results, query, opts) {
1555
1654
  }
1556
1655
  }
1557
1656
  else if (opts.format === "cli") {
1657
+ const editorUriTemplate = getEditorUriTemplate();
1658
+ const linkDb = getDb();
1558
1659
  for (let i = 0; i < filtered.length; i++) {
1559
1660
  const row = filtered[i];
1560
1661
  if (!row)
@@ -1562,13 +1663,25 @@ function outputResults(results, query, opts) {
1562
1663
  const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
1563
1664
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1564
1665
  // Line 1: filepath with docid
1565
- const path = toQmdPath(row.displayPath);
1666
+ const virtualPath = row.file.startsWith("qmd://") ? row.file : toQmdPath(row.displayPath);
1667
+ const parsed = parseVirtualPath(virtualPath);
1668
+ const absolutePath = resolveVirtualPath(linkDb, virtualPath);
1669
+ const legacyPath = toQmdPath(row.displayPath);
1670
+ const displayPath = parsed?.path || row.displayPath;
1566
1671
  // Only show :line if we actually found a term match in the snippet body (exclude header line).
1567
1672
  const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
1568
1673
  const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
1569
1674
  const lineInfo = hasMatch ? `:${line}` : "";
1570
1675
  const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
1571
- console.log(`${c.cyan}${path}${c.dim}${lineInfo}${c.reset}${docidStr}`);
1676
+ if (process.stdout.isTTY && absolutePath && parsed?.path) {
1677
+ const linkLine = hasMatch ? line : 1;
1678
+ const linkTarget = buildEditorUri(editorUriTemplate, absolutePath, linkLine, 1);
1679
+ const clickable = termLink(`${displayPath}${lineInfo}`, linkTarget);
1680
+ console.log(`${c.cyan}${clickable}${c.reset}${docidStr}`);
1681
+ }
1682
+ else {
1683
+ console.log(`${c.cyan}${legacyPath}${c.dim}${lineInfo}${c.reset}${docidStr}`);
1684
+ }
1572
1685
  // Line 2: Title (if available)
1573
1686
  if (row.title) {
1574
1687
  console.log(`${c.bold}Title: ${row.title}${c.reset}`);
@@ -1867,8 +1980,10 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1867
1980
  limit: opts.all ? 500 : (opts.limit || 10),
1868
1981
  minScore: opts.minScore || 0,
1869
1982
  candidateLimit: opts.candidateLimit,
1983
+ skipRerank: opts.skipRerank,
1870
1984
  explain: !!opts.explain,
1871
1985
  intent,
1986
+ chunkStrategy: opts.chunkStrategy,
1872
1987
  hooks: {
1873
1988
  onEmbedStart: (count) => {
1874
1989
  process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
@@ -1894,8 +2009,10 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1894
2009
  limit: opts.all ? 500 : (opts.limit || 10),
1895
2010
  minScore: opts.minScore || 0,
1896
2011
  candidateLimit: opts.candidateLimit,
2012
+ skipRerank: opts.skipRerank,
1897
2013
  explain: !!opts.explain,
1898
2014
  intent,
2015
+ chunkStrategy: opts.chunkStrategy,
1899
2016
  hooks: {
1900
2017
  onStrongSignal: (score) => {
1901
2018
  process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@@ -1990,6 +2107,8 @@ function parseCLI() {
1990
2107
  mask: { type: "string" }, // glob pattern
1991
2108
  // Embed options
1992
2109
  force: { type: "boolean", short: "f" },
2110
+ "max-docs-per-batch": { type: "string" },
2111
+ "max-batch-mb": { type: "string" },
1993
2112
  // Update options
1994
2113
  pull: { type: "boolean" }, // git pull before update
1995
2114
  refresh: { type: "boolean" },
@@ -2000,7 +2119,10 @@ function parseCLI() {
2000
2119
  "line-numbers": { type: "boolean" }, // add line numbers to output
2001
2120
  // Query options
2002
2121
  "candidate-limit": { type: "string", short: "C" },
2122
+ "no-rerank": { type: "boolean", default: false },
2003
2123
  intent: { type: "string" },
2124
+ // Chunking options
2125
+ "chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
2004
2126
  // MCP HTTP transport options
2005
2127
  http: { type: "boolean" },
2006
2128
  daemon: { type: "boolean" },
@@ -2040,8 +2162,10 @@ function parseCLI() {
2040
2162
  collection: values.collection,
2041
2163
  lineNumbers: !!values["line-numbers"],
2042
2164
  candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
2165
+ skipRerank: !!values["no-rerank"],
2043
2166
  explain: !!values.explain,
2044
2167
  intent: values.intent,
2168
+ chunkStrategy: parseChunkStrategy(values["chunk-strategy"]),
2045
2169
  };
2046
2170
  return {
2047
2171
  command: positionals[0] || "",
@@ -2177,6 +2301,7 @@ function showHelp() {
2177
2301
  console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
2178
2302
  console.log(" qmd skill show/install - Show or install the packaged QMD skill");
2179
2303
  console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
2304
+ console.log(" qmd bench <fixture.json> - Run search quality benchmarks against a fixture file");
2180
2305
  console.log("");
2181
2306
  console.log("Collections & context:");
2182
2307
  console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders");
@@ -2187,6 +2312,8 @@ function showHelp() {
2187
2312
  console.log(" qmd status - View index + collection health");
2188
2313
  console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
2189
2314
  console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
2315
+ console.log(" --max-docs-per-batch <n> - Cap docs loaded into memory per embedding batch");
2316
+ console.log(" --max-batch-mb <n> - Cap UTF-8 MB loaded into memory per embedding batch");
2190
2317
  console.log(" qmd cleanup - Clear caches, vacuum DB");
2191
2318
  console.log("");
2192
2319
  console.log("Query syntax (qmd query):");
@@ -2232,6 +2359,7 @@ function showHelp() {
2232
2359
  console.log("");
2233
2360
  console.log("Global options:");
2234
2361
  console.log(" --index <name> - Use a named index (default: index)");
2362
+ console.log(" QMD_EDITOR_URI - Editor link template for clickable TTY search output");
2235
2363
  console.log("");
2236
2364
  console.log("Search options:");
2237
2365
  console.log(" -n <num> - Max results (default 5, or 20 for --files/--json)");
@@ -2239,11 +2367,15 @@ function showHelp() {
2239
2367
  console.log(" --min-score <num> - Minimum similarity score");
2240
2368
  console.log(" --full - Output full document instead of snippet");
2241
2369
  console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
2370
+ console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
2242
2371
  console.log(" --line-numbers - Include line numbers in output");
2243
2372
  console.log(" --explain - Include retrieval score traces (query --json/CLI)");
2244
2373
  console.log(" --files | --json | --csv | --md | --xml - Output format");
2245
2374
  console.log(" -c, --collection <name> - Filter by one or more collections");
2246
2375
  console.log("");
2376
+ console.log("Embed/query options:");
2377
+ console.log(" --chunk-strategy <auto|regex> - Chunking mode (default: regex; auto uses AST for code files)");
2378
+ console.log("");
2247
2379
  console.log("Multi-get options:");
2248
2380
  console.log(" -l <num> - Maximum lines per file");
2249
2381
  console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
@@ -2533,7 +2665,20 @@ if (isMain) {
2533
2665
  await updateCollections();
2534
2666
  break;
2535
2667
  case "embed":
2536
- await vectorIndex(DEFAULT_EMBED_MODEL, !!cli.values.force);
2668
+ try {
2669
+ const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
2670
+ const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
2671
+ const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
2672
+ await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
2673
+ maxDocsPerBatch,
2674
+ maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
2675
+ chunkStrategy: embedChunkStrategy,
2676
+ });
2677
+ }
2678
+ catch (error) {
2679
+ console.error(error instanceof Error ? error.message : String(error));
2680
+ process.exit(1);
2681
+ }
2537
2682
  break;
2538
2683
  case "pull": {
2539
2684
  const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
@@ -2581,6 +2726,23 @@ if (isMain) {
2581
2726
  }
2582
2727
  await querySearch(cli.query, cli.opts);
2583
2728
  break;
2729
+ case "bench": {
2730
+ const fixturePath = cli.args[0];
2731
+ if (!fixturePath) {
2732
+ console.error("Usage: qmd bench <fixture.json> [--json] [-c collection]");
2733
+ console.error("");
2734
+ console.error("Run search quality benchmarks against a fixture file.");
2735
+ console.error("See src/bench/fixtures/example.json for the fixture format.");
2736
+ process.exit(1);
2737
+ }
2738
+ const { runBenchmark } = await import("../bench/bench.js");
2739
+ const benchCollection = cli.opts.collection;
2740
+ await runBenchmark(fixturePath, {
2741
+ json: !!cli.opts.json,
2742
+ collection: Array.isArray(benchCollection) ? benchCollection[0] : benchCollection,
2743
+ });
2744
+ break;
2745
+ }
2584
2746
  case "mcp": {
2585
2747
  const sub = cli.args[0]; // stop | status | undefined
2586
2748
  // Cache dir for PID/log files — same dir as the index
@@ -21,12 +21,23 @@ export interface Collection {
21
21
  update?: string;
22
22
  includeByDefault?: boolean;
23
23
  }
24
+ /**
25
+ * Model configuration for embedding, reranking, and generation
26
+ */
27
+ export interface ModelsConfig {
28
+ embed?: string;
29
+ rerank?: string;
30
+ generate?: string;
31
+ }
24
32
  /**
25
33
  * The complete configuration file structure
26
34
  */
27
35
  export interface CollectionConfig {
28
36
  global_context?: string;
37
+ editor_uri?: string;
38
+ editor_uri_template?: string;
29
39
  collections: Record<string, Collection>;
40
+ models?: ModelsConfig;
30
41
  }
31
42
  /**
32
43
  * Collection with its name (for return values)
package/dist/db.d.ts CHANGED
@@ -4,6 +4,11 @@
4
4
  * Provides a unified Database export that works under both Bun (bun:sqlite)
5
5
  * and Node.js (better-sqlite3). The APIs are nearly identical — the main
6
6
  * difference is the import path.
7
+ *
8
+ * On macOS, Apple's system SQLite is compiled with SQLITE_OMIT_LOAD_EXTENSION,
9
+ * which prevents loading native extensions like sqlite-vec. When running under
10
+ * Bun we call Database.setCustomSQLite() to swap in Homebrew's full-featured
11
+ * SQLite build before creating any database instances.
7
12
  */
8
13
  export declare const isBun: boolean;
9
14
  /**
@@ -29,5 +34,8 @@ export interface Statement {
29
34
  }
30
35
  /**
31
36
  * Load the sqlite-vec extension into a database.
37
+ *
38
+ * Throws with platform-specific fix instructions when the extension is
39
+ * unavailable.
32
40
  */
33
41
  export declare function loadSqliteVec(db: Database): void;
package/dist/db.js CHANGED
@@ -4,6 +4,11 @@
4
4
  * Provides a unified Database export that works under both Bun (bun:sqlite)
5
5
  * and Node.js (better-sqlite3). The APIs are nearly identical — the main
6
6
  * difference is the import path.
7
+ *
8
+ * On macOS, Apple's system SQLite is compiled with SQLITE_OMIT_LOAD_EXTENSION,
9
+ * which prevents loading native extensions like sqlite-vec. When running under
10
+ * Bun we call Database.setCustomSQLite() to swap in Homebrew's full-featured
11
+ * SQLite build before creating any database instances.
7
12
  */
8
13
  export const isBun = typeof globalThis.Bun !== "undefined";
9
14
  let _Database;
@@ -11,9 +16,35 @@ let _sqliteVecLoad;
11
16
  if (isBun) {
12
17
  // Dynamic string prevents tsc from resolving bun:sqlite on Node.js builds
13
18
  const bunSqlite = "bun:" + "sqlite";
14
- _Database = (await import(/* @vite-ignore */ bunSqlite)).Database;
15
- const { getLoadablePath } = await import("sqlite-vec");
16
- _sqliteVecLoad = (db) => db.loadExtension(getLoadablePath());
19
+ const BunDatabase = (await import(/* @vite-ignore */ bunSqlite)).Database;
20
+ // See: https://bun.com/docs/runtime/sqlite#setcustomsqlite
21
+ if (process.platform === "darwin") {
22
+ const homebrewPaths = [
23
+ "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib", // Apple Silicon
24
+ "/usr/local/opt/sqlite/lib/libsqlite3.dylib", // Intel
25
+ ];
26
+ for (const p of homebrewPaths) {
27
+ try {
28
+ BunDatabase.setCustomSQLite(p);
29
+ break;
30
+ }
31
+ catch { }
32
+ }
33
+ }
34
+ _Database = BunDatabase;
35
+ // setCustomSQLite may have silently failed — test that extensions actually work.
36
+ try {
37
+ const { getLoadablePath } = await import("sqlite-vec");
38
+ const vecPath = getLoadablePath();
39
+ const testDb = new BunDatabase(":memory:");
40
+ testDb.loadExtension(vecPath);
41
+ testDb.close();
42
+ _sqliteVecLoad = (db) => db.loadExtension(vecPath);
43
+ }
44
+ catch {
45
+ // Vector search won't work, but BM25 and other operations are unaffected.
46
+ _sqliteVecLoad = null;
47
+ }
17
48
  }
18
49
  else {
19
50
  _Database = (await import("better-sqlite3")).default;
@@ -28,7 +59,17 @@ export function openDatabase(path) {
28
59
  }
29
60
  /**
30
61
  * Load the sqlite-vec extension into a database.
62
+ *
63
+ * Throws with platform-specific fix instructions when the extension is
64
+ * unavailable.
31
65
  */
32
66
  export function loadSqliteVec(db) {
67
+ if (!_sqliteVecLoad) {
68
+ const hint = isBun && process.platform === "darwin"
69
+ ? "On macOS with Bun, install Homebrew SQLite: brew install sqlite\n" +
70
+ "Or install qmd with npm instead: npm install -g @tobilu/qmd"
71
+ : "Ensure the sqlite-vec native module is installed correctly.";
72
+ throw new Error(`sqlite-vec extension is unavailable. ${hint}`);
73
+ }
33
74
  _sqliteVecLoad(db);
34
75
  }
package/dist/index.d.ts CHANGED
@@ -16,11 +16,12 @@
16
16
  * const results = await store.search({ query: "how does auth work?" })
17
17
  * await store.close()
18
18
  */
19
- import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult } from "./store.js";
19
+ import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult, type ChunkStrategy } from "./store.js";
20
20
  import { type Collection, type CollectionConfig, type NamedCollection, type ContextMap } from "./collections.js";
21
21
  export type { DocumentResult, DocumentNotFound, SearchResult, HybridQueryResult, HybridQueryOptions, HybridQueryExplain, ExpandedQuery, StructuredSearchOptions, MultiGetResult, IndexStatus, IndexHealthInfo, SearchHooks, ReindexProgress, ReindexResult, EmbedProgress, EmbedResult, Collection, CollectionConfig, NamedCollection, ContextMap, };
22
22
  export type { InternalStore };
23
23
  export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
24
+ export type { ChunkStrategy } from "./store.js";
24
25
  export { getDefaultDbPath } from "./store.js";
25
26
  export { Maintenance } from "./maintenance.js";
26
27
  /**
@@ -65,6 +66,8 @@ export interface SearchOptions {
65
66
  minScore?: number;
66
67
  /** Include explain traces */
67
68
  explain?: boolean;
69
+ /** Chunk strategy: "auto" (default, uses AST for code files) or "regex" (legacy) */
70
+ chunkStrategy?: ChunkStrategy;
68
71
  }
69
72
  /**
70
73
  * Options for searchLex() — BM25 keyword search.
@@ -183,6 +186,9 @@ export interface QMDStore {
183
186
  embed(options?: {
184
187
  force?: boolean;
185
188
  model?: string;
189
+ maxDocsPerBatch?: number;
190
+ maxBatchBytes?: number;
191
+ chunkStrategy?: ChunkStrategy;
186
192
  onProgress?: (info: EmbedProgress) => void;
187
193
  }): Promise<EmbedResult>;
188
194
  /** Get index status (document counts, collections, embedding state) */
package/dist/index.js CHANGED
@@ -19,7 +19,7 @@
19
19
  import { createStore as createStoreInternal, hybridQuery, structuredSearch, extractSnippet, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_MULTI_GET_MAX_BYTES, reindexCollection, generateEmbeddings, listCollections as storeListCollections, syncConfigToDb, getStoreCollections, getStoreCollection, getStoreGlobalContext, getStoreContexts, upsertStoreCollection, deleteStoreCollection, renameStoreCollection, updateStoreContext, removeStoreContext, setStoreGlobalContext, vacuumDatabase, cleanupOrphanedContent, cleanupOrphanedVectors, deleteLLMCache, deleteInactiveDocuments, clearAllEmbeddings, } from "./store.js";
20
20
  import { LlamaCpp, } from "./llm.js";
21
21
  import { setConfigSource, loadConfig, addCollection as collectionsAddCollection, removeCollection as collectionsRemoveCollection, renameCollection as collectionsRenameCollection, addContext as collectionsAddContext, removeContext as collectionsRemoveContext, setGlobalContext as collectionsSetGlobalContext, } from "./collections.js";
22
- // Re-export utility functions used by frontends
22
+ // Re-export utility functions and types used by frontends
23
23
  export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
24
24
  // Re-export getDefaultDbPath for CLI/MCP that need the default database location
25
25
  export { getDefaultDbPath } from "./store.js";
@@ -63,21 +63,26 @@ export async function createStore(options) {
63
63
  // Track whether we have a YAML config path for write-through
64
64
  const hasYamlConfig = !!options.configPath;
65
65
  // Sync config into SQLite store_collections
66
+ let config;
66
67
  if (options.configPath) {
67
68
  // YAML mode: inject config source for write-through, sync to DB
68
69
  setConfigSource({ configPath: options.configPath });
69
- const config = loadConfig();
70
+ config = loadConfig();
70
71
  syncConfigToDb(db, config);
71
72
  }
72
73
  else if (options.config) {
73
74
  // Inline config mode: inject config source for mutations, sync to DB
74
75
  setConfigSource({ config: options.config });
75
- syncConfigToDb(db, options.config);
76
+ config = options.config;
77
+ syncConfigToDb(db, config);
76
78
  }
77
79
  // else: DB-only mode — no external config, use existing store_collections
78
80
  // Create a per-store LlamaCpp instance — lazy-loads models on first use,
79
81
  // auto-unloads after 5 min inactivity to free VRAM.
80
82
  const llm = new LlamaCpp({
83
+ embedModel: config?.models?.embed,
84
+ generateModel: config?.models?.generate,
85
+ rerankModel: config?.models?.rerank,
81
86
  inactivityTimeoutMs: 5 * 60 * 1000,
82
87
  disposeModelsOnInactivity: true,
83
88
  });
@@ -105,6 +110,7 @@ export async function createStore(options) {
105
110
  explain: opts.explain,
106
111
  intent: opts.intent,
107
112
  skipRerank,
113
+ chunkStrategy: opts.chunkStrategy,
108
114
  });
109
115
  }
110
116
  // Simple query string — use hybridQuery (expand + search + rerank)
@@ -115,6 +121,7 @@ export async function createStore(options) {
115
121
  explain: opts.explain,
116
122
  intent: opts.intent,
117
123
  skipRerank,
124
+ chunkStrategy: opts.chunkStrategy,
118
125
  });
119
126
  },
120
127
  searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
@@ -210,6 +217,9 @@ export async function createStore(options) {
210
217
  return generateEmbeddings(internal, {
211
218
  force: embedOpts?.force,
212
219
  model: embedOpts?.model,
220
+ maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
221
+ maxBatchBytes: embedOpts?.maxBatchBytes,
222
+ chunkStrategy: embedOpts?.chunkStrategy,
213
223
  onProgress: embedOpts?.onProgress,
214
224
  });
215
225
  },
package/dist/llm.d.ts CHANGED
@@ -105,7 +105,7 @@ export type LLMSessionOptions = {
105
105
  */
106
106
  export interface ILLMSession {
107
107
  embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
108
- embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
108
+ embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
109
109
  expandQuery(query: string, options?: {
110
110
  context?: string;
111
111
  includeLexical?: boolean;
@@ -137,7 +137,7 @@ export type RerankDocument = {
137
137
  };
138
138
  export declare const LFM2_GENERATE_MODEL = "hf:LiquidAI/LFM2-1.2B-GGUF/LFM2-1.2B-Q4_K_M.gguf";
139
139
  export declare const LFM2_INSTRUCT_MODEL = "hf:LiquidAI/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf";
140
- export declare const DEFAULT_EMBED_MODEL_URI: string;
140
+ export declare const DEFAULT_EMBED_MODEL_URI = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
141
141
  export declare const DEFAULT_RERANK_MODEL_URI = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
142
142
  export declare const DEFAULT_GENERATE_MODEL_URI = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
143
143
  export declare const DEFAULT_MODEL_CACHE_DIR: string;
@@ -232,6 +232,7 @@ export declare class LlamaCpp implements LLM {
232
232
  private disposeModelsOnInactivity;
233
233
  private disposed;
234
234
  constructor(config?: LlamaCppConfig);
235
+ get embedModelName(): string;
235
236
  /**
236
237
  * Reset the inactivity timer. Called after each model operation.
237
238
  * When timer fires, models are unloaded to free memory (if no active sessions).
@@ -306,6 +307,7 @@ export declare class LlamaCpp implements LLM {
306
307
  * - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
307
308
  */
308
309
  private static readonly RERANK_CONTEXT_SIZE;
310
+ private static readonly EMBED_CONTEXT_SIZE;
309
311
  private ensureRerankContexts;
310
312
  /**
311
313
  * Tokenize text using the embedding model's tokenizer
@@ -320,12 +322,19 @@ export declare class LlamaCpp implements LLM {
320
322
  * Detokenize token IDs back to text
321
323
  */
322
324
  detokenize(tokens: readonly LlamaToken[]): Promise<string>;
325
+ /**
326
+ * Truncate text to fit within the embedding model's context window.
327
+ * Uses the model's own tokenizer for accurate token counting, then
328
+ * detokenizes back to text if truncation is needed.
329
+ * Returns the (possibly truncated) text and whether truncation occurred.
330
+ */
331
+ private truncateToContextSize;
323
332
  embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
324
333
  /**
325
334
  * Batch embed multiple texts efficiently
326
335
  * Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
327
336
  */
328
- embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
337
+ embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
329
338
  generate(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null>;
330
339
  modelExists(modelUri: string): Promise<ModelInfo>;
331
340
  expandQuery(query: string, options?: {