@tobilu/qmd 2.0.1 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/qmd.js CHANGED
@@ -1,32 +1,41 @@
1
1
  #!/usr/bin/env node
2
- import { openDatabase } from "../db.js";
2
+ import { isBun, openDatabase } from "../db.js";
3
3
  import fastGlob from "fast-glob";
4
4
  import { execSync, spawn as nodeSpawn } from "child_process";
5
5
  import { fileURLToPath } from "url";
6
- import { dirname, join as pathJoin, relative as relativePath } from "path";
6
+ import { basename, dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
7
7
  import { parseArgs } from "util";
8
- import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs";
8
+ import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
9
9
  import { createInterface } from "readline/promises";
10
- import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, syncConfigToDb, } from "../store.js";
11
- import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
10
+ import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, getEmbeddingFingerprint, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, findOrMigrateLegacyDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_QUERY_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, maybeAdoptLegacyEmbeddingFingerprint, syncConfigToDb, } from "../store.js";
11
+ import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile } from "../llm.js";
12
12
  import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
13
- import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, } from "../collections.js";
14
- import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";
15
- // Enable production mode - allows using default database path
16
- // Tests must set INDEX_PATH or use createStore() with explicit path
17
- enableProductionMode();
13
+ import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, saveConfig, setConfigSource, findLocalConfigPath, getLocalDbPath, getConfigPath, configExists, } from "../collections.js";
14
+ // NOTE: enableProductionMode() is intentionally NOT called at module scope here.
15
+ // Importing this module for its exports (e.g. buildEditorUri, termLink from
16
+ // test/cli.test.ts) must not flip the global production flag, as that leaks
17
+ // into unrelated tests that rely on the default (development) database path
18
+ // resolution. The flag is flipped inside the CLI's main-module guard below so
19
+ // it only fires when qmd is actually invoked as a script.
18
20
  // =============================================================================
19
21
  // Store/DB lifecycle (no legacy singletons in store.ts)
20
22
  // =============================================================================
21
23
  let store = null;
22
24
  let storeDbPathOverride;
25
+ let currentIndexName = "index";
23
26
  function getStore() {
24
27
  if (!store) {
25
28
  store = createStore(storeDbPathOverride);
26
29
  // Sync YAML config into SQLite store_collections so store.ts reads from DB
27
30
  try {
31
+ const activeModels = ensureModelsConfiguredForCli();
28
32
  const config = loadConfig();
29
33
  syncConfigToDb(store.db, config);
34
+ setDefaultLlamaCpp(new LlamaCpp({
35
+ embedModel: activeModels.embed,
36
+ generateModel: activeModels.generate,
37
+ rerankModel: activeModels.rerank,
38
+ }));
30
39
  }
31
40
  catch {
32
41
  // Config may not exist yet — that's fine, DB works without it
@@ -59,16 +68,18 @@ function closeDb() {
59
68
  function getDbPath() {
60
69
  return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
61
70
  }
71
+ function getActiveIndexName() {
72
+ return currentIndexName;
73
+ }
62
74
  function setIndexName(name) {
63
75
  let normalizedName = name;
64
76
  // Normalize relative paths to prevent malformed database paths
65
77
  if (name && name.includes('/')) {
66
- const { resolve } = require('path');
67
- const { cwd } = require('process');
68
- const absolutePath = resolve(cwd(), name);
78
+ const absolutePath = pathResolve(process.cwd(), name);
69
79
  // Replace path separators with underscores to create a valid filename
70
80
  normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
71
81
  }
82
+ currentIndexName = normalizedName || "index";
72
83
  storeDbPathOverride = normalizedName ? getDefaultDbPath(normalizedName) : undefined;
73
84
  // Reset open handle so next use opens the new index
74
85
  closeDb();
@@ -94,6 +105,51 @@ const cursor = {
94
105
  hide() { process.stderr.write('\x1b[?25l'); },
95
106
  show() { process.stderr.write('\x1b[?25h'); },
96
107
  };
108
+ async function flushWritable(stream) {
109
+ await new Promise((resolve) => {
110
+ stream.write("", () => resolve());
111
+ });
112
+ }
113
+ function shouldBypassNativeCleanup(options) {
114
+ return ((options.platform ?? process.platform) === "darwin" &&
115
+ options.command === "query" &&
116
+ options.format === "json" &&
117
+ process.env.QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT !== "1");
118
+ }
119
+ function immediateProcessExit(code) {
120
+ const processWithReallyExit = process;
121
+ if (typeof processWithReallyExit.reallyExit === "function") {
122
+ processWithReallyExit.reallyExit(code);
123
+ return;
124
+ }
125
+ process.exit(code);
126
+ }
127
+ /**
128
+ * Finish a successful CLI command after output has been flushed. On macOS JSON
129
+ * query runs, skip normal native teardown and use Node/Bun's immediate exit path:
130
+ * ggml Metal can abort from C++ finalizers after valid JSON has already been
131
+ * produced (#368). This wrapper is only reached after the command completed, so
132
+ * real query failures still exit through the normal error path before this runs.
133
+ */
134
+ export async function finishSuccessfulCliCommand(options) {
135
+ const stderr = options.stderr ?? process.stderr;
136
+ const exit = options.exit ?? ((code) => process.exit(code));
137
+ const immediateExit = options.immediateExit ?? immediateProcessExit;
138
+ await flushWritable(options.stdout ?? process.stdout);
139
+ if (shouldBypassNativeCleanup(options)) {
140
+ await flushWritable(stderr);
141
+ immediateExit(0);
142
+ return;
143
+ }
144
+ try {
145
+ await (options.cleanup ?? disposeDefaultLlamaCpp)();
146
+ }
147
+ catch (error) {
148
+ stderr.write(`QMD Warning: cleanup after successful output failed (${error instanceof Error ? error.message : String(error)}); exiting 0 because command output completed.\n`);
149
+ }
150
+ await flushWritable(stderr);
151
+ exit(0);
152
+ }
97
153
  // Ensure cursor is restored on exit
98
154
  process.on('SIGINT', () => { cursor.show(); process.exit(130); });
99
155
  process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
@@ -126,8 +182,8 @@ function formatETA(seconds) {
126
182
  return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
127
183
  }
128
184
  // Check index health and print warnings/tips
129
- function checkIndexHealth(db) {
130
- const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
185
+ function checkIndexHealth(db, model = resolveEmbedModelForCli()) {
186
+ const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db, model);
131
187
  // Warn if many docs need embedding
132
188
  if (needsEmbedding > 0) {
133
189
  const pct = Math.round((needsEmbedding / totalDocs) * 100);
@@ -198,6 +254,71 @@ function formatBytes(bytes) {
198
254
  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
199
255
  return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
200
256
  }
257
+ function sameDirectory(a, b) {
258
+ try {
259
+ return realpathSync(a) === realpathSync(b);
260
+ }
261
+ catch {
262
+ return pathResolve(a) === pathResolve(b);
263
+ }
264
+ }
265
+ function initLocalIndex() {
266
+ const cwd = getPwd();
267
+ if (sameDirectory(cwd, homedir())) {
268
+ throw new Error("Refusing to initialize a local index in $HOME. The global index is automatically created; run `qmd collection add <path>` for the global index, or run `qmd init` inside a project folder.");
269
+ }
270
+ const qmdDir = pathJoin(cwd, ".qmd");
271
+ const ymlPath = pathJoin(qmdDir, "index.yml");
272
+ const yamlPath = pathJoin(qmdDir, "index.yaml");
273
+ const configPath = existsSync(yamlPath) ? yamlPath : ymlPath;
274
+ const dbPath = pathJoin(qmdDir, "index.sqlite");
275
+ mkdirSync(qmdDir, { recursive: true });
276
+ setConfigSource({ configPath });
277
+ storeDbPathOverride = dbPath;
278
+ closeDb();
279
+ if (!existsSync(configPath)) {
280
+ saveConfig({
281
+ collections: {},
282
+ models: resolveModels(),
283
+ });
284
+ }
285
+ else {
286
+ ensureModelsConfiguredForCli();
287
+ }
288
+ const localStore = createStore(dbPath);
289
+ syncConfigToDb(localStore.db, loadConfig());
290
+ localStore.close();
291
+ console.log("ready to go with new local index");
292
+ }
293
+ function isForceCpuEnabled() {
294
+ const value = process.env.QMD_FORCE_CPU;
295
+ return !!value && !["false", "off", "none", "disable", "disabled", "0"].includes(value.trim().toLowerCase());
296
+ }
297
+ function configuredGpuModeLabel() {
298
+ return isForceCpuEnabled()
299
+ ? "CPU forced (QMD_FORCE_CPU)"
300
+ : (process.env.QMD_LLAMA_GPU?.trim() || "auto");
301
+ }
302
+ function summarizeDeviceNames(names) {
303
+ const counts = new Map();
304
+ for (const name of names) {
305
+ counts.set(name, (counts.get(name) || 0) + 1);
306
+ }
307
+ return Array.from(counts.entries())
308
+ .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
309
+ .join(", ");
310
+ }
311
+ function sanitizeDiagnosticMessage(message) {
312
+ const home = homedir();
313
+ return message
314
+ .replaceAll(home, "~")
315
+ .replaceAll(process.cwd(), ".")
316
+ .split("\n")
317
+ .map(line => line.trim())
318
+ .filter(Boolean)
319
+ .slice(0, 3)
320
+ .join("; ");
321
+ }
201
322
  async function showStatus() {
202
323
  const dbPath = getDbPath();
203
324
  const db = getDb();
@@ -215,7 +336,8 @@ async function showStatus() {
215
336
  // Overall stats
216
337
  const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get();
217
338
  const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get();
218
- const needsEmbedding = getHashesNeedingEmbedding(db);
339
+ const statusEmbedModel = resolveEmbedModelForCli();
340
+ const needsEmbedding = getHashesNeedingEmbedding(db, undefined, statusEmbedModel);
219
341
  // Most recent update across all collections
220
342
  const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get();
221
343
  console.log(`${c.bold}QMD Status${c.reset}\n`);
@@ -261,6 +383,34 @@ async function showStatus() {
261
383
  context: ctx.context
262
384
  });
263
385
  }
386
+ // AST chunking status
387
+ try {
388
+ const { getASTStatus } = await import("../ast.js");
389
+ const ast = await getASTStatus();
390
+ console.log(`\n${c.bold}AST Chunking${c.reset}`);
391
+ if (ast.available) {
392
+ const ok = ast.languages.filter(l => l.available).map(l => l.language);
393
+ const fail = ast.languages.filter(l => !l.available);
394
+ console.log(` Status: ${c.green}active${c.reset}`);
395
+ console.log(` Languages: ${ok.join(", ")}`);
396
+ if (fail.length > 0) {
397
+ for (const f of fail) {
398
+ console.log(` ${c.yellow}Unavailable: ${f.language} (${f.error})${c.reset}`);
399
+ }
400
+ }
401
+ }
402
+ else {
403
+ console.log(` Status: ${c.yellow}unavailable${c.reset} (falling back to regex chunking)`);
404
+ for (const l of ast.languages) {
405
+ if (l.error)
406
+ console.log(` ${c.dim}${l.language}: ${l.error}${c.reset}`);
407
+ }
408
+ }
409
+ }
410
+ catch {
411
+ console.log(`\n${c.bold}AST Chunking${c.reset}`);
412
+ console.log(` Status: ${c.dim}not available${c.reset}`);
413
+ }
264
414
  if (collections.length > 0) {
265
415
  console.log(`\n${c.bold}Collections${c.reset}`);
266
416
  for (const col of collections) {
@@ -306,41 +456,11 @@ async function showStatus() {
306
456
  const match = uri.match(/^hf:([^/]+\/[^/]+)\//);
307
457
  return match ? `https://huggingface.co/${match[1]}` : uri;
308
458
  };
459
+ const activeModels = resolveModelsForCli();
309
460
  console.log(`\n${c.bold}Models${c.reset}`);
310
- console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
311
- console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
312
- console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
313
- }
314
- // Device / GPU info
315
- try {
316
- const llm = getDefaultLlamaCpp();
317
- const device = await llm.getDeviceInfo();
318
- console.log(`\n${c.bold}Device${c.reset}`);
319
- if (device.gpu) {
320
- console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
321
- if (device.gpuDevices.length > 0) {
322
- // Deduplicate and count GPUs
323
- const counts = new Map();
324
- for (const name of device.gpuDevices) {
325
- counts.set(name, (counts.get(name) || 0) + 1);
326
- }
327
- const deviceStr = Array.from(counts.entries())
328
- .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
329
- .join(', ');
330
- console.log(` Devices: ${deviceStr}`);
331
- }
332
- if (device.vram) {
333
- console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
334
- }
335
- }
336
- else {
337
- console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
338
- console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
339
- }
340
- console.log(` CPU: ${device.cpuCores} math cores`);
341
- }
342
- catch {
343
- // Don't fail status if LLM init fails
461
+ console.log(` Embedding: ${hfLink(activeModels.embed)}`);
462
+ console.log(` Reranking: ${hfLink(activeModels.rerank)}`);
463
+ console.log(` Generation: ${hfLink(activeModels.generate)}`);
344
464
  }
345
465
  // Tips section
346
466
  const tips = [];
@@ -624,7 +744,6 @@ function contextRemove(pathArg) {
624
744
  console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
625
745
  }
626
746
  function getDocument(filename, fromLine, maxLines, lineNumbers) {
627
- const db = getDb();
628
747
  // Parse :linenum suffix from filename (e.g., "file.md:100")
629
748
  let inputPath = filename;
630
749
  const colonMatch = inputPath.match(/:(\d+)$/);
@@ -635,6 +754,14 @@ function getDocument(filename, fromLine, maxLines, lineNumbers) {
635
754
  inputPath = inputPath.slice(0, -colonMatch[0].length);
636
755
  }
637
756
  }
757
+ if (fromLine !== undefined)
758
+ fromLine = Math.max(1, fromLine);
759
+ const parsedIndexPath = isVirtualPath(inputPath) ? parseVirtualPath(inputPath) : null;
760
+ if (parsedIndexPath?.indexName) {
761
+ setIndexName(parsedIndexPath.indexName);
762
+ setConfigIndexName(parsedIndexPath.indexName);
763
+ }
764
+ const db = getDb();
638
765
  // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
639
766
  if (isDocid(inputPath)) {
640
767
  const docidMatch = findDocumentByDocid(db, inputPath);
@@ -787,7 +914,7 @@ function getDocument(filename, fromLine, maxLines, lineNumbers) {
787
914
  function multiGet(pattern, maxLines, maxBytes = DEFAULT_MULTI_GET_MAX_BYTES, format = "cli") {
788
915
  const db = getDb();
789
916
  // Check if it's a comma-separated list or a glob pattern
790
- const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
917
+ const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?') && !pattern.includes('{');
791
918
  let files;
792
919
  if (isCommaSeparated) {
793
920
  // Comma-separated list of files (can be virtual paths or relative paths)
@@ -1044,8 +1171,35 @@ function listFiles(pathArg) {
1044
1171
  // Parse the path argument
1045
1172
  let collectionName;
1046
1173
  let pathPrefix = null;
1047
- if (pathArg.startsWith('qmd://')) {
1048
- // Virtual path format: qmd://collection/path
1174
+ const afterScheme = pathArg.startsWith('qmd://') ? pathArg.slice('qmd://'.length) : null;
1175
+ if (afterScheme !== null && afterScheme.startsWith('/')) {
1176
+ // Absolute-path collection: qmd:///Users/foo/bar — normalizeVirtualPath would corrupt
1177
+ // this by stripping all leading slashes, so bypass parseVirtualPath entirely.
1178
+ const normalized = afterScheme.replace(/\/$/, '');
1179
+ const allColls = yamlListCollections();
1180
+ const match = allColls
1181
+ .filter(c => normalized === c.name || normalized.startsWith(c.name + '/'))
1182
+ .sort((a, b) => b.name.length - a.name.length)[0];
1183
+ if (match) {
1184
+ collectionName = match.name;
1185
+ const rest = normalized.slice(match.name.length).replace(/^\//, '');
1186
+ pathPrefix = rest || null;
1187
+ }
1188
+ else {
1189
+ // Preserve the historical qmd:////collection/path alias behavior for normal
1190
+ // collections when no absolute-path collection matches.
1191
+ const parsed = parseVirtualPath(pathArg);
1192
+ if (!parsed) {
1193
+ console.error(`Invalid virtual path: ${pathArg}`);
1194
+ closeDb();
1195
+ process.exit(1);
1196
+ }
1197
+ collectionName = parsed.collectionName;
1198
+ pathPrefix = parsed.path;
1199
+ }
1200
+ }
1201
+ else if (afterScheme !== null) {
1202
+ // Normal virtual path: qmd://collection-name/path
1049
1203
  const parsed = parseVirtualPath(pathArg);
1050
1204
  if (!parsed) {
1051
1205
  console.error(`Invalid virtual path: ${pathArg}`);
@@ -1055,8 +1209,24 @@ function listFiles(pathArg) {
1055
1209
  collectionName = parsed.collectionName;
1056
1210
  pathPrefix = parsed.path;
1057
1211
  }
1212
+ else if (pathArg.startsWith('/')) {
1213
+ // Raw absolute filesystem path — longest-prefix match against collection names
1214
+ const normalized = pathArg.replace(/\/$/, '');
1215
+ const allColls = yamlListCollections();
1216
+ const match = allColls
1217
+ .filter(c => normalized === c.name || normalized.startsWith(c.name + '/'))
1218
+ .sort((a, b) => b.name.length - a.name.length)[0];
1219
+ if (match) {
1220
+ collectionName = match.name;
1221
+ const rest = normalized.slice(match.name.length).replace(/^\//, '');
1222
+ pathPrefix = rest || null;
1223
+ }
1224
+ else {
1225
+ collectionName = normalized;
1226
+ }
1227
+ }
1058
1228
  else {
1059
- // Just collection name or collection/path
1229
+ // Short collection name or name/path
1060
1230
  const parts = pathArg.split('/');
1061
1231
  collectionName = parts[0] || '';
1062
1232
  if (parts.length > 1) {
@@ -1288,7 +1458,7 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
1288
1458
  try {
1289
1459
  content = readFileSync(filepath, "utf-8");
1290
1460
  }
1291
- catch (err) {
1461
+ catch {
1292
1462
  // Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
1293
1463
  processed++;
1294
1464
  progress.set((processed / total) * 100);
@@ -1301,8 +1471,8 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
1301
1471
  }
1302
1472
  const hash = await hashContent(content);
1303
1473
  const title = extractTitle(content, relativeFile);
1304
- // Check if document exists in this collection with this path
1305
- const existing = findActiveDocument(db, collectionName, path);
1474
+ // Check if document exists (also migrates legacy lowercase paths)
1475
+ const existing = findOrMigrateLegacyDocument(db, collectionName, path);
1306
1476
  if (existing) {
1307
1477
  if (existing.hash === hash) {
1308
1478
  // Hash unchanged, but check if title needs updating
@@ -1367,42 +1537,109 @@ function renderProgressBar(percent, width = 30) {
1367
1537
  const bar = "█".repeat(filled) + "░".repeat(empty);
1368
1538
  return bar;
1369
1539
  }
1370
- async function vectorIndex(model = DEFAULT_EMBED_MODEL, force = false) {
1540
+ function parseEmbedBatchOption(name, value) {
1541
+ if (value === undefined)
1542
+ return undefined;
1543
+ const parsed = Number(value);
1544
+ if (!Number.isInteger(parsed) || parsed < 1) {
1545
+ throw new Error(`${name} must be a positive integer`);
1546
+ }
1547
+ return parsed;
1548
+ }
1549
+ function parseChunkStrategy(value) {
1550
+ if (value === undefined)
1551
+ return undefined;
1552
+ const s = String(value);
1553
+ if (s === "auto" || s === "regex")
1554
+ return s;
1555
+ throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
1556
+ }
1557
+ function ensureModelsConfiguredForCli() {
1558
+ try {
1559
+ const config = loadConfig();
1560
+ const models = resolveModels(config.models);
1561
+ const current = config.models ?? {};
1562
+ if (current.embed !== models.embed || current.generate !== models.generate || current.rerank !== models.rerank) {
1563
+ saveConfig({
1564
+ ...config,
1565
+ models: {
1566
+ ...current,
1567
+ embed: models.embed,
1568
+ generate: models.generate,
1569
+ rerank: models.rerank,
1570
+ },
1571
+ });
1572
+ }
1573
+ return models;
1574
+ }
1575
+ catch {
1576
+ return resolveModels();
1577
+ }
1578
+ }
1579
+ export function resolveEmbedModelForCli() {
1580
+ return ensureModelsConfiguredForCli().embed;
1581
+ }
1582
+ export function resolveGenerateModelForCli() {
1583
+ return ensureModelsConfiguredForCli().generate;
1584
+ }
1585
+ export function resolveRerankModelForCli() {
1586
+ return ensureModelsConfiguredForCli().rerank;
1587
+ }
1588
+ function resolveModelsForCli() {
1589
+ return ensureModelsConfiguredForCli();
1590
+ }
1591
+ async function vectorIndex(model = resolveEmbedModelForCli(), force = false, batchOptions) {
1371
1592
  const storeInstance = getStore();
1372
1593
  const db = storeInstance.db;
1373
1594
  if (force) {
1374
1595
  console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
1375
1596
  }
1376
1597
  // Check if there's work to do before starting
1377
- const hashesToEmbed = getHashesForEmbedding(db);
1378
- if (hashesToEmbed.length === 0 && !force) {
1598
+ const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection, model);
1599
+ if (hashesToEmbed === 0 && !force) {
1379
1600
  console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
1380
1601
  closeDb();
1381
1602
  return;
1382
1603
  }
1383
- console.log(`${c.dim}Model: ${model}${c.reset}\n`);
1604
+ console.log(`${c.dim}Model: ${shortModelName(model)}${c.reset}\n`);
1605
+ if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
1606
+ const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
1607
+ const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
1608
+ console.log(`${c.dim}Batch: ${maxDocsPerBatch} docs / ${formatBytes(maxBatchBytes)}${c.reset}\n`);
1609
+ }
1384
1610
  cursor.hide();
1385
1611
  progress.indeterminate();
1386
1612
  const startTime = Date.now();
1387
1613
  const result = await generateEmbeddings(storeInstance, {
1388
1614
  force,
1389
1615
  model,
1616
+ collection: batchOptions?.collection,
1617
+ maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
1618
+ maxBatchBytes: batchOptions?.maxBatchBytes,
1619
+ chunkStrategy: batchOptions?.chunkStrategy,
1390
1620
  onProgress: (info) => {
1391
1621
  if (info.totalBytes === 0)
1392
1622
  return;
1393
- const percent = (info.bytesProcessed / info.totalBytes) * 100;
1623
+ // Progress is measured by input bytes, not by chunks. The final chunk
1624
+ // count is discovered lazily batch-by-batch, so displaying
1625
+ // chunksEmbedded/totalChunks makes the percent look wrong when a few
1626
+ // large documents remain. Show chunks as a count and label the byte
1627
+ // percentage explicitly as input progress.
1628
+ const percent = Math.min(100, (info.bytesProcessed / info.totalBytes) * 100);
1394
1629
  progress.set(percent);
1395
1630
  const elapsed = (Date.now() - startTime) / 1000;
1396
- const bytesPerSec = info.bytesProcessed / elapsed;
1397
- const remainingBytes = info.totalBytes - info.bytesProcessed;
1398
- const etaSec = remainingBytes / bytesPerSec;
1631
+ const bytesPerSec = elapsed > 0 ? info.bytesProcessed / elapsed : 0;
1632
+ const remainingBytes = Math.max(0, info.totalBytes - info.bytesProcessed);
1633
+ const etaSec = bytesPerSec > 0 ? remainingBytes / bytesPerSec : Number.POSITIVE_INFINITY;
1399
1634
  const bar = renderProgressBar(percent);
1400
1635
  const percentStr = percent.toFixed(0).padStart(3);
1401
- const throughput = `${formatBytes(bytesPerSec)}/s`;
1402
- const eta = elapsed > 2 ? formatETA(etaSec) : "...";
1403
- const errStr = info.errors > 0 ? ` ${c.yellow}${info.errors} err${c.reset}` : "";
1636
+ const throughput = bytesPerSec > 0 ? `${formatBytes(bytesPerSec)}/s` : ".../s";
1637
+ const eta = elapsed > 2 && Number.isFinite(etaSec) ? formatETA(etaSec) : "...";
1638
+ const inputStr = `${formatBytes(info.bytesProcessed)}/${formatBytes(info.totalBytes)} input`;
1639
+ const chunkStr = `${formatCount(info.chunksEmbedded)} chunks`;
1640
+ const errStr = info.errors > 0 ? ` ${c.yellow}${formatCount(info.errors)} err${c.reset}` : "";
1404
1641
  if (isTTY)
1405
- process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${info.chunksEmbedded}/${info.totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
1642
+ process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}% input${c.reset} ${c.dim}${chunkStr}${errStr} · ${inputStr} · ${throughput} · ETA ${eta}${c.reset} `);
1406
1643
  },
1407
1644
  });
1408
1645
  progress.clear();
@@ -1415,7 +1652,13 @@ async function vectorIndex(model = DEFAULT_EMBED_MODEL, force = false) {
1415
1652
  console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
1416
1653
  console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${result.chunksEmbedded}${c.reset} chunks from ${c.bold}${result.docsProcessed}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset}`);
1417
1654
  if (result.errors > 0) {
1418
- console.log(`${c.yellow}⚠ ${result.errors} chunks failed${c.reset}`);
1655
+ console.log(`${c.yellow}⚠ ${formatCount(result.errors)} chunks still failed after retries${c.reset}`);
1656
+ for (const failure of (result.failures ?? []).slice(0, 8)) {
1657
+ console.log(` ${c.dim}${failure.path}#${failure.seq} (${failure.attempts} attempts): ${failure.reason}${c.reset}`);
1658
+ }
1659
+ if ((result.failures?.length ?? 0) > 8) {
1660
+ console.log(` ${c.dim}...and ${formatCount((result.failures?.length ?? 0) - 8)} more${c.reset}`);
1661
+ }
1419
1662
  }
1420
1663
  }
1421
1664
  closeDb();
@@ -1513,6 +1756,45 @@ function printEmptySearchResults(format, reason = "no_results") {
1513
1756
  }
1514
1757
  console.log("No results found.");
1515
1758
  }
1759
+ const DEFAULT_EDITOR_URI_TEMPLATE = "vscode://file/{path}:{line}:{col}";
1760
+ function encodePathForEditorUri(absolutePath) {
1761
+ return encodeURI(absolutePath)
1762
+ .replace(/\?/g, "%3F")
1763
+ .replace(/#/g, "%23");
1764
+ }
1765
+ function getEditorUriTemplate() {
1766
+ const envTemplate = process.env.QMD_EDITOR_URI?.trim();
1767
+ if (envTemplate)
1768
+ return envTemplate;
1769
+ try {
1770
+ const config = loadConfig();
1771
+ const configTemplate = (config.editor_uri
1772
+ || config.editor_uri_template
1773
+ || config.editorUri
1774
+ || (typeof config["editor-uri"] === "string" ? config["editor-uri"] : undefined))?.trim();
1775
+ if (configTemplate)
1776
+ return configTemplate;
1777
+ }
1778
+ catch {
1779
+ // Ignore config parsing issues and use default template.
1780
+ }
1781
+ return DEFAULT_EDITOR_URI_TEMPLATE;
1782
+ }
1783
+ export function buildEditorUri(template, absolutePath, line, col) {
1784
+ const safeLine = Number.isFinite(line) && line > 0 ? Math.floor(line) : 1;
1785
+ const safeCol = Number.isFinite(col) && col > 0 ? Math.floor(col) : 1;
1786
+ const encodedPath = encodePathForEditorUri(absolutePath);
1787
+ return template
1788
+ .replace(/\{path\}/g, encodedPath)
1789
+ .replace(/\{line\}/g, String(safeLine))
1790
+ .replace(/\{col\}/g, String(safeCol))
1791
+ .replace(/\{column\}/g, String(safeCol));
1792
+ }
1793
+ export function termLink(text, url, isTTY = !!process.stdout.isTTY) {
1794
+ if (!isTTY)
1795
+ return text;
1796
+ return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`;
1797
+ }
1516
1798
  function outputResults(results, query, opts) {
1517
1799
  const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
1518
1800
  if (filtered.length === 0) {
@@ -1520,13 +1802,21 @@ function outputResults(results, query, opts) {
1520
1802
  return;
1521
1803
  }
1522
1804
  // Helper to create qmd:// URI from displayPath
1523
- const toQmdPath = (displayPath) => `qmd://${displayPath}`;
1805
+ const toQmdPath = (displayPath) => {
1806
+ const [collectionName, ...segments] = displayPath.split("/");
1807
+ if (!collectionName || segments.length === 0) {
1808
+ return `qmd://${displayPath}`;
1809
+ }
1810
+ const indexName = getActiveIndexName();
1811
+ return buildVirtualPath(collectionName, segments.join("/"), indexName === "index" ? undefined : indexName);
1812
+ };
1524
1813
  if (opts.format === "json") {
1525
1814
  // JSON output for LLM consumption
1526
1815
  const output = filtered.map(row => {
1527
1816
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1817
+ const snippetInfo = extractSnippet(row.body, query, 300, row.chunkPos, row.chunkLen, opts.intent);
1528
1818
  let body = opts.full ? row.body : undefined;
1529
- let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
1819
+ let snippet = !opts.full ? snippetInfo.snippet : undefined;
1530
1820
  if (opts.lineNumbers) {
1531
1821
  if (body)
1532
1822
  body = addLineNumbers(body);
@@ -1537,6 +1827,7 @@ function outputResults(results, query, opts) {
1537
1827
  ...(docid && { docid: `#${docid}` }),
1538
1828
  score: Math.round(row.score * 100) / 100,
1539
1829
  file: toQmdPath(row.displayPath),
1830
+ line: snippetInfo.line,
1540
1831
  title: row.title,
1541
1832
  ...(row.context && { context: row.context }),
1542
1833
  ...(body && { body }),
@@ -1555,20 +1846,34 @@ function outputResults(results, query, opts) {
1555
1846
  }
1556
1847
  }
1557
1848
  else if (opts.format === "cli") {
1849
+ const editorUriTemplate = getEditorUriTemplate();
1850
+ const linkDb = getDb();
1558
1851
  for (let i = 0; i < filtered.length; i++) {
1559
1852
  const row = filtered[i];
1560
1853
  if (!row)
1561
1854
  continue;
1562
- const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
1855
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
1563
1856
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1564
1857
  // Line 1: filepath with docid
1565
- const path = toQmdPath(row.displayPath);
1858
+ const virtualPath = row.file.startsWith("qmd://") ? row.file : toQmdPath(row.displayPath);
1859
+ const parsed = parseVirtualPath(virtualPath);
1860
+ const absolutePath = resolveVirtualPath(linkDb, virtualPath);
1861
+ const legacyPath = toQmdPath(row.displayPath);
1862
+ const displayPath = parsed?.path || row.displayPath;
1566
1863
  // Only show :line if we actually found a term match in the snippet body (exclude header line).
1567
1864
  const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
1568
1865
  const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
1569
1866
  const lineInfo = hasMatch ? `:${line}` : "";
1570
1867
  const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
1571
- console.log(`${c.cyan}${path}${c.dim}${lineInfo}${c.reset}${docidStr}`);
1868
+ if (process.stdout.isTTY && absolutePath && parsed?.path) {
1869
+ const linkLine = hasMatch ? line : 1;
1870
+ const linkTarget = buildEditorUri(editorUriTemplate, absolutePath, linkLine, 1);
1871
+ const clickable = termLink(`${displayPath}${lineInfo}`, linkTarget);
1872
+ console.log(`${c.cyan}${clickable}${c.reset}${docidStr}`);
1873
+ }
1874
+ else {
1875
+ console.log(`${c.cyan}${legacyPath}${c.dim}${lineInfo}${c.reset}${docidStr}`);
1876
+ }
1572
1877
  // Line 2: Title (if available)
1573
1878
  if (row.title) {
1574
1879
  console.log(`${c.bold}Title: ${row.title}${c.reset}`);
@@ -1603,8 +1908,9 @@ function outputResults(results, query, opts) {
1603
1908
  }
1604
1909
  console.log();
1605
1910
  // Snippet with highlighting (diff-style header included)
1606
- let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
1607
- const highlighted = highlightTerms(displaySnippet, query);
1911
+ const content = opts.full ? row.body : snippet;
1912
+ const displayContent = opts.lineNumbers ? addLineNumbers(content, opts.full ? 1 : line) : content;
1913
+ const highlighted = highlightTerms(displayContent, query);
1608
1914
  console.log(highlighted);
1609
1915
  // Double empty line between results
1610
1916
  if (i < filtered.length - 1)
@@ -1618,7 +1924,7 @@ function outputResults(results, query, opts) {
1618
1924
  continue;
1619
1925
  const heading = row.title || row.displayPath;
1620
1926
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1621
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
1927
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
1622
1928
  if (opts.lineNumbers) {
1623
1929
  content = addLineNumbers(content);
1624
1930
  }
@@ -1632,7 +1938,7 @@ function outputResults(results, query, opts) {
1632
1938
  const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
1633
1939
  const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
1634
1940
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1635
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
1941
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
1636
1942
  if (opts.lineNumbers) {
1637
1943
  content = addLineNumbers(content);
1638
1944
  }
@@ -1643,10 +1949,10 @@ function outputResults(results, query, opts) {
1643
1949
  // CSV format
1644
1950
  console.log("docid,score,file,title,context,line,snippet");
1645
1951
  for (const row of filtered) {
1646
- const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
1952
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
1647
1953
  let content = opts.full ? row.body : snippet;
1648
1954
  if (opts.lineNumbers) {
1649
- content = addLineNumbers(content, line);
1955
+ content = addLineNumbers(content, opts.full ? 1 : line);
1650
1956
  }
1651
1957
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1652
1958
  const snippetText = content || "";
@@ -1867,8 +2173,10 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1867
2173
  limit: opts.all ? 500 : (opts.limit || 10),
1868
2174
  minScore: opts.minScore || 0,
1869
2175
  candidateLimit: opts.candidateLimit,
2176
+ skipRerank: opts.skipRerank,
1870
2177
  explain: !!opts.explain,
1871
2178
  intent,
2179
+ chunkStrategy: opts.chunkStrategy,
1872
2180
  hooks: {
1873
2181
  onEmbedStart: (count) => {
1874
2182
  process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
@@ -1894,8 +2202,10 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1894
2202
  limit: opts.all ? 500 : (opts.limit || 10),
1895
2203
  minScore: opts.minScore || 0,
1896
2204
  candidateLimit: opts.candidateLimit,
2205
+ skipRerank: opts.skipRerank,
1897
2206
  explain: !!opts.explain,
1898
2207
  intent,
2208
+ chunkStrategy: opts.chunkStrategy,
1899
2209
  hooks: {
1900
2210
  onStrongSignal: (score) => {
1901
2211
  process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@@ -1942,13 +2252,13 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1942
2252
  const displayQuery = structuredQueries
1943
2253
  ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
1944
2254
  : query;
1945
- // Map to CLI output format — use bestChunk for snippet display
1946
2255
  outputResults(results.map(r => ({
1947
2256
  file: r.file,
1948
2257
  displayPath: r.displayPath,
1949
2258
  title: r.title,
1950
- body: r.bestChunk,
2259
+ body: r.body,
1951
2260
  chunkPos: r.bestChunkPos,
2261
+ chunkLen: r.bestChunk.length,
1952
2262
  score: r.score,
1953
2263
  context: r.context,
1954
2264
  docid: r.docid,
@@ -1990,6 +2300,8 @@ function parseCLI() {
1990
2300
  mask: { type: "string" }, // glob pattern
1991
2301
  // Embed options
1992
2302
  force: { type: "boolean", short: "f" },
2303
+ "max-docs-per-batch": { type: "string" },
2304
+ "max-batch-mb": { type: "string" },
1993
2305
  // Update options
1994
2306
  pull: { type: "boolean" }, // git pull before update
1995
2307
  refresh: { type: "boolean" },
@@ -2000,7 +2312,11 @@ function parseCLI() {
2000
2312
  "line-numbers": { type: "boolean" }, // add line numbers to output
2001
2313
  // Query options
2002
2314
  "candidate-limit": { type: "string", short: "C" },
2315
+ "no-rerank": { type: "boolean", default: false },
2316
+ "no-gpu": { type: "boolean", default: false },
2003
2317
  intent: { type: "string" },
2318
+ // Chunking options
2319
+ "chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
2004
2320
  // MCP HTTP transport options
2005
2321
  http: { type: "boolean" },
2006
2322
  daemon: { type: "boolean" },
@@ -2009,11 +2325,27 @@ function parseCLI() {
2009
2325
  allowPositionals: true,
2010
2326
  strict: false, // Allow unknown options to pass through
2011
2327
  });
2012
- // Select index name (default: "index")
2328
+ if (values["no-gpu"]) {
2329
+ process.env.QMD_FORCE_CPU = "1";
2330
+ }
2331
+ // Select index name (default: "index"). If no explicit --index is supplied,
2332
+ // a project-local .qmd/index.yaml overrides the global config/cache paths.
2013
2333
  const indexName = values.index;
2014
2334
  if (indexName) {
2015
2335
  setIndexName(indexName);
2016
2336
  setConfigIndexName(indexName);
2337
+ setConfigSource();
2338
+ }
2339
+ else {
2340
+ const localConfigPath = findLocalConfigPath();
2341
+ if (localConfigPath) {
2342
+ setConfigSource({ configPath: localConfigPath });
2343
+ storeDbPathOverride = getLocalDbPath(localConfigPath);
2344
+ closeDb();
2345
+ }
2346
+ else {
2347
+ setConfigSource();
2348
+ }
2017
2349
  }
2018
2350
  // Determine output format
2019
2351
  let format = "cli";
@@ -2040,8 +2372,10 @@ function parseCLI() {
2040
2372
  collection: values.collection,
2041
2373
  lineNumbers: !!values["line-numbers"],
2042
2374
  candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
2375
+ skipRerank: !!values["no-rerank"],
2043
2376
  explain: !!values.explain,
2044
2377
  intent: values.intent,
2378
+ chunkStrategy: parseChunkStrategy(values["chunk-strategy"]),
2045
2379
  };
2046
2380
  return {
2047
2381
  command: positionals[0] || "",
@@ -2079,26 +2413,293 @@ function removePath(path) {
2079
2413
  unlinkSync(path);
2080
2414
  }
2081
2415
  }
2416
+ const SKILL_DIR = "skills";
2417
+ function findPackageRoot() {
2418
+ if (process.env.QMD_SKILLS_DIR) {
2419
+ return null;
2420
+ }
2421
+ const start = dirname(fileURLToPath(import.meta.url));
2422
+ let current = start;
2423
+ while (true) {
2424
+ if (existsSync(resolve(current, SKILL_DIR))) {
2425
+ return current;
2426
+ }
2427
+ const parent = dirname(current);
2428
+ if (parent === current)
2429
+ break;
2430
+ current = parent;
2431
+ }
2432
+ return null;
2433
+ }
2434
+ function getSkillSearchDirs(_runtimeOnly = false) {
2435
+ if (process.env.QMD_SKILLS_DIR) {
2436
+ return [process.env.QMD_SKILLS_DIR];
2437
+ }
2438
+ const root = findPackageRoot();
2439
+ if (!root)
2440
+ return [];
2441
+ const dir = resolve(root, SKILL_DIR);
2442
+ return existsSync(dir) ? [dir] : [];
2443
+ }
2444
+ function parseSkillFrontmatter(content) {
2445
+ const trimmed = content.trimStart();
2446
+ if (!trimmed.startsWith("---"))
2447
+ return null;
2448
+ const end = trimmed.slice(3).indexOf("\n---");
2449
+ if (end < 0)
2450
+ return null;
2451
+ const frontmatter = trimmed.slice(3, 3 + end);
2452
+ let name = "";
2453
+ let description = "";
2454
+ let hidden = false;
2455
+ const lines = frontmatter.split(/\r?\n/);
2456
+ for (let i = 0; i < lines.length; i++) {
2457
+ const line = lines[i];
2458
+ if (line.startsWith("name:")) {
2459
+ name = line.slice("name:".length).trim();
2460
+ }
2461
+ else if (line.startsWith("description:")) {
2462
+ const parts = [line.slice("description:".length).trim()];
2463
+ while (i + 1 < lines.length && /^\s+\S/.test(lines[i + 1])) {
2464
+ i++;
2465
+ parts.push(lines[i].trim());
2466
+ }
2467
+ description = parts.join(" ");
2468
+ }
2469
+ else if (line.startsWith("hidden:")) {
2470
+ const value = line.slice("hidden:".length).trim().toLowerCase();
2471
+ hidden = value === "true" || value === "yes";
2472
+ }
2473
+ }
2474
+ if (!name)
2475
+ return null;
2476
+ return { name, description, hidden };
2477
+ }
2478
+ function discoverSkills(runtimeOnly = false) {
2479
+ const skills = [];
2480
+ for (const dir of getSkillSearchDirs(runtimeOnly)) {
2481
+ let entries = [];
2482
+ try {
2483
+ entries = readdirSync(dir);
2484
+ }
2485
+ catch {
2486
+ continue;
2487
+ }
2488
+ for (const entry of entries) {
2489
+ const skillDir = resolve(dir, entry);
2490
+ const skillPath = resolve(skillDir, "SKILL.md");
2491
+ if (!existsSync(skillPath))
2492
+ continue;
2493
+ let content = "";
2494
+ try {
2495
+ content = readFileSync(skillPath, "utf-8");
2496
+ }
2497
+ catch {
2498
+ continue;
2499
+ }
2500
+ const parsed = parseSkillFrontmatter(content);
2501
+ if (!parsed)
2502
+ continue;
2503
+ skills.push({ ...parsed, dir: skillDir });
2504
+ }
2505
+ }
2506
+ return skills.sort((a, b) => a.name.localeCompare(b.name));
2507
+ }
2508
+ function findSkill(name, runtimeOnly = false) {
2509
+ return discoverSkills(runtimeOnly).find((skill) => skill.name === name) ?? null;
2510
+ }
2511
+ function readSkillContent(skill) {
2512
+ return readFileSync(resolve(skill.dir, "SKILL.md"), "utf-8");
2513
+ }
2514
+ function collectSkillFiles(skill) {
2515
+ const files = [];
2516
+ for (const subdirName of ["references", "templates", "scripts"]) {
2517
+ const subdir = resolve(skill.dir, subdirName);
2518
+ if (!existsSync(subdir))
2519
+ continue;
2520
+ for (const entry of readdirSync(subdir).sort()) {
2521
+ const filePath = resolve(subdir, entry);
2522
+ try {
2523
+ if (!statSync(filePath).isFile())
2524
+ continue;
2525
+ files.push({ relativePath: `${subdirName}/${basename(filePath)}`, content: readFileSync(filePath, "utf-8") });
2526
+ }
2527
+ catch {
2528
+ // Ignore unreadable supplementary files.
2529
+ }
2530
+ }
2531
+ }
2532
+ return files;
2533
+ }
2082
2534
  function showSkill() {
2083
- console.log("QMD Skill (embedded)");
2535
+ const skill = findSkill("qmd");
2536
+ if (!skill) {
2537
+ throw new Error("QMD skill not found. Reinstall qmd or set QMD_SKILLS_DIR.");
2538
+ }
2539
+ console.log("QMD Skill");
2084
2540
  console.log("");
2085
- const content = getEmbeddedQmdSkillContent();
2541
+ const content = readSkillContent(skill);
2086
2542
  process.stdout.write(content.endsWith("\n") ? content : content + "\n");
2087
2543
  }
2088
- function writeEmbeddedSkill(targetDir, force) {
2544
+ function copyDirectoryContents(sourceDir, targetDir) {
2545
+ mkdirSync(targetDir, { recursive: true });
2546
+ for (const entry of readdirSync(sourceDir)) {
2547
+ const sourcePath = resolve(sourceDir, entry);
2548
+ const targetPath = resolve(targetDir, entry);
2549
+ const stat = statSync(sourcePath);
2550
+ if (stat.isDirectory()) {
2551
+ copyDirectoryContents(sourcePath, targetPath);
2552
+ }
2553
+ else if (stat.isFile()) {
2554
+ copyFileSync(sourcePath, targetPath);
2555
+ }
2556
+ }
2557
+ }
2558
+ function installedSkillStubContent() {
2559
+ return `---
2560
+ name: qmd
2561
+ description: Bootstrap QMD search instructions from the installed qmd CLI. Use when users ask to find notes, retrieve documents, inspect a wiki, or answer from indexed local markdown.
2562
+ license: MIT
2563
+ compatibility: Requires qmd CLI. Run \`qmd skill show\` for version-matched instructions.
2564
+ allowed-tools: Bash(qmd:*), mcp__qmd__*
2565
+ ---
2566
+
2567
+ # QMD - Query Markdown Documents
2568
+
2569
+ This installed skill is intentionally a small bootstrap so it does not go stale
2570
+ when the qmd package updates.
2571
+
2572
+ Load the full, version-matched QMD instructions from the CLI:
2573
+
2574
+ !\`qmd skill show\`
2575
+
2576
+ If your agent does not support bang-command expansion, run:
2577
+
2578
+ \`\`\`bash
2579
+ qmd skill show
2580
+ \`\`\`
2581
+
2582
+ Then follow those instructions. In short: search first, fetch full sources with
2583
+ \`qmd get\` or \`qmd multi-get\`, and answer from retrieved text rather than snippets.
2584
+ `;
2585
+ }
2586
+ function writeSkillInstall(targetDir, force) {
2089
2587
  if (pathExists(targetDir)) {
2090
2588
  if (!force) {
2091
2589
  throw new Error(`Skill already exists: ${targetDir} (use --force to replace it)`);
2092
2590
  }
2093
2591
  removePath(targetDir);
2094
2592
  }
2095
- mkdirSync(targetDir, { recursive: true });
2096
- for (const file of getEmbeddedQmdSkillFiles()) {
2097
- const destination = resolve(targetDir, file.relativePath);
2098
- mkdirSync(dirname(destination), { recursive: true });
2099
- writeFileSync(destination, file.content, "utf-8");
2593
+ const skill = findSkill("qmd");
2594
+ if (!skill) {
2595
+ throw new Error("QMD skill not found. Reinstall qmd or set QMD_SKILLS_DIR.");
2596
+ }
2597
+ copyDirectoryContents(skill.dir, targetDir);
2598
+ writeFileSync(resolve(targetDir, "SKILL.md"), installedSkillStubContent(), "utf-8");
2599
+ }
2600
+ function outputSkillsJson(payload) {
2601
+ console.log(JSON.stringify(payload));
2602
+ }
2603
+ function runSkillsCommand(args, jsonMode, fullOption = false, allOption = false) {
2604
+ const subcommand = args[0] ?? "list";
2605
+ const runtimeSkills = () => discoverSkills(true).filter((skill) => !skill.hidden);
2606
+ switch (subcommand) {
2607
+ case "list": {
2608
+ const skills = runtimeSkills();
2609
+ if (jsonMode) {
2610
+ outputSkillsJson({ success: true, data: skills.map(({ name, description }) => ({ name, description })) });
2611
+ return;
2612
+ }
2613
+ if (skills.length === 0) {
2614
+ console.log("No skills found");
2615
+ return;
2616
+ }
2617
+ const maxName = Math.max(...skills.map((skill) => skill.name.length));
2618
+ for (const skill of skills) {
2619
+ console.log(` ${skill.name.padEnd(maxName)} ${skill.description}`);
2620
+ }
2621
+ return;
2622
+ }
2623
+ case "get": {
2624
+ const full = fullOption || args.includes("--full");
2625
+ const getAll = allOption || args.includes("--all");
2626
+ const names = args.slice(1).filter((arg) => arg !== "--full" && arg !== "--all");
2627
+ const targets = getAll ? runtimeSkills() : names.map((name) => {
2628
+ const skill = findSkill(name, true);
2629
+ if (!skill) {
2630
+ throw new Error(`Skill not found: ${name}`);
2631
+ }
2632
+ return skill;
2633
+ });
2634
+ if (targets.length === 0) {
2635
+ throw new Error("No skill name provided. Usage: qmd skills get <name>");
2636
+ }
2637
+ if (jsonMode) {
2638
+ outputSkillsJson({
2639
+ success: true,
2640
+ data: targets.map((skill) => ({
2641
+ name: skill.name,
2642
+ content: readSkillContent(skill),
2643
+ ...(full ? { files: collectSkillFiles(skill).map((file) => ({ path: file.relativePath, content: file.content })) } : {}),
2644
+ })),
2645
+ });
2646
+ return;
2647
+ }
2648
+ targets.forEach((skill, index) => {
2649
+ if (index > 0)
2650
+ console.log("\n---\n");
2651
+ const content = readSkillContent(skill);
2652
+ process.stdout.write(content.endsWith("\n") ? content : content + "\n");
2653
+ if (full) {
2654
+ for (const file of collectSkillFiles(skill)) {
2655
+ console.log(`\n--- ${file.relativePath} ---\n`);
2656
+ process.stdout.write(file.content.endsWith("\n") ? file.content : file.content + "\n");
2657
+ }
2658
+ }
2659
+ });
2660
+ return;
2661
+ }
2662
+ case "path": {
2663
+ const name = args[1];
2664
+ if (!name) {
2665
+ const paths = getSkillSearchDirs(true);
2666
+ if (jsonMode)
2667
+ outputSkillsJson({ success: true, data: { paths } });
2668
+ else
2669
+ paths.forEach((path) => console.log(path));
2670
+ return;
2671
+ }
2672
+ const skill = findSkill(name, true);
2673
+ if (!skill) {
2674
+ throw new Error(`Skill not found: ${name}`);
2675
+ }
2676
+ if (jsonMode)
2677
+ outputSkillsJson({ success: true, data: { name: skill.name, path: skill.dir } });
2678
+ else
2679
+ console.log(skill.dir);
2680
+ return;
2681
+ }
2682
+ case "help": {
2683
+ showSkillsHelp();
2684
+ return;
2685
+ }
2686
+ default:
2687
+ throw new Error(`Unknown skills subcommand: ${subcommand}`);
2100
2688
  }
2101
2689
  }
2690
+ function showSkillsHelp() {
2691
+ console.log("Usage: qmd skills <list|get|path> [options]");
2692
+ console.log("");
2693
+ console.log("Commands:");
2694
+ console.log(" list List bundled runtime skills");
2695
+ console.log(" get <name> Print a bundled runtime skill");
2696
+ console.log(" get <name> --full Include references/templates/scripts");
2697
+ console.log(" get --all Print all bundled runtime skills");
2698
+ console.log(" path [name] Print runtime skill directory path(s)");
2699
+ console.log("");
2700
+ console.log("Options:");
2701
+ console.log(" --json Print structured JSON");
2702
+ }
2102
2703
  function ensureClaudeSymlink(linkPath, targetDir, force) {
2103
2704
  const parentDir = dirname(linkPath);
2104
2705
  if (pathExists(parentDir)) {
@@ -2148,7 +2749,7 @@ async function shouldCreateClaudeSymlink(linkPath, autoYes) {
2148
2749
  }
2149
2750
  async function installSkill(globalInstall, force, autoYes) {
2150
2751
  const installDir = getSkillInstallDir(globalInstall);
2151
- writeEmbeddedSkill(installDir, force);
2752
+ writeSkillInstall(installDir, force);
2152
2753
  console.log(`✓ Installed QMD skill to ${installDir}`);
2153
2754
  const claudeLinkPath = getClaudeSkillLinkPath(globalInstall);
2154
2755
  if (!(await shouldCreateClaudeSymlink(claudeLinkPath, autoYes))) {
@@ -2175,8 +2776,10 @@ function showHelp() {
2175
2776
  console.log(" qmd vsearch <query> - Vector similarity only");
2176
2777
  console.log(" qmd get <file>[:line] [-l N] - Show a single document, optional line slice");
2177
2778
  console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
2178
- console.log(" qmd skill show/install - Show or install the packaged QMD skill");
2779
+ console.log(" qmd skills list/get/path - List and retrieve bundled runtime skills");
2780
+ console.log(" qmd skill show/install - Show or install the QMD skill");
2179
2781
  console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
2782
+ console.log(" qmd bench <fixture.json> - Run search quality benchmarks against a fixture file");
2180
2783
  console.log("");
2181
2784
  console.log("Collections & context:");
2182
2785
  console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders");
@@ -2184,9 +2787,12 @@ function showHelp() {
2184
2787
  console.log(" qmd ls [collection[/path]] - Inspect indexed files");
2185
2788
  console.log("");
2186
2789
  console.log("Maintenance:");
2790
+ console.log(" qmd init - Create a project-local .qmd index");
2187
2791
  console.log(" qmd status - View index + collection health");
2188
2792
  console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
2189
- console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
2793
+ console.log(" qmd embed [-f] [-c <name>] - Generate/refresh vector embeddings");
2794
+ console.log(" --max-docs-per-batch <n> - Cap docs loaded into memory per embedding batch");
2795
+ console.log(" --max-batch-mb <n> - Cap UTF-8 MB loaded into memory per embedding batch");
2190
2796
  console.log(" qmd cleanup - Clear caches, vacuum DB");
2191
2797
  console.log("");
2192
2798
  console.log("Query syntax (qmd query):");
@@ -2225,6 +2831,7 @@ function showHelp() {
2225
2831
  console.log("");
2226
2832
  console.log("AI agents & integrations:");
2227
2833
  console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs.");
2834
+ console.log(" - Run `qmd skills get qmd --full` for version-matched agent instructions.");
2228
2835
  console.log(" - `qmd skill install` installs the QMD skill into ./.agents/skills/qmd.");
2229
2836
  console.log(" - Use `qmd skill install --global` for ~/.agents/skills/qmd.");
2230
2837
  console.log(" - `qmd --skill` is kept as an alias for `qmd skill show`.");
@@ -2232,6 +2839,7 @@ function showHelp() {
2232
2839
  console.log("");
2233
2840
  console.log("Global options:");
2234
2841
  console.log(" --index <name> - Use a named index (default: index)");
2842
+ console.log(" QMD_EDITOR_URI - Editor link template for clickable TTY search output");
2235
2843
  console.log("");
2236
2844
  console.log("Search options:");
2237
2845
  console.log(" -n <num> - Max results (default 5, or 20 for --files/--json)");
@@ -2239,11 +2847,16 @@ function showHelp() {
2239
2847
  console.log(" --min-score <num> - Minimum similarity score");
2240
2848
  console.log(" --full - Output full document instead of snippet");
2241
2849
  console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
2850
+ console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
2851
+ console.log(" --no-gpu - Force CPU mode for llama.cpp operations (same as QMD_FORCE_CPU=1)");
2242
2852
  console.log(" --line-numbers - Include line numbers in output");
2243
2853
  console.log(" --explain - Include retrieval score traces (query --json/CLI)");
2244
2854
  console.log(" --files | --json | --csv | --md | --xml - Output format");
2245
2855
  console.log(" -c, --collection <name> - Filter by one or more collections");
2246
2856
  console.log("");
2857
+ console.log("Embed/query options:");
2858
+ console.log(" --chunk-strategy <auto|regex> - Chunking mode (default: regex; auto uses AST for code files)");
2859
+ console.log("");
2247
2860
  console.log("Multi-get options:");
2248
2861
  console.log(" -l <num> - Maximum lines per file");
2249
2862
  console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
@@ -2251,10 +2864,531 @@ function showHelp() {
2251
2864
  console.log("");
2252
2865
  console.log(`Index: ${getDbPath()}`);
2253
2866
  }
2254
- async function showVersion() {
2867
+ function doctorCheck(label, ok, details) {
2868
+ const mark = ok ? `${c.green}✓${c.reset}` : `${c.yellow}⚠${c.reset}`;
2869
+ console.log(`${mark} ${label}: ${details}`);
2870
+ }
2871
+ function formatCount(n) {
2872
+ return n.toLocaleString("en-US");
2873
+ }
2874
+ function shortModelName(model) {
2875
+ if (model.startsWith("hf:")) {
2876
+ return model.split("/").pop() || model;
2877
+ }
2878
+ return model.length > 56 ? `${model.slice(0, 53)}...` : model;
2879
+ }
2880
+ function normalizedDoctorNextSteps(steps) {
2881
+ const unique = Array.from(new Set(steps));
2882
+ const hasForceEmbed = unique.some(step => step.includes("qmd embed --force"));
2883
+ if (!hasForceEmbed)
2884
+ return unique;
2885
+ return unique.filter(step => !step.includes("qmd embed") || step.startsWith("Run `qmd embed --force`"));
2886
+ }
2887
+ function shortHashSeq(hashSeq) {
2888
+ const idx = hashSeq.lastIndexOf("_");
2889
+ if (idx < 0)
2890
+ return hashSeq.length > 18 ? `${hashSeq.slice(0, 18)}...` : hashSeq;
2891
+ return `${hashSeq.slice(0, 12)}_${hashSeq.slice(idx + 1)}`;
2892
+ }
2893
+ function decodeStoredEmbedding(bytes) {
2894
+ return new Float32Array(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength));
2895
+ }
2896
+ function cosineDistance(a, b) {
2897
+ if (a.length !== b.length || a.length === 0)
2898
+ return Number.POSITIVE_INFINITY;
2899
+ let dot = 0;
2900
+ let normA = 0;
2901
+ let normB = 0;
2902
+ for (let i = 0; i < a.length; i++) {
2903
+ const av = a[i] ?? 0;
2904
+ const bv = b[i] ?? 0;
2905
+ dot += av * bv;
2906
+ normA += av * av;
2907
+ normB += bv * bv;
2908
+ }
2909
+ if (normA === 0 || normB === 0)
2910
+ return Number.POSITIVE_INFINITY;
2911
+ return 1 - (dot / (Math.sqrt(normA) * Math.sqrt(normB)));
2912
+ }
2913
+ function formatModelDiagnosticPath(path) {
2914
+ return sanitizeDiagnosticMessage(path);
2915
+ }
2916
+ function findCachedModelInspection(model) {
2917
+ const invalid = [];
2918
+ if (model.startsWith("hf:")) {
2919
+ const filename = model.split("/").pop();
2920
+ if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR))
2921
+ return { path: null, invalid };
2922
+ const entries = readdirSync(DEFAULT_MODEL_CACHE_DIR, { withFileTypes: true });
2923
+ for (const entry of entries) {
2924
+ if (!entry.isFile() || !entry.name.includes(filename))
2925
+ continue;
2926
+ const candidate = pathJoin(DEFAULT_MODEL_CACHE_DIR, entry.name);
2927
+ const inspection = inspectGgufFile(candidate);
2928
+ if (inspection.valid)
2929
+ return { path: candidate, invalid };
2930
+ invalid.push(`${formatModelDiagnosticPath(candidate)}: ${inspection.details}`);
2931
+ }
2932
+ return { path: null, invalid };
2933
+ }
2934
+ const inspection = inspectGgufFile(model);
2935
+ if (inspection.valid)
2936
+ return { path: model, invalid };
2937
+ if (inspection.exists)
2938
+ invalid.push(`${formatModelDiagnosticPath(model)}: ${inspection.details}`);
2939
+ return { path: null, invalid };
2940
+ }
2941
+ function envValueForDisplay(value) {
2942
+ const sanitized = sanitizeDiagnosticMessage(value);
2943
+ return sanitized.length > 96 ? `${sanitized.slice(0, 93)}...` : sanitized;
2944
+ }
2945
+ function collectEnvironmentOverrides(activeModels, configModels = {}) {
2946
+ const overrides = [];
2947
+ const add = (name, consequence) => {
2948
+ const raw = process.env[name]?.trim();
2949
+ if (!raw)
2950
+ return;
2951
+ overrides.push({ name, value: envValueForDisplay(raw), consequence });
2952
+ };
2953
+ const addModel = (name, key, active) => {
2954
+ const raw = process.env[name]?.trim();
2955
+ if (!raw)
2956
+ return;
2957
+ const configured = configModels[key];
2958
+ const consequence = configured && configured !== raw
2959
+ ? `set but ignored because index models.${key} is configured as ${configured}`
2960
+ : `sets the active ${key} model to ${active}; changes embedding/search semantics and may require \`qmd pull\` plus \`qmd embed\``;
2961
+ overrides.push({ name, value: envValueForDisplay(raw), consequence });
2962
+ };
2963
+ add("INDEX_PATH", "overrides the SQLite index path; QMD reads/writes a different database");
2964
+ add("QMD_CONFIG_DIR", "overrides the QMD config directory and takes precedence over XDG_CONFIG_HOME");
2965
+ add("XDG_CONFIG_HOME", "moves QMD config to $XDG_CONFIG_HOME/qmd when QMD_CONFIG_DIR is not set");
2966
+ add("XDG_CACHE_HOME", "moves the default index cache, model cache, and MCP daemon PID files");
2967
+ addModel("QMD_EMBED_MODEL", "embed", activeModels.embed);
2968
+ addModel("QMD_GENERATE_MODEL", "generate", activeModels.generate);
2969
+ addModel("QMD_RERANK_MODEL", "rerank", activeModels.rerank);
2970
+ add("QMD_FORCE_CPU", "forces llama.cpp to bypass GPU backends; embeddings/query will be slower but GPU crashes are avoided");
2971
+ add("QMD_LLAMA_GPU", "selects llama.cpp GPU backend (metal/cuda/vulkan) or disables GPU when set to false/off/0");
2972
+ add("QMD_DOCTOR_DEVICE_PROBE", "controls qmd doctor native device probing; 0/off skips GPU probing");
2973
+ add("QMD_EMBED_PARALLELISM", "overrides embedding parallel context count; too high can exhaust RAM/VRAM");
2974
+ add("QMD_EXPAND_CONTEXT_SIZE", "overrides query expansion context size; larger values use more memory");
2975
+ add("QMD_RERANK_CONTEXT_SIZE", "overrides reranker context size; larger values use more memory");
2976
+ add("QMD_EMBED_CONTEXT_SIZE", "overrides embed context size; larger values use more memory");
2977
+ add("QMD_EDITOR_URI", "overrides clickable editor link template in terminal output");
2978
+ add("QMD_SKILLS_DIR", "overrides where qmd skills are discovered from");
2979
+ add("QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT", "disables macOS JSON-query safe exit workaround; may re-expose Metal finalizer crashes");
2980
+ add("NO_COLOR", "disables colored terminal output");
2981
+ add("CI", "disables real LLM operations inside QMD's LlamaCpp wrapper");
2982
+ add("HF_ENDPOINT", "changes Hugging Face download endpoint used when pulling models");
2983
+ add("QMD_WRAPPER_CAPTURE", "test/debug hook for the qmd shell wrapper; should not be set in normal use");
2984
+ add("WSL_DISTRO_NAME", "enables WSL path handling heuristics");
2985
+ add("WSL_INTEROP", "enables WSL path handling heuristics");
2986
+ return overrides;
2987
+ }
2988
+ function checkDoctorIndexConfig(nextSteps) {
2989
+ try {
2990
+ const config = loadConfig();
2991
+ const collectionCount = Object.keys(config.collections ?? {}).length;
2992
+ if (collectionCount === 0) {
2993
+ doctorCheck("index config", false, "no collections configured. Next: `qmd collection add .`");
2994
+ nextSteps.push("Run `qmd collection add . --name <name>` from the folder you want to index, or edit .qmd/index.yml manually.");
2995
+ }
2996
+ else {
2997
+ doctorCheck("index config", true, `${formatCount(collectionCount)} ${collectionCount === 1 ? "collection" : "collections"} configured`);
2998
+ }
2999
+ return { config, valid: true };
3000
+ }
3001
+ catch (error) {
3002
+ const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
3003
+ const configPath = getConfigPath();
3004
+ doctorCheck("index config", false, `invalid index.yml at ${configPath}: ${message}. Next: fix the YAML and rerun \`qmd doctor\``);
3005
+ nextSteps.push(`Fix invalid YAML in ${configPath}, then rerun \`qmd doctor\`.`);
3006
+ return { config: null, valid: false };
3007
+ }
3008
+ }
3009
+ function checkEnvironmentOverrides(activeModels, configModels = {}) {
3010
+ const overrides = collectEnvironmentOverrides(activeModels, configModels);
3011
+ if (overrides.length === 0) {
3012
+ doctorCheck("environment overrides", true, "none");
3013
+ return;
3014
+ }
3015
+ doctorCheck("environment overrides", false, `${overrides.length} set`);
3016
+ for (const override of overrides) {
3017
+ console.log(` - ${override.name}=${override.value}: ${override.consequence}`);
3018
+ }
3019
+ }
3020
+ function checkModelDefaults(activeModels, configModels = {}) {
3021
+ const checks = [
3022
+ { role: "embedding", key: "embed", active: activeModels.embed, configured: configModels.embed, defaultModel: DEFAULT_EMBED_MODEL, envName: "QMD_EMBED_MODEL", envValue: process.env.QMD_EMBED_MODEL },
3023
+ { role: "generation", key: "generate", active: activeModels.generate, configured: configModels.generate, defaultModel: DEFAULT_QUERY_MODEL, envName: "QMD_GENERATE_MODEL", envValue: process.env.QMD_GENERATE_MODEL },
3024
+ { role: "reranking", key: "rerank", active: activeModels.rerank, configured: configModels.rerank, defaultModel: DEFAULT_RERANK_MODEL, envName: "QMD_RERANK_MODEL", envValue: process.env.QMD_RERANK_MODEL },
3025
+ ];
3026
+ const notes = [];
3027
+ for (const check of checks) {
3028
+ const envValue = check.envValue?.trim();
3029
+ if (envValue && check.active === envValue) {
3030
+ notes.push(`${check.role}: env ${check.envName}=${check.active} (default ${check.defaultModel}; might be ok)`);
3031
+ }
3032
+ else if (check.configured && check.configured !== check.defaultModel) {
3033
+ notes.push(`${check.role}: index ${check.configured} (default ${check.defaultModel}; might be ok)`);
3034
+ }
3035
+ else if (envValue && check.active !== envValue) {
3036
+ notes.push(`${check.role}: ${check.envName} is set to ${envValue} but index config uses ${check.active}`);
3037
+ }
3038
+ }
3039
+ if (notes.length === 0) {
3040
+ doctorCheck("model defaults", true, "using QMD codebase defaults");
3041
+ return;
3042
+ }
3043
+ doctorCheck("model defaults", false, `non-default model configuration: ${notes.join("; ")}`);
3044
+ }
3045
+ function checkModelCache(activeModels, nextSteps) {
3046
+ const models = [
3047
+ ["embedding", activeModels.embed],
3048
+ ["generation", activeModels.generate],
3049
+ ["reranking", activeModels.rerank],
3050
+ ];
3051
+ const unique = new Map();
3052
+ for (const [role, model] of models) {
3053
+ unique.set(model, [...(unique.get(model) ?? []), role]);
3054
+ }
3055
+ const missing = [];
3056
+ const cached = [];
3057
+ const invalid = [];
3058
+ for (const [model, roles] of unique) {
3059
+ const label = `${roles.join("+")}: ${model}`;
3060
+ const inspection = findCachedModelInspection(model);
3061
+ invalid.push(...inspection.invalid.map(detail => `${label} (${detail})`));
3062
+ if (inspection.path) {
3063
+ cached.push(label);
3064
+ }
3065
+ else {
3066
+ missing.push(label);
3067
+ }
3068
+ }
3069
+ if (missing.length === 0 && invalid.length === 0) {
3070
+ doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded and valid GGUF`);
3071
+ return;
3072
+ }
3073
+ const parts = [];
3074
+ if (invalid.length > 0)
3075
+ parts.push(`invalid ${invalid.length}: ${invalid.join("; ")}`);
3076
+ if (missing.length > 0)
3077
+ parts.push(`missing ${missing.length}/${unique.size}: ${missing.join("; ")}`);
3078
+ const next = invalid.length > 0
3079
+ ? "Next: run `qmd pull --refresh` (or remove the bad cached file)"
3080
+ : "Next: run `qmd pull`";
3081
+ doctorCheck("model cache", false, `${parts.join("; ")}. ${next}`);
3082
+ if (invalid.length > 0) {
3083
+ nextSteps.push("Run `qmd pull --refresh` to replace invalid cached model files, or delete the listed file and rerun `qmd pull`.");
3084
+ }
3085
+ else {
3086
+ nextSteps.push("Run `qmd pull` to download missing embedding/generation/reranking models before `qmd embed` or `qmd query`.");
3087
+ }
3088
+ }
3089
+ async function checkEmbeddingVectorSamples(db, model, fingerprint, sampleSize = 3) {
3090
+ const activeDocs = db.prepare(`SELECT COUNT(*) AS count FROM documents WHERE active = 1`).get().count;
3091
+ if (activeDocs === 0) {
3092
+ return { ok: true, details: "no active documents indexed" };
3093
+ }
3094
+ const vecTableExists = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
3095
+ if (!vecTableExists) {
3096
+ return { ok: false, details: "no vector table to test; please run qmd embed again" };
3097
+ }
3098
+ const samples = db.prepare(`
3099
+ SELECT cv.hash, cv.seq, c.doc AS body, MIN(d.path) AS path
3100
+ FROM content_vectors cv
3101
+ JOIN documents d ON d.hash = cv.hash AND d.active = 1
3102
+ JOIN content c ON c.hash = cv.hash
3103
+ WHERE cv.model = ? AND cv.embed_fingerprint = ?
3104
+ GROUP BY cv.hash, cv.seq, c.doc
3105
+ ORDER BY random()
3106
+ LIMIT ?
3107
+ `).all(model, fingerprint, sampleSize);
3108
+ if (samples.length === 0) {
3109
+ return { ok: false, details: "no current embedded chunks to test; please run qmd embed again" };
3110
+ }
3111
+ const threshold = 0.0001;
3112
+ const mismatches = [];
3113
+ await withLLMSession(async (session) => {
3114
+ for (const sample of samples) {
3115
+ const hashSeq = `${sample.hash}_${sample.seq}`;
3116
+ const chunks = await chunkDocumentByTokens(sample.body, undefined, undefined, undefined, sample.path, undefined, session.signal);
3117
+ const chunk = chunks[sample.seq];
3118
+ if (!chunk) {
3119
+ mismatches.push(`${shortHashSeq(hashSeq)}: chunk no longer exists`);
3120
+ continue;
3121
+ }
3122
+ const title = extractTitle(sample.body, sample.path);
3123
+ const result = await session.embed(formatDocForEmbedding(chunk.text, title, model), { model });
3124
+ if (!result) {
3125
+ mismatches.push(`${shortHashSeq(hashSeq)}: embedding failed`);
3126
+ continue;
3127
+ }
3128
+ const stored = db.prepare(`SELECT embedding FROM vectors_vec WHERE hash_seq = ?`).get(hashSeq);
3129
+ if (!stored) {
3130
+ mismatches.push(`${shortHashSeq(hashSeq)}: stored vector missing`);
3131
+ continue;
3132
+ }
3133
+ const distance = cosineDistance(result.embedding, decodeStoredEmbedding(stored.embedding));
3134
+ if (distance > threshold) {
3135
+ mismatches.push(`${shortHashSeq(hashSeq)}: stored vector distance ${distance.toFixed(6)}`);
3136
+ }
3137
+ }
3138
+ }, { maxDuration: 10 * 60 * 1000, name: "doctorEmbeddingVectorSample" });
3139
+ if (mismatches.length > 0) {
3140
+ return {
3141
+ ok: false,
3142
+ details: `${mismatches.length}/${samples.length} sampled chunks differ from stored vectors (${mismatches[0]}). Rebuild with \`qmd embed --force\``,
3143
+ };
3144
+ }
3145
+ return {
3146
+ ok: true,
3147
+ details: `${samples.length} sampled ${samples.length === 1 ? "chunk" : "chunks"} reproduce stored vectors`,
3148
+ };
3149
+ }
3150
+ function hasLibraryInDirs(libraryBaseName, dirs) {
3151
+ for (const dir of dirs) {
3152
+ if (!dir || !existsSync(dir))
3153
+ continue;
3154
+ try {
3155
+ for (const entry of readdirSync(dir)) {
3156
+ if (entry === libraryBaseName || entry.startsWith(`${libraryBaseName}.`))
3157
+ return true;
3158
+ }
3159
+ }
3160
+ catch { /* ignore unreadable system library dirs */ }
3161
+ }
3162
+ return false;
3163
+ }
3164
+ function linuxCudaRuntimeDiagnostic() {
3165
+ if (process.platform !== "linux")
3166
+ return null;
3167
+ const dirs = new Set();
3168
+ for (const value of [process.env.LD_LIBRARY_PATH, process.env.CUDA_PATH]) {
3169
+ for (const part of (value ?? "").split(":")) {
3170
+ if (part)
3171
+ dirs.add(part);
3172
+ }
3173
+ }
3174
+ if (process.env.CUDA_PATH) {
3175
+ dirs.add(pathJoin(process.env.CUDA_PATH, "lib64"));
3176
+ dirs.add(pathJoin(process.env.CUDA_PATH, "targets", "x86_64-linux", "lib"));
3177
+ }
3178
+ for (const dir of ["/usr/lib", "/usr/lib64", "/usr/lib/x86_64-linux-gnu", "/usr/local/cuda/lib64", "/usr/local/cuda/targets/x86_64-linux/lib"]) {
3179
+ dirs.add(dir);
3180
+ }
3181
+ try {
3182
+ for (const entry of readdirSync("/usr/local")) {
3183
+ if (!entry.toLowerCase().startsWith("cuda-"))
3184
+ continue;
3185
+ const cudaRoot = pathJoin("/usr/local", entry);
3186
+ dirs.add(pathJoin(cudaRoot, "lib64"));
3187
+ dirs.add(pathJoin(cudaRoot, "targets", "x86_64-linux", "lib"));
3188
+ }
3189
+ }
3190
+ catch { /* /usr/local may not be readable in restricted environments */ }
3191
+ const searchDirs = [...dirs];
3192
+ const hasDriver = hasLibraryInDirs("libcuda.so", searchDirs) || hasLibraryInDirs("libnvidia-ml.so", searchDirs);
3193
+ if (!hasDriver)
3194
+ return null;
3195
+ const cudaLibraries = [
3196
+ ["libcudart.so", "CUDA runtime"],
3197
+ ["libcublas.so", "cuBLAS"],
3198
+ ["libcublasLt.so", "cuBLASLt"],
3199
+ ];
3200
+ const missing = cudaLibraries
3201
+ .filter(([library]) => !hasLibraryInDirs(library, searchDirs))
3202
+ .map(([, label]) => label);
3203
+ if (missing.length === 0)
3204
+ return null;
3205
+ return `NVIDIA driver libraries are visible, but CUDA user-space libraries are missing from loader paths (${missing.join(", ")})`;
3206
+ }
3207
+ async function runDoctorDeviceChecks(nextSteps) {
3208
+ const mode = configuredGpuModeLabel();
3209
+ doctorCheck("device mode", true, mode);
3210
+ const skipProbe = ["0", "false", "off", "no", "skip"].includes((process.env.QMD_DOCTOR_DEVICE_PROBE ?? "").trim().toLowerCase());
3211
+ if (skipProbe) {
3212
+ doctorCheck("device probe", false, "skipped by QMD_DOCTOR_DEVICE_PROBE=0. Next: unset it and rerun `qmd doctor` to verify GPU/CPU acceleration");
3213
+ nextSteps.push("Unset `QMD_DOCTOR_DEVICE_PROBE` and rerun `qmd doctor` when you want to verify llama.cpp device acceleration.");
3214
+ return;
3215
+ }
3216
+ const crashHint = "Probing native llama backend now. If qmd crashes here, rerun with `QMD_FORCE_CPU=1 qmd doctor` (or `QMD_DOCTOR_DEVICE_PROBE=0 qmd doctor` to skip this probe).";
3217
+ if (process.stdout.isTTY) {
3218
+ process.stdout.write(`${c.dim}${crashHint}${c.reset}`);
3219
+ }
3220
+ try {
3221
+ const device = await getDefaultLlamaCpp().getDeviceInfo({ allowBuild: false });
3222
+ if (process.stdout.isTTY) {
3223
+ process.stdout.write(`\r${" ".repeat(crashHint.length)}\r`);
3224
+ }
3225
+ if (device.gpu) {
3226
+ const gpuLabel = device.gpu === "metal" && process.platform === "darwin"
3227
+ ? "metal (macOS Metal backend)"
3228
+ : String(device.gpu);
3229
+ const parts = [`GPU ${gpuLabel}`, `offloading ${device.gpuOffloading ? "enabled" : "disabled"}`];
3230
+ if (device.gpuDevices.length > 0)
3231
+ parts.push(`devices: ${summarizeDeviceNames(device.gpuDevices)}`);
3232
+ if (device.vram)
3233
+ parts.push(`VRAM ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
3234
+ parts.push(`${device.cpuCores} CPU math cores`);
3235
+ doctorCheck("device probe", device.gpuOffloading, device.gpuOffloading
3236
+ ? parts.join("; ")
3237
+ : `${parts.join("; ")}. Next: check QMD_LLAMA_GPU and llama.cpp backend support`);
3238
+ if (!device.gpuOffloading) {
3239
+ nextSteps.push("GPU was detected but offloading is disabled; check `QMD_LLAMA_GPU=metal|cuda|vulkan` and rerun `qmd doctor`.");
3240
+ }
3241
+ }
3242
+ else {
3243
+ const cudaDiagnostic = linuxCudaRuntimeDiagnostic();
3244
+ const diagnosticSuffix = cudaDiagnostic ? ` ${cudaDiagnostic}.` : "";
3245
+ doctorCheck("device probe", false, `running on CPU (${device.cpuCores} math cores).${diagnosticSuffix} Next: install/configure Metal, CUDA, or Vulkan for faster embeddings, or set QMD_FORCE_CPU=1 to make CPU mode explicit`);
3246
+ if (cudaDiagnostic) {
3247
+ nextSteps.push(`${cudaDiagnostic}; install CUDA runtime/cuBLAS libraries or add their directory to LD_LIBRARY_PATH, then rerun \`qmd doctor\`.`);
3248
+ }
3249
+ else {
3250
+ nextSteps.push("Vector operations are running on CPU; install/configure Metal, CUDA, or Vulkan if embedding/query performance is too slow.");
3251
+ }
3252
+ }
3253
+ }
3254
+ catch (error) {
3255
+ if (process.stdout.isTTY) {
3256
+ process.stdout.write(`\r${" ".repeat(crashHint.length)}\r`);
3257
+ }
3258
+ const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
3259
+ doctorCheck("device probe", false, `probe failed: ${message}. Next: run with QMD_FORCE_CPU=1 to bypass GPU probing, or set QMD_LLAMA_GPU=metal|cuda|vulkan and retry`);
3260
+ nextSteps.push("GPU probe failed; try `QMD_FORCE_CPU=1 qmd doctor` to confirm CPU fallback, then fix GPU drivers/backend if acceleration is expected.");
3261
+ }
3262
+ }
3263
+ async function showDoctor() {
3264
+ const storeInstance = getStore();
3265
+ const db = storeInstance.db;
3266
+ const pkg = readPackageJson();
3267
+ const activeModels = resolveModelsForCli();
3268
+ const embedModel = activeModels.embed;
3269
+ const fingerprint = getEmbeddingFingerprint(embedModel);
3270
+ const nextSteps = [];
3271
+ console.log(`${c.bold}QMD Doctor${c.reset}\n`);
3272
+ console.log(`Index: ${getDbPath()}`);
3273
+ console.log(`Runtime: ${isBun ? "bun:sqlite" : "better-sqlite3"}`);
3274
+ try {
3275
+ const row = db.prepare(`SELECT sqlite_version() AS version`).get();
3276
+ doctorCheck("SQLite runtime", true, row.version);
3277
+ }
3278
+ catch (error) {
3279
+ doctorCheck("SQLite runtime", false, error instanceof Error ? error.message : String(error));
3280
+ }
3281
+ const betterSqliteVersion = pkg.dependencies?.["better-sqlite3"] ?? pkg.devDependencies?.["better-sqlite3"] ?? "not declared";
3282
+ doctorCheck("better-sqlite3 package", true, String(betterSqliteVersion));
3283
+ try {
3284
+ const row = db.prepare(`SELECT vec_version() AS version`).get();
3285
+ doctorCheck("sqlite-vec", true, row.version);
3286
+ }
3287
+ catch (error) {
3288
+ doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error));
3289
+ }
3290
+ const configCheck = checkDoctorIndexConfig(nextSteps);
3291
+ const configModels = configCheck.config?.models ?? {};
3292
+ checkEnvironmentOverrides(activeModels, configModels);
3293
+ checkModelDefaults(activeModels, configModels);
3294
+ checkModelCache(activeModels, nextSteps);
3295
+ await runDoctorDeviceChecks(nextSteps);
3296
+ try {
3297
+ const adoption = await maybeAdoptLegacyEmbeddingFingerprint(storeInstance, embedModel);
3298
+ if (adoption.checked || adoption.adopted > 0) {
3299
+ doctorCheck("legacy fingerprint adoption", adoption.adopted > 0, adoption.adopted > 0 ? `adopted ${adoption.adopted} legacy chunks; ${adoption.reason}` : adoption.reason);
3300
+ }
3301
+ }
3302
+ catch (error) {
3303
+ doctorCheck("legacy fingerprint adoption", false, error instanceof Error ? error.message : String(error));
3304
+ }
3305
+ try {
3306
+ const pending = getHashesNeedingEmbedding(db, undefined, embedModel);
3307
+ doctorCheck("embedding freshness", pending === 0, pending === 0 ? "all active documents match current fingerprint" : `${formatCount(pending)} active documents need embeddings. Next: \`qmd embed\``);
3308
+ if (pending > 0) {
3309
+ nextSteps.push(`Run \`qmd embed\` to generate ${formatCount(pending)} missing/stale document embeddings.`);
3310
+ }
3311
+ }
3312
+ catch (error) {
3313
+ doctorCheck("embedding freshness", false, error instanceof Error ? error.message : String(error));
3314
+ }
3315
+ try {
3316
+ const rows = db.prepare(`
3317
+ SELECT model, embed_fingerprint AS fingerprint, COUNT(DISTINCT hash) AS docs, COUNT(*) AS chunks
3318
+ FROM content_vectors
3319
+ GROUP BY model, embed_fingerprint
3320
+ ORDER BY chunks DESC, model, embed_fingerprint
3321
+ `).all();
3322
+ const uniqueFingerprints = new Set(rows.map(row => row.fingerprint));
3323
+ const offCurrent = rows.filter(row => row.model === embedModel && row.fingerprint !== fingerprint);
3324
+ const ok = rows.length === 0 || (uniqueFingerprints.size === 1 && rows[0]?.fingerprint === fingerprint && offCurrent.length === 0);
3325
+ const currentDocs = rows
3326
+ .filter(row => row.model === embedModel && row.fingerprint === fingerprint)
3327
+ .reduce((sum, row) => sum + row.docs, 0);
3328
+ const otherDocs = rows.reduce((sum, row) => sum + row.docs, 0) - currentDocs;
3329
+ const groups = rows.map(row => {
3330
+ const label = row.fingerprint === fingerprint ? "current" : (row.fingerprint || "legacy");
3331
+ return `${shortModelName(row.model)}:${label} ${formatCount(row.docs)} docs/${formatCount(row.chunks)} chunks`;
3332
+ }).join("; ");
3333
+ const namedFingerprintRows = rows.filter(row => row.fingerprint);
3334
+ const namedFingerprints = [...new Set(namedFingerprintRows.map(row => row.fingerprint))];
3335
+ if (namedFingerprints.length > 1) {
3336
+ const namedGroups = namedFingerprintRows
3337
+ .map(row => `${row.fingerprint}${row.fingerprint === fingerprint ? " (current)" : ""}: ${shortModelName(row.model)} ${formatCount(row.docs)} docs/${formatCount(row.chunks)} chunks`)
3338
+ .join("; ");
3339
+ doctorCheck("mixed named embedding fingerprints", false, `content_vectors contains ${namedFingerprints.length} named fingerprints: ${namedGroups}. Next: \`qmd embed\` or \`qmd embed --force\``);
3340
+ nextSteps.push("Run `qmd embed` to converge mixed named embedding fingerprints; use `qmd embed --force` if old named fingerprints or vector sample mismatches remain.");
3341
+ }
3342
+ const details = rows.length === 0
3343
+ ? `no vectors yet; current fingerprint ${fingerprint}`
3344
+ : ok
3345
+ ? `${formatCount(currentDocs)} docs on current fingerprint (${fingerprint})`
3346
+ : `${formatCount(currentDocs)} docs current, ${formatCount(otherDocs)} docs legacy/stale. ${groups}. Next: \`qmd embed\``;
3347
+ doctorCheck("embedding fingerprints", ok, details);
3348
+ if (!ok) {
3349
+ nextSteps.push("Run `qmd embed` to migrate active documents to the current embedding fingerprint; use `qmd embed --force` if vector samples still fail afterward.");
3350
+ }
3351
+ }
3352
+ catch (error) {
3353
+ doctorCheck("embedding fingerprints", false, error instanceof Error ? error.message : String(error));
3354
+ }
3355
+ try {
3356
+ const vectorSample = await checkEmbeddingVectorSamples(db, embedModel, fingerprint);
3357
+ doctorCheck("embedding vector sample", vectorSample.ok, vectorSample.details);
3358
+ if (!vectorSample.ok) {
3359
+ nextSteps.push("Run `qmd embed --force` to rebuild existing vectors that no longer reproduce under the current embedding pipeline.");
3360
+ }
3361
+ }
3362
+ catch (error) {
3363
+ const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
3364
+ doctorCheck("embedding vector sample", false, `${message}; rebuild with \`qmd embed --force\``);
3365
+ nextSteps.push("Run `qmd embed --force` to rebuild existing vectors, then rerun `qmd doctor`.");
3366
+ }
3367
+ const steps = normalizedDoctorNextSteps(nextSteps);
3368
+ if (steps.length > 0) {
3369
+ console.log(`\n${c.bold}Recommended next step${steps.length === 1 ? "" : "s"}${c.reset}`);
3370
+ for (const step of steps) {
3371
+ console.log(` - ${step}`);
3372
+ }
3373
+ }
3374
+ closeDb();
3375
+ }
3376
+ function printDoctorHint() {
3377
+ console.error("If qmd still behaves unexpectedly, run 'qmd doctor' for diagnostics.");
3378
+ }
3379
+ function exitWithError(error, code = 1) {
3380
+ console.error(error instanceof Error ? error.message : String(error));
3381
+ printDoctorHint();
3382
+ process.exit(code);
3383
+ }
3384
+ function readPackageJson() {
2255
3385
  const scriptDir = dirname(fileURLToPath(import.meta.url));
2256
3386
  const pkgPath = resolve(scriptDir, "..", "..", "package.json");
2257
- const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
3387
+ return JSON.parse(readFileSync(pkgPath, "utf-8"));
3388
+ }
3389
+ async function showVersion() {
3390
+ const scriptDir = dirname(fileURLToPath(import.meta.url));
3391
+ const pkg = readPackageJson();
2258
3392
  let commit = "";
2259
3393
  try {
2260
3394
  commit = execSync(`git -C ${scriptDir} rev-parse --short HEAD`, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
@@ -2273,6 +3407,10 @@ const isMain = argv1 === __filename
2273
3407
  || argv1?.endsWith("/qmd.js")
2274
3408
  || (argv1 != null && realpathSync(argv1) === __filename);
2275
3409
  if (isMain) {
3410
+ // Flip to production mode only when this module is executed as the CLI
3411
+ // entrypoint, not when imported for its exports. Tests must set INDEX_PATH
3412
+ // or use createStore() with an explicit path.
3413
+ enableProductionMode();
2276
3414
  const cli = parseCLI();
2277
3415
  if (cli.values.version) {
2278
3416
  await showVersion();
@@ -2286,8 +3424,8 @@ if (isMain) {
2286
3424
  console.log("Usage: qmd skill <show|install> [options]");
2287
3425
  console.log("");
2288
3426
  console.log("Commands:");
2289
- console.log(" show Print the packaged QMD skill");
2290
- console.log(" install Install into ./.agents/skills/qmd");
3427
+ console.log(" show Print the QMD skill");
3428
+ console.log(" install Install QMD skill into ./.agents/skills/qmd");
2291
3429
  console.log("");
2292
3430
  console.log("Options:");
2293
3431
  console.log(" --global Install into ~/.agents/skills/qmd");
@@ -2522,25 +3660,57 @@ if (isMain) {
2522
3660
  default:
2523
3661
  console.error(`Unknown subcommand: ${subcommand}`);
2524
3662
  console.error("Run 'qmd collection help' for usage");
3663
+ printDoctorHint();
2525
3664
  process.exit(1);
2526
3665
  }
2527
3666
  break;
2528
3667
  }
3668
+ case "init":
3669
+ try {
3670
+ initLocalIndex();
3671
+ }
3672
+ catch (error) {
3673
+ exitWithError(error);
3674
+ }
3675
+ break;
2529
3676
  case "status":
2530
3677
  await showStatus();
2531
3678
  break;
3679
+ case "doctor":
3680
+ await showDoctor();
3681
+ break;
2532
3682
  case "update":
2533
3683
  await updateCollections();
2534
3684
  break;
2535
3685
  case "embed":
2536
- await vectorIndex(DEFAULT_EMBED_MODEL, !!cli.values.force);
3686
+ try {
3687
+ const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
3688
+ const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
3689
+ const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
3690
+ // Validate -c against configured collections before dispatching, so a
3691
+ // typo errors with "Collection not found: X" instead of silently
3692
+ // reporting success because no pending docs match a nonexistent name.
3693
+ // embed operates on a single collection; only the first value is used.
3694
+ const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false);
3695
+ const embedCollection = embedValidatedCollections[0];
3696
+ await vectorIndex(resolveEmbedModelForCli(), !!cli.values.force, {
3697
+ maxDocsPerBatch,
3698
+ maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
3699
+ chunkStrategy: embedChunkStrategy,
3700
+ collection: embedCollection,
3701
+ });
3702
+ }
3703
+ catch (error) {
3704
+ exitWithError(error);
3705
+ }
2537
3706
  break;
2538
3707
  case "pull": {
2539
3708
  const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
3709
+ const activeModels = resolveModelsForCli();
2540
3710
  const models = [
2541
- DEFAULT_EMBED_MODEL_URI,
2542
- DEFAULT_GENERATE_MODEL_URI,
2543
- DEFAULT_RERANK_MODEL_URI,
3711
+ activeModels.embed,
3712
+ activeModels.generate,
3713
+ activeModels.rerank,
2544
3714
  ];
2545
3715
  console.log(`${c.bold}Pulling models${c.reset}`);
2546
3716
  const results = await pullModels(models, {
@@ -2581,6 +3751,25 @@ if (isMain) {
2581
3751
  }
2582
3752
  await querySearch(cli.query, cli.opts);
2583
3753
  break;
3754
+ case "bench": {
3755
+ const fixturePath = cli.args[0];
3756
+ if (!fixturePath) {
3757
+ console.error("Usage: qmd bench <fixture.json> [--json] [-c collection]");
3758
+ console.error("");
3759
+ console.error("Run search quality benchmarks against a fixture file.");
3760
+ console.error("See src/bench/fixtures/example.json for the fixture format.");
3761
+ process.exit(1);
3762
+ }
3763
+ const { runBenchmark } = await import("../bench/bench.js");
3764
+ const benchCollection = cli.opts.collection;
3765
+ await runBenchmark(fixturePath, {
3766
+ json: !!cli.values.json,
3767
+ collection: Array.isArray(benchCollection) ? benchCollection[0] : benchCollection,
3768
+ dbPath: getDbPath(),
3769
+ configPath: configExists() ? getConfigPath() : undefined,
3770
+ });
3771
+ break;
3772
+ }
2584
3773
  case "mcp": {
2585
3774
  const sub = cli.args[0]; // stop | status | undefined
2586
3775
  // Cache dir for PID/log files — same dir as the index
@@ -2626,9 +3815,10 @@ if (isMain) {
2626
3815
  const logPath = resolve(cacheDir, "mcp.log");
2627
3816
  const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
2628
3817
  const selfPath = fileURLToPath(import.meta.url);
3818
+ const indexArgs = cli.values.index ? ["--index", String(cli.values.index)] : [];
2629
3819
  const spawnArgs = selfPath.endsWith(".ts")
2630
- ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)]
2631
- : [selfPath, "mcp", "--http", "--port", String(port)];
3820
+ ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)]
3821
+ : [selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)];
2632
3822
  const child = nodeSpawn(process.execPath, spawnArgs, {
2633
3823
  stdio: ["ignore", logFd, logFd],
2634
3824
  detached: true,
@@ -2646,10 +3836,10 @@ if (isMain) {
2646
3836
  process.removeAllListeners("SIGINT");
2647
3837
  const { startMcpHttpServer } = await import("../mcp/server.js");
2648
3838
  try {
2649
- await startMcpHttpServer(port);
3839
+ await startMcpHttpServer(port, { dbPath: getDbPath() });
2650
3840
  }
2651
3841
  catch (e) {
2652
- if (e?.code === "EADDRINUSE") {
3842
+ if (typeof e === "object" && e !== null && "code" in e && e.code === "EADDRINUSE") {
2653
3843
  console.error(`Port ${port} already in use. Try a different port with --port.`);
2654
3844
  process.exit(1);
2655
3845
  }
@@ -2659,7 +3849,27 @@ if (isMain) {
2659
3849
  else {
2660
3850
  // Default: stdio transport
2661
3851
  const { startMcpServer } = await import("../mcp/server.js");
2662
- await startMcpServer();
3852
+ await startMcpServer({ dbPath: getDbPath() });
3853
+ }
3854
+ break;
3855
+ }
3856
+ case "skills": {
3857
+ try {
3858
+ if (cli.values.help || cli.args[0] === "help") {
3859
+ showSkillsHelp();
3860
+ }
3861
+ else {
3862
+ runSkillsCommand(cli.args, Boolean(cli.values.json), Boolean(cli.values.full), Boolean(cli.values.all));
3863
+ }
3864
+ }
3865
+ catch (error) {
3866
+ if (cli.values.json) {
3867
+ outputSkillsJson({ success: false, error: error instanceof Error ? error.message : String(error) });
3868
+ }
3869
+ else {
3870
+ console.error(error instanceof Error ? error.message : String(error));
3871
+ }
3872
+ process.exit(1);
2663
3873
  }
2664
3874
  break;
2665
3875
  }
@@ -2675,8 +3885,7 @@ if (isMain) {
2675
3885
  await installSkill(Boolean(cli.values.global), Boolean(cli.values.force), Boolean(cli.values.yes));
2676
3886
  }
2677
3887
  catch (error) {
2678
- console.error(error instanceof Error ? error.message : String(error));
2679
- process.exit(1);
3888
+ exitWithError(error);
2680
3889
  }
2681
3890
  break;
2682
3891
  }
@@ -2685,8 +3894,8 @@ if (isMain) {
2685
3894
  console.log("Usage: qmd skill <show|install> [options]");
2686
3895
  console.log("");
2687
3896
  console.log("Commands:");
2688
- console.log(" show Print the packaged QMD skill");
2689
- console.log(" install Install into ./.agents/skills/qmd");
3897
+ console.log(" show Print the QMD skill");
3898
+ console.log(" install Install QMD skill into ./.agents/skills/qmd");
2690
3899
  console.log("");
2691
3900
  console.log("Options:");
2692
3901
  console.log(" --global Install into ~/.agents/skills/qmd");
@@ -2697,6 +3906,7 @@ if (isMain) {
2697
3906
  default:
2698
3907
  console.error(`Unknown subcommand: ${subcommand}`);
2699
3908
  console.error("Run 'qmd skill help' for usage");
3909
+ printDoctorHint();
2700
3910
  process.exit(1);
2701
3911
  }
2702
3912
  break;
@@ -2728,10 +3938,13 @@ if (isMain) {
2728
3938
  default:
2729
3939
  console.error(`Unknown command: ${cli.command}`);
2730
3940
  console.error("Run 'qmd --help' for usage.");
3941
+ printDoctorHint();
2731
3942
  process.exit(1);
2732
3943
  }
2733
3944
  if (cli.command !== "mcp") {
2734
- await disposeDefaultLlamaCpp();
2735
- process.exit(0);
3945
+ await finishSuccessfulCliCommand({
3946
+ command: cli.command,
3947
+ format: cli.opts.format,
3948
+ });
2736
3949
  }
2737
3950
  } // end if (main module)