@tobilu/qmd 2.1.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/qmd.js CHANGED
@@ -1,39 +1,41 @@
1
1
  #!/usr/bin/env node
2
- import { openDatabase } from "../db.js";
2
+ import { isBun, openDatabase } from "../db.js";
3
3
  import fastGlob from "fast-glob";
4
4
  import { execSync, spawn as nodeSpawn } from "child_process";
5
5
  import { fileURLToPath } from "url";
6
- import { dirname, join as pathJoin, relative as relativePath } from "path";
6
+ import { basename, dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
7
7
  import { parseArgs } from "util";
8
- import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs";
8
+ import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
9
9
  import { createInterface } from "readline/promises";
10
- import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, syncConfigToDb, } from "../store.js";
11
- import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
10
+ import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, getEmbeddingFingerprint, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, findOrMigrateLegacyDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_QUERY_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, generateEmbeddings, maybeAdoptLegacyEmbeddingFingerprint, syncConfigToDb, } from "../store.js";
11
+ import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile } from "../llm.js";
12
12
  import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
13
- import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, } from "../collections.js";
14
- import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";
15
- // Enable production mode - allows using default database path
16
- // Tests must set INDEX_PATH or use createStore() with explicit path
17
- enableProductionMode();
13
+ import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, saveConfig, setConfigSource, findLocalConfigPath, getLocalDbPath, getConfigPath, configExists, } from "../collections.js";
14
+ // NOTE: enableProductionMode() is intentionally NOT called at module scope here.
15
+ // Importing this module for its exports (e.g. buildEditorUri, termLink from
16
+ // test/cli.test.ts) must not flip the global production flag, as that leaks
17
+ // into unrelated tests that rely on the default (development) database path
18
+ // resolution. The flag is flipped inside the CLI's main-module guard below so
19
+ // it only fires when qmd is actually invoked as a script.
18
20
  // =============================================================================
19
21
  // Store/DB lifecycle (no legacy singletons in store.ts)
20
22
  // =============================================================================
21
23
  let store = null;
22
24
  let storeDbPathOverride;
25
+ let currentIndexName = "index";
23
26
  function getStore() {
24
27
  if (!store) {
25
28
  store = createStore(storeDbPathOverride);
26
29
  // Sync YAML config into SQLite store_collections so store.ts reads from DB
27
30
  try {
31
+ const activeModels = ensureModelsConfiguredForCli();
28
32
  const config = loadConfig();
29
33
  syncConfigToDb(store.db, config);
30
- if (config.models) {
31
- setDefaultLlamaCpp(new LlamaCpp({
32
- embedModel: config.models.embed,
33
- generateModel: config.models.generate,
34
- rerankModel: config.models.rerank,
35
- }));
36
- }
34
+ setDefaultLlamaCpp(new LlamaCpp({
35
+ embedModel: activeModels.embed,
36
+ generateModel: activeModels.generate,
37
+ rerankModel: activeModels.rerank,
38
+ }));
37
39
  }
38
40
  catch {
39
41
  // Config may not exist yet — that's fine, DB works without it
@@ -66,16 +68,18 @@ function closeDb() {
66
68
  function getDbPath() {
67
69
  return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
68
70
  }
71
+ function getActiveIndexName() {
72
+ return currentIndexName;
73
+ }
69
74
  function setIndexName(name) {
70
75
  let normalizedName = name;
71
76
  // Normalize relative paths to prevent malformed database paths
72
77
  if (name && name.includes('/')) {
73
- const { resolve } = require('path');
74
- const { cwd } = require('process');
75
- const absolutePath = resolve(cwd(), name);
78
+ const absolutePath = pathResolve(process.cwd(), name);
76
79
  // Replace path separators with underscores to create a valid filename
77
80
  normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
78
81
  }
82
+ currentIndexName = normalizedName || "index";
79
83
  storeDbPathOverride = normalizedName ? getDefaultDbPath(normalizedName) : undefined;
80
84
  // Reset open handle so next use opens the new index
81
85
  closeDb();
@@ -101,6 +105,51 @@ const cursor = {
101
105
  hide() { process.stderr.write('\x1b[?25l'); },
102
106
  show() { process.stderr.write('\x1b[?25h'); },
103
107
  };
108
+ async function flushWritable(stream) {
109
+ await new Promise((resolve) => {
110
+ stream.write("", () => resolve());
111
+ });
112
+ }
113
+ function shouldBypassNativeCleanup(options) {
114
+ return ((options.platform ?? process.platform) === "darwin" &&
115
+ options.command === "query" &&
116
+ options.format === "json" &&
117
+ process.env.QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT !== "1");
118
+ }
119
+ function immediateProcessExit(code) {
120
+ const processWithReallyExit = process;
121
+ if (typeof processWithReallyExit.reallyExit === "function") {
122
+ processWithReallyExit.reallyExit(code);
123
+ return;
124
+ }
125
+ process.exit(code);
126
+ }
127
+ /**
128
+ * Finish a successful CLI command after output has been flushed. On macOS JSON
129
+ * query runs, skip normal native teardown and use Node/Bun's immediate exit path:
130
+ * ggml Metal can abort from C++ finalizers after valid JSON has already been
131
+ * produced (#368). This wrapper is only reached after the command completed, so
132
+ * real query failures still exit through the normal error path before this runs.
133
+ */
134
+ export async function finishSuccessfulCliCommand(options) {
135
+ const stderr = options.stderr ?? process.stderr;
136
+ const exit = options.exit ?? ((code) => process.exit(code));
137
+ const immediateExit = options.immediateExit ?? immediateProcessExit;
138
+ await flushWritable(options.stdout ?? process.stdout);
139
+ if (shouldBypassNativeCleanup(options)) {
140
+ await flushWritable(stderr);
141
+ immediateExit(0);
142
+ return;
143
+ }
144
+ try {
145
+ await (options.cleanup ?? disposeDefaultLlamaCpp)();
146
+ }
147
+ catch (error) {
148
+ stderr.write(`QMD Warning: cleanup after successful output failed (${error instanceof Error ? error.message : String(error)}); exiting 0 because command output completed.\n`);
149
+ }
150
+ await flushWritable(stderr);
151
+ exit(0);
152
+ }
104
153
  // Ensure cursor is restored on exit
105
154
  process.on('SIGINT', () => { cursor.show(); process.exit(130); });
106
155
  process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
@@ -133,8 +182,8 @@ function formatETA(seconds) {
133
182
  return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
134
183
  }
135
184
  // Check index health and print warnings/tips
136
- function checkIndexHealth(db) {
137
- const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
185
+ function checkIndexHealth(db, model = resolveEmbedModelForCli()) {
186
+ const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db, model);
138
187
  // Warn if many docs need embedding
139
188
  if (needsEmbedding > 0) {
140
189
  const pct = Math.round((needsEmbedding / totalDocs) * 100);
@@ -205,6 +254,71 @@ function formatBytes(bytes) {
205
254
  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
206
255
  return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
207
256
  }
257
+ function sameDirectory(a, b) {
258
+ try {
259
+ return realpathSync(a) === realpathSync(b);
260
+ }
261
+ catch {
262
+ return pathResolve(a) === pathResolve(b);
263
+ }
264
+ }
265
+ function initLocalIndex() {
266
+ const cwd = getPwd();
267
+ if (sameDirectory(cwd, homedir())) {
268
+ throw new Error("Refusing to initialize a local index in $HOME. The global index is automatically created; run `qmd collection add <path>` for the global index, or run `qmd init` inside a project folder.");
269
+ }
270
+ const qmdDir = pathJoin(cwd, ".qmd");
271
+ const ymlPath = pathJoin(qmdDir, "index.yml");
272
+ const yamlPath = pathJoin(qmdDir, "index.yaml");
273
+ const configPath = existsSync(yamlPath) ? yamlPath : ymlPath;
274
+ const dbPath = pathJoin(qmdDir, "index.sqlite");
275
+ mkdirSync(qmdDir, { recursive: true });
276
+ setConfigSource({ configPath });
277
+ storeDbPathOverride = dbPath;
278
+ closeDb();
279
+ if (!existsSync(configPath)) {
280
+ saveConfig({
281
+ collections: {},
282
+ models: resolveModels(),
283
+ });
284
+ }
285
+ else {
286
+ ensureModelsConfiguredForCli();
287
+ }
288
+ const localStore = createStore(dbPath);
289
+ syncConfigToDb(localStore.db, loadConfig());
290
+ localStore.close();
291
+ console.log("ready to go with new local index");
292
+ }
293
+ function isForceCpuEnabled() {
294
+ const value = process.env.QMD_FORCE_CPU;
295
+ return !!value && !["false", "off", "none", "disable", "disabled", "0"].includes(value.trim().toLowerCase());
296
+ }
297
+ function configuredGpuModeLabel() {
298
+ return isForceCpuEnabled()
299
+ ? "CPU forced (QMD_FORCE_CPU)"
300
+ : (process.env.QMD_LLAMA_GPU?.trim() || "auto");
301
+ }
302
+ function summarizeDeviceNames(names) {
303
+ const counts = new Map();
304
+ for (const name of names) {
305
+ counts.set(name, (counts.get(name) || 0) + 1);
306
+ }
307
+ return Array.from(counts.entries())
308
+ .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
309
+ .join(", ");
310
+ }
311
+ function sanitizeDiagnosticMessage(message) {
312
+ const home = homedir();
313
+ return message
314
+ .replaceAll(home, "~")
315
+ .replaceAll(process.cwd(), ".")
316
+ .split("\n")
317
+ .map(line => line.trim())
318
+ .filter(Boolean)
319
+ .slice(0, 3)
320
+ .join("; ");
321
+ }
208
322
  async function showStatus() {
209
323
  const dbPath = getDbPath();
210
324
  const db = getDb();
@@ -222,7 +336,8 @@ async function showStatus() {
222
336
  // Overall stats
223
337
  const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get();
224
338
  const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get();
225
- const needsEmbedding = getHashesNeedingEmbedding(db);
339
+ const statusEmbedModel = resolveEmbedModelForCli();
340
+ const needsEmbedding = getHashesNeedingEmbedding(db, undefined, statusEmbedModel);
226
341
  // Most recent update across all collections
227
342
  const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get();
228
343
  console.log(`${c.bold}QMD Status${c.reset}\n`);
@@ -341,41 +456,11 @@ async function showStatus() {
341
456
  const match = uri.match(/^hf:([^/]+\/[^/]+)\//);
342
457
  return match ? `https://huggingface.co/${match[1]}` : uri;
343
458
  };
459
+ const activeModels = resolveModelsForCli();
344
460
  console.log(`\n${c.bold}Models${c.reset}`);
345
- console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
346
- console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
347
- console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
348
- }
349
- // Device / GPU info
350
- try {
351
- const llm = getDefaultLlamaCpp();
352
- const device = await llm.getDeviceInfo();
353
- console.log(`\n${c.bold}Device${c.reset}`);
354
- if (device.gpu) {
355
- console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
356
- if (device.gpuDevices.length > 0) {
357
- // Deduplicate and count GPUs
358
- const counts = new Map();
359
- for (const name of device.gpuDevices) {
360
- counts.set(name, (counts.get(name) || 0) + 1);
361
- }
362
- const deviceStr = Array.from(counts.entries())
363
- .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
364
- .join(', ');
365
- console.log(` Devices: ${deviceStr}`);
366
- }
367
- if (device.vram) {
368
- console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
369
- }
370
- }
371
- else {
372
- console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
373
- console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
374
- }
375
- console.log(` CPU: ${device.cpuCores} math cores`);
376
- }
377
- catch {
378
- // Don't fail status if LLM init fails
461
+ console.log(` Embedding: ${hfLink(activeModels.embed)}`);
462
+ console.log(` Reranking: ${hfLink(activeModels.rerank)}`);
463
+ console.log(` Generation: ${hfLink(activeModels.generate)}`);
379
464
  }
380
465
  // Tips section
381
466
  const tips = [];
@@ -659,7 +744,6 @@ function contextRemove(pathArg) {
659
744
  console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
660
745
  }
661
746
  function getDocument(filename, fromLine, maxLines, lineNumbers) {
662
- const db = getDb();
663
747
  // Parse :linenum suffix from filename (e.g., "file.md:100")
664
748
  let inputPath = filename;
665
749
  const colonMatch = inputPath.match(/:(\d+)$/);
@@ -670,6 +754,14 @@ function getDocument(filename, fromLine, maxLines, lineNumbers) {
670
754
  inputPath = inputPath.slice(0, -colonMatch[0].length);
671
755
  }
672
756
  }
757
+ if (fromLine !== undefined)
758
+ fromLine = Math.max(1, fromLine);
759
+ const parsedIndexPath = isVirtualPath(inputPath) ? parseVirtualPath(inputPath) : null;
760
+ if (parsedIndexPath?.indexName) {
761
+ setIndexName(parsedIndexPath.indexName);
762
+ setConfigIndexName(parsedIndexPath.indexName);
763
+ }
764
+ const db = getDb();
673
765
  // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
674
766
  if (isDocid(inputPath)) {
675
767
  const docidMatch = findDocumentByDocid(db, inputPath);
@@ -1079,8 +1171,35 @@ function listFiles(pathArg) {
1079
1171
  // Parse the path argument
1080
1172
  let collectionName;
1081
1173
  let pathPrefix = null;
1082
- if (pathArg.startsWith('qmd://')) {
1083
- // Virtual path format: qmd://collection/path
1174
+ const afterScheme = pathArg.startsWith('qmd://') ? pathArg.slice('qmd://'.length) : null;
1175
+ if (afterScheme !== null && afterScheme.startsWith('/')) {
1176
+ // Absolute-path collection: qmd:///Users/foo/bar — normalizeVirtualPath would corrupt
1177
+ // this by stripping all leading slashes, so bypass parseVirtualPath entirely.
1178
+ const normalized = afterScheme.replace(/\/$/, '');
1179
+ const allColls = yamlListCollections();
1180
+ const match = allColls
1181
+ .filter(c => normalized === c.name || normalized.startsWith(c.name + '/'))
1182
+ .sort((a, b) => b.name.length - a.name.length)[0];
1183
+ if (match) {
1184
+ collectionName = match.name;
1185
+ const rest = normalized.slice(match.name.length).replace(/^\//, '');
1186
+ pathPrefix = rest || null;
1187
+ }
1188
+ else {
1189
+ // Preserve the historical qmd:////collection/path alias behavior for normal
1190
+ // collections when no absolute-path collection matches.
1191
+ const parsed = parseVirtualPath(pathArg);
1192
+ if (!parsed) {
1193
+ console.error(`Invalid virtual path: ${pathArg}`);
1194
+ closeDb();
1195
+ process.exit(1);
1196
+ }
1197
+ collectionName = parsed.collectionName;
1198
+ pathPrefix = parsed.path;
1199
+ }
1200
+ }
1201
+ else if (afterScheme !== null) {
1202
+ // Normal virtual path: qmd://collection-name/path
1084
1203
  const parsed = parseVirtualPath(pathArg);
1085
1204
  if (!parsed) {
1086
1205
  console.error(`Invalid virtual path: ${pathArg}`);
@@ -1090,8 +1209,24 @@ function listFiles(pathArg) {
1090
1209
  collectionName = parsed.collectionName;
1091
1210
  pathPrefix = parsed.path;
1092
1211
  }
1212
+ else if (pathArg.startsWith('/')) {
1213
+ // Raw absolute filesystem path — longest-prefix match against collection names
1214
+ const normalized = pathArg.replace(/\/$/, '');
1215
+ const allColls = yamlListCollections();
1216
+ const match = allColls
1217
+ .filter(c => normalized === c.name || normalized.startsWith(c.name + '/'))
1218
+ .sort((a, b) => b.name.length - a.name.length)[0];
1219
+ if (match) {
1220
+ collectionName = match.name;
1221
+ const rest = normalized.slice(match.name.length).replace(/^\//, '');
1222
+ pathPrefix = rest || null;
1223
+ }
1224
+ else {
1225
+ collectionName = normalized;
1226
+ }
1227
+ }
1093
1228
  else {
1094
- // Just collection name or collection/path
1229
+ // Short collection name or name/path
1095
1230
  const parts = pathArg.split('/');
1096
1231
  collectionName = parts[0] || '';
1097
1232
  if (parts.length > 1) {
@@ -1323,7 +1458,7 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
1323
1458
  try {
1324
1459
  content = readFileSync(filepath, "utf-8");
1325
1460
  }
1326
- catch (err) {
1461
+ catch {
1327
1462
  // Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
1328
1463
  processed++;
1329
1464
  progress.set((processed / total) * 100);
@@ -1336,8 +1471,8 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
1336
1471
  }
1337
1472
  const hash = await hashContent(content);
1338
1473
  const title = extractTitle(content, relativeFile);
1339
- // Check if document exists in this collection with this path
1340
- const existing = findActiveDocument(db, collectionName, path);
1474
+ // Check if document exists (also migrates legacy lowercase paths)
1475
+ const existing = findOrMigrateLegacyDocument(db, collectionName, path);
1341
1476
  if (existing) {
1342
1477
  if (existing.hash === hash) {
1343
1478
  // Hash unchanged, but check if title needs updating
@@ -1419,20 +1554,54 @@ function parseChunkStrategy(value) {
1419
1554
  return s;
1420
1555
  throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
1421
1556
  }
1422
- async function vectorIndex(model = DEFAULT_EMBED_MODEL_URI, force = false, batchOptions) {
1557
+ function ensureModelsConfiguredForCli() {
1558
+ try {
1559
+ const config = loadConfig();
1560
+ const models = resolveModels(config.models);
1561
+ const current = config.models ?? {};
1562
+ if (current.embed !== models.embed || current.generate !== models.generate || current.rerank !== models.rerank) {
1563
+ saveConfig({
1564
+ ...config,
1565
+ models: {
1566
+ ...current,
1567
+ embed: models.embed,
1568
+ generate: models.generate,
1569
+ rerank: models.rerank,
1570
+ },
1571
+ });
1572
+ }
1573
+ return models;
1574
+ }
1575
+ catch {
1576
+ return resolveModels();
1577
+ }
1578
+ }
1579
+ export function resolveEmbedModelForCli() {
1580
+ return ensureModelsConfiguredForCli().embed;
1581
+ }
1582
+ export function resolveGenerateModelForCli() {
1583
+ return ensureModelsConfiguredForCli().generate;
1584
+ }
1585
+ export function resolveRerankModelForCli() {
1586
+ return ensureModelsConfiguredForCli().rerank;
1587
+ }
1588
+ function resolveModelsForCli() {
1589
+ return ensureModelsConfiguredForCli();
1590
+ }
1591
+ async function vectorIndex(model = resolveEmbedModelForCli(), force = false, batchOptions) {
1423
1592
  const storeInstance = getStore();
1424
1593
  const db = storeInstance.db;
1425
1594
  if (force) {
1426
1595
  console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
1427
1596
  }
1428
1597
  // Check if there's work to do before starting
1429
- const hashesToEmbed = getHashesNeedingEmbedding(db);
1598
+ const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection, model);
1430
1599
  if (hashesToEmbed === 0 && !force) {
1431
1600
  console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
1432
1601
  closeDb();
1433
1602
  return;
1434
1603
  }
1435
- console.log(`${c.dim}Model: ${model}${c.reset}\n`);
1604
+ console.log(`${c.dim}Model: ${shortModelName(model)}${c.reset}\n`);
1436
1605
  if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
1437
1606
  const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
1438
1607
  const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
@@ -1444,25 +1613,33 @@ async function vectorIndex(model = DEFAULT_EMBED_MODEL_URI, force = false, batch
1444
1613
  const result = await generateEmbeddings(storeInstance, {
1445
1614
  force,
1446
1615
  model,
1616
+ collection: batchOptions?.collection,
1447
1617
  maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
1448
1618
  maxBatchBytes: batchOptions?.maxBatchBytes,
1449
1619
  chunkStrategy: batchOptions?.chunkStrategy,
1450
1620
  onProgress: (info) => {
1451
1621
  if (info.totalBytes === 0)
1452
1622
  return;
1453
- const percent = (info.bytesProcessed / info.totalBytes) * 100;
1623
+ // Progress is measured by input bytes, not by chunks. The final chunk
1624
+ // count is discovered lazily batch-by-batch, so displaying
1625
+ // chunksEmbedded/totalChunks makes the percent look wrong when a few
1626
+ // large documents remain. Show chunks as a count and label the byte
1627
+ // percentage explicitly as input progress.
1628
+ const percent = Math.min(100, (info.bytesProcessed / info.totalBytes) * 100);
1454
1629
  progress.set(percent);
1455
1630
  const elapsed = (Date.now() - startTime) / 1000;
1456
- const bytesPerSec = info.bytesProcessed / elapsed;
1457
- const remainingBytes = info.totalBytes - info.bytesProcessed;
1458
- const etaSec = remainingBytes / bytesPerSec;
1631
+ const bytesPerSec = elapsed > 0 ? info.bytesProcessed / elapsed : 0;
1632
+ const remainingBytes = Math.max(0, info.totalBytes - info.bytesProcessed);
1633
+ const etaSec = bytesPerSec > 0 ? remainingBytes / bytesPerSec : Number.POSITIVE_INFINITY;
1459
1634
  const bar = renderProgressBar(percent);
1460
1635
  const percentStr = percent.toFixed(0).padStart(3);
1461
- const throughput = `${formatBytes(bytesPerSec)}/s`;
1462
- const eta = elapsed > 2 ? formatETA(etaSec) : "...";
1463
- const errStr = info.errors > 0 ? ` ${c.yellow}${info.errors} err${c.reset}` : "";
1636
+ const throughput = bytesPerSec > 0 ? `${formatBytes(bytesPerSec)}/s` : ".../s";
1637
+ const eta = elapsed > 2 && Number.isFinite(etaSec) ? formatETA(etaSec) : "...";
1638
+ const inputStr = `${formatBytes(info.bytesProcessed)}/${formatBytes(info.totalBytes)} input`;
1639
+ const chunkStr = `${formatCount(info.chunksEmbedded)} chunks`;
1640
+ const errStr = info.errors > 0 ? ` ${c.yellow}${formatCount(info.errors)} err${c.reset}` : "";
1464
1641
  if (isTTY)
1465
- process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${info.chunksEmbedded}/${info.totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
1642
+ process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}% input${c.reset} ${c.dim}${chunkStr}${errStr} · ${inputStr} · ${throughput} · ETA ${eta}${c.reset} `);
1466
1643
  },
1467
1644
  });
1468
1645
  progress.clear();
@@ -1475,7 +1652,13 @@ async function vectorIndex(model = DEFAULT_EMBED_MODEL_URI, force = false, batch
1475
1652
  console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
1476
1653
  console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${result.chunksEmbedded}${c.reset} chunks from ${c.bold}${result.docsProcessed}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset}`);
1477
1654
  if (result.errors > 0) {
1478
- console.log(`${c.yellow}⚠ ${result.errors} chunks failed${c.reset}`);
1655
+ console.log(`${c.yellow}⚠ ${formatCount(result.errors)} chunks still failed after retries${c.reset}`);
1656
+ for (const failure of (result.failures ?? []).slice(0, 8)) {
1657
+ console.log(` ${c.dim}${failure.path}#${failure.seq} (${failure.attempts} attempts): ${failure.reason}${c.reset}`);
1658
+ }
1659
+ if ((result.failures?.length ?? 0) > 8) {
1660
+ console.log(` ${c.dim}...and ${formatCount((result.failures?.length ?? 0) - 8)} more${c.reset}`);
1661
+ }
1479
1662
  }
1480
1663
  }
1481
1664
  closeDb();
@@ -1619,13 +1802,21 @@ function outputResults(results, query, opts) {
1619
1802
  return;
1620
1803
  }
1621
1804
  // Helper to create qmd:// URI from displayPath
1622
- const toQmdPath = (displayPath) => `qmd://${displayPath}`;
1805
+ const toQmdPath = (displayPath) => {
1806
+ const [collectionName, ...segments] = displayPath.split("/");
1807
+ if (!collectionName || segments.length === 0) {
1808
+ return `qmd://${displayPath}`;
1809
+ }
1810
+ const indexName = getActiveIndexName();
1811
+ return buildVirtualPath(collectionName, segments.join("/"), indexName === "index" ? undefined : indexName);
1812
+ };
1623
1813
  if (opts.format === "json") {
1624
1814
  // JSON output for LLM consumption
1625
1815
  const output = filtered.map(row => {
1626
1816
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1817
+ const snippetInfo = extractSnippet(row.body, query, 300, row.chunkPos, row.chunkLen, opts.intent);
1627
1818
  let body = opts.full ? row.body : undefined;
1628
- let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
1819
+ let snippet = !opts.full ? snippetInfo.snippet : undefined;
1629
1820
  if (opts.lineNumbers) {
1630
1821
  if (body)
1631
1822
  body = addLineNumbers(body);
@@ -1636,6 +1827,7 @@ function outputResults(results, query, opts) {
1636
1827
  ...(docid && { docid: `#${docid}` }),
1637
1828
  score: Math.round(row.score * 100) / 100,
1638
1829
  file: toQmdPath(row.displayPath),
1830
+ line: snippetInfo.line,
1639
1831
  title: row.title,
1640
1832
  ...(row.context && { context: row.context }),
1641
1833
  ...(body && { body }),
@@ -1660,7 +1852,7 @@ function outputResults(results, query, opts) {
1660
1852
  const row = filtered[i];
1661
1853
  if (!row)
1662
1854
  continue;
1663
- const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
1855
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
1664
1856
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1665
1857
  // Line 1: filepath with docid
1666
1858
  const virtualPath = row.file.startsWith("qmd://") ? row.file : toQmdPath(row.displayPath);
@@ -1716,8 +1908,9 @@ function outputResults(results, query, opts) {
1716
1908
  }
1717
1909
  console.log();
1718
1910
  // Snippet with highlighting (diff-style header included)
1719
- let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
1720
- const highlighted = highlightTerms(displaySnippet, query);
1911
+ const content = opts.full ? row.body : snippet;
1912
+ const displayContent = opts.lineNumbers ? addLineNumbers(content, opts.full ? 1 : line) : content;
1913
+ const highlighted = highlightTerms(displayContent, query);
1721
1914
  console.log(highlighted);
1722
1915
  // Double empty line between results
1723
1916
  if (i < filtered.length - 1)
@@ -1731,7 +1924,7 @@ function outputResults(results, query, opts) {
1731
1924
  continue;
1732
1925
  const heading = row.title || row.displayPath;
1733
1926
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1734
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
1927
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
1735
1928
  if (opts.lineNumbers) {
1736
1929
  content = addLineNumbers(content);
1737
1930
  }
@@ -1745,7 +1938,7 @@ function outputResults(results, query, opts) {
1745
1938
  const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
1746
1939
  const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
1747
1940
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1748
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
1941
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
1749
1942
  if (opts.lineNumbers) {
1750
1943
  content = addLineNumbers(content);
1751
1944
  }
@@ -1756,10 +1949,10 @@ function outputResults(results, query, opts) {
1756
1949
  // CSV format
1757
1950
  console.log("docid,score,file,title,context,line,snippet");
1758
1951
  for (const row of filtered) {
1759
- const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
1952
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
1760
1953
  let content = opts.full ? row.body : snippet;
1761
1954
  if (opts.lineNumbers) {
1762
- content = addLineNumbers(content, line);
1955
+ content = addLineNumbers(content, opts.full ? 1 : line);
1763
1956
  }
1764
1957
  const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1765
1958
  const snippetText = content || "";
@@ -2059,13 +2252,13 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
2059
2252
  const displayQuery = structuredQueries
2060
2253
  ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
2061
2254
  : query;
2062
- // Map to CLI output format — use bestChunk for snippet display
2063
2255
  outputResults(results.map(r => ({
2064
2256
  file: r.file,
2065
2257
  displayPath: r.displayPath,
2066
2258
  title: r.title,
2067
- body: r.bestChunk,
2259
+ body: r.body,
2068
2260
  chunkPos: r.bestChunkPos,
2261
+ chunkLen: r.bestChunk.length,
2069
2262
  score: r.score,
2070
2263
  context: r.context,
2071
2264
  docid: r.docid,
@@ -2120,6 +2313,7 @@ function parseCLI() {
2120
2313
  // Query options
2121
2314
  "candidate-limit": { type: "string", short: "C" },
2122
2315
  "no-rerank": { type: "boolean", default: false },
2316
+ "no-gpu": { type: "boolean", default: false },
2123
2317
  intent: { type: "string" },
2124
2318
  // Chunking options
2125
2319
  "chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
@@ -2131,11 +2325,27 @@ function parseCLI() {
2131
2325
  allowPositionals: true,
2132
2326
  strict: false, // Allow unknown options to pass through
2133
2327
  });
2134
- // Select index name (default: "index")
2328
+ if (values["no-gpu"]) {
2329
+ process.env.QMD_FORCE_CPU = "1";
2330
+ }
2331
+ // Select index name (default: "index"). If no explicit --index is supplied,
2332
+ // a project-local .qmd/index.yaml overrides the global config/cache paths.
2135
2333
  const indexName = values.index;
2136
2334
  if (indexName) {
2137
2335
  setIndexName(indexName);
2138
2336
  setConfigIndexName(indexName);
2337
+ setConfigSource();
2338
+ }
2339
+ else {
2340
+ const localConfigPath = findLocalConfigPath();
2341
+ if (localConfigPath) {
2342
+ setConfigSource({ configPath: localConfigPath });
2343
+ storeDbPathOverride = getLocalDbPath(localConfigPath);
2344
+ closeDb();
2345
+ }
2346
+ else {
2347
+ setConfigSource();
2348
+ }
2139
2349
  }
2140
2350
  // Determine output format
2141
2351
  let format = "cli";
@@ -2203,26 +2413,293 @@ function removePath(path) {
2203
2413
  unlinkSync(path);
2204
2414
  }
2205
2415
  }
2416
+ const SKILL_DIR = "skills";
2417
+ function findPackageRoot() {
2418
+ if (process.env.QMD_SKILLS_DIR) {
2419
+ return null;
2420
+ }
2421
+ const start = dirname(fileURLToPath(import.meta.url));
2422
+ let current = start;
2423
+ while (true) {
2424
+ if (existsSync(resolve(current, SKILL_DIR))) {
2425
+ return current;
2426
+ }
2427
+ const parent = dirname(current);
2428
+ if (parent === current)
2429
+ break;
2430
+ current = parent;
2431
+ }
2432
+ return null;
2433
+ }
2434
+ function getSkillSearchDirs(_runtimeOnly = false) {
2435
+ if (process.env.QMD_SKILLS_DIR) {
2436
+ return [process.env.QMD_SKILLS_DIR];
2437
+ }
2438
+ const root = findPackageRoot();
2439
+ if (!root)
2440
+ return [];
2441
+ const dir = resolve(root, SKILL_DIR);
2442
+ return existsSync(dir) ? [dir] : [];
2443
+ }
2444
+ function parseSkillFrontmatter(content) {
2445
+ const trimmed = content.trimStart();
2446
+ if (!trimmed.startsWith("---"))
2447
+ return null;
2448
+ const end = trimmed.slice(3).indexOf("\n---");
2449
+ if (end < 0)
2450
+ return null;
2451
+ const frontmatter = trimmed.slice(3, 3 + end);
2452
+ let name = "";
2453
+ let description = "";
2454
+ let hidden = false;
2455
+ const lines = frontmatter.split(/\r?\n/);
2456
+ for (let i = 0; i < lines.length; i++) {
2457
+ const line = lines[i];
2458
+ if (line.startsWith("name:")) {
2459
+ name = line.slice("name:".length).trim();
2460
+ }
2461
+ else if (line.startsWith("description:")) {
2462
+ const parts = [line.slice("description:".length).trim()];
2463
+ while (i + 1 < lines.length && /^\s+\S/.test(lines[i + 1])) {
2464
+ i++;
2465
+ parts.push(lines[i].trim());
2466
+ }
2467
+ description = parts.join(" ");
2468
+ }
2469
+ else if (line.startsWith("hidden:")) {
2470
+ const value = line.slice("hidden:".length).trim().toLowerCase();
2471
+ hidden = value === "true" || value === "yes";
2472
+ }
2473
+ }
2474
+ if (!name)
2475
+ return null;
2476
+ return { name, description, hidden };
2477
+ }
2478
+ function discoverSkills(runtimeOnly = false) {
2479
+ const skills = [];
2480
+ for (const dir of getSkillSearchDirs(runtimeOnly)) {
2481
+ let entries = [];
2482
+ try {
2483
+ entries = readdirSync(dir);
2484
+ }
2485
+ catch {
2486
+ continue;
2487
+ }
2488
+ for (const entry of entries) {
2489
+ const skillDir = resolve(dir, entry);
2490
+ const skillPath = resolve(skillDir, "SKILL.md");
2491
+ if (!existsSync(skillPath))
2492
+ continue;
2493
+ let content = "";
2494
+ try {
2495
+ content = readFileSync(skillPath, "utf-8");
2496
+ }
2497
+ catch {
2498
+ continue;
2499
+ }
2500
+ const parsed = parseSkillFrontmatter(content);
2501
+ if (!parsed)
2502
+ continue;
2503
+ skills.push({ ...parsed, dir: skillDir });
2504
+ }
2505
+ }
2506
+ return skills.sort((a, b) => a.name.localeCompare(b.name));
2507
+ }
2508
+ function findSkill(name, runtimeOnly = false) {
2509
+ return discoverSkills(runtimeOnly).find((skill) => skill.name === name) ?? null;
2510
+ }
2511
+ function readSkillContent(skill) {
2512
+ return readFileSync(resolve(skill.dir, "SKILL.md"), "utf-8");
2513
+ }
2514
+ function collectSkillFiles(skill) {
2515
+ const files = [];
2516
+ for (const subdirName of ["references", "templates", "scripts"]) {
2517
+ const subdir = resolve(skill.dir, subdirName);
2518
+ if (!existsSync(subdir))
2519
+ continue;
2520
+ for (const entry of readdirSync(subdir).sort()) {
2521
+ const filePath = resolve(subdir, entry);
2522
+ try {
2523
+ if (!statSync(filePath).isFile())
2524
+ continue;
2525
+ files.push({ relativePath: `${subdirName}/${basename(filePath)}`, content: readFileSync(filePath, "utf-8") });
2526
+ }
2527
+ catch {
2528
+ // Ignore unreadable supplementary files.
2529
+ }
2530
+ }
2531
+ }
2532
+ return files;
2533
+ }
2206
2534
  function showSkill() {
2207
- console.log("QMD Skill (embedded)");
2535
+ const skill = findSkill("qmd");
2536
+ if (!skill) {
2537
+ throw new Error("QMD skill not found. Reinstall qmd or set QMD_SKILLS_DIR.");
2538
+ }
2539
+ console.log("QMD Skill");
2208
2540
  console.log("");
2209
- const content = getEmbeddedQmdSkillContent();
2541
+ const content = readSkillContent(skill);
2210
2542
  process.stdout.write(content.endsWith("\n") ? content : content + "\n");
2211
2543
  }
2212
- function writeEmbeddedSkill(targetDir, force) {
2544
+ function copyDirectoryContents(sourceDir, targetDir) {
2545
+ mkdirSync(targetDir, { recursive: true });
2546
+ for (const entry of readdirSync(sourceDir)) {
2547
+ const sourcePath = resolve(sourceDir, entry);
2548
+ const targetPath = resolve(targetDir, entry);
2549
+ const stat = statSync(sourcePath);
2550
+ if (stat.isDirectory()) {
2551
+ copyDirectoryContents(sourcePath, targetPath);
2552
+ }
2553
+ else if (stat.isFile()) {
2554
+ copyFileSync(sourcePath, targetPath);
2555
+ }
2556
+ }
2557
+ }
2558
+ function installedSkillStubContent() {
2559
+ return `---
2560
+ name: qmd
2561
+ description: Bootstrap QMD search instructions from the installed qmd CLI. Use when users ask to find notes, retrieve documents, inspect a wiki, or answer from indexed local markdown.
2562
+ license: MIT
2563
+ compatibility: Requires qmd CLI. Run \`qmd skill show\` for version-matched instructions.
2564
+ allowed-tools: Bash(qmd:*), mcp__qmd__*
2565
+ ---
2566
+
2567
+ # QMD - Query Markdown Documents
2568
+
2569
+ This installed skill is intentionally a small bootstrap so it does not go stale
2570
+ when the qmd package updates.
2571
+
2572
+ Load the full, version-matched QMD instructions from the CLI:
2573
+
2574
+ !\`qmd skill show\`
2575
+
2576
+ If your agent does not support bang-command expansion, run:
2577
+
2578
+ \`\`\`bash
2579
+ qmd skill show
2580
+ \`\`\`
2581
+
2582
+ Then follow those instructions. In short: search first, fetch full sources with
2583
+ \`qmd get\` or \`qmd multi-get\`, and answer from retrieved text rather than snippets.
2584
+ `;
2585
+ }
2586
+ function writeSkillInstall(targetDir, force) {
2213
2587
  if (pathExists(targetDir)) {
2214
2588
  if (!force) {
2215
2589
  throw new Error(`Skill already exists: ${targetDir} (use --force to replace it)`);
2216
2590
  }
2217
2591
  removePath(targetDir);
2218
2592
  }
2219
- mkdirSync(targetDir, { recursive: true });
2220
- for (const file of getEmbeddedQmdSkillFiles()) {
2221
- const destination = resolve(targetDir, file.relativePath);
2222
- mkdirSync(dirname(destination), { recursive: true });
2223
- writeFileSync(destination, file.content, "utf-8");
2593
+ const skill = findSkill("qmd");
2594
+ if (!skill) {
2595
+ throw new Error("QMD skill not found. Reinstall qmd or set QMD_SKILLS_DIR.");
2596
+ }
2597
+ copyDirectoryContents(skill.dir, targetDir);
2598
+ writeFileSync(resolve(targetDir, "SKILL.md"), installedSkillStubContent(), "utf-8");
2599
+ }
2600
+ function outputSkillsJson(payload) {
2601
+ console.log(JSON.stringify(payload));
2602
+ }
2603
+ function runSkillsCommand(args, jsonMode, fullOption = false, allOption = false) {
2604
+ const subcommand = args[0] ?? "list";
2605
+ const runtimeSkills = () => discoverSkills(true).filter((skill) => !skill.hidden);
2606
+ switch (subcommand) {
2607
+ case "list": {
2608
+ const skills = runtimeSkills();
2609
+ if (jsonMode) {
2610
+ outputSkillsJson({ success: true, data: skills.map(({ name, description }) => ({ name, description })) });
2611
+ return;
2612
+ }
2613
+ if (skills.length === 0) {
2614
+ console.log("No skills found");
2615
+ return;
2616
+ }
2617
+ const maxName = Math.max(...skills.map((skill) => skill.name.length));
2618
+ for (const skill of skills) {
2619
+ console.log(` ${skill.name.padEnd(maxName)} ${skill.description}`);
2620
+ }
2621
+ return;
2622
+ }
2623
+ case "get": {
2624
+ const full = fullOption || args.includes("--full");
2625
+ const getAll = allOption || args.includes("--all");
2626
+ const names = args.slice(1).filter((arg) => arg !== "--full" && arg !== "--all");
2627
+ const targets = getAll ? runtimeSkills() : names.map((name) => {
2628
+ const skill = findSkill(name, true);
2629
+ if (!skill) {
2630
+ throw new Error(`Skill not found: ${name}`);
2631
+ }
2632
+ return skill;
2633
+ });
2634
+ if (targets.length === 0) {
2635
+ throw new Error("No skill name provided. Usage: qmd skills get <name>");
2636
+ }
2637
+ if (jsonMode) {
2638
+ outputSkillsJson({
2639
+ success: true,
2640
+ data: targets.map((skill) => ({
2641
+ name: skill.name,
2642
+ content: readSkillContent(skill),
2643
+ ...(full ? { files: collectSkillFiles(skill).map((file) => ({ path: file.relativePath, content: file.content })) } : {}),
2644
+ })),
2645
+ });
2646
+ return;
2647
+ }
2648
+ targets.forEach((skill, index) => {
2649
+ if (index > 0)
2650
+ console.log("\n---\n");
2651
+ const content = readSkillContent(skill);
2652
+ process.stdout.write(content.endsWith("\n") ? content : content + "\n");
2653
+ if (full) {
2654
+ for (const file of collectSkillFiles(skill)) {
2655
+ console.log(`\n--- ${file.relativePath} ---\n`);
2656
+ process.stdout.write(file.content.endsWith("\n") ? file.content : file.content + "\n");
2657
+ }
2658
+ }
2659
+ });
2660
+ return;
2661
+ }
2662
+ case "path": {
2663
+ const name = args[1];
2664
+ if (!name) {
2665
+ const paths = getSkillSearchDirs(true);
2666
+ if (jsonMode)
2667
+ outputSkillsJson({ success: true, data: { paths } });
2668
+ else
2669
+ paths.forEach((path) => console.log(path));
2670
+ return;
2671
+ }
2672
+ const skill = findSkill(name, true);
2673
+ if (!skill) {
2674
+ throw new Error(`Skill not found: ${name}`);
2675
+ }
2676
+ if (jsonMode)
2677
+ outputSkillsJson({ success: true, data: { name: skill.name, path: skill.dir } });
2678
+ else
2679
+ console.log(skill.dir);
2680
+ return;
2681
+ }
2682
+ case "help": {
2683
+ showSkillsHelp();
2684
+ return;
2685
+ }
2686
+ default:
2687
+ throw new Error(`Unknown skills subcommand: ${subcommand}`);
2224
2688
  }
2225
2689
  }
2690
+ function showSkillsHelp() {
2691
+ console.log("Usage: qmd skills <list|get|path> [options]");
2692
+ console.log("");
2693
+ console.log("Commands:");
2694
+ console.log(" list List bundled runtime skills");
2695
+ console.log(" get <name> Print a bundled runtime skill");
2696
+ console.log(" get <name> --full Include references/templates/scripts");
2697
+ console.log(" get --all Print all bundled runtime skills");
2698
+ console.log(" path [name] Print runtime skill directory path(s)");
2699
+ console.log("");
2700
+ console.log("Options:");
2701
+ console.log(" --json Print structured JSON");
2702
+ }
2226
2703
  function ensureClaudeSymlink(linkPath, targetDir, force) {
2227
2704
  const parentDir = dirname(linkPath);
2228
2705
  if (pathExists(parentDir)) {
@@ -2272,7 +2749,7 @@ async function shouldCreateClaudeSymlink(linkPath, autoYes) {
2272
2749
  }
2273
2750
  async function installSkill(globalInstall, force, autoYes) {
2274
2751
  const installDir = getSkillInstallDir(globalInstall);
2275
- writeEmbeddedSkill(installDir, force);
2752
+ writeSkillInstall(installDir, force);
2276
2753
  console.log(`✓ Installed QMD skill to ${installDir}`);
2277
2754
  const claudeLinkPath = getClaudeSkillLinkPath(globalInstall);
2278
2755
  if (!(await shouldCreateClaudeSymlink(claudeLinkPath, autoYes))) {
@@ -2299,7 +2776,8 @@ function showHelp() {
2299
2776
  console.log(" qmd vsearch <query> - Vector similarity only");
2300
2777
  console.log(" qmd get <file>[:line] [-l N] - Show a single document, optional line slice");
2301
2778
  console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
2302
- console.log(" qmd skill show/install - Show or install the packaged QMD skill");
2779
+ console.log(" qmd skills list/get/path - List and retrieve bundled runtime skills");
2780
+ console.log(" qmd skill show/install - Show or install the QMD skill");
2303
2781
  console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
2304
2782
  console.log(" qmd bench <fixture.json> - Run search quality benchmarks against a fixture file");
2305
2783
  console.log("");
@@ -2309,9 +2787,10 @@ function showHelp() {
2309
2787
  console.log(" qmd ls [collection[/path]] - Inspect indexed files");
2310
2788
  console.log("");
2311
2789
  console.log("Maintenance:");
2790
+ console.log(" qmd init - Create a project-local .qmd index");
2312
2791
  console.log(" qmd status - View index + collection health");
2313
2792
  console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
2314
- console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
2793
+ console.log(" qmd embed [-f] [-c <name>] - Generate/refresh vector embeddings");
2315
2794
  console.log(" --max-docs-per-batch <n> - Cap docs loaded into memory per embedding batch");
2316
2795
  console.log(" --max-batch-mb <n> - Cap UTF-8 MB loaded into memory per embedding batch");
2317
2796
  console.log(" qmd cleanup - Clear caches, vacuum DB");
@@ -2352,6 +2831,7 @@ function showHelp() {
2352
2831
  console.log("");
2353
2832
  console.log("AI agents & integrations:");
2354
2833
  console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs.");
2834
+ console.log(" - Run `qmd skills get qmd --full` for version-matched agent instructions.");
2355
2835
  console.log(" - `qmd skill install` installs the QMD skill into ./.agents/skills/qmd.");
2356
2836
  console.log(" - Use `qmd skill install --global` for ~/.agents/skills/qmd.");
2357
2837
  console.log(" - `qmd --skill` is kept as an alias for `qmd skill show`.");
@@ -2368,6 +2848,7 @@ function showHelp() {
2368
2848
  console.log(" --full - Output full document instead of snippet");
2369
2849
  console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
2370
2850
  console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
2851
+ console.log(" --no-gpu - Force CPU mode for llama.cpp operations (same as QMD_FORCE_CPU=1)");
2371
2852
  console.log(" --line-numbers - Include line numbers in output");
2372
2853
  console.log(" --explain - Include retrieval score traces (query --json/CLI)");
2373
2854
  console.log(" --files | --json | --csv | --md | --xml - Output format");
@@ -2383,10 +2864,531 @@ function showHelp() {
2383
2864
  console.log("");
2384
2865
  console.log(`Index: ${getDbPath()}`);
2385
2866
  }
2386
- async function showVersion() {
2867
+ function doctorCheck(label, ok, details) {
2868
+ const mark = ok ? `${c.green}✓${c.reset}` : `${c.yellow}⚠${c.reset}`;
2869
+ console.log(`${mark} ${label}: ${details}`);
2870
+ }
2871
+ function formatCount(n) {
2872
+ return n.toLocaleString("en-US");
2873
+ }
2874
+ function shortModelName(model) {
2875
+ if (model.startsWith("hf:")) {
2876
+ return model.split("/").pop() || model;
2877
+ }
2878
+ return model.length > 56 ? `${model.slice(0, 53)}...` : model;
2879
+ }
2880
+ function normalizedDoctorNextSteps(steps) {
2881
+ const unique = Array.from(new Set(steps));
2882
+ const hasForceEmbed = unique.some(step => step.includes("qmd embed --force"));
2883
+ if (!hasForceEmbed)
2884
+ return unique;
2885
+ return unique.filter(step => !step.includes("qmd embed") || step.startsWith("Run `qmd embed --force`"));
2886
+ }
2887
+ function shortHashSeq(hashSeq) {
2888
+ const idx = hashSeq.lastIndexOf("_");
2889
+ if (idx < 0)
2890
+ return hashSeq.length > 18 ? `${hashSeq.slice(0, 18)}...` : hashSeq;
2891
+ return `${hashSeq.slice(0, 12)}_${hashSeq.slice(idx + 1)}`;
2892
+ }
2893
+ function decodeStoredEmbedding(bytes) {
2894
+ return new Float32Array(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength));
2895
+ }
2896
+ function cosineDistance(a, b) {
2897
+ if (a.length !== b.length || a.length === 0)
2898
+ return Number.POSITIVE_INFINITY;
2899
+ let dot = 0;
2900
+ let normA = 0;
2901
+ let normB = 0;
2902
+ for (let i = 0; i < a.length; i++) {
2903
+ const av = a[i] ?? 0;
2904
+ const bv = b[i] ?? 0;
2905
+ dot += av * bv;
2906
+ normA += av * av;
2907
+ normB += bv * bv;
2908
+ }
2909
+ if (normA === 0 || normB === 0)
2910
+ return Number.POSITIVE_INFINITY;
2911
+ return 1 - (dot / (Math.sqrt(normA) * Math.sqrt(normB)));
2912
+ }
2913
+ function formatModelDiagnosticPath(path) {
2914
+ return sanitizeDiagnosticMessage(path);
2915
+ }
2916
+ function findCachedModelInspection(model) {
2917
+ const invalid = [];
2918
+ if (model.startsWith("hf:")) {
2919
+ const filename = model.split("/").pop();
2920
+ if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR))
2921
+ return { path: null, invalid };
2922
+ const entries = readdirSync(DEFAULT_MODEL_CACHE_DIR, { withFileTypes: true });
2923
+ for (const entry of entries) {
2924
+ if (!entry.isFile() || !entry.name.includes(filename))
2925
+ continue;
2926
+ const candidate = pathJoin(DEFAULT_MODEL_CACHE_DIR, entry.name);
2927
+ const inspection = inspectGgufFile(candidate);
2928
+ if (inspection.valid)
2929
+ return { path: candidate, invalid };
2930
+ invalid.push(`${formatModelDiagnosticPath(candidate)}: ${inspection.details}`);
2931
+ }
2932
+ return { path: null, invalid };
2933
+ }
2934
+ const inspection = inspectGgufFile(model);
2935
+ if (inspection.valid)
2936
+ return { path: model, invalid };
2937
+ if (inspection.exists)
2938
+ invalid.push(`${formatModelDiagnosticPath(model)}: ${inspection.details}`);
2939
+ return { path: null, invalid };
2940
+ }
2941
+ function envValueForDisplay(value) {
2942
+ const sanitized = sanitizeDiagnosticMessage(value);
2943
+ return sanitized.length > 96 ? `${sanitized.slice(0, 93)}...` : sanitized;
2944
+ }
2945
+ function collectEnvironmentOverrides(activeModels, configModels = {}) {
2946
+ const overrides = [];
2947
+ const add = (name, consequence) => {
2948
+ const raw = process.env[name]?.trim();
2949
+ if (!raw)
2950
+ return;
2951
+ overrides.push({ name, value: envValueForDisplay(raw), consequence });
2952
+ };
2953
+ const addModel = (name, key, active) => {
2954
+ const raw = process.env[name]?.trim();
2955
+ if (!raw)
2956
+ return;
2957
+ const configured = configModels[key];
2958
+ const consequence = configured && configured !== raw
2959
+ ? `set but ignored because index models.${key} is configured as ${configured}`
2960
+ : `sets the active ${key} model to ${active}; changes embedding/search semantics and may require \`qmd pull\` plus \`qmd embed\``;
2961
+ overrides.push({ name, value: envValueForDisplay(raw), consequence });
2962
+ };
2963
+ add("INDEX_PATH", "overrides the SQLite index path; QMD reads/writes a different database");
2964
+ add("QMD_CONFIG_DIR", "overrides the QMD config directory and takes precedence over XDG_CONFIG_HOME");
2965
+ add("XDG_CONFIG_HOME", "moves QMD config to $XDG_CONFIG_HOME/qmd when QMD_CONFIG_DIR is not set");
2966
+ add("XDG_CACHE_HOME", "moves the default index cache, model cache, and MCP daemon PID files");
2967
+ addModel("QMD_EMBED_MODEL", "embed", activeModels.embed);
2968
+ addModel("QMD_GENERATE_MODEL", "generate", activeModels.generate);
2969
+ addModel("QMD_RERANK_MODEL", "rerank", activeModels.rerank);
2970
+ add("QMD_FORCE_CPU", "forces llama.cpp to bypass GPU backends; embeddings/query will be slower but GPU crashes are avoided");
2971
+ add("QMD_LLAMA_GPU", "selects llama.cpp GPU backend (metal/cuda/vulkan) or disables GPU when set to false/off/0");
2972
+ add("QMD_DOCTOR_DEVICE_PROBE", "controls qmd doctor native device probing; 0/off skips GPU probing");
2973
+ add("QMD_EMBED_PARALLELISM", "overrides embedding parallel context count; too high can exhaust RAM/VRAM");
2974
+ add("QMD_EXPAND_CONTEXT_SIZE", "overrides query expansion context size; larger values use more memory");
2975
+ add("QMD_RERANK_CONTEXT_SIZE", "overrides reranker context size; larger values use more memory");
2976
+ add("QMD_EMBED_CONTEXT_SIZE", "overrides embed context size; larger values use more memory");
2977
+ add("QMD_EDITOR_URI", "overrides clickable editor link template in terminal output");
2978
+ add("QMD_SKILLS_DIR", "overrides where qmd skills are discovered from");
2979
+ add("QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT", "disables macOS JSON-query safe exit workaround; may re-expose Metal finalizer crashes");
2980
+ add("NO_COLOR", "disables colored terminal output");
2981
+ add("CI", "disables real LLM operations inside QMD's LlamaCpp wrapper");
2982
+ add("HF_ENDPOINT", "changes Hugging Face download endpoint used when pulling models");
2983
+ add("QMD_WRAPPER_CAPTURE", "test/debug hook for the qmd shell wrapper; should not be set in normal use");
2984
+ add("WSL_DISTRO_NAME", "enables WSL path handling heuristics");
2985
+ add("WSL_INTEROP", "enables WSL path handling heuristics");
2986
+ return overrides;
2987
+ }
2988
+ function checkDoctorIndexConfig(nextSteps) {
2989
+ try {
2990
+ const config = loadConfig();
2991
+ const collectionCount = Object.keys(config.collections ?? {}).length;
2992
+ if (collectionCount === 0) {
2993
+ doctorCheck("index config", false, "no collections configured. Next: `qmd collection add .`");
2994
+ nextSteps.push("Run `qmd collection add . --name <name>` from the folder you want to index, or edit .qmd/index.yml manually.");
2995
+ }
2996
+ else {
2997
+ doctorCheck("index config", true, `${formatCount(collectionCount)} ${collectionCount === 1 ? "collection" : "collections"} configured`);
2998
+ }
2999
+ return { config, valid: true };
3000
+ }
3001
+ catch (error) {
3002
+ const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
3003
+ const configPath = getConfigPath();
3004
+ doctorCheck("index config", false, `invalid index.yml at ${configPath}: ${message}. Next: fix the YAML and rerun \`qmd doctor\``);
3005
+ nextSteps.push(`Fix invalid YAML in ${configPath}, then rerun \`qmd doctor\`.`);
3006
+ return { config: null, valid: false };
3007
+ }
3008
+ }
3009
+ function checkEnvironmentOverrides(activeModels, configModels = {}) {
3010
+ const overrides = collectEnvironmentOverrides(activeModels, configModels);
3011
+ if (overrides.length === 0) {
3012
+ doctorCheck("environment overrides", true, "none");
3013
+ return;
3014
+ }
3015
+ doctorCheck("environment overrides", false, `${overrides.length} set`);
3016
+ for (const override of overrides) {
3017
+ console.log(` - ${override.name}=${override.value}: ${override.consequence}`);
3018
+ }
3019
+ }
3020
+ function checkModelDefaults(activeModels, configModels = {}) {
3021
+ const checks = [
3022
+ { role: "embedding", key: "embed", active: activeModels.embed, configured: configModels.embed, defaultModel: DEFAULT_EMBED_MODEL, envName: "QMD_EMBED_MODEL", envValue: process.env.QMD_EMBED_MODEL },
3023
+ { role: "generation", key: "generate", active: activeModels.generate, configured: configModels.generate, defaultModel: DEFAULT_QUERY_MODEL, envName: "QMD_GENERATE_MODEL", envValue: process.env.QMD_GENERATE_MODEL },
3024
+ { role: "reranking", key: "rerank", active: activeModels.rerank, configured: configModels.rerank, defaultModel: DEFAULT_RERANK_MODEL, envName: "QMD_RERANK_MODEL", envValue: process.env.QMD_RERANK_MODEL },
3025
+ ];
3026
+ const notes = [];
3027
+ for (const check of checks) {
3028
+ const envValue = check.envValue?.trim();
3029
+ if (envValue && check.active === envValue) {
3030
+ notes.push(`${check.role}: env ${check.envName}=${check.active} (default ${check.defaultModel}; might be ok)`);
3031
+ }
3032
+ else if (check.configured && check.configured !== check.defaultModel) {
3033
+ notes.push(`${check.role}: index ${check.configured} (default ${check.defaultModel}; might be ok)`);
3034
+ }
3035
+ else if (envValue && check.active !== envValue) {
3036
+ notes.push(`${check.role}: ${check.envName} is set to ${envValue} but index config uses ${check.active}`);
3037
+ }
3038
+ }
3039
+ if (notes.length === 0) {
3040
+ doctorCheck("model defaults", true, "using QMD codebase defaults");
3041
+ return;
3042
+ }
3043
+ doctorCheck("model defaults", false, `non-default model configuration: ${notes.join("; ")}`);
3044
+ }
3045
+ function checkModelCache(activeModels, nextSteps) {
3046
+ const models = [
3047
+ ["embedding", activeModels.embed],
3048
+ ["generation", activeModels.generate],
3049
+ ["reranking", activeModels.rerank],
3050
+ ];
3051
+ const unique = new Map();
3052
+ for (const [role, model] of models) {
3053
+ unique.set(model, [...(unique.get(model) ?? []), role]);
3054
+ }
3055
+ const missing = [];
3056
+ const cached = [];
3057
+ const invalid = [];
3058
+ for (const [model, roles] of unique) {
3059
+ const label = `${roles.join("+")}: ${model}`;
3060
+ const inspection = findCachedModelInspection(model);
3061
+ invalid.push(...inspection.invalid.map(detail => `${label} (${detail})`));
3062
+ if (inspection.path) {
3063
+ cached.push(label);
3064
+ }
3065
+ else {
3066
+ missing.push(label);
3067
+ }
3068
+ }
3069
+ if (missing.length === 0 && invalid.length === 0) {
3070
+ doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded and valid GGUF`);
3071
+ return;
3072
+ }
3073
+ const parts = [];
3074
+ if (invalid.length > 0)
3075
+ parts.push(`invalid ${invalid.length}: ${invalid.join("; ")}`);
3076
+ if (missing.length > 0)
3077
+ parts.push(`missing ${missing.length}/${unique.size}: ${missing.join("; ")}`);
3078
+ const next = invalid.length > 0
3079
+ ? "Next: run `qmd pull --refresh` (or remove the bad cached file)"
3080
+ : "Next: run `qmd pull`";
3081
+ doctorCheck("model cache", false, `${parts.join("; ")}. ${next}`);
3082
+ if (invalid.length > 0) {
3083
+ nextSteps.push("Run `qmd pull --refresh` to replace invalid cached model files, or delete the listed file and rerun `qmd pull`.");
3084
+ }
3085
+ else {
3086
+ nextSteps.push("Run `qmd pull` to download missing embedding/generation/reranking models before `qmd embed` or `qmd query`.");
3087
+ }
3088
+ }
3089
+ async function checkEmbeddingVectorSamples(db, model, fingerprint, sampleSize = 3) {
3090
+ const activeDocs = db.prepare(`SELECT COUNT(*) AS count FROM documents WHERE active = 1`).get().count;
3091
+ if (activeDocs === 0) {
3092
+ return { ok: true, details: "no active documents indexed" };
3093
+ }
3094
+ const vecTableExists = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
3095
+ if (!vecTableExists) {
3096
+ return { ok: false, details: "no vector table to test; please run qmd embed again" };
3097
+ }
3098
+ const samples = db.prepare(`
3099
+ SELECT cv.hash, cv.seq, c.doc AS body, MIN(d.path) AS path
3100
+ FROM content_vectors cv
3101
+ JOIN documents d ON d.hash = cv.hash AND d.active = 1
3102
+ JOIN content c ON c.hash = cv.hash
3103
+ WHERE cv.model = ? AND cv.embed_fingerprint = ?
3104
+ GROUP BY cv.hash, cv.seq, c.doc
3105
+ ORDER BY random()
3106
+ LIMIT ?
3107
+ `).all(model, fingerprint, sampleSize);
3108
+ if (samples.length === 0) {
3109
+ return { ok: false, details: "no current embedded chunks to test; please run qmd embed again" };
3110
+ }
3111
+ const threshold = 0.0001;
3112
+ const mismatches = [];
3113
+ await withLLMSession(async (session) => {
3114
+ for (const sample of samples) {
3115
+ const hashSeq = `${sample.hash}_${sample.seq}`;
3116
+ const chunks = await chunkDocumentByTokens(sample.body, undefined, undefined, undefined, sample.path, undefined, session.signal);
3117
+ const chunk = chunks[sample.seq];
3118
+ if (!chunk) {
3119
+ mismatches.push(`${shortHashSeq(hashSeq)}: chunk no longer exists`);
3120
+ continue;
3121
+ }
3122
+ const title = extractTitle(sample.body, sample.path);
3123
+ const result = await session.embed(formatDocForEmbedding(chunk.text, title, model), { model });
3124
+ if (!result) {
3125
+ mismatches.push(`${shortHashSeq(hashSeq)}: embedding failed`);
3126
+ continue;
3127
+ }
3128
+ const stored = db.prepare(`SELECT embedding FROM vectors_vec WHERE hash_seq = ?`).get(hashSeq);
3129
+ if (!stored) {
3130
+ mismatches.push(`${shortHashSeq(hashSeq)}: stored vector missing`);
3131
+ continue;
3132
+ }
3133
+ const distance = cosineDistance(result.embedding, decodeStoredEmbedding(stored.embedding));
3134
+ if (distance > threshold) {
3135
+ mismatches.push(`${shortHashSeq(hashSeq)}: stored vector distance ${distance.toFixed(6)}`);
3136
+ }
3137
+ }
3138
+ }, { maxDuration: 10 * 60 * 1000, name: "doctorEmbeddingVectorSample" });
3139
+ if (mismatches.length > 0) {
3140
+ return {
3141
+ ok: false,
3142
+ details: `${mismatches.length}/${samples.length} sampled chunks differ from stored vectors (${mismatches[0]}). Rebuild with \`qmd embed --force\``,
3143
+ };
3144
+ }
3145
+ return {
3146
+ ok: true,
3147
+ details: `${samples.length} sampled ${samples.length === 1 ? "chunk" : "chunks"} reproduce stored vectors`,
3148
+ };
3149
+ }
3150
+ function hasLibraryInDirs(libraryBaseName, dirs) {
3151
+ for (const dir of dirs) {
3152
+ if (!dir || !existsSync(dir))
3153
+ continue;
3154
+ try {
3155
+ for (const entry of readdirSync(dir)) {
3156
+ if (entry === libraryBaseName || entry.startsWith(`${libraryBaseName}.`))
3157
+ return true;
3158
+ }
3159
+ }
3160
+ catch { /* ignore unreadable system library dirs */ }
3161
+ }
3162
+ return false;
3163
+ }
3164
+ function linuxCudaRuntimeDiagnostic() {
3165
+ if (process.platform !== "linux")
3166
+ return null;
3167
+ const dirs = new Set();
3168
+ for (const value of [process.env.LD_LIBRARY_PATH, process.env.CUDA_PATH]) {
3169
+ for (const part of (value ?? "").split(":")) {
3170
+ if (part)
3171
+ dirs.add(part);
3172
+ }
3173
+ }
3174
+ if (process.env.CUDA_PATH) {
3175
+ dirs.add(pathJoin(process.env.CUDA_PATH, "lib64"));
3176
+ dirs.add(pathJoin(process.env.CUDA_PATH, "targets", "x86_64-linux", "lib"));
3177
+ }
3178
+ for (const dir of ["/usr/lib", "/usr/lib64", "/usr/lib/x86_64-linux-gnu", "/usr/local/cuda/lib64", "/usr/local/cuda/targets/x86_64-linux/lib"]) {
3179
+ dirs.add(dir);
3180
+ }
3181
+ try {
3182
+ for (const entry of readdirSync("/usr/local")) {
3183
+ if (!entry.toLowerCase().startsWith("cuda-"))
3184
+ continue;
3185
+ const cudaRoot = pathJoin("/usr/local", entry);
3186
+ dirs.add(pathJoin(cudaRoot, "lib64"));
3187
+ dirs.add(pathJoin(cudaRoot, "targets", "x86_64-linux", "lib"));
3188
+ }
3189
+ }
3190
+ catch { /* /usr/local may not be readable in restricted environments */ }
3191
+ const searchDirs = [...dirs];
3192
+ const hasDriver = hasLibraryInDirs("libcuda.so", searchDirs) || hasLibraryInDirs("libnvidia-ml.so", searchDirs);
3193
+ if (!hasDriver)
3194
+ return null;
3195
+ const cudaLibraries = [
3196
+ ["libcudart.so", "CUDA runtime"],
3197
+ ["libcublas.so", "cuBLAS"],
3198
+ ["libcublasLt.so", "cuBLASLt"],
3199
+ ];
3200
+ const missing = cudaLibraries
3201
+ .filter(([library]) => !hasLibraryInDirs(library, searchDirs))
3202
+ .map(([, label]) => label);
3203
+ if (missing.length === 0)
3204
+ return null;
3205
+ return `NVIDIA driver libraries are visible, but CUDA user-space libraries are missing from loader paths (${missing.join(", ")})`;
3206
+ }
3207
+ async function runDoctorDeviceChecks(nextSteps) {
3208
+ const mode = configuredGpuModeLabel();
3209
+ doctorCheck("device mode", true, mode);
3210
+ const skipProbe = ["0", "false", "off", "no", "skip"].includes((process.env.QMD_DOCTOR_DEVICE_PROBE ?? "").trim().toLowerCase());
3211
+ if (skipProbe) {
3212
+ doctorCheck("device probe", false, "skipped by QMD_DOCTOR_DEVICE_PROBE=0. Next: unset it and rerun `qmd doctor` to verify GPU/CPU acceleration");
3213
+ nextSteps.push("Unset `QMD_DOCTOR_DEVICE_PROBE` and rerun `qmd doctor` when you want to verify llama.cpp device acceleration.");
3214
+ return;
3215
+ }
3216
+ const crashHint = "Probing native llama backend now. If qmd crashes here, rerun with `QMD_FORCE_CPU=1 qmd doctor` (or `QMD_DOCTOR_DEVICE_PROBE=0 qmd doctor` to skip this probe).";
3217
+ if (process.stdout.isTTY) {
3218
+ process.stdout.write(`${c.dim}${crashHint}${c.reset}`);
3219
+ }
3220
+ try {
3221
+ const device = await getDefaultLlamaCpp().getDeviceInfo({ allowBuild: false });
3222
+ if (process.stdout.isTTY) {
3223
+ process.stdout.write(`\r${" ".repeat(crashHint.length)}\r`);
3224
+ }
3225
+ if (device.gpu) {
3226
+ const gpuLabel = device.gpu === "metal" && process.platform === "darwin"
3227
+ ? "metal (macOS Metal backend)"
3228
+ : String(device.gpu);
3229
+ const parts = [`GPU ${gpuLabel}`, `offloading ${device.gpuOffloading ? "enabled" : "disabled"}`];
3230
+ if (device.gpuDevices.length > 0)
3231
+ parts.push(`devices: ${summarizeDeviceNames(device.gpuDevices)}`);
3232
+ if (device.vram)
3233
+ parts.push(`VRAM ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
3234
+ parts.push(`${device.cpuCores} CPU math cores`);
3235
+ doctorCheck("device probe", device.gpuOffloading, device.gpuOffloading
3236
+ ? parts.join("; ")
3237
+ : `${parts.join("; ")}. Next: check QMD_LLAMA_GPU and llama.cpp backend support`);
3238
+ if (!device.gpuOffloading) {
3239
+ nextSteps.push("GPU was detected but offloading is disabled; check `QMD_LLAMA_GPU=metal|cuda|vulkan` and rerun `qmd doctor`.");
3240
+ }
3241
+ }
3242
+ else {
3243
+ const cudaDiagnostic = linuxCudaRuntimeDiagnostic();
3244
+ const diagnosticSuffix = cudaDiagnostic ? ` ${cudaDiagnostic}.` : "";
3245
+ doctorCheck("device probe", false, `running on CPU (${device.cpuCores} math cores).${diagnosticSuffix} Next: install/configure Metal, CUDA, or Vulkan for faster embeddings, or set QMD_FORCE_CPU=1 to make CPU mode explicit`);
3246
+ if (cudaDiagnostic) {
3247
+ nextSteps.push(`${cudaDiagnostic}; install CUDA runtime/cuBLAS libraries or add their directory to LD_LIBRARY_PATH, then rerun \`qmd doctor\`.`);
3248
+ }
3249
+ else {
3250
+ nextSteps.push("Vector operations are running on CPU; install/configure Metal, CUDA, or Vulkan if embedding/query performance is too slow.");
3251
+ }
3252
+ }
3253
+ }
3254
+ catch (error) {
3255
+ if (process.stdout.isTTY) {
3256
+ process.stdout.write(`\r${" ".repeat(crashHint.length)}\r`);
3257
+ }
3258
+ const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
3259
+ doctorCheck("device probe", false, `probe failed: ${message}. Next: run with QMD_FORCE_CPU=1 to bypass GPU probing, or set QMD_LLAMA_GPU=metal|cuda|vulkan and retry`);
3260
+ nextSteps.push("GPU probe failed; try `QMD_FORCE_CPU=1 qmd doctor` to confirm CPU fallback, then fix GPU drivers/backend if acceleration is expected.");
3261
+ }
3262
+ }
3263
+ async function showDoctor() {
3264
+ const storeInstance = getStore();
3265
+ const db = storeInstance.db;
3266
+ const pkg = readPackageJson();
3267
+ const activeModels = resolveModelsForCli();
3268
+ const embedModel = activeModels.embed;
3269
+ const fingerprint = getEmbeddingFingerprint(embedModel);
3270
+ const nextSteps = [];
3271
+ console.log(`${c.bold}QMD Doctor${c.reset}\n`);
3272
+ console.log(`Index: ${getDbPath()}`);
3273
+ console.log(`Runtime: ${isBun ? "bun:sqlite" : "better-sqlite3"}`);
3274
+ try {
3275
+ const row = db.prepare(`SELECT sqlite_version() AS version`).get();
3276
+ doctorCheck("SQLite runtime", true, row.version);
3277
+ }
3278
+ catch (error) {
3279
+ doctorCheck("SQLite runtime", false, error instanceof Error ? error.message : String(error));
3280
+ }
3281
+ const betterSqliteVersion = pkg.dependencies?.["better-sqlite3"] ?? pkg.devDependencies?.["better-sqlite3"] ?? "not declared";
3282
+ doctorCheck("better-sqlite3 package", true, String(betterSqliteVersion));
3283
+ try {
3284
+ const row = db.prepare(`SELECT vec_version() AS version`).get();
3285
+ doctorCheck("sqlite-vec", true, row.version);
3286
+ }
3287
+ catch (error) {
3288
+ doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error));
3289
+ }
3290
+ const configCheck = checkDoctorIndexConfig(nextSteps);
3291
+ const configModels = configCheck.config?.models ?? {};
3292
+ checkEnvironmentOverrides(activeModels, configModels);
3293
+ checkModelDefaults(activeModels, configModels);
3294
+ checkModelCache(activeModels, nextSteps);
3295
+ await runDoctorDeviceChecks(nextSteps);
3296
+ try {
3297
+ const adoption = await maybeAdoptLegacyEmbeddingFingerprint(storeInstance, embedModel);
3298
+ if (adoption.checked || adoption.adopted > 0) {
3299
+ doctorCheck("legacy fingerprint adoption", adoption.adopted > 0, adoption.adopted > 0 ? `adopted ${adoption.adopted} legacy chunks; ${adoption.reason}` : adoption.reason);
3300
+ }
3301
+ }
3302
+ catch (error) {
3303
+ doctorCheck("legacy fingerprint adoption", false, error instanceof Error ? error.message : String(error));
3304
+ }
3305
+ try {
3306
+ const pending = getHashesNeedingEmbedding(db, undefined, embedModel);
3307
+ doctorCheck("embedding freshness", pending === 0, pending === 0 ? "all active documents match current fingerprint" : `${formatCount(pending)} active documents need embeddings. Next: \`qmd embed\``);
3308
+ if (pending > 0) {
3309
+ nextSteps.push(`Run \`qmd embed\` to generate ${formatCount(pending)} missing/stale document embeddings.`);
3310
+ }
3311
+ }
3312
+ catch (error) {
3313
+ doctorCheck("embedding freshness", false, error instanceof Error ? error.message : String(error));
3314
+ }
3315
+ try {
3316
+ const rows = db.prepare(`
3317
+ SELECT model, embed_fingerprint AS fingerprint, COUNT(DISTINCT hash) AS docs, COUNT(*) AS chunks
3318
+ FROM content_vectors
3319
+ GROUP BY model, embed_fingerprint
3320
+ ORDER BY chunks DESC, model, embed_fingerprint
3321
+ `).all();
3322
+ const uniqueFingerprints = new Set(rows.map(row => row.fingerprint));
3323
+ const offCurrent = rows.filter(row => row.model === embedModel && row.fingerprint !== fingerprint);
3324
+ const ok = rows.length === 0 || (uniqueFingerprints.size === 1 && rows[0]?.fingerprint === fingerprint && offCurrent.length === 0);
3325
+ const currentDocs = rows
3326
+ .filter(row => row.model === embedModel && row.fingerprint === fingerprint)
3327
+ .reduce((sum, row) => sum + row.docs, 0);
3328
+ const otherDocs = rows.reduce((sum, row) => sum + row.docs, 0) - currentDocs;
3329
+ const groups = rows.map(row => {
3330
+ const label = row.fingerprint === fingerprint ? "current" : (row.fingerprint || "legacy");
3331
+ return `${shortModelName(row.model)}:${label} ${formatCount(row.docs)} docs/${formatCount(row.chunks)} chunks`;
3332
+ }).join("; ");
3333
+ const namedFingerprintRows = rows.filter(row => row.fingerprint);
3334
+ const namedFingerprints = [...new Set(namedFingerprintRows.map(row => row.fingerprint))];
3335
+ if (namedFingerprints.length > 1) {
3336
+ const namedGroups = namedFingerprintRows
3337
+ .map(row => `${row.fingerprint}${row.fingerprint === fingerprint ? " (current)" : ""}: ${shortModelName(row.model)} ${formatCount(row.docs)} docs/${formatCount(row.chunks)} chunks`)
3338
+ .join("; ");
3339
+ doctorCheck("mixed named embedding fingerprints", false, `content_vectors contains ${namedFingerprints.length} named fingerprints: ${namedGroups}. Next: \`qmd embed\` or \`qmd embed --force\``);
3340
+ nextSteps.push("Run `qmd embed` to converge mixed named embedding fingerprints; use `qmd embed --force` if old named fingerprints or vector sample mismatches remain.");
3341
+ }
3342
+ const details = rows.length === 0
3343
+ ? `no vectors yet; current fingerprint ${fingerprint}`
3344
+ : ok
3345
+ ? `${formatCount(currentDocs)} docs on current fingerprint (${fingerprint})`
3346
+ : `${formatCount(currentDocs)} docs current, ${formatCount(otherDocs)} docs legacy/stale. ${groups}. Next: \`qmd embed\``;
3347
+ doctorCheck("embedding fingerprints", ok, details);
3348
+ if (!ok) {
3349
+ nextSteps.push("Run `qmd embed` to migrate active documents to the current embedding fingerprint; use `qmd embed --force` if vector samples still fail afterward.");
3350
+ }
3351
+ }
3352
+ catch (error) {
3353
+ doctorCheck("embedding fingerprints", false, error instanceof Error ? error.message : String(error));
3354
+ }
3355
+ try {
3356
+ const vectorSample = await checkEmbeddingVectorSamples(db, embedModel, fingerprint);
3357
+ doctorCheck("embedding vector sample", vectorSample.ok, vectorSample.details);
3358
+ if (!vectorSample.ok) {
3359
+ nextSteps.push("Run `qmd embed --force` to rebuild existing vectors that no longer reproduce under the current embedding pipeline.");
3360
+ }
3361
+ }
3362
+ catch (error) {
3363
+ const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
3364
+ doctorCheck("embedding vector sample", false, `${message}; rebuild with \`qmd embed --force\``);
3365
+ nextSteps.push("Run `qmd embed --force` to rebuild existing vectors, then rerun `qmd doctor`.");
3366
+ }
3367
+ const steps = normalizedDoctorNextSteps(nextSteps);
3368
+ if (steps.length > 0) {
3369
+ console.log(`\n${c.bold}Recommended next step${steps.length === 1 ? "" : "s"}${c.reset}`);
3370
+ for (const step of steps) {
3371
+ console.log(` - ${step}`);
3372
+ }
3373
+ }
3374
+ closeDb();
3375
+ }
3376
+ function printDoctorHint() {
3377
+ console.error("If qmd still behaves unexpectedly, run 'qmd doctor' for diagnostics.");
3378
+ }
3379
+ function exitWithError(error, code = 1) {
3380
+ console.error(error instanceof Error ? error.message : String(error));
3381
+ printDoctorHint();
3382
+ process.exit(code);
3383
+ }
3384
+ function readPackageJson() {
2387
3385
  const scriptDir = dirname(fileURLToPath(import.meta.url));
2388
3386
  const pkgPath = resolve(scriptDir, "..", "..", "package.json");
2389
- const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
3387
+ return JSON.parse(readFileSync(pkgPath, "utf-8"));
3388
+ }
3389
+ async function showVersion() {
3390
+ const scriptDir = dirname(fileURLToPath(import.meta.url));
3391
+ const pkg = readPackageJson();
2390
3392
  let commit = "";
2391
3393
  try {
2392
3394
  commit = execSync(`git -C ${scriptDir} rev-parse --short HEAD`, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
@@ -2405,6 +3407,10 @@ const isMain = argv1 === __filename
2405
3407
  || argv1?.endsWith("/qmd.js")
2406
3408
  || (argv1 != null && realpathSync(argv1) === __filename);
2407
3409
  if (isMain) {
3410
+ // Flip to production mode only when this module is executed as the CLI
3411
+ // entrypoint, not when imported for its exports. Tests must set INDEX_PATH
3412
+ // or use createStore() with an explicit path.
3413
+ enableProductionMode();
2408
3414
  const cli = parseCLI();
2409
3415
  if (cli.values.version) {
2410
3416
  await showVersion();
@@ -2418,8 +3424,8 @@ if (isMain) {
2418
3424
  console.log("Usage: qmd skill <show|install> [options]");
2419
3425
  console.log("");
2420
3426
  console.log("Commands:");
2421
- console.log(" show Print the packaged QMD skill");
2422
- console.log(" install Install into ./.agents/skills/qmd");
3427
+ console.log(" show Print the QMD skill");
3428
+ console.log(" install Install QMD skill into ./.agents/skills/qmd");
2423
3429
  console.log("");
2424
3430
  console.log("Options:");
2425
3431
  console.log(" --global Install into ~/.agents/skills/qmd");
@@ -2654,13 +3660,25 @@ if (isMain) {
2654
3660
  default:
2655
3661
  console.error(`Unknown subcommand: ${subcommand}`);
2656
3662
  console.error("Run 'qmd collection help' for usage");
3663
+ printDoctorHint();
2657
3664
  process.exit(1);
2658
3665
  }
2659
3666
  break;
2660
3667
  }
3668
+ case "init":
3669
+ try {
3670
+ initLocalIndex();
3671
+ }
3672
+ catch (error) {
3673
+ exitWithError(error);
3674
+ }
3675
+ break;
2661
3676
  case "status":
2662
3677
  await showStatus();
2663
3678
  break;
3679
+ case "doctor":
3680
+ await showDoctor();
3681
+ break;
2664
3682
  case "update":
2665
3683
  await updateCollections();
2666
3684
  break;
@@ -2669,23 +3687,30 @@ if (isMain) {
2669
3687
  const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
2670
3688
  const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
2671
3689
  const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
2672
- await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
3690
+ // Validate -c against configured collections before dispatching, so a
3691
+ // typo errors with "Collection not found: X" instead of silently
3692
+ // reporting success because no pending docs match a nonexistent name.
3693
+ // embed operates on a single collection; only the first value is used.
3694
+ const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false);
3695
+ const embedCollection = embedValidatedCollections[0];
3696
+ await vectorIndex(resolveEmbedModelForCli(), !!cli.values.force, {
2673
3697
  maxDocsPerBatch,
2674
3698
  maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
2675
3699
  chunkStrategy: embedChunkStrategy,
3700
+ collection: embedCollection,
2676
3701
  });
2677
3702
  }
2678
3703
  catch (error) {
2679
- console.error(error instanceof Error ? error.message : String(error));
2680
- process.exit(1);
3704
+ exitWithError(error);
2681
3705
  }
2682
3706
  break;
2683
3707
  case "pull": {
2684
3708
  const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
3709
+ const activeModels = resolveModelsForCli();
2685
3710
  const models = [
2686
- DEFAULT_EMBED_MODEL_URI,
2687
- DEFAULT_GENERATE_MODEL_URI,
2688
- DEFAULT_RERANK_MODEL_URI,
3711
+ activeModels.embed,
3712
+ activeModels.generate,
3713
+ activeModels.rerank,
2689
3714
  ];
2690
3715
  console.log(`${c.bold}Pulling models${c.reset}`);
2691
3716
  const results = await pullModels(models, {
@@ -2738,8 +3763,10 @@ if (isMain) {
2738
3763
  const { runBenchmark } = await import("../bench/bench.js");
2739
3764
  const benchCollection = cli.opts.collection;
2740
3765
  await runBenchmark(fixturePath, {
2741
- json: !!cli.opts.json,
3766
+ json: !!cli.values.json,
2742
3767
  collection: Array.isArray(benchCollection) ? benchCollection[0] : benchCollection,
3768
+ dbPath: getDbPath(),
3769
+ configPath: configExists() ? getConfigPath() : undefined,
2743
3770
  });
2744
3771
  break;
2745
3772
  }
@@ -2788,9 +3815,10 @@ if (isMain) {
2788
3815
  const logPath = resolve(cacheDir, "mcp.log");
2789
3816
  const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
2790
3817
  const selfPath = fileURLToPath(import.meta.url);
3818
+ const indexArgs = cli.values.index ? ["--index", String(cli.values.index)] : [];
2791
3819
  const spawnArgs = selfPath.endsWith(".ts")
2792
- ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)]
2793
- : [selfPath, "mcp", "--http", "--port", String(port)];
3820
+ ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)]
3821
+ : [selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)];
2794
3822
  const child = nodeSpawn(process.execPath, spawnArgs, {
2795
3823
  stdio: ["ignore", logFd, logFd],
2796
3824
  detached: true,
@@ -2808,10 +3836,10 @@ if (isMain) {
2808
3836
  process.removeAllListeners("SIGINT");
2809
3837
  const { startMcpHttpServer } = await import("../mcp/server.js");
2810
3838
  try {
2811
- await startMcpHttpServer(port);
3839
+ await startMcpHttpServer(port, { dbPath: getDbPath() });
2812
3840
  }
2813
3841
  catch (e) {
2814
- if (e?.code === "EADDRINUSE") {
3842
+ if (typeof e === "object" && e !== null && "code" in e && e.code === "EADDRINUSE") {
2815
3843
  console.error(`Port ${port} already in use. Try a different port with --port.`);
2816
3844
  process.exit(1);
2817
3845
  }
@@ -2821,7 +3849,27 @@ if (isMain) {
2821
3849
  else {
2822
3850
  // Default: stdio transport
2823
3851
  const { startMcpServer } = await import("../mcp/server.js");
2824
- await startMcpServer();
3852
+ await startMcpServer({ dbPath: getDbPath() });
3853
+ }
3854
+ break;
3855
+ }
3856
+ case "skills": {
3857
+ try {
3858
+ if (cli.values.help || cli.args[0] === "help") {
3859
+ showSkillsHelp();
3860
+ }
3861
+ else {
3862
+ runSkillsCommand(cli.args, Boolean(cli.values.json), Boolean(cli.values.full), Boolean(cli.values.all));
3863
+ }
3864
+ }
3865
+ catch (error) {
3866
+ if (cli.values.json) {
3867
+ outputSkillsJson({ success: false, error: error instanceof Error ? error.message : String(error) });
3868
+ }
3869
+ else {
3870
+ console.error(error instanceof Error ? error.message : String(error));
3871
+ }
3872
+ process.exit(1);
2825
3873
  }
2826
3874
  break;
2827
3875
  }
@@ -2837,8 +3885,7 @@ if (isMain) {
2837
3885
  await installSkill(Boolean(cli.values.global), Boolean(cli.values.force), Boolean(cli.values.yes));
2838
3886
  }
2839
3887
  catch (error) {
2840
- console.error(error instanceof Error ? error.message : String(error));
2841
- process.exit(1);
3888
+ exitWithError(error);
2842
3889
  }
2843
3890
  break;
2844
3891
  }
@@ -2847,8 +3894,8 @@ if (isMain) {
2847
3894
  console.log("Usage: qmd skill <show|install> [options]");
2848
3895
  console.log("");
2849
3896
  console.log("Commands:");
2850
- console.log(" show Print the packaged QMD skill");
2851
- console.log(" install Install into ./.agents/skills/qmd");
3897
+ console.log(" show Print the QMD skill");
3898
+ console.log(" install Install QMD skill into ./.agents/skills/qmd");
2852
3899
  console.log("");
2853
3900
  console.log("Options:");
2854
3901
  console.log(" --global Install into ~/.agents/skills/qmd");
@@ -2859,6 +3906,7 @@ if (isMain) {
2859
3906
  default:
2860
3907
  console.error(`Unknown subcommand: ${subcommand}`);
2861
3908
  console.error("Run 'qmd skill help' for usage");
3909
+ printDoctorHint();
2862
3910
  process.exit(1);
2863
3911
  }
2864
3912
  break;
@@ -2890,10 +3938,13 @@ if (isMain) {
2890
3938
  default:
2891
3939
  console.error(`Unknown command: ${cli.command}`);
2892
3940
  console.error("Run 'qmd --help' for usage.");
3941
+ printDoctorHint();
2893
3942
  process.exit(1);
2894
3943
  }
2895
3944
  if (cli.command !== "mcp") {
2896
- await disposeDefaultLlamaCpp();
2897
- process.exit(0);
3945
+ await finishSuccessfulCliCommand({
3946
+ command: cli.command,
3947
+ format: cli.opts.format,
3948
+ });
2898
3949
  }
2899
3950
  } // end if (main module)