@joycodetech/qmd-ja 2.5.3-ja.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +821 -0
  2. package/LICENSE +21 -0
  3. package/README.md +1143 -0
  4. package/bin/qmd-ja +162 -0
  5. package/dist/ast.d.ts +65 -0
  6. package/dist/ast.js +334 -0
  7. package/dist/bench/bench.d.ts +23 -0
  8. package/dist/bench/bench.js +280 -0
  9. package/dist/bench/score.d.ts +33 -0
  10. package/dist/bench/score.js +88 -0
  11. package/dist/bench/types.d.ts +80 -0
  12. package/dist/bench/types.js +8 -0
  13. package/dist/cli/formatter.d.ts +120 -0
  14. package/dist/cli/formatter.js +355 -0
  15. package/dist/cli/qmd.d.ts +43 -0
  16. package/dist/cli/qmd.js +4179 -0
  17. package/dist/collections.d.ts +166 -0
  18. package/dist/collections.js +410 -0
  19. package/dist/db.d.ts +44 -0
  20. package/dist/db.js +75 -0
  21. package/dist/index.d.ts +230 -0
  22. package/dist/index.js +242 -0
  23. package/dist/llm.d.ts +500 -0
  24. package/dist/llm.js +1615 -0
  25. package/dist/maintenance.d.ts +23 -0
  26. package/dist/maintenance.js +37 -0
  27. package/dist/mcp/server.d.ts +24 -0
  28. package/dist/mcp/server.js +702 -0
  29. package/dist/paths.d.ts +1 -0
  30. package/dist/paths.js +4 -0
  31. package/dist/store.d.ts +1002 -0
  32. package/dist/store.js +4208 -0
  33. package/models/vaporetto-bccwj.model +0 -0
  34. package/package.json +130 -0
  35. package/scripts/build.mjs +30 -0
  36. package/scripts/check-package-grammars.mjs +29 -0
  37. package/scripts/package-smoke.mjs +65 -0
  38. package/scripts/test-all.mjs +38 -0
  39. package/skills/qmd/SKILL.md +295 -0
  40. package/skills/qmd/references/mcp-setup.md +102 -0
  41. package/skills/release/SKILL.md +139 -0
  42. package/skills/release/scripts/install-hooks.sh +38 -0
  43. package/vendor/vaporetto-node-wasm/LICENSE +22 -0
  44. package/vendor/vaporetto-node-wasm/package.json +11 -0
  45. package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.d.ts +19 -0
  46. package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.js +202 -0
  47. package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm +0 -0
  48. package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm.d.ts +13 -0
@@ -0,0 +1,4179 @@
1
+ import { isBun, openDatabase } from "../db.js";
2
+ import fastGlob from "fast-glob";
3
+ import { execSync, spawn as nodeSpawn } from "child_process";
4
+ import { fileURLToPath } from "url";
5
+ import { basename, dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
6
+ import { parseArgs } from "util";
7
+ import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
8
+ import { createInterface } from "readline/promises";
9
+ import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, getEmbeddingFingerprint, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, findOrMigrateLegacyDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_QUERY_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, initializeKuromojiTokenizer, initializeVaporettoTokenizer, FTS_CJK_NORMALIZED_VERSION, resolveVaporettoModelPath, generateEmbeddings, maybeAdoptLegacyEmbeddingFingerprint, syncConfigToDb, } from "../store.js";
10
+ import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile, isDarwinMetalMitigationActive } from "../llm.js";
11
+ import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
12
+ import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, saveConfig, setConfigSource, findLocalConfigPath, getLocalDbPath, getConfigPath, configExists, } from "../collections.js";
13
+ // NOTE: enableProductionMode() is intentionally NOT called at module scope here.
14
+ // Importing this module for its exports (e.g. buildEditorUri, termLink from
15
+ // test/cli.test.ts) must not flip the global production flag, as that leaks
16
+ // into unrelated tests that rely on the default (development) database path
17
+ // resolution. The flag is flipped inside the CLI's main-module guard below so
18
+ // it only fires when qmd is actually invoked as a script.
19
+ // =============================================================================
20
+ // Store/DB lifecycle (no legacy singletons in store.ts)
21
+ // =============================================================================
22
+ let store = null;
23
+ let storeDbPathOverride;
24
+ let currentIndexName = "index";
25
+ function getStore() {
26
+ if (!store) {
27
+ store = createStore(storeDbPathOverride);
28
+ // Sync YAML config into SQLite store_collections so store.ts reads from DB
29
+ try {
30
+ const activeModels = ensureModelsConfiguredForCli();
31
+ const config = loadConfig();
32
+ syncConfigToDb(store.db, config);
33
+ setDefaultLlamaCpp(new LlamaCpp({
34
+ embedModel: activeModels.embed,
35
+ generateModel: activeModels.generate,
36
+ rerankModel: activeModels.rerank,
37
+ }));
38
+ }
39
+ catch {
40
+ // Config may not exist yet — that's fine, DB works without it
41
+ }
42
+ }
43
+ return store;
44
+ }
45
+ function getDb() {
46
+ return getStore().db;
47
+ }
48
+ /** Re-sync YAML config into SQLite after CLI mutations (add/remove/rename collection, context changes) */
49
+ function resyncConfig() {
50
+ const s = getStore();
51
+ try {
52
+ const config = loadConfig();
53
+ // Clear config hash to force re-sync
54
+ s.db.prepare(`DELETE FROM store_config WHERE key = 'config_hash'`).run();
55
+ syncConfigToDb(s.db, config);
56
+ }
57
+ catch {
58
+ // Config may not exist — that's fine
59
+ }
60
+ }
61
+ function closeDb() {
62
+ if (store) {
63
+ store.close();
64
+ store = null;
65
+ }
66
+ }
67
+ function getDbPath() {
68
+ return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
69
+ }
70
+ function getActiveIndexName() {
71
+ return currentIndexName;
72
+ }
73
+ function setIndexName(name) {
74
+ let normalizedName = name;
75
+ // Normalize relative paths to prevent malformed database paths
76
+ if (name && name.includes('/')) {
77
+ const absolutePath = pathResolve(process.cwd(), name);
78
+ // Replace path separators with underscores to create a valid filename
79
+ normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
80
+ }
81
+ currentIndexName = normalizedName || "index";
82
+ storeDbPathOverride = normalizedName ? getDefaultDbPath(normalizedName) : undefined;
83
+ // Reset open handle so next use opens the new index
84
+ closeDb();
85
+ }
86
+ function ensureVecTable(_db, dimensions) {
87
+ // Store owns the DB; ignore `_db` and ensure vec table on the active store
88
+ getStore().ensureVecTable(dimensions);
89
+ }
90
+ // Terminal colors (respects NO_COLOR env)
91
+ const useColor = !process.env.NO_COLOR && process.stdout.isTTY;
92
+ const c = {
93
+ reset: useColor ? "\x1b[0m" : "",
94
+ dim: useColor ? "\x1b[2m" : "",
95
+ bold: useColor ? "\x1b[1m" : "",
96
+ cyan: useColor ? "\x1b[36m" : "",
97
+ yellow: useColor ? "\x1b[33m" : "",
98
+ green: useColor ? "\x1b[32m" : "",
99
+ magenta: useColor ? "\x1b[35m" : "",
100
+ blue: useColor ? "\x1b[34m" : "",
101
+ };
102
+ // Terminal cursor control
103
+ const cursor = {
104
+ hide() { process.stderr.write('\x1b[?25l'); },
105
+ show() { process.stderr.write('\x1b[?25h'); },
106
+ };
107
+ async function flushWritable(stream) {
108
+ await new Promise((resolve) => {
109
+ stream.write("", () => resolve());
110
+ });
111
+ }
112
+ /**
113
+ * Finish a successful CLI command after output has been flushed.
114
+ *
115
+ * We deliberately do NOT call `process.exit(0)`. `process.exit()` skips
116
+ * Node's `beforeExit` event, and node-llama-cpp registers a `beforeExit` hook
117
+ * that auto-disposes its native handles. On darwin, without that hook firing,
118
+ * libggml-metal's static `ggml_metal_device` destructor asserts on a
119
+ * non-empty residency-set collection during `__cxa_finalize_ranges` and
120
+ * dumps a multi-kB backtrace (upstream ggml-org/llama.cpp#22593, fix open as
121
+ * PR #22595). Empirically, even with explicit `disposeDefaultLlamaCpp()` the
122
+ * direct `process.exit(0)` path still trips the assertion — letting the
123
+ * event loop drain naturally is what actually clears the rsets.
124
+ *
125
+ * So: set `process.exitCode = 0` and return. The main module finishes, the
126
+ * event loop drains, `beforeExit` fires, native resources tear down in
127
+ * order, and the process exits cleanly. The `GGML_METAL_NO_RESIDENCY=1` env
128
+ * var that `bin/qmd` exports is a defense-in-depth safety net for paths
129
+ * that still call `process.exit()` after loading the native binding
130
+ * (signal handlers, error paths, `bun test`).
131
+ *
132
+ * If the caller passes an explicit `exit` for testability, we honor it —
133
+ * the lifecycle tests verify the legacy flush → cleanup → exit ordering.
134
+ * Production callers must not pass `exit`.
135
+ */
136
+ export async function finishSuccessfulCliCommand(options) {
137
+ const stderr = options.stderr ?? process.stderr;
138
+ await flushWritable(options.stdout ?? process.stdout);
139
+ try {
140
+ await (options.cleanup ?? disposeDefaultLlamaCpp)();
141
+ }
142
+ catch (error) {
143
+ stderr.write(`QMD Warning: cleanup after successful output failed (${error instanceof Error ? error.message : String(error)}); exiting 0 because command output completed.\n`);
144
+ }
145
+ await flushWritable(stderr);
146
+ if (options.exit) {
147
+ options.exit(0);
148
+ return;
149
+ }
150
+ process.exitCode = 0;
151
+ }
152
+ // Ensure cursor is restored on exit
153
+ process.on('SIGINT', () => { cursor.show(); process.exit(130); });
154
+ process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
155
+ // Terminal progress bar using OSC 9;4 escape sequence (TTY only)
156
+ const isTTY = process.stderr.isTTY;
157
+ const progress = {
158
+ set(percent) {
159
+ if (isTTY)
160
+ process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
161
+ },
162
+ clear() {
163
+ if (isTTY)
164
+ process.stderr.write(`\x1b]9;4;0\x07`);
165
+ },
166
+ indeterminate() {
167
+ if (isTTY)
168
+ process.stderr.write(`\x1b]9;4;3\x07`);
169
+ },
170
+ error() {
171
+ if (isTTY)
172
+ process.stderr.write(`\x1b]9;4;2\x07`);
173
+ },
174
+ };
175
+ // Format seconds into human-readable ETA
176
+ function formatETA(seconds) {
177
+ if (seconds < 60)
178
+ return `${Math.round(seconds)}s`;
179
+ if (seconds < 3600)
180
+ return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`;
181
+ return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
182
+ }
183
+ // Check index health and print warnings/tips
184
+ function checkIndexHealth(db, model = resolveEmbedModelForCli()) {
185
+ const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db, model);
186
+ // Warn if many docs need embedding
187
+ if (needsEmbedding > 0) {
188
+ const pct = Math.round((needsEmbedding / totalDocs) * 100);
189
+ if (pct >= 10) {
190
+ process.stderr.write(`${c.yellow}Warning: ${needsEmbedding} documents (${pct}%) need embeddings. Run 'qmd embed' for better results.${c.reset}\n`);
191
+ }
192
+ else {
193
+ process.stderr.write(`${c.dim}Tip: ${needsEmbedding} documents need embeddings. Run 'qmd embed' to index them.${c.reset}\n`);
194
+ }
195
+ }
196
+ // Check if most recent document update is older than 2 weeks
197
+ if (daysStale !== null && daysStale >= 14) {
198
+ process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update' to refresh.${c.reset}\n`);
199
+ }
200
+ }
201
+ // Compute unique display path for a document
202
+ // Always include at least parent folder + filename, add more parent dirs until unique
203
+ function computeDisplayPath(filepath, collectionPath, existingPaths) {
204
+ // Get path relative to collection (include collection dir name)
205
+ const collectionDir = collectionPath.replace(/\/$/, '');
206
+ const collectionName = collectionDir.split('/').pop() || '';
207
+ let relativePath;
208
+ if (filepath.startsWith(collectionDir + '/')) {
209
+ // filepath is under collection: use collection name + relative path
210
+ relativePath = collectionName + filepath.slice(collectionDir.length);
211
+ }
212
+ else {
213
+ // Fallback: just use the filepath
214
+ relativePath = filepath;
215
+ }
216
+ const parts = relativePath.split('/').filter(p => p.length > 0);
217
+ // Always include at least parent folder + filename (minimum 2 parts if available)
218
+ // Then add more parent dirs until unique
219
+ const minParts = Math.min(2, parts.length);
220
+ for (let i = parts.length - minParts; i >= 0; i--) {
221
+ const candidate = parts.slice(i).join('/');
222
+ if (!existingPaths.has(candidate)) {
223
+ return candidate;
224
+ }
225
+ }
226
+ // Absolute fallback: use full path (should be unique)
227
+ return filepath;
228
+ }
229
+ function formatTimeAgo(date) {
230
+ const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
231
+ if (seconds < 60)
232
+ return `${seconds}s ago`;
233
+ const minutes = Math.floor(seconds / 60);
234
+ if (minutes < 60)
235
+ return `${minutes}m ago`;
236
+ const hours = Math.floor(minutes / 60);
237
+ if (hours < 24)
238
+ return `${hours}h ago`;
239
+ const days = Math.floor(hours / 24);
240
+ return `${days}d ago`;
241
+ }
242
+ function formatMs(ms) {
243
+ if (ms < 1000)
244
+ return `${ms}ms`;
245
+ return `${(ms / 1000).toFixed(1)}s`;
246
+ }
247
+ function formatBytes(bytes) {
248
+ if (bytes < 1024)
249
+ return `${bytes} B`;
250
+ if (bytes < 1024 * 1024)
251
+ return `${(bytes / 1024).toFixed(1)} KB`;
252
+ if (bytes < 1024 * 1024 * 1024)
253
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
254
+ return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
255
+ }
256
+ function sameDirectory(a, b) {
257
+ try {
258
+ return realpathSync(a) === realpathSync(b);
259
+ }
260
+ catch {
261
+ return pathResolve(a) === pathResolve(b);
262
+ }
263
+ }
264
+ function initLocalIndex() {
265
+ const cwd = getPwd();
266
+ if (sameDirectory(cwd, homedir())) {
267
+ throw new Error("Refusing to initialize a local index in $HOME. The global index is automatically created; run `qmd collection add <path>` for the global index, or run `qmd init` inside a project folder.");
268
+ }
269
+ const qmdDir = pathJoin(cwd, ".qmd");
270
+ const ymlPath = pathJoin(qmdDir, "index.yml");
271
+ const yamlPath = pathJoin(qmdDir, "index.yaml");
272
+ const configPath = existsSync(yamlPath) ? yamlPath : ymlPath;
273
+ const dbPath = pathJoin(qmdDir, "index.sqlite");
274
+ mkdirSync(qmdDir, { recursive: true });
275
+ setConfigSource({ configPath });
276
+ storeDbPathOverride = dbPath;
277
+ closeDb();
278
+ if (!existsSync(configPath)) {
279
+ saveConfig({
280
+ collections: {},
281
+ models: resolveModels(),
282
+ });
283
+ }
284
+ else {
285
+ ensureModelsConfiguredForCli();
286
+ }
287
+ const localStore = createStore(dbPath);
288
+ syncConfigToDb(localStore.db, loadConfig());
289
+ localStore.close();
290
+ console.log("ready to go with new local index");
291
+ }
292
+ function isForceCpuEnabled() {
293
+ const value = process.env.QMD_FORCE_CPU;
294
+ return !!value && !["false", "off", "none", "disable", "disabled", "0"].includes(value.trim().toLowerCase());
295
+ }
296
+ function configuredGpuModeLabel() {
297
+ return isForceCpuEnabled()
298
+ ? "CPU forced (QMD_FORCE_CPU)"
299
+ : (process.env.QMD_LLAMA_GPU?.trim() || "auto");
300
+ }
301
+ function summarizeDeviceNames(names) {
302
+ const counts = new Map();
303
+ for (const name of names) {
304
+ counts.set(name, (counts.get(name) || 0) + 1);
305
+ }
306
+ return Array.from(counts.entries())
307
+ .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
308
+ .join(", ");
309
+ }
310
+ function sanitizeDiagnosticMessage(message) {
311
+ const home = homedir();
312
+ return message
313
+ .replaceAll(home, "~")
314
+ .replaceAll(process.cwd(), ".")
315
+ .split("\n")
316
+ .map(line => line.trim())
317
+ .filter(Boolean)
318
+ .slice(0, 3)
319
+ .join("; ");
320
+ }
321
+ async function showStatus() {
322
+ const dbPath = getDbPath();
323
+ const db = getDb();
324
+ // Collections are defined in YAML; no duplicate cleanup needed.
325
+ // Collections are defined in YAML; no duplicate cleanup needed.
326
+ // Index size
327
+ let indexSize = 0;
328
+ try {
329
+ const stat = statSync(dbPath).size;
330
+ indexSize = stat;
331
+ }
332
+ catch { }
333
+ // Collections info (from YAML + database stats)
334
+ const collections = listCollections(db);
335
+ // Overall stats
336
+ const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get();
337
+ const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get();
338
+ const statusEmbedModel = resolveEmbedModelForCli();
339
+ const needsEmbedding = getHashesNeedingEmbedding(db, undefined, statusEmbedModel);
340
+ // Most recent update across all collections
341
+ const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get();
342
+ console.log(`${c.bold}QMD Status${c.reset}\n`);
343
+ console.log(`Index: ${dbPath}`);
344
+ console.log(`Size: ${formatBytes(indexSize)}`);
345
+ // MCP daemon status (check PID file liveness)
346
+ const mcpCacheDir = process.env.XDG_CACHE_HOME
347
+ ? resolve(process.env.XDG_CACHE_HOME, "qmd")
348
+ : resolve(homedir(), ".cache", "qmd");
349
+ const mcpPidPath = resolve(mcpCacheDir, "mcp.pid");
350
+ if (existsSync(mcpPidPath)) {
351
+ const mcpPid = parseInt(readFileSync(mcpPidPath, "utf-8").trim());
352
+ try {
353
+ process.kill(mcpPid, 0);
354
+ console.log(`MCP: ${c.green}running${c.reset} (PID ${mcpPid})`);
355
+ }
356
+ catch {
357
+ unlinkSync(mcpPidPath);
358
+ // Stale PID file cleaned up silently
359
+ }
360
+ }
361
+ console.log("");
362
+ console.log(`${c.bold}Documents${c.reset}`);
363
+ console.log(` Total: ${totalDocs.count} files indexed`);
364
+ console.log(` Vectors: ${vectorCount.count} embedded`);
365
+ if (needsEmbedding > 0) {
366
+ console.log(` ${c.yellow}Pending: ${needsEmbedding} need embedding${c.reset} (run 'qmd embed')`);
367
+ }
368
+ if (mostRecent.latest) {
369
+ const lastUpdate = new Date(mostRecent.latest);
370
+ console.log(` Updated: ${formatTimeAgo(lastUpdate)}`);
371
+ }
372
+ // Get all contexts grouped by collection (from YAML)
373
+ const allContexts = listAllContexts();
374
+ const contextsByCollection = new Map();
375
+ for (const ctx of allContexts) {
376
+ // Group contexts by collection name
377
+ if (!contextsByCollection.has(ctx.collection)) {
378
+ contextsByCollection.set(ctx.collection, []);
379
+ }
380
+ contextsByCollection.get(ctx.collection).push({
381
+ path_prefix: ctx.path,
382
+ context: ctx.context
383
+ });
384
+ }
385
+ // AST chunking status
386
+ try {
387
+ const { getASTStatus } = await import("../ast.js");
388
+ const ast = await getASTStatus();
389
+ console.log(`\n${c.bold}AST Chunking${c.reset}`);
390
+ if (ast.available) {
391
+ const ok = ast.languages.filter(l => l.available).map(l => l.language);
392
+ const fail = ast.languages.filter(l => !l.available);
393
+ console.log(` Status: ${c.green}active${c.reset}`);
394
+ console.log(` Languages: ${ok.join(", ")}`);
395
+ if (fail.length > 0) {
396
+ for (const f of fail) {
397
+ console.log(` ${c.yellow}Unavailable: ${f.language} (${f.error})${c.reset}`);
398
+ }
399
+ }
400
+ }
401
+ else {
402
+ console.log(` Status: ${c.yellow}unavailable${c.reset} (falling back to regex chunking)`);
403
+ for (const l of ast.languages) {
404
+ if (l.error)
405
+ console.log(` ${c.dim}${l.language}: ${l.error}${c.reset}`);
406
+ }
407
+ }
408
+ }
409
+ catch {
410
+ console.log(`\n${c.bold}AST Chunking${c.reset}`);
411
+ console.log(` Status: ${c.dim}not available${c.reset}`);
412
+ }
413
+ if (collections.length > 0) {
414
+ console.log(`\n${c.bold}Collections${c.reset}`);
415
+ for (const col of collections) {
416
+ const lastMod = col.last_modified ? formatTimeAgo(new Date(col.last_modified)) : "never";
417
+ const contexts = contextsByCollection.get(col.name) || [];
418
+ console.log(` ${c.cyan}${col.name}${c.reset} ${c.dim}(qmd://${col.name}/)${c.reset}`);
419
+ console.log(` ${c.dim}Pattern:${c.reset} ${col.glob_pattern}`);
420
+ console.log(` ${c.dim}Files:${c.reset} ${col.active_count} (updated ${lastMod})`);
421
+ if (contexts.length > 0) {
422
+ console.log(` ${c.dim}Contexts:${c.reset} ${contexts.length}`);
423
+ for (const ctx of contexts) {
424
+ // Handle both empty string and '/' as root context
425
+ const pathDisplay = (ctx.path_prefix === '' || ctx.path_prefix === '/') ? '/' : `/${ctx.path_prefix}`;
426
+ const contextPreview = ctx.context.length > 60
427
+ ? ctx.context.substring(0, 57) + '...'
428
+ : ctx.context;
429
+ console.log(` ${c.dim}${pathDisplay}:${c.reset} ${contextPreview}`);
430
+ }
431
+ }
432
+ }
433
+ // Show examples of virtual paths
434
+ console.log(`\n${c.bold}Examples${c.reset}`);
435
+ console.log(` ${c.dim}# List files in a collection${c.reset}`);
436
+ if (collections.length > 0 && collections[0]) {
437
+ console.log(` qmd ls ${collections[0].name}`);
438
+ }
439
+ console.log(` ${c.dim}# Get a document${c.reset}`);
440
+ if (collections.length > 0 && collections[0]) {
441
+ console.log(` qmd get qmd://${collections[0].name}/path/to/file.md`);
442
+ }
443
+ console.log(` ${c.dim}# Search within a collection${c.reset}`);
444
+ if (collections.length > 0 && collections[0]) {
445
+ console.log(` qmd search "query" -c ${collections[0].name}`);
446
+ }
447
+ }
448
+ else {
449
+ console.log(`\n${c.dim}No collections. Run 'qmd collection add .' to index markdown files.${c.reset}`);
450
+ }
451
+ // Models
452
+ {
453
+ // hf:org/repo/file.gguf → https://huggingface.co/org/repo
454
+ const hfLink = (uri) => {
455
+ const match = uri.match(/^hf:([^/]+\/[^/]+)\//);
456
+ return match ? `https://huggingface.co/${match[1]}` : uri;
457
+ };
458
+ const activeModels = resolveModelsForCli();
459
+ console.log(`\n${c.bold}Models${c.reset}`);
460
+ console.log(` Embedding: ${hfLink(activeModels.embed)}`);
461
+ console.log(` Reranking: ${hfLink(activeModels.rerank)}`);
462
+ console.log(` Generation: ${hfLink(activeModels.generate)}`);
463
+ }
464
+ // Tips section
465
+ const tips = [];
466
+ // Check for collections without context
467
+ const collectionsWithoutContext = collections.filter(col => {
468
+ const contexts = contextsByCollection.get(col.name) || [];
469
+ return contexts.length === 0;
470
+ });
471
+ if (collectionsWithoutContext.length > 0) {
472
+ const names = collectionsWithoutContext.map(c => c.name).slice(0, 3).join(', ');
473
+ const more = collectionsWithoutContext.length > 3 ? ` +${collectionsWithoutContext.length - 3} more` : '';
474
+ tips.push(`Add context to collections for better search results: ${names}${more}`);
475
+ tips.push(` ${c.dim}qmd context add qmd://<name>/ "What this collection contains"${c.reset}`);
476
+ tips.push(` ${c.dim}qmd context add qmd://<name>/meeting-notes "Weekly team meeting notes"${c.reset}`);
477
+ }
478
+ // Check for collections without update commands
479
+ const collectionsWithoutUpdate = collections.filter(col => {
480
+ const yamlCol = getCollectionFromYaml(col.name);
481
+ return !yamlCol?.update;
482
+ });
483
+ if (collectionsWithoutUpdate.length > 0 && collections.length > 1) {
484
+ const names = collectionsWithoutUpdate.map(c => c.name).slice(0, 3).join(', ');
485
+ const more = collectionsWithoutUpdate.length > 3 ? ` +${collectionsWithoutUpdate.length - 3} more` : '';
486
+ tips.push(`Add update commands to keep collections fresh: ${names}${more}`);
487
+ tips.push(` ${c.dim}qmd collection update-cmd <name> 'git stash && git pull --rebase --ff-only && git stash pop'${c.reset}`);
488
+ }
489
+ if (tips.length > 0) {
490
+ console.log(`\n${c.bold}Tips${c.reset}`);
491
+ for (const tip of tips) {
492
+ console.log(` ${tip}`);
493
+ }
494
+ }
495
+ closeDb();
496
+ }
497
+ async function updateCollections() {
498
+ await initializeKuromojiTokenizer(); // kuromoji: morphological analysis for CJK FTS
499
+ const db = getDb();
500
+ const storeInstance = getStore();
501
+ // Collections are defined in YAML; no duplicate cleanup needed.
502
+ // Clear Ollama cache on update
503
+ clearCache(db);
504
+ const collections = listCollections(db);
505
+ if (collections.length === 0) {
506
+ console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`);
507
+ closeDb();
508
+ return;
509
+ }
510
+ console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
511
+ for (let i = 0; i < collections.length; i++) {
512
+ const col = collections[i];
513
+ if (!col)
514
+ continue;
515
+ console.log(`${c.cyan}[${i + 1}/${collections.length}]${c.reset} ${c.bold}${col.name}${c.reset} ${c.dim}(${col.glob_pattern})${c.reset}`);
516
+ // Execute custom update command if specified in YAML
517
+ const yamlCol = getCollectionFromYaml(col.name);
518
+ if (yamlCol?.update) {
519
+ console.log(`${c.dim} Running update command: ${yamlCol.update}${c.reset}`);
520
+ try {
521
+ const proc = nodeSpawn("bash", ["-c", yamlCol.update], {
522
+ cwd: col.pwd,
523
+ stdio: ["ignore", "pipe", "pipe"],
524
+ });
525
+ const [output, errorOutput, exitCode] = await new Promise((resolve, reject) => {
526
+ let out = "";
527
+ let err = "";
528
+ proc.stdout?.on("data", (d) => { out += d.toString(); });
529
+ proc.stderr?.on("data", (d) => { err += d.toString(); });
530
+ proc.on("error", reject);
531
+ proc.on("close", (code) => resolve([out, err, code ?? 1]));
532
+ });
533
+ if (output.trim()) {
534
+ console.log(output.trim().split('\n').map(l => ` ${l}`).join('\n'));
535
+ }
536
+ if (errorOutput.trim()) {
537
+ console.log(errorOutput.trim().split('\n').map(l => ` ${l}`).join('\n'));
538
+ }
539
+ if (exitCode !== 0) {
540
+ console.log(`${c.yellow}✗ Update command failed with exit code ${exitCode}${c.reset}`);
541
+ process.exit(exitCode);
542
+ }
543
+ }
544
+ catch (err) {
545
+ console.log(`${c.yellow}✗ Update command failed: ${err}${c.reset}`);
546
+ process.exit(1);
547
+ }
548
+ }
549
+ const startTime = Date.now();
550
+ console.log(`Collection: ${col.pwd} (${col.glob_pattern})`);
551
+ progress.indeterminate();
552
+ const result = await reindexCollection(storeInstance, col.pwd, col.glob_pattern, col.name, {
553
+ ignorePatterns: yamlCol?.ignore,
554
+ onProgress: (info) => {
555
+ progress.set((info.current / info.total) * 100);
556
+ const elapsed = (Date.now() - startTime) / 1000;
557
+ const rate = info.current / elapsed;
558
+ const remaining = (info.total - info.current) / rate;
559
+ const eta = info.current > 2 ? ` ETA: ${formatETA(remaining)}` : "";
560
+ if (isTTY)
561
+ process.stderr.write(`\rIndexing: ${info.current}/${info.total}${eta} `);
562
+ },
563
+ });
564
+ progress.clear();
565
+ console.log(`\nIndexed: ${result.indexed} new, ${result.updated} updated, ${result.unchanged} unchanged, ${result.removed} removed`);
566
+ if (result.orphanedCleaned > 0) {
567
+ console.log(`Cleaned up ${result.orphanedCleaned} orphaned content hash(es)`);
568
+ }
569
+ console.log("");
570
+ }
571
+ // Check if any documents need embedding (show once at end)
572
+ const needsEmbedding = getHashesNeedingEmbedding(db);
573
+ closeDb();
574
+ console.log(`${c.green}✓ All collections updated.${c.reset}`);
575
+ if (needsEmbedding > 0) {
576
+ console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
577
+ }
578
+ }
579
+ /**
580
+ * Detect which collection (if any) contains the given filesystem path.
581
+ * Returns { collectionId, collectionName, relativePath } or null if not in any collection.
582
+ */
583
+ function detectCollectionFromPath(db, fsPath) {
584
+ const realPath = getRealPath(fsPath);
585
+ // Find collections that this path is under from YAML
586
+ const allCollections = yamlListCollections();
587
+ // Find longest matching path
588
+ let bestMatch = null;
589
+ for (const coll of allCollections) {
590
+ if (realPath.startsWith(coll.path + '/') || realPath === coll.path) {
591
+ if (!bestMatch || coll.path.length > bestMatch.path.length) {
592
+ bestMatch = { name: coll.name, path: coll.path };
593
+ }
594
+ }
595
+ }
596
+ if (!bestMatch)
597
+ return null;
598
+ // Calculate relative path
599
+ let relativePath = realPath;
600
+ if (relativePath.startsWith(bestMatch.path + '/')) {
601
+ relativePath = relativePath.slice(bestMatch.path.length + 1);
602
+ }
603
+ else if (relativePath === bestMatch.path) {
604
+ relativePath = '';
605
+ }
606
+ return {
607
+ collectionName: bestMatch.name,
608
+ relativePath
609
+ };
610
+ }
611
+ async function contextAdd(pathArg, contextText) {
612
+ const db = getDb();
613
+ // Handle "/" as global context (applies to all collections)
614
+ if (pathArg === '/') {
615
+ setGlobalContext(contextText);
616
+ resyncConfig();
617
+ console.log(`${c.green}✓${c.reset} Set global context`);
618
+ console.log(`${c.dim}Context: ${contextText}${c.reset}`);
619
+ closeDb();
620
+ return;
621
+ }
622
+ // Resolve path - defaults to current directory if not provided
623
+ let fsPath = pathArg || '.';
624
+ if (fsPath === '.' || fsPath === './') {
625
+ fsPath = getPwd();
626
+ }
627
+ else if (fsPath.startsWith('~/')) {
628
+ fsPath = homedir() + fsPath.slice(1);
629
+ }
630
+ else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) {
631
+ fsPath = resolve(getPwd(), fsPath);
632
+ }
633
+ // Handle virtual paths (qmd://collection/path)
634
+ if (isVirtualPath(fsPath)) {
635
+ const parsed = parseVirtualPath(fsPath);
636
+ if (!parsed) {
637
+ console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`);
638
+ process.exit(1);
639
+ }
640
+ const coll = getCollectionFromYaml(parsed.collectionName);
641
+ if (!coll) {
642
+ console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
643
+ process.exit(1);
644
+ }
645
+ yamlAddContext(parsed.collectionName, parsed.path, contextText);
646
+ resyncConfig();
647
+ const displayPath = parsed.path
648
+ ? `qmd://${parsed.collectionName}/${parsed.path}`
649
+ : `qmd://${parsed.collectionName}/ (collection root)`;
650
+ console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
651
+ console.log(`${c.dim}Context: ${contextText}${c.reset}`);
652
+ closeDb();
653
+ return;
654
+ }
655
+ // Detect collection from filesystem path
656
+ const detected = detectCollectionFromPath(db, fsPath);
657
+ if (!detected) {
658
+ console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
659
+ console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`);
660
+ process.exit(1);
661
+ }
662
+ yamlAddContext(detected.collectionName, detected.relativePath, contextText);
663
+ resyncConfig();
664
+ const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`;
665
+ console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
666
+ console.log(`${c.dim}Context: ${contextText}${c.reset}`);
667
+ closeDb();
668
+ }
669
+ function contextList() {
670
+ const db = getDb();
671
+ const allContexts = listAllContexts();
672
+ if (allContexts.length === 0) {
673
+ console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`);
674
+ closeDb();
675
+ return;
676
+ }
677
+ console.log(`\n${c.bold}Configured Contexts${c.reset}\n`);
678
+ let lastCollection = '';
679
+ for (const ctx of allContexts) {
680
+ if (ctx.collection !== lastCollection) {
681
+ console.log(`${c.cyan}${ctx.collection}${c.reset}`);
682
+ lastCollection = ctx.collection;
683
+ }
684
+ const displayPath = ctx.path ? ` ${ctx.path}` : ' / (root)';
685
+ console.log(`${displayPath}`);
686
+ console.log(` ${c.dim}${ctx.context}${c.reset}`);
687
+ }
688
+ closeDb();
689
+ }
690
+ function contextRemove(pathArg) {
691
+ if (pathArg === '/') {
692
+ // Remove global context
693
+ setGlobalContext(undefined);
694
+ // Resync so SQLite store_config is updated
695
+ const s = getStore();
696
+ resyncConfig();
697
+ closeDb();
698
+ console.log(`${c.green}✓${c.reset} Removed global context`);
699
+ return;
700
+ }
701
+ // Handle virtual paths
702
+ if (isVirtualPath(pathArg)) {
703
+ const parsed = parseVirtualPath(pathArg);
704
+ if (!parsed) {
705
+ console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`);
706
+ process.exit(1);
707
+ }
708
+ const coll = getCollectionFromYaml(parsed.collectionName);
709
+ if (!coll) {
710
+ console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
711
+ process.exit(1);
712
+ }
713
+ const success = yamlRemoveContext(coll.name, parsed.path);
714
+ if (!success) {
715
+ console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`);
716
+ process.exit(1);
717
+ }
718
+ console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`);
719
+ return;
720
+ }
721
+ // Handle filesystem paths
722
+ let fsPath = pathArg;
723
+ if (fsPath === '.' || fsPath === './') {
724
+ fsPath = getPwd();
725
+ }
726
+ else if (fsPath.startsWith('~/')) {
727
+ fsPath = homedir() + fsPath.slice(1);
728
+ }
729
+ else if (!fsPath.startsWith('/')) {
730
+ fsPath = resolve(getPwd(), fsPath);
731
+ }
732
+ const db = getDb();
733
+ const detected = detectCollectionFromPath(db, fsPath);
734
+ closeDb();
735
+ if (!detected) {
736
+ console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
737
+ process.exit(1);
738
+ }
739
+ const success = yamlRemoveContext(detected.collectionName, detected.relativePath);
740
+ if (!success) {
741
+ console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`);
742
+ process.exit(1);
743
+ }
744
+ console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
745
+ }
746
+ /**
747
+ * Render an absolute filesystem path for human display under --full-path.
748
+ *
749
+ * If the path is the current working directory or a subpath of it, return a
750
+ * "./"-prefixed relative path so it is unambiguously a filesystem path (not a
751
+ * bare collection-relative string that could be confused for a `qmd://`
752
+ * fragment). Otherwise return the absolute realpath so symlinks resolve
753
+ * consistently. Returns `null` if the path could not be normalized — callers
754
+ * fall back to whatever they had before.
755
+ */
756
+ function renderFullPath(absolutePath, cwd = process.cwd()) {
757
+ let real;
758
+ try {
759
+ real = realpathSync(absolutePath);
760
+ }
761
+ catch {
762
+ real = absolutePath;
763
+ }
764
+ const cwdReal = (() => { try {
765
+ return realpathSync(cwd);
766
+ }
767
+ catch {
768
+ return cwd;
769
+ } })();
770
+ if (real === cwdReal)
771
+ return "./";
772
+ if (real.startsWith(cwdReal + "/")) {
773
+ const rel = relativePath(cwdReal, real);
774
+ if (rel && !rel.startsWith(".."))
775
+ return `./${rel}`;
776
+ }
777
+ return real;
778
+ }
779
+ function getDocument(filename, fromLine, maxLines, lineNumbers, fullPath = false) {
780
+ // Parse :line suffix from filename. Two forms:
781
+ // "file.md:100" -> start at line 100
782
+ // "file.md:100:40" -> start at line 100, read 40 lines
783
+ // The :// in virtual paths is never matched because we anchor digits to $.
784
+ // Explicit --from/-l flags always win over values parsed from the path.
785
+ let inputPath = filename;
786
+ const rangeMatch = inputPath.match(/:(\d+):(\d+)$/);
787
+ if (rangeMatch) {
788
+ if (fromLine === undefined)
789
+ fromLine = parseInt(rangeMatch[1], 10);
790
+ if (maxLines === undefined)
791
+ maxLines = parseInt(rangeMatch[2], 10);
792
+ inputPath = inputPath.slice(0, -rangeMatch[0].length);
793
+ }
794
+ else {
795
+ const colonMatch = inputPath.match(/:(\d+)$/);
796
+ if (colonMatch) {
797
+ const matched = colonMatch[1];
798
+ if (matched) {
799
+ if (fromLine === undefined)
800
+ fromLine = parseInt(matched, 10);
801
+ inputPath = inputPath.slice(0, -colonMatch[0].length);
802
+ }
803
+ }
804
+ }
805
+ if (fromLine !== undefined)
806
+ fromLine = Math.max(1, fromLine);
807
+ const parsedIndexPath = isVirtualPath(inputPath) ? parseVirtualPath(inputPath) : null;
808
+ if (parsedIndexPath?.indexName) {
809
+ setIndexName(parsedIndexPath.indexName);
810
+ setConfigIndexName(parsedIndexPath.indexName);
811
+ }
812
+ const db = getDb();
813
+ // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
814
+ if (isDocid(inputPath)) {
815
+ const docidMatch = findDocumentByDocid(db, inputPath);
816
+ if (docidMatch) {
817
+ inputPath = docidMatch.filepath;
818
+ }
819
+ else {
820
+ console.error(`Document not found: ${filename}`);
821
+ closeDb();
822
+ process.exit(1);
823
+ }
824
+ }
825
+ let doc = null;
826
+ let virtualPath;
827
+ // Handle virtual paths (qmd://collection/path)
828
+ if (isVirtualPath(inputPath)) {
829
+ const parsed = parseVirtualPath(inputPath);
830
+ if (!parsed) {
831
+ console.error(`Invalid virtual path: ${inputPath}`);
832
+ closeDb();
833
+ process.exit(1);
834
+ }
835
+ // Try exact match on collection + path
836
+ doc = db.prepare(`
837
+ SELECT d.collection as collectionName, d.path, content.doc as body
838
+ FROM documents d
839
+ JOIN content ON content.hash = d.hash
840
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
841
+ `).get(parsed.collectionName, parsed.path);
842
+ if (!doc) {
843
+ // Try fuzzy match by path ending
844
+ doc = db.prepare(`
845
+ SELECT d.collection as collectionName, d.path, content.doc as body
846
+ FROM documents d
847
+ JOIN content ON content.hash = d.hash
848
+ WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
849
+ LIMIT 1
850
+ `).get(parsed.collectionName, `%${parsed.path}`);
851
+ }
852
+ virtualPath = inputPath;
853
+ }
854
+ else {
855
+ // Try to interpret as collection/path format first (before filesystem path)
856
+ // If path is relative (no / or ~ prefix), check if first component is a collection name
857
+ if (!inputPath.startsWith('/') && !inputPath.startsWith('~')) {
858
+ const parts = inputPath.split('/');
859
+ if (parts.length >= 2) {
860
+ const possibleCollection = parts[0];
861
+ const possiblePath = parts.slice(1).join('/');
862
+ // Check if this collection exists
863
+ const collExists = possibleCollection ? db.prepare(`
864
+ SELECT 1 FROM documents WHERE collection = ? AND active = 1 LIMIT 1
865
+ `).get(possibleCollection) : null;
866
+ if (collExists) {
867
+ // Try exact match on collection + path
868
+ doc = db.prepare(`
869
+ SELECT d.collection as collectionName, d.path, content.doc as body
870
+ FROM documents d
871
+ JOIN content ON content.hash = d.hash
872
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
873
+ `).get(possibleCollection || "", possiblePath || "");
874
+ if (!doc) {
875
+ // Try fuzzy match by path ending
876
+ doc = db.prepare(`
877
+ SELECT d.collection as collectionName, d.path, content.doc as body
878
+ FROM documents d
879
+ JOIN content ON content.hash = d.hash
880
+ WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
881
+ LIMIT 1
882
+ `).get(possibleCollection || "", `%${possiblePath}`);
883
+ }
884
+ if (doc) {
885
+ virtualPath = buildVirtualPath(doc.collectionName, doc.path);
886
+ // Skip the filesystem path handling below
887
+ }
888
+ }
889
+ }
890
+ }
891
+ // If not found as collection/path, handle as filesystem paths
892
+ if (!doc) {
893
+ let fsPath = inputPath;
894
+ // Expand ~ to home directory
895
+ if (fsPath.startsWith('~/')) {
896
+ fsPath = homedir() + fsPath.slice(1);
897
+ }
898
+ else if (!fsPath.startsWith('/')) {
899
+ // Relative path - resolve from current directory
900
+ fsPath = resolve(getPwd(), fsPath);
901
+ }
902
+ fsPath = getRealPath(fsPath);
903
+ // Try to detect which collection contains this path
904
+ const detected = detectCollectionFromPath(db, fsPath);
905
+ if (detected) {
906
+ // Found collection - query by collection name + relative path
907
+ doc = db.prepare(`
908
+ SELECT d.collection as collectionName, d.path, content.doc as body
909
+ FROM documents d
910
+ JOIN content ON content.hash = d.hash
911
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
912
+ `).get(detected.collectionName, detected.relativePath);
913
+ }
914
+ // Fuzzy match by filename (last component of path)
915
+ if (!doc) {
916
+ const filename = inputPath.split('/').pop() || inputPath;
917
+ doc = db.prepare(`
918
+ SELECT d.collection as collectionName, d.path, content.doc as body
919
+ FROM documents d
920
+ JOIN content ON content.hash = d.hash
921
+ WHERE d.path LIKE ? AND d.active = 1
922
+ LIMIT 1
923
+ `).get(`%${filename}`);
924
+ }
925
+ if (doc) {
926
+ virtualPath = buildVirtualPath(doc.collectionName, doc.path);
927
+ }
928
+ else {
929
+ virtualPath = inputPath;
930
+ }
931
+ }
932
+ }
933
+ // Ensure doc is not null before proceeding
934
+ if (!doc) {
935
+ console.error(`Document not found: ${filename}`);
936
+ closeDb();
937
+ process.exit(1);
938
+ }
939
+ // Get context for this file
940
+ const context = getContextForPath(db, doc.collectionName, doc.path);
941
+ // Resolve the docid (first 6 chars of the content hash) so callers always
942
+ // know what they retrieved and can cite it back to `get`/`multi-get`.
943
+ const hashRow = db.prepare(`
944
+ SELECT d.hash as hash
945
+ FROM documents d
946
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
947
+ `).get(doc.collectionName, doc.path);
948
+ const docid = hashRow?.hash ? hashRow.hash.slice(0, 6) : undefined;
949
+ const canonicalPath = buildVirtualPath(doc.collectionName, doc.path);
950
+ // --full-path: show the on-disk path instead of the qmd:// URL + docid, when
951
+ // the file actually exists. Fall back to the canonical header otherwise.
952
+ let header;
953
+ if (fullPath) {
954
+ const fsPath = resolveVirtualPath(db, canonicalPath);
955
+ if (fsPath && existsSync(fsPath)) {
956
+ header = renderFullPath(fsPath);
957
+ }
958
+ else {
959
+ header = docid ? `${canonicalPath} #${docid}` : canonicalPath;
960
+ }
961
+ }
962
+ else {
963
+ header = docid ? `${canonicalPath} #${docid}` : canonicalPath;
964
+ }
965
+ let output = doc.body;
966
+ const startLine = fromLine || 1;
967
+ // Apply line filtering if specified
968
+ if (fromLine !== undefined || maxLines !== undefined) {
969
+ const lines = output.split('\n');
970
+ const start = startLine - 1; // Convert to 0-indexed
971
+ const end = maxLines !== undefined ? start + maxLines : lines.length;
972
+ output = lines.slice(start, end).join('\n');
973
+ }
974
+ // Line numbers are on by default (disable with --no-line-numbers) so the
975
+ // model can cite exact lines and request follow-up ranges via path:from:count.
976
+ if (lineNumbers) {
977
+ output = addLineNumbers(output, startLine);
978
+ }
979
+ // Header: identify the document (path + docid, or the on-disk path with
980
+ // --full-path), then optional context.
981
+ console.log(header);
982
+ if (context) {
983
+ console.log(`Folder Context: ${context}`);
984
+ }
985
+ console.log("---\n");
986
+ console.log(output);
987
+ closeDb();
988
+ }
989
+ // Multi-get: fetch multiple documents by glob pattern or comma-separated list
990
+ function multiGet(pattern, maxLines, maxBytes = DEFAULT_MULTI_GET_MAX_BYTES, format = "cli", lineNumbers = true, fullPath = false) {
991
+ const db = getDb();
992
+ // Check if it's a comma-separated list or a glob pattern
993
+ const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?') && !pattern.includes('{');
994
+ let files;
995
+ if (isCommaSeparated) {
996
+ // Comma-separated list of files (can be virtual paths or relative paths)
997
+ const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
998
+ files = [];
999
+ for (const name of names) {
1000
+ let doc = null;
1001
+ // Handle virtual paths
1002
+ if (isVirtualPath(name)) {
1003
+ const parsed = parseVirtualPath(name);
1004
+ if (parsed) {
1005
+ // Try exact match on collection + path
1006
+ doc = db.prepare(`
1007
+ SELECT
1008
+ 'qmd://' || d.collection || '/' || d.path as virtual_path,
1009
+ LENGTH(content.doc) as body_length,
1010
+ d.collection,
1011
+ d.path
1012
+ FROM documents d
1013
+ JOIN content ON content.hash = d.hash
1014
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
1015
+ `).get(parsed.collectionName, parsed.path);
1016
+ }
1017
+ }
1018
+ else {
1019
+ // Try exact match on path
1020
+ doc = db.prepare(`
1021
+ SELECT
1022
+ 'qmd://' || d.collection || '/' || d.path as virtual_path,
1023
+ LENGTH(content.doc) as body_length,
1024
+ d.collection,
1025
+ d.path
1026
+ FROM documents d
1027
+ JOIN content ON content.hash = d.hash
1028
+ WHERE d.path = ? AND d.active = 1
1029
+ LIMIT 1
1030
+ `).get(name);
1031
+ // Try suffix match
1032
+ if (!doc) {
1033
+ doc = db.prepare(`
1034
+ SELECT
1035
+ 'qmd://' || d.collection || '/' || d.path as virtual_path,
1036
+ LENGTH(content.doc) as body_length,
1037
+ d.collection,
1038
+ d.path
1039
+ FROM documents d
1040
+ JOIN content ON content.hash = d.hash
1041
+ WHERE d.path LIKE ? AND d.active = 1
1042
+ LIMIT 1
1043
+ `).get(`%${name}`);
1044
+ }
1045
+ }
1046
+ if (doc) {
1047
+ files.push({
1048
+ filepath: doc.virtual_path,
1049
+ displayPath: doc.virtual_path,
1050
+ bodyLength: doc.body_length,
1051
+ collection: doc.collection,
1052
+ path: doc.path
1053
+ });
1054
+ }
1055
+ else {
1056
+ console.error(`File not found: ${name}`);
1057
+ }
1058
+ }
1059
+ }
1060
+ else {
1061
+ // Glob pattern - matchFilesByGlob now returns virtual paths
1062
+ files = matchFilesByGlob(db, pattern).map(f => ({
1063
+ ...f,
1064
+ collection: undefined, // Will be fetched later if needed
1065
+ path: undefined
1066
+ }));
1067
+ if (files.length === 0) {
1068
+ console.error(`No files matched pattern: ${pattern}`);
1069
+ closeDb();
1070
+ process.exit(1);
1071
+ }
1072
+ }
1073
+ // Collect results for structured output
1074
+ const results = [];
1075
+ for (const file of files) {
1076
+ // Parse virtual path to get collection info if not already available
1077
+ let collection = file.collection;
1078
+ let path = file.path;
1079
+ if (!collection || !path) {
1080
+ const parsed = parseVirtualPath(file.filepath);
1081
+ if (parsed) {
1082
+ collection = parsed.collectionName;
1083
+ path = parsed.path;
1084
+ }
1085
+ }
1086
+ // Get context using collection-scoped function
1087
+ const context = collection && path ? getContextForPath(db, collection, path) : null;
1088
+ // Resolve docid (first 6 chars of content hash) so every entry can be cited.
1089
+ const docidRow = collection && path ? db.prepare(`
1090
+ SELECT d.hash as hash
1091
+ FROM documents d
1092
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
1093
+ `).get(collection, path) : null;
1094
+ const docid = docidRow?.hash ? docidRow.hash.slice(0, 6) : undefined;
1095
+ // --full-path: resolve the on-disk path when it exists (else fall back).
1096
+ // Display as ./-prefixed relative path when under $PWD; absolute realpath
1097
+ // otherwise. See renderFullPath() for the policy.
1098
+ let fsPath;
1099
+ if (fullPath) {
1100
+ const resolved = resolveVirtualPath(db, file.filepath);
1101
+ if (resolved && existsSync(resolved))
1102
+ fsPath = renderFullPath(resolved);
1103
+ }
1104
+ // Check size limit
1105
+ if (file.bodyLength > maxBytes) {
1106
+ results.push({
1107
+ file: file.filepath,
1108
+ displayPath: file.displayPath,
1109
+ fsPath,
1110
+ docid,
1111
+ title: file.displayPath.split('/').pop() || file.displayPath,
1112
+ body: "",
1113
+ context,
1114
+ skipped: true,
1115
+ skipReason: `File too large (${Math.round(file.bodyLength / 1024)}KB > ${Math.round(maxBytes / 1024)}KB). Use 'qmd get ${file.displayPath}' to retrieve.`,
1116
+ });
1117
+ continue;
1118
+ }
1119
+ // Fetch document content using collection and path
1120
+ if (!collection || !path)
1121
+ continue;
1122
+ const doc = db.prepare(`
1123
+ SELECT content.doc as body, d.title
1124
+ FROM documents d
1125
+ JOIN content ON content.hash = d.hash
1126
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
1127
+ `).get(collection, path);
1128
+ if (!doc)
1129
+ continue;
1130
+ let body = doc.body;
1131
+ // Apply line limit if specified
1132
+ if (maxLines !== undefined) {
1133
+ const lines = body.split('\n');
1134
+ body = lines.slice(0, maxLines).join('\n');
1135
+ if (lines.length > maxLines) {
1136
+ body += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
1137
+ }
1138
+ }
1139
+ // Line numbers on by default (disable with --no-line-numbers).
1140
+ if (lineNumbers) {
1141
+ body = addLineNumbers(body);
1142
+ }
1143
+ results.push({
1144
+ file: file.filepath,
1145
+ displayPath: file.displayPath,
1146
+ fsPath,
1147
+ docid,
1148
+ title: doc.title || file.displayPath.split('/').pop() || file.displayPath,
1149
+ body,
1150
+ context,
1151
+ skipped: false,
1152
+ });
1153
+ }
1154
+ closeDb();
1155
+ // --full-path replaces the qmd:// path + docid with the on-disk path (when it
1156
+ // resolved). Per result: pick the identifier and whether to show the docid.
1157
+ const identOf = (r) => (fullPath && r.fsPath) ? r.fsPath : r.displayPath;
1158
+ const docidOf = (r) => (fullPath && r.fsPath) ? undefined : r.docid;
1159
+ // Output based on format
1160
+ if (format === "json") {
1161
+ const output = results.map(r => {
1162
+ const docidVal = docidOf(r);
1163
+ return {
1164
+ file: identOf(r),
1165
+ ...(docidVal && { docid: `#${docidVal}` }),
1166
+ title: r.title,
1167
+ ...(r.context && { context: r.context }),
1168
+ ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
1169
+ };
1170
+ });
1171
+ console.log(JSON.stringify(output, null, 2));
1172
+ }
1173
+ else if (format === "csv") {
1174
+ const escapeField = (val) => {
1175
+ if (val === null || val === undefined)
1176
+ return "";
1177
+ const str = String(val);
1178
+ if (str.includes(",") || str.includes('"') || str.includes("\n")) {
1179
+ return `"${str.replace(/"/g, '""')}"`;
1180
+ }
1181
+ return str;
1182
+ };
1183
+ console.log("docid,file,title,context,skipped,body");
1184
+ for (const r of results) {
1185
+ const docidVal = docidOf(r);
1186
+ console.log([docidVal ? `#${docidVal}` : "", identOf(r), r.title, r.context, r.skipped ? "true" : "false", r.skipped ? r.skipReason : r.body].map(escapeField).join(","));
1187
+ }
1188
+ }
1189
+ else if (format === "files") {
1190
+ for (const r of results) {
1191
+ const docidVal = docidOf(r);
1192
+ const id = docidVal ? `#${docidVal} ` : "";
1193
+ const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
1194
+ const status = r.skipped ? "[SKIPPED]" : "";
1195
+ console.log(`${id}${identOf(r)}${ctx}${status ? `,${status}` : ""}`);
1196
+ }
1197
+ }
1198
+ else if (format === "md") {
1199
+ for (const r of results) {
1200
+ const docidVal = docidOf(r);
1201
+ console.log(`## ${identOf(r)}\n`);
1202
+ if (docidVal)
1203
+ console.log(`**docid:** \`#${docidVal}\`\n`);
1204
+ if (r.title && r.title !== r.displayPath)
1205
+ console.log(`**Title:** ${r.title}\n`);
1206
+ if (r.context)
1207
+ console.log(`**Context:** ${r.context}\n`);
1208
+ if (r.skipped) {
1209
+ console.log(`> ${r.skipReason}\n`);
1210
+ }
1211
+ else {
1212
+ console.log("```");
1213
+ console.log(r.body);
1214
+ console.log("```\n");
1215
+ }
1216
+ }
1217
+ }
1218
+ else if (format === "xml") {
1219
+ console.log('<?xml version="1.0" encoding="UTF-8"?>');
1220
+ console.log("<documents>");
1221
+ for (const r of results) {
1222
+ const docidVal = docidOf(r);
1223
+ const docidAttr = docidVal ? ` docid="#${docidVal}"` : "";
1224
+ console.log(` <document${docidAttr}>`);
1225
+ console.log(` <file>${escapeXml(identOf(r))}</file>`);
1226
+ console.log(` <title>${escapeXml(r.title)}</title>`);
1227
+ if (r.context)
1228
+ console.log(` <context>${escapeXml(r.context)}</context>`);
1229
+ if (r.skipped) {
1230
+ console.log(` <skipped>true</skipped>`);
1231
+ console.log(` <reason>${escapeXml(r.skipReason || "")}</reason>`);
1232
+ }
1233
+ else {
1234
+ console.log(` <body>${escapeXml(r.body)}</body>`);
1235
+ }
1236
+ console.log(" </document>");
1237
+ }
1238
+ console.log("</documents>");
1239
+ }
1240
+ else {
1241
+ // CLI format (default)
1242
+ for (const r of results) {
1243
+ const docidVal = docidOf(r);
1244
+ const id = docidVal ? ` #${docidVal}` : "";
1245
+ console.log(`\n${'='.repeat(60)}`);
1246
+ console.log(`File: ${identOf(r)}${id}`);
1247
+ console.log(`${'='.repeat(60)}\n`);
1248
+ if (r.skipped) {
1249
+ console.log(`[SKIPPED: ${r.skipReason}]`);
1250
+ continue;
1251
+ }
1252
+ if (r.context) {
1253
+ console.log(`Folder Context: ${r.context}\n---\n`);
1254
+ }
1255
+ console.log(r.body);
1256
+ }
1257
+ }
1258
+ }
1259
+ // List files in virtual file tree
1260
+ function listFiles(pathArg) {
1261
+ const db = getDb();
1262
+ if (!pathArg) {
1263
+ // No argument - list all collections
1264
+ const yamlCollections = yamlListCollections();
1265
+ if (yamlCollections.length === 0) {
1266
+ console.log("No collections found. Run 'qmd collection add .' to index files.");
1267
+ closeDb();
1268
+ return;
1269
+ }
1270
+ // Get file counts from database for each collection
1271
+ const collections = yamlCollections.map(coll => {
1272
+ const stats = db.prepare(`
1273
+ SELECT COUNT(*) as file_count
1274
+ FROM documents d
1275
+ WHERE d.collection = ? AND d.active = 1
1276
+ `).get(coll.name);
1277
+ return {
1278
+ name: coll.name,
1279
+ file_count: stats?.file_count || 0
1280
+ };
1281
+ });
1282
+ console.log(`${c.bold}Collections:${c.reset}\n`);
1283
+ for (const coll of collections) {
1284
+ console.log(` ${c.dim}qmd://${c.reset}${c.cyan}${coll.name}/${c.reset} ${c.dim}(${coll.file_count} files)${c.reset}`);
1285
+ }
1286
+ closeDb();
1287
+ return;
1288
+ }
1289
+ // Parse the path argument
1290
+ let collectionName;
1291
+ let pathPrefix = null;
1292
+ const afterScheme = pathArg.startsWith('qmd://') ? pathArg.slice('qmd://'.length) : null;
1293
+ if (afterScheme !== null && afterScheme.startsWith('/')) {
1294
+ // Absolute-path collection: qmd:///Users/foo/bar — normalizeVirtualPath would corrupt
1295
+ // this by stripping all leading slashes, so bypass parseVirtualPath entirely.
1296
+ const normalized = afterScheme.replace(/\/$/, '');
1297
+ const allColls = yamlListCollections();
1298
+ const match = allColls
1299
+ .filter(c => normalized === c.name || normalized.startsWith(c.name + '/'))
1300
+ .sort((a, b) => b.name.length - a.name.length)[0];
1301
+ if (match) {
1302
+ collectionName = match.name;
1303
+ const rest = normalized.slice(match.name.length).replace(/^\//, '');
1304
+ pathPrefix = rest || null;
1305
+ }
1306
+ else {
1307
+ // Preserve the historical qmd:////collection/path alias behavior for normal
1308
+ // collections when no absolute-path collection matches.
1309
+ const parsed = parseVirtualPath(pathArg);
1310
+ if (!parsed) {
1311
+ console.error(`Invalid virtual path: ${pathArg}`);
1312
+ closeDb();
1313
+ process.exit(1);
1314
+ }
1315
+ collectionName = parsed.collectionName;
1316
+ pathPrefix = parsed.path;
1317
+ }
1318
+ }
1319
+ else if (afterScheme !== null) {
1320
+ // Normal virtual path: qmd://collection-name/path
1321
+ const parsed = parseVirtualPath(pathArg);
1322
+ if (!parsed) {
1323
+ console.error(`Invalid virtual path: ${pathArg}`);
1324
+ closeDb();
1325
+ process.exit(1);
1326
+ }
1327
+ collectionName = parsed.collectionName;
1328
+ pathPrefix = parsed.path;
1329
+ }
1330
+ else if (pathArg.startsWith('/')) {
1331
+ // Raw absolute filesystem path — longest-prefix match against collection names
1332
+ const normalized = pathArg.replace(/\/$/, '');
1333
+ const allColls = yamlListCollections();
1334
+ const match = allColls
1335
+ .filter(c => normalized === c.name || normalized.startsWith(c.name + '/'))
1336
+ .sort((a, b) => b.name.length - a.name.length)[0];
1337
+ if (match) {
1338
+ collectionName = match.name;
1339
+ const rest = normalized.slice(match.name.length).replace(/^\//, '');
1340
+ pathPrefix = rest || null;
1341
+ }
1342
+ else {
1343
+ collectionName = normalized;
1344
+ }
1345
+ }
1346
+ else {
1347
+ // Short collection name or name/path
1348
+ const parts = pathArg.split('/');
1349
+ collectionName = parts[0] || '';
1350
+ if (parts.length > 1) {
1351
+ pathPrefix = parts.slice(1).join('/');
1352
+ }
1353
+ }
1354
+ // Get the collection
1355
+ const coll = getCollectionFromYaml(collectionName);
1356
+ if (!coll) {
1357
+ console.error(`Collection not found: ${collectionName}`);
1358
+ console.error(`Run 'qmd ls' to see available collections.`);
1359
+ closeDb();
1360
+ process.exit(1);
1361
+ }
1362
+ // List files in the collection with size and modification time
1363
+ let query;
1364
+ let params;
1365
+ if (pathPrefix) {
1366
+ // List files under a specific path
1367
+ query = `
1368
+ SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
1369
+ FROM documents d
1370
+ JOIN content ct ON d.hash = ct.hash
1371
+ WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
1372
+ ORDER BY d.path
1373
+ `;
1374
+ params = [coll.name, `${pathPrefix}%`];
1375
+ }
1376
+ else {
1377
+ // List all files in the collection
1378
+ query = `
1379
+ SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
1380
+ FROM documents d
1381
+ JOIN content ct ON d.hash = ct.hash
1382
+ WHERE d.collection = ? AND d.active = 1
1383
+ ORDER BY d.path
1384
+ `;
1385
+ params = [coll.name];
1386
+ }
1387
+ const files = db.prepare(query).all(...params);
1388
+ if (files.length === 0) {
1389
+ if (pathPrefix) {
1390
+ console.log(`No files found under qmd://${collectionName}/${pathPrefix}`);
1391
+ }
1392
+ else {
1393
+ console.log(`No files found in collection: ${collectionName}`);
1394
+ }
1395
+ closeDb();
1396
+ return;
1397
+ }
1398
+ // Calculate max widths for alignment
1399
+ const maxSize = Math.max(...files.map(f => formatBytes(f.size).length));
1400
+ // Output in ls -l style
1401
+ for (const file of files) {
1402
+ const sizeStr = formatBytes(file.size).padStart(maxSize);
1403
+ const date = new Date(file.modified_at);
1404
+ const timeStr = formatLsTime(date);
1405
+ // Dim the qmd:// prefix, highlight the filename
1406
+ console.log(`${sizeStr} ${timeStr} ${c.dim}qmd://${collectionName}/${c.reset}${c.cyan}${file.path}${c.reset}`);
1407
+ }
1408
+ closeDb();
1409
+ }
1410
+ // Format date/time like ls -l
1411
+ function formatLsTime(date) {
1412
+ const now = new Date();
1413
+ const sixMonthsAgo = new Date(now.getTime() - 6 * 30 * 24 * 60 * 60 * 1000);
1414
+ const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
1415
+ const month = months[date.getMonth()];
1416
+ const day = date.getDate().toString().padStart(2, ' ');
1417
+ // If file is older than 6 months, show year instead of time
1418
+ if (date < sixMonthsAgo) {
1419
+ const year = date.getFullYear();
1420
+ return `${month} ${day} ${year}`;
1421
+ }
1422
+ else {
1423
+ const hours = date.getHours().toString().padStart(2, '0');
1424
+ const minutes = date.getMinutes().toString().padStart(2, '0');
1425
+ return `${month} ${day} ${hours}:${minutes}`;
1426
+ }
1427
+ }
1428
+ // Collection management commands
1429
+ function collectionList() {
1430
+ const db = getDb();
1431
+ const collections = listCollections(db);
1432
+ if (collections.length === 0) {
1433
+ console.log("No collections found. Run 'qmd collection add .' to create one.");
1434
+ closeDb();
1435
+ return;
1436
+ }
1437
+ console.log(`${c.bold}Collections (${collections.length}):${c.reset}\n`);
1438
+ for (const coll of collections) {
1439
+ const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
1440
+ const timeAgo = formatTimeAgo(updatedAt);
1441
+ // Get YAML config to check includeByDefault
1442
+ const yamlColl = getCollectionFromYaml(coll.name);
1443
+ const excluded = yamlColl?.includeByDefault === false;
1444
+ const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
1445
+ console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`);
1446
+ console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
1447
+ if (yamlColl?.ignore?.length) {
1448
+ console.log(` ${c.dim}Ignore:${c.reset} ${yamlColl.ignore.join(', ')}`);
1449
+ }
1450
+ console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
1451
+ console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
1452
+ console.log();
1453
+ }
1454
+ closeDb();
1455
+ }
1456
+ async function collectionAdd(pwd, globPattern, name) {
1457
+ // If name not provided, generate from pwd basename
1458
+ let collName = name;
1459
+ if (!collName) {
1460
+ const parts = pwd.split('/').filter(Boolean);
1461
+ collName = parts[parts.length - 1] || 'root';
1462
+ }
1463
+ // Check if collection with this name already exists in YAML
1464
+ const existing = getCollectionFromYaml(collName);
1465
+ if (existing) {
1466
+ console.error(`${c.yellow}Collection '${collName}' already exists.${c.reset}`);
1467
+ console.error(`Use a different name with --name <name>`);
1468
+ process.exit(1);
1469
+ }
1470
+ // Check if a collection with this pwd+glob already exists in YAML
1471
+ const allCollections = yamlListCollections();
1472
+ const existingPwdGlob = allCollections.find(c => c.path === pwd && c.pattern === globPattern);
1473
+ if (existingPwdGlob) {
1474
+ console.error(`${c.yellow}A collection already exists for this path and pattern:${c.reset}`);
1475
+ console.error(` Name: ${existingPwdGlob.name} (qmd://${existingPwdGlob.name}/)`);
1476
+ console.error(` Pattern: ${globPattern}`);
1477
+ console.error(`\nUse 'qmd update' to re-index it, or remove it first with 'qmd collection remove ${existingPwdGlob.name}'`);
1478
+ process.exit(1);
1479
+ }
1480
+ // Add to YAML config + sync to SQLite
1481
+ const { addCollection } = await import("../collections.js");
1482
+ addCollection(collName, pwd, globPattern);
1483
+ resyncConfig();
1484
+ // Create the collection and index files
1485
+ console.log(`Creating collection '${collName}'...`);
1486
+ const newColl = getCollectionFromYaml(collName);
1487
+ await indexFiles(pwd, globPattern, collName, false, newColl?.ignore);
1488
+ console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`);
1489
+ }
1490
+ function collectionRemove(name) {
1491
+ // Check if collection exists in YAML
1492
+ const coll = getCollectionFromYaml(name);
1493
+ if (!coll) {
1494
+ console.error(`${c.yellow}Collection not found: ${name}${c.reset}`);
1495
+ console.error(`Run 'qmd collection list' to see available collections.`);
1496
+ process.exit(1);
1497
+ }
1498
+ const db = getDb();
1499
+ const result = removeCollection(db, name);
1500
+ // Also remove from YAML config
1501
+ yamlRemoveCollectionFn(name);
1502
+ closeDb();
1503
+ console.log(`${c.green}✓${c.reset} Removed collection '${name}'`);
1504
+ console.log(` Deleted ${result.deletedDocs} documents`);
1505
+ if (result.cleanedHashes > 0) {
1506
+ console.log(` Cleaned up ${result.cleanedHashes} orphaned content hashes`);
1507
+ }
1508
+ }
1509
+ function collectionRename(oldName, newName) {
1510
+ // Check if old collection exists in YAML
1511
+ const coll = getCollectionFromYaml(oldName);
1512
+ if (!coll) {
1513
+ console.error(`${c.yellow}Collection not found: ${oldName}${c.reset}`);
1514
+ console.error(`Run 'qmd collection list' to see available collections.`);
1515
+ process.exit(1);
1516
+ }
1517
+ // Check if new name already exists in YAML
1518
+ const existing = getCollectionFromYaml(newName);
1519
+ if (existing) {
1520
+ console.error(`${c.yellow}Collection name already exists: ${newName}${c.reset}`);
1521
+ console.error(`Choose a different name or remove the existing collection first.`);
1522
+ process.exit(1);
1523
+ }
1524
+ const db = getDb();
1525
+ renameCollection(db, oldName, newName);
1526
+ // Also rename in YAML config
1527
+ yamlRenameCollectionFn(oldName, newName);
1528
+ closeDb();
1529
+ console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
1530
+ console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`);
1531
+ }
1532
+ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppressEmbedNotice = false, ignorePatterns) {
1533
+ const db = getDb();
1534
+ const resolvedPwd = pwd || getPwd();
1535
+ const now = new Date().toISOString();
1536
+ const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
1537
+ // Clear Ollama cache on index
1538
+ clearCache(db);
1539
+ // Collection name must be provided (from YAML)
1540
+ if (!collectionName) {
1541
+ throw new Error("Collection name is required. Collections must be defined in ~/.config/qmd/index.yml");
1542
+ }
1543
+ console.log(`Collection: ${resolvedPwd} (${globPattern})`);
1544
+ progress.indeterminate();
1545
+ const allIgnore = [
1546
+ ...excludeDirs.map(d => `**/${d}/**`),
1547
+ ...(ignorePatterns || []),
1548
+ ];
1549
+ const allFiles = await fastGlob(globPattern, {
1550
+ cwd: resolvedPwd,
1551
+ onlyFiles: true,
1552
+ followSymbolicLinks: false,
1553
+ dot: false,
1554
+ ignore: allIgnore,
1555
+ });
1556
+ // Filter hidden files/folders (dot: false handles top-level but not nested)
1557
+ const files = allFiles.filter(file => {
1558
+ const parts = file.split("/");
1559
+ return !parts.some(part => part.startsWith("."));
1560
+ });
1561
+ const total = files.length;
1562
+ const hasNoFiles = total === 0;
1563
+ if (hasNoFiles) {
1564
+ progress.clear();
1565
+ console.log("No files found matching pattern.");
1566
+ // Continue so the deactivation pass can mark previously indexed docs as inactive.
1567
+ }
1568
+ let indexed = 0, updated = 0, unchanged = 0, processed = 0;
1569
+ const seenPaths = new Set();
1570
+ const startTime = Date.now();
1571
+ for (const relativeFile of files) {
1572
+ const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
1573
+ // Store the literal relative path — handelize() is NOT applied at index time.
1574
+ const path = relativeFile.replace(/\\/g, '/');
1575
+ seenPaths.add(path);
1576
+ let content;
1577
+ try {
1578
+ content = readFileSync(filepath, "utf-8");
1579
+ }
1580
+ catch {
1581
+ // Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
1582
+ processed++;
1583
+ progress.set((processed / total) * 100);
1584
+ continue;
1585
+ }
1586
+ // Skip empty files - nothing useful to index
1587
+ if (!content.trim()) {
1588
+ processed++;
1589
+ continue;
1590
+ }
1591
+ const hash = await hashContent(content);
1592
+ const title = extractTitle(content, relativeFile);
1593
+ // Check if document exists (also migrates legacy lowercase paths)
1594
+ const existing = findOrMigrateLegacyDocument(db, collectionName, path);
1595
+ if (existing) {
1596
+ if (existing.hash === hash) {
1597
+ // Hash unchanged, but check if title needs updating
1598
+ if (existing.title !== title) {
1599
+ updateDocumentTitle(db, existing.id, title, now);
1600
+ updated++;
1601
+ }
1602
+ else {
1603
+ unchanged++;
1604
+ }
1605
+ }
1606
+ else {
1607
+ // Content changed - insert new content hash and update document
1608
+ insertContent(db, hash, content, now);
1609
+ const stat = statSync(filepath);
1610
+ updateDocument(db, existing.id, title, hash, stat ? new Date(stat.mtime).toISOString() : now);
1611
+ updated++;
1612
+ }
1613
+ }
1614
+ else {
1615
+ // New document - insert content and document
1616
+ indexed++;
1617
+ insertContent(db, hash, content, now);
1618
+ const stat = statSync(filepath);
1619
+ insertDocument(db, collectionName, path, title, hash, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now);
1620
+ }
1621
+ processed++;
1622
+ progress.set((processed / total) * 100);
1623
+ const elapsed = (Date.now() - startTime) / 1000;
1624
+ const rate = processed / elapsed;
1625
+ const remaining = (total - processed) / rate;
1626
+ const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
1627
+ if (isTTY)
1628
+ process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
1629
+ }
1630
+ // Deactivate documents in this collection that no longer exist
1631
+ const allActive = getActiveDocumentPaths(db, collectionName);
1632
+ let removed = 0;
1633
+ for (const path of allActive) {
1634
+ if (!seenPaths.has(path)) {
1635
+ deactivateDocument(db, collectionName, path);
1636
+ removed++;
1637
+ }
1638
+ }
1639
+ // Clean up orphaned content hashes (content not referenced by any document)
1640
+ const orphanedContent = cleanupOrphanedContent(db);
1641
+ // Check if vector index needs updating
1642
+ const needsEmbedding = getHashesNeedingEmbedding(db);
1643
+ progress.clear();
1644
+ console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
1645
+ if (orphanedContent > 0) {
1646
+ console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
1647
+ }
1648
+ if (needsEmbedding > 0 && !suppressEmbedNotice) {
1649
+ console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
1650
+ }
1651
+ closeDb();
1652
+ }
1653
+ function renderProgressBar(percent, width = 30) {
1654
+ const filled = Math.round((percent / 100) * width);
1655
+ const empty = width - filled;
1656
+ const bar = "█".repeat(filled) + "░".repeat(empty);
1657
+ return bar;
1658
+ }
1659
+ function parseEmbedBatchOption(name, value) {
1660
+ if (value === undefined)
1661
+ return undefined;
1662
+ const parsed = Number(value);
1663
+ if (!Number.isInteger(parsed) || parsed < 1) {
1664
+ throw new Error(`${name} must be a positive integer`);
1665
+ }
1666
+ return parsed;
1667
+ }
1668
+ function parseChunkStrategy(value) {
1669
+ if (value === undefined)
1670
+ return undefined;
1671
+ const s = String(value);
1672
+ if (s === "auto" || s === "regex")
1673
+ return s;
1674
+ throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
1675
+ }
1676
+ function ensureModelsConfiguredForCli() {
1677
+ try {
1678
+ const config = loadConfig();
1679
+ const models = resolveModels(config.models);
1680
+ const current = config.models ?? {};
1681
+ if (current.embed !== models.embed || current.generate !== models.generate || current.rerank !== models.rerank) {
1682
+ saveConfig({
1683
+ ...config,
1684
+ models: {
1685
+ ...current,
1686
+ embed: models.embed,
1687
+ generate: models.generate,
1688
+ rerank: models.rerank,
1689
+ },
1690
+ });
1691
+ }
1692
+ return models;
1693
+ }
1694
+ catch {
1695
+ return resolveModels();
1696
+ }
1697
+ }
1698
+ export function resolveEmbedModelForCli() {
1699
+ return ensureModelsConfiguredForCli().embed;
1700
+ }
1701
+ export function resolveGenerateModelForCli() {
1702
+ return ensureModelsConfiguredForCli().generate;
1703
+ }
1704
+ export function resolveRerankModelForCli() {
1705
+ return ensureModelsConfiguredForCli().rerank;
1706
+ }
1707
+ function resolveModelsForCli() {
1708
+ return ensureModelsConfiguredForCli();
1709
+ }
1710
+ async function vectorIndex(model = resolveEmbedModelForCli(), force = false, batchOptions) {
1711
+ const storeInstance = getStore();
1712
+ const db = storeInstance.db;
1713
+ if (force) {
1714
+ console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
1715
+ }
1716
+ // Check if there's work to do before starting
1717
+ const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection, model);
1718
+ if (hashesToEmbed === 0 && !force) {
1719
+ console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
1720
+ closeDb();
1721
+ return;
1722
+ }
1723
+ console.log(`${c.dim}Model: ${shortModelName(model)}${c.reset}\n`);
1724
+ if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
1725
+ const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
1726
+ const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
1727
+ console.log(`${c.dim}Batch: ${maxDocsPerBatch} docs / ${formatBytes(maxBatchBytes)}${c.reset}\n`);
1728
+ }
1729
+ cursor.hide();
1730
+ progress.indeterminate();
1731
+ const startTime = Date.now();
1732
+ const result = await generateEmbeddings(storeInstance, {
1733
+ force,
1734
+ model,
1735
+ collection: batchOptions?.collection,
1736
+ maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
1737
+ maxBatchBytes: batchOptions?.maxBatchBytes,
1738
+ chunkStrategy: batchOptions?.chunkStrategy,
1739
+ onProgress: (info) => {
1740
+ if (info.totalBytes === 0)
1741
+ return;
1742
+ // Progress is measured by input bytes, not by chunks. The final chunk
1743
+ // count is discovered lazily batch-by-batch, so displaying
1744
+ // chunksEmbedded/totalChunks makes the percent look wrong when a few
1745
+ // large documents remain. Show chunks as a count and label the byte
1746
+ // percentage explicitly as input progress.
1747
+ const percent = Math.min(100, (info.bytesProcessed / info.totalBytes) * 100);
1748
+ progress.set(percent);
1749
+ const elapsed = (Date.now() - startTime) / 1000;
1750
+ const bytesPerSec = elapsed > 0 ? info.bytesProcessed / elapsed : 0;
1751
+ const remainingBytes = Math.max(0, info.totalBytes - info.bytesProcessed);
1752
+ const etaSec = bytesPerSec > 0 ? remainingBytes / bytesPerSec : Number.POSITIVE_INFINITY;
1753
+ const bar = renderProgressBar(percent);
1754
+ const percentStr = percent.toFixed(0).padStart(3);
1755
+ const throughput = bytesPerSec > 0 ? `${formatBytes(bytesPerSec)}/s` : ".../s";
1756
+ const eta = elapsed > 2 && Number.isFinite(etaSec) ? formatETA(etaSec) : "...";
1757
+ const inputStr = `${formatBytes(info.bytesProcessed)}/${formatBytes(info.totalBytes)} input`;
1758
+ const chunkStr = `${formatCount(info.chunksEmbedded)} chunks`;
1759
+ const errStr = info.errors > 0 ? ` ${c.yellow}${formatCount(info.errors)} err${c.reset}` : "";
1760
+ if (isTTY)
1761
+ process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}% input${c.reset} ${c.dim}${chunkStr}${errStr} · ${inputStr} · ${throughput} · ETA ${eta}${c.reset} `);
1762
+ },
1763
+ });
1764
+ progress.clear();
1765
+ cursor.show();
1766
+ const totalTimeSec = result.durationMs / 1000;
1767
+ if (result.chunksEmbedded === 0 && result.docsProcessed === 0) {
1768
+ console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`);
1769
+ }
1770
+ else {
1771
+ console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
1772
+ console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${result.chunksEmbedded}${c.reset} chunks from ${c.bold}${result.docsProcessed}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset}`);
1773
+ if (result.errors > 0) {
1774
+ console.log(`${c.yellow}⚠ ${formatCount(result.errors)} chunks still failed after retries${c.reset}`);
1775
+ for (const failure of (result.failures ?? []).slice(0, 8)) {
1776
+ console.log(` ${c.dim}${failure.path}#${failure.seq} (${failure.attempts} attempts): ${failure.reason}${c.reset}`);
1777
+ }
1778
+ if ((result.failures?.length ?? 0) > 8) {
1779
+ console.log(` ${c.dim}...and ${formatCount((result.failures?.length ?? 0) - 8)} more${c.reset}`);
1780
+ }
1781
+ }
1782
+ }
1783
+ closeDb();
1784
+ }
1785
+ // Sanitize a term for FTS5: remove punctuation except apostrophes
1786
+ function sanitizeFTS5Term(term) {
1787
+ // Remove all non-alphanumeric except apostrophes (for contractions like "don't")
1788
+ return term.replace(/[^\w']/g, '').trim();
1789
+ }
1790
+ // Build FTS5 query: phrase-aware with fallback to individual terms
1791
+ function buildFTS5Query(query) {
1792
+ // Sanitize the full query for phrase matching
1793
+ const sanitizedQuery = query.replace(/[^\w\s']/g, '').trim();
1794
+ const terms = query
1795
+ .split(/\s+/)
1796
+ .map(sanitizeFTS5Term)
1797
+ .filter(term => term.length >= 2); // Skip single chars and empty
1798
+ if (terms.length === 0)
1799
+ return "";
1800
+ if (terms.length === 1)
1801
+ return `"${terms[0].replace(/"/g, '""')}"`;
1802
+ // Strategy: exact phrase OR proximity match OR individual terms
1803
+ // Exact phrase matches rank highest, then close proximity, then any term
1804
+ const phrase = `"${sanitizedQuery.replace(/"/g, '""')}"`;
1805
+ const quotedTerms = terms.map(t => `"${t.replace(/"/g, '""')}"`);
1806
+ // FTS5 NEAR syntax: NEAR(term1 term2, distance)
1807
+ const nearPhrase = `NEAR(${quotedTerms.join(' ')}, 10)`;
1808
+ const orTerms = quotedTerms.join(' OR ');
1809
+ // Exact phrase > proximity > any term
1810
+ return `(${phrase}) OR (${nearPhrase}) OR (${orTerms})`;
1811
+ }
1812
+ // Normalize BM25 score to 0-1 range using sigmoid
1813
+ function normalizeBM25(score) {
1814
+ // BM25 scores are negative in SQLite (lower = better)
1815
+ // Typical range: -15 (excellent) to -2 (weak match)
1816
+ // Map to 0-1 where higher is better
1817
+ const absScore = Math.abs(score);
1818
+ // Sigmoid-ish normalization: maps ~2-15 range to ~0.1-0.95
1819
+ return 1 / (1 + Math.exp(-(absScore - 5) / 3));
1820
+ }
1821
+ // Highlight query terms in text (skip short words < 3 chars)
1822
+ function highlightTerms(text, query) {
1823
+ if (!useColor)
1824
+ return text;
1825
+ const terms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
1826
+ let result = text;
1827
+ for (const term of terms) {
1828
+ const regex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
1829
+ result = result.replace(regex, `${c.yellow}${c.bold}$1${c.reset}`);
1830
+ }
1831
+ return result;
1832
+ }
1833
+ // Format score with color based on value
1834
+ function formatScore(score) {
1835
+ const pct = (score * 100).toFixed(0).padStart(3);
1836
+ if (!useColor)
1837
+ return `${pct}%`;
1838
+ if (score >= 0.7)
1839
+ return `${c.green}${pct}%${c.reset}`;
1840
+ if (score >= 0.4)
1841
+ return `${c.yellow}${pct}%${c.reset}`;
1842
+ return `${c.dim}${pct}%${c.reset}`;
1843
+ }
1844
+ function formatExplainNumber(value) {
1845
+ return value.toFixed(4);
1846
+ }
1847
+ // Shorten directory path for display - relative to $HOME (used for context paths, not documents)
1848
+ function shortPath(dirpath) {
1849
+ const home = homedir();
1850
+ if (dirpath.startsWith(home)) {
1851
+ return '~' + dirpath.slice(home.length);
1852
+ }
1853
+ return dirpath;
1854
+ }
1855
+ // Emit format-safe empty output for search commands.
1856
+ function printEmptySearchResults(format, reason = "no_results") {
1857
+ if (format === "json") {
1858
+ console.log("[]");
1859
+ return;
1860
+ }
1861
+ if (format === "csv") {
1862
+ console.log("docid,score,file,title,context,line,snippet");
1863
+ return;
1864
+ }
1865
+ if (format === "xml") {
1866
+ console.log("<results></results>");
1867
+ return;
1868
+ }
1869
+ if (format === "md" || format === "files") {
1870
+ return;
1871
+ }
1872
+ if (reason === "min_score") {
1873
+ console.log("No results found above minimum score threshold.");
1874
+ return;
1875
+ }
1876
+ console.log("No results found.");
1877
+ }
1878
+ const DEFAULT_EDITOR_URI_TEMPLATE = "vscode://file/{path}:{line}:{col}";
1879
+ function encodePathForEditorUri(absolutePath) {
1880
+ return encodeURI(absolutePath)
1881
+ .replace(/\?/g, "%3F")
1882
+ .replace(/#/g, "%23");
1883
+ }
1884
+ function getEditorUriTemplate() {
1885
+ const envTemplate = process.env.QMD_EDITOR_URI?.trim();
1886
+ if (envTemplate)
1887
+ return envTemplate;
1888
+ try {
1889
+ const config = loadConfig();
1890
+ const configTemplate = (config.editor_uri
1891
+ || config.editor_uri_template
1892
+ || config.editorUri
1893
+ || (typeof config["editor-uri"] === "string" ? config["editor-uri"] : undefined))?.trim();
1894
+ if (configTemplate)
1895
+ return configTemplate;
1896
+ }
1897
+ catch {
1898
+ // Ignore config parsing issues and use default template.
1899
+ }
1900
+ return DEFAULT_EDITOR_URI_TEMPLATE;
1901
+ }
1902
+ export function buildEditorUri(template, absolutePath, line, col) {
1903
+ const safeLine = Number.isFinite(line) && line > 0 ? Math.floor(line) : 1;
1904
+ const safeCol = Number.isFinite(col) && col > 0 ? Math.floor(col) : 1;
1905
+ const encodedPath = encodePathForEditorUri(absolutePath);
1906
+ return template
1907
+ .replace(/\{path\}/g, encodedPath)
1908
+ .replace(/\{line\}/g, String(safeLine))
1909
+ .replace(/\{col\}/g, String(safeCol))
1910
+ .replace(/\{column\}/g, String(safeCol));
1911
+ }
1912
+ export function termLink(text, url, isTTY = !!process.stdout.isTTY) {
1913
+ if (!isTTY)
1914
+ return text;
1915
+ return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`;
1916
+ }
1917
+ function outputResults(results, query, opts) {
1918
+ const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
1919
+ if (filtered.length === 0) {
1920
+ printEmptySearchResults(opts.format, "min_score");
1921
+ return;
1922
+ }
1923
+ // Helper to create qmd:// URI from displayPath
1924
+ const toQmdPath = (displayPath) => {
1925
+ const [collectionName, ...segments] = displayPath.split("/");
1926
+ if (!collectionName || segments.length === 0) {
1927
+ return `qmd://${displayPath}`;
1928
+ }
1929
+ const indexName = getActiveIndexName();
1930
+ return buildVirtualPath(collectionName, segments.join("/"), indexName === "index" ? undefined : indexName);
1931
+ };
1932
+ // Helper to pick the visible path for a result. With --full-path we swap
1933
+ // the qmd:// URI for the file's on-disk path via renderFullPath() (./-
1934
+ // prefixed relative when under $PWD, absolute realpath otherwise). Falls
1935
+ // back to qmd:// if the file is no longer resolvable on disk.
1936
+ const linkDbForPaths = opts.fullPath ? getDb() : null;
1937
+ const displayPathFor = (row) => {
1938
+ // Always rebuild from displayPath so the active index name is included
1939
+ // as ?index=… for non-default indexes. row.file may not carry it.
1940
+ const qmdUri = toQmdPath(row.displayPath);
1941
+ if (!opts.fullPath || !linkDbForPaths)
1942
+ return qmdUri;
1943
+ const absolute = resolveVirtualPath(linkDbForPaths, qmdUri);
1944
+ if (!absolute || !existsSync(absolute))
1945
+ return qmdUri;
1946
+ return renderFullPath(absolute);
1947
+ };
1948
+ if (opts.format === "json") {
1949
+ // JSON output for LLM consumption
1950
+ const output = filtered.map(row => {
1951
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1952
+ const snippetInfo = extractSnippet(row.body, query, 300, row.chunkPos, row.chunkLen, opts.intent);
1953
+ let body = opts.full ? row.body : undefined;
1954
+ let snippet = !opts.full ? snippetInfo.snippet : undefined;
1955
+ if (opts.lineNumbers) {
1956
+ if (body)
1957
+ body = addLineNumbers(body);
1958
+ if (snippet)
1959
+ snippet = addLineNumbers(snippet);
1960
+ }
1961
+ // With --full-path, omit docid (the on-disk path is the identifier).
1962
+ return {
1963
+ ...(docid && !opts.fullPath && { docid: `#${docid}` }),
1964
+ score: Math.round(row.score * 100) / 100,
1965
+ file: displayPathFor(row),
1966
+ line: snippetInfo.line,
1967
+ title: row.title,
1968
+ ...(row.context && { context: row.context }),
1969
+ ...(body && { body }),
1970
+ ...(snippet && { snippet }),
1971
+ ...(opts.explain && row.explain && { explain: row.explain }),
1972
+ };
1973
+ });
1974
+ console.log(JSON.stringify(output, null, 2));
1975
+ }
1976
+ else if (opts.format === "files") {
1977
+ // Simple docid,score,filepath,context output
1978
+ for (const row of filtered) {
1979
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1980
+ const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
1981
+ if (opts.fullPath) {
1982
+ // --full-path: drop the docid, the on-disk path is the identifier.
1983
+ console.log(`${row.score.toFixed(2)},${displayPathFor(row)}${ctx}`);
1984
+ }
1985
+ else {
1986
+ console.log(`#${docid},${row.score.toFixed(2)},${displayPathFor(row)}${ctx}`);
1987
+ }
1988
+ }
1989
+ }
1990
+ else if (opts.format === "cli") {
1991
+ const editorUriTemplate = getEditorUriTemplate();
1992
+ const linkDb = getDb();
1993
+ for (let i = 0; i < filtered.length; i++) {
1994
+ const row = filtered[i];
1995
+ if (!row)
1996
+ continue;
1997
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
1998
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1999
+ // Line 1: filepath with docid
2000
+ // Default: show the full qmd:// URI so the user can see which collection
2001
+ // a hit lives in and can pipe the same string straight back into
2002
+ // `qmd get`. A bare collection-relative path like `sources/foo.md` is
2003
+ // ambiguous: it's not a real filesystem path, not a URI, and not a
2004
+ // shell-friendly identifier on its own.
2005
+ // With --full-path the visible label is the file's on-disk path
2006
+ // ($PWD-relative when in a subfolder; absolute realpath otherwise),
2007
+ // and the docid is omitted because the path is the identifier.
2008
+ const virtualPath = toQmdPath(row.displayPath);
2009
+ const parsed = parseVirtualPath(virtualPath);
2010
+ const absolutePath = resolveVirtualPath(linkDb, virtualPath);
2011
+ const visiblePath = displayPathFor(row);
2012
+ // Only show :line if we actually found a term match in the snippet body (exclude header line).
2013
+ const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
2014
+ const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
2015
+ const lineInfo = hasMatch ? `:${line}` : "";
2016
+ const docidStr = (docid && !opts.fullPath) ? ` ${c.dim}#${docid}${c.reset}` : "";
2017
+ if (process.stdout.isTTY && absolutePath && parsed?.path) {
2018
+ const linkLine = hasMatch ? line : 1;
2019
+ const linkTarget = buildEditorUri(editorUriTemplate, absolutePath, linkLine, 1);
2020
+ const clickable = termLink(`${visiblePath}${lineInfo}`, linkTarget);
2021
+ console.log(`${c.cyan}${clickable}${c.reset}${docidStr}`);
2022
+ }
2023
+ else {
2024
+ console.log(`${c.cyan}${visiblePath}${c.dim}${lineInfo}${c.reset}${docidStr}`);
2025
+ }
2026
+ // Line 2: Title (if available)
2027
+ if (row.title) {
2028
+ console.log(`${c.bold}Title: ${row.title}${c.reset}`);
2029
+ }
2030
+ // Line 3: Context (if available)
2031
+ if (row.context) {
2032
+ console.log(`${c.dim}Context: ${row.context}${c.reset}`);
2033
+ }
2034
+ // Line 4: Score
2035
+ const score = formatScore(row.score);
2036
+ console.log(`Score: ${c.bold}${score}${c.reset}`);
2037
+ if (opts.explain && row.explain) {
2038
+ const explain = row.explain;
2039
+ const ftsScores = explain.ftsScores.length > 0
2040
+ ? explain.ftsScores.map(formatExplainNumber).join(", ")
2041
+ : "none";
2042
+ const vecScores = explain.vectorScores.length > 0
2043
+ ? explain.vectorScores.map(formatExplainNumber).join(", ")
2044
+ : "none";
2045
+ const contribSummary = explain.rrf.contributions
2046
+ .slice()
2047
+ .sort((a, b) => b.rrfContribution - a.rrfContribution)
2048
+ .slice(0, 3)
2049
+ .map(c => `${c.source}/${c.queryType}#${c.rank}:${formatExplainNumber(c.rrfContribution)}`)
2050
+ .join(" | ");
2051
+ console.log(`${c.dim}Explain: fts=[${ftsScores}] vec=[${vecScores}]${c.reset}`);
2052
+ console.log(`${c.dim} RRF: total=${formatExplainNumber(explain.rrf.totalScore)} base=${formatExplainNumber(explain.rrf.baseScore)} bonus=${formatExplainNumber(explain.rrf.topRankBonus)} rank=${explain.rrf.rank}${c.reset}`);
2053
+ console.log(`${c.dim} Blend: ${Math.round(explain.rrf.weight * 100)}%*${formatExplainNumber(explain.rrf.positionScore)} + ${Math.round((1 - explain.rrf.weight) * 100)}%*${formatExplainNumber(explain.rerankScore)} = ${formatExplainNumber(explain.blendedScore)}${c.reset}`);
2054
+ if (contribSummary.length > 0) {
2055
+ console.log(`${c.dim} Top RRF contributions: ${contribSummary}${c.reset}`);
2056
+ }
2057
+ }
2058
+ console.log();
2059
+ // Snippet with highlighting (diff-style header included)
2060
+ const content = opts.full ? row.body : snippet;
2061
+ const displayContent = opts.lineNumbers ? addLineNumbers(content, opts.full ? 1 : line) : content;
2062
+ const highlighted = highlightTerms(displayContent, query);
2063
+ console.log(highlighted);
2064
+ // Double empty line between results
2065
+ if (i < filtered.length - 1)
2066
+ console.log('\n');
2067
+ }
2068
+ }
2069
+ else if (opts.format === "md") {
2070
+ for (let i = 0; i < filtered.length; i++) {
2071
+ const row = filtered[i];
2072
+ if (!row)
2073
+ continue;
2074
+ const visiblePath = displayPathFor(row);
2075
+ const heading = row.title || visiblePath;
2076
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
2077
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
2078
+ if (opts.lineNumbers) {
2079
+ content = addLineNumbers(content);
2080
+ }
2081
+ const fileLine = `**file:** \`${visiblePath}\`\n`;
2082
+ // With --full-path the on-disk path is the identifier; drop the docid line.
2083
+ const docidLine = (docid && !opts.fullPath) ? `**docid:** \`#${docid}\`\n` : "";
2084
+ const contextLine = row.context ? `**context:** ${row.context}\n` : "";
2085
+ console.log(`---\n# ${heading}\n${fileLine}${docidLine}${contextLine}\n${content}\n`);
2086
+ }
2087
+ }
2088
+ else if (opts.format === "xml") {
2089
+ for (const row of filtered) {
2090
+ const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
2091
+ const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
2092
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
2093
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
2094
+ if (opts.lineNumbers) {
2095
+ content = addLineNumbers(content);
2096
+ }
2097
+ const docidAttr = opts.fullPath ? "" : ` docid="#${docid}"`;
2098
+ console.log(`<file${docidAttr} name="${displayPathFor(row)}"${titleAttr}${contextAttr}>\n${content}\n</file>\n`);
2099
+ }
2100
+ }
2101
+ else {
2102
+ // CSV format
2103
+ const csvHeader = opts.fullPath
2104
+ ? "score,file,title,context,line,snippet"
2105
+ : "docid,score,file,title,context,line,snippet";
2106
+ console.log(csvHeader);
2107
+ for (const row of filtered) {
2108
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
2109
+ let content = opts.full ? row.body : snippet;
2110
+ if (opts.lineNumbers) {
2111
+ content = addLineNumbers(content, opts.full ? 1 : line);
2112
+ }
2113
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
2114
+ const snippetText = content || "";
2115
+ const path = escapeCSV(displayPathFor(row));
2116
+ const tail = `${path},${escapeCSV(row.title || "")},${escapeCSV(row.context || "")},${line},${escapeCSV(snippetText)}`;
2117
+ if (opts.fullPath) {
2118
+ console.log(`${row.score.toFixed(4)},${tail}`);
2119
+ }
2120
+ else {
2121
+ console.log(`#${docid},${row.score.toFixed(4)},${tail}`);
2122
+ }
2123
+ }
2124
+ }
2125
+ }
2126
+ // Resolve -c collection filter: supports single string, array, or undefined.
2127
+ // Returns validated collection names (exits on unknown collection).
2128
+ function resolveCollectionFilter(raw, useDefaults = false) {
2129
+ // If no filter specified and useDefaults is true, use default collections
2130
+ if (!raw && useDefaults) {
2131
+ return getDefaultCollectionNames();
2132
+ }
2133
+ if (!raw)
2134
+ return [];
2135
+ const names = Array.isArray(raw) ? raw : [raw];
2136
+ const validated = [];
2137
+ for (const name of names) {
2138
+ const coll = getCollectionFromYaml(name);
2139
+ if (!coll) {
2140
+ console.error(`Collection not found: ${name}`);
2141
+ closeDb();
2142
+ process.exit(1);
2143
+ }
2144
+ validated.push(name);
2145
+ }
2146
+ return validated;
2147
+ }
2148
+ // Post-filter results to only include files from specified collections.
2149
+ function filterByCollections(results, collectionNames) {
2150
+ if (collectionNames.length <= 1)
2151
+ return results;
2152
+ const prefixes = collectionNames.map(n => `qmd://${n}/`);
2153
+ return results.filter(r => {
2154
+ const path = r.filepath || r.file || '';
2155
+ return prefixes.some(p => path.startsWith(p));
2156
+ });
2157
+ }
2158
+ function parseStructuredQuery(query) {
2159
+ const rawLines = query.split('\n').map((line, idx) => ({
2160
+ raw: line,
2161
+ trimmed: line.trim(),
2162
+ number: idx + 1,
2163
+ })).filter(line => line.trimmed.length > 0);
2164
+ if (rawLines.length === 0)
2165
+ return null;
2166
+ const prefixRe = /^(lex|vec|hyde):\s*/i;
2167
+ const expandRe = /^expand:\s*/i;
2168
+ const intentRe = /^intent:\s*/i;
2169
+ const typed = [];
2170
+ let intent;
2171
+ for (const line of rawLines) {
2172
+ if (expandRe.test(line.trimmed)) {
2173
+ if (rawLines.length > 1) {
2174
+ throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
2175
+ }
2176
+ const text = line.trimmed.replace(expandRe, '').trim();
2177
+ if (!text) {
2178
+ throw new Error('expand: query must include text.');
2179
+ }
2180
+ return null; // treat as standalone expand query
2181
+ }
2182
+ // Parse intent: lines
2183
+ if (intentRe.test(line.trimmed)) {
2184
+ if (intent !== undefined) {
2185
+ throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`);
2186
+ }
2187
+ const text = line.trimmed.replace(intentRe, '').trim();
2188
+ if (!text) {
2189
+ throw new Error(`Line ${line.number}: intent: must include text.`);
2190
+ }
2191
+ intent = text;
2192
+ continue;
2193
+ }
2194
+ const match = line.trimmed.match(prefixRe);
2195
+ if (match) {
2196
+ const type = match[1].toLowerCase();
2197
+ const text = line.trimmed.slice(match[0].length).trim();
2198
+ if (!text) {
2199
+ throw new Error(`Line ${line.number} (${type}:) must include text.`);
2200
+ }
2201
+ if (/\r|\n/.test(text)) {
2202
+ throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
2203
+ }
2204
+ typed.push({ type, query: text, line: line.number });
2205
+ continue;
2206
+ }
2207
+ if (rawLines.length === 1) {
2208
+ // Single plain line -> implicit expand
2209
+ return null;
2210
+ }
2211
+ throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`);
2212
+ }
2213
+ // intent: alone is not a valid query — must have at least one search
2214
+ if (intent && typed.length === 0) {
2215
+ throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.');
2216
+ }
2217
+ return typed.length > 0 ? { searches: typed, intent } : null;
2218
+ }
2219
+ function search(query, opts) {
2220
+ const db = getDb();
2221
+ // Validate collection filter (supports multiple -c flags)
2222
+ // Use default collections if none specified
2223
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
2224
+ const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
2225
+ // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
2226
+ const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
2227
+ const results = filterByCollections(searchFTS(db, query, fetchLimit, singleCollection), collectionNames);
2228
+ // Add context to results
2229
+ const resultsWithContext = results.map(r => ({
2230
+ file: r.filepath,
2231
+ displayPath: r.displayPath,
2232
+ title: r.title,
2233
+ body: r.body || "",
2234
+ score: r.score,
2235
+ context: getContextForFile(db, r.filepath),
2236
+ hash: r.hash,
2237
+ docid: r.docid,
2238
+ }));
2239
+ closeDb();
2240
+ if (resultsWithContext.length === 0) {
2241
+ printEmptySearchResults(opts.format);
2242
+ return;
2243
+ }
2244
+ outputResults(resultsWithContext, query, opts);
2245
+ }
2246
+ // Log query expansion as a tree to stderr (CLI progress feedback)
2247
+ function logExpansionTree(originalQuery, expanded) {
2248
+ const lines = [];
2249
+ lines.push(`${c.dim}├─ ${originalQuery}${c.reset}`);
2250
+ for (const q of expanded) {
2251
+ let preview = q.query.replace(/\n/g, ' ');
2252
+ if (preview.length > 72)
2253
+ preview = preview.substring(0, 69) + '...';
2254
+ lines.push(`${c.dim}├─ ${q.type}: ${preview}${c.reset}`);
2255
+ }
2256
+ if (lines.length > 0) {
2257
+ lines[lines.length - 1] = lines[lines.length - 1].replace('├─', '└─');
2258
+ }
2259
+ for (const line of lines)
2260
+ process.stderr.write(line + '\n');
2261
+ }
2262
+ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
2263
+ const store = getStore();
2264
+ // Validate collection filter (supports multiple -c flags)
2265
+ // Use default collections if none specified
2266
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
2267
+ const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
2268
+ checkIndexHealth(store.db);
2269
+ await withLLMSession(async () => {
2270
+ let results = await vectorSearchQuery(store, query, {
2271
+ collection: singleCollection,
2272
+ limit: opts.all ? 500 : (opts.limit || 10),
2273
+ minScore: opts.minScore || 0.3,
2274
+ intent: opts.intent,
2275
+ hooks: {
2276
+ onExpand: (original, expanded) => {
2277
+ logExpansionTree(original, expanded);
2278
+ process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
2279
+ },
2280
+ },
2281
+ });
2282
+ // Post-filter for multi-collection
2283
+ if (collectionNames.length > 1) {
2284
+ results = results.filter(r => {
2285
+ const prefixes = collectionNames.map(n => `qmd://${n}/`);
2286
+ return prefixes.some(p => r.file.startsWith(p));
2287
+ });
2288
+ }
2289
+ closeDb();
2290
+ if (results.length === 0) {
2291
+ printEmptySearchResults(opts.format);
2292
+ return;
2293
+ }
2294
+ outputResults(results.map(r => ({
2295
+ file: r.file,
2296
+ displayPath: r.displayPath,
2297
+ title: r.title,
2298
+ body: r.body,
2299
+ score: r.score,
2300
+ context: r.context,
2301
+ docid: r.docid,
2302
+ })), query, { ...opts, limit: results.length });
2303
+ }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
2304
+ }
2305
+ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rerankModel = DEFAULT_RERANK_MODEL) {
2306
+ const store = getStore();
2307
+ // Validate collection filter (supports multiple -c flags)
2308
+ // Use default collections if none specified
2309
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
2310
+ const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
2311
+ checkIndexHealth(store.db);
2312
+ // Check for structured query syntax (lex:/vec:/hyde:/intent: prefixes)
2313
+ const parsed = parseStructuredQuery(query);
2314
+ // Intent can come from --intent flag or from intent: line in query document
2315
+ const intent = opts.intent || parsed?.intent;
2316
+ await withLLMSession(async () => {
2317
+ let results;
2318
+ if (parsed) {
2319
+ const structuredQueries = parsed.searches;
2320
+ // Structured search — user provided their own query expansions
2321
+ const typeLabels = structuredQueries.map(s => s.type).join('+');
2322
+ process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
2323
+ if (intent) {
2324
+ process.stderr.write(`${c.dim}├─ intent: ${intent}${c.reset}\n`);
2325
+ }
2326
+ // Log each sub-query
2327
+ for (const s of structuredQueries) {
2328
+ let preview = s.query.replace(/\n/g, ' ');
2329
+ if (preview.length > 72)
2330
+ preview = preview.substring(0, 69) + '...';
2331
+ process.stderr.write(`${c.dim}├─ ${s.type}: ${preview}${c.reset}\n`);
2332
+ }
2333
+ process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`);
2334
+ results = await structuredSearch(store, structuredQueries, {
2335
+ collections: singleCollection ? [singleCollection] : undefined,
2336
+ limit: opts.all ? 500 : (opts.limit || 10),
2337
+ minScore: opts.minScore || 0,
2338
+ candidateLimit: opts.candidateLimit,
2339
+ skipRerank: opts.skipRerank,
2340
+ explain: !!opts.explain,
2341
+ intent,
2342
+ chunkStrategy: opts.chunkStrategy,
2343
+ hooks: {
2344
+ onEmbedStart: (count) => {
2345
+ process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
2346
+ },
2347
+ onEmbedDone: (ms) => {
2348
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
2349
+ },
2350
+ onRerankStart: (chunkCount) => {
2351
+ process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
2352
+ progress.indeterminate();
2353
+ },
2354
+ onRerankDone: (ms) => {
2355
+ progress.clear();
2356
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
2357
+ },
2358
+ },
2359
+ });
2360
+ }
2361
+ else {
2362
+ // Standard hybrid query with automatic expansion
2363
+ results = await hybridQuery(store, query, {
2364
+ collection: singleCollection,
2365
+ limit: opts.all ? 500 : (opts.limit || 10),
2366
+ minScore: opts.minScore || 0,
2367
+ candidateLimit: opts.candidateLimit,
2368
+ skipRerank: opts.skipRerank,
2369
+ explain: !!opts.explain,
2370
+ intent,
2371
+ chunkStrategy: opts.chunkStrategy,
2372
+ hooks: {
2373
+ onStrongSignal: (score) => {
2374
+ process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
2375
+ },
2376
+ onExpandStart: () => {
2377
+ process.stderr.write(`${c.dim}Expanding query...${c.reset}`);
2378
+ },
2379
+ onExpand: (original, expanded, ms) => {
2380
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
2381
+ logExpansionTree(original, expanded);
2382
+ process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
2383
+ },
2384
+ onEmbedStart: (count) => {
2385
+ process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
2386
+ },
2387
+ onEmbedDone: (ms) => {
2388
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
2389
+ },
2390
+ onRerankStart: (chunkCount) => {
2391
+ process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
2392
+ progress.indeterminate();
2393
+ },
2394
+ onRerankDone: (ms) => {
2395
+ progress.clear();
2396
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
2397
+ },
2398
+ },
2399
+ });
2400
+ }
2401
+ // Post-filter for multi-collection
2402
+ if (collectionNames.length > 1) {
2403
+ results = results.filter(r => {
2404
+ const prefixes = collectionNames.map(n => `qmd://${n}/`);
2405
+ return prefixes.some(p => r.file.startsWith(p));
2406
+ });
2407
+ }
2408
+ closeDb();
2409
+ if (results.length === 0) {
2410
+ printEmptySearchResults(opts.format);
2411
+ return;
2412
+ }
2413
+ // Use first lex/vec query for output context, or original query
2414
+ const structuredQueries = parsed?.searches;
2415
+ const displayQuery = structuredQueries
2416
+ ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
2417
+ : query;
2418
+ outputResults(results.map(r => ({
2419
+ file: r.file,
2420
+ displayPath: r.displayPath,
2421
+ title: r.title,
2422
+ body: r.body,
2423
+ chunkPos: r.bestChunkPos,
2424
+ chunkLen: r.bestChunk.length,
2425
+ score: r.score,
2426
+ context: r.context,
2427
+ docid: r.docid,
2428
+ explain: r.explain,
2429
+ })), displayQuery, { ...opts, limit: results.length });
2430
+ }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
2431
+ }
2432
+ // Parse CLI arguments using util.parseArgs
2433
+ function parseCLI() {
2434
+ const { values, positionals } = parseArgs({
2435
+ args: process.argv.slice(2), // Skip node and script path
2436
+ options: {
2437
+ // Global options
2438
+ index: {
2439
+ type: "string",
2440
+ },
2441
+ context: {
2442
+ type: "string",
2443
+ },
2444
+ help: { type: "boolean", short: "h" },
2445
+ version: { type: "boolean", short: "v" },
2446
+ skill: { type: "boolean" },
2447
+ global: { type: "boolean" },
2448
+ yes: { type: "boolean" },
2449
+ // Search options
2450
+ n: { type: "string" },
2451
+ "min-score": { type: "string" },
2452
+ all: { type: "boolean" },
2453
+ full: { type: "boolean" },
2454
+ format: { type: "string" }, // preferred: --format cli|json|csv|md|xml|files
2455
+ // Legacy boolean format aliases. Kept working for back-compat but
2456
+ // omitted from the documented help; prefer `--format <kind>`.
2457
+ csv: { type: "boolean" },
2458
+ md: { type: "boolean" },
2459
+ xml: { type: "boolean" },
2460
+ files: { type: "boolean" },
2461
+ json: { type: "boolean" },
2462
+ explain: { type: "boolean" },
2463
+ collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s)
2464
+ // Collection options
2465
+ name: { type: "string" }, // collection name
2466
+ mask: { type: "string" }, // glob pattern
2467
+ // Embed options
2468
+ force: { type: "boolean", short: "f" },
2469
+ "max-docs-per-batch": { type: "string" },
2470
+ "max-batch-mb": { type: "string" },
2471
+ // Update options
2472
+ pull: { type: "boolean" }, // git pull before update
2473
+ refresh: { type: "boolean" },
2474
+ // Get options
2475
+ l: { type: "string" }, // max lines
2476
+ from: { type: "string" }, // start line
2477
+ "max-bytes": { type: "string" }, // max bytes for multi-get
2478
+ "line-numbers": { type: "boolean" }, // add line numbers to output (search; default on for get/multi-get)
2479
+ "no-line-numbers": { type: "boolean" }, // disable line numbers for get/multi-get
2480
+ "full-path": { type: "boolean" }, // show on-disk paths instead of qmd:// (get/multi-get/search/query)
2481
+ // Query options
2482
+ "candidate-limit": { type: "string", short: "C" },
2483
+ "no-rerank": { type: "boolean", default: false },
2484
+ "no-gpu": { type: "boolean", default: false },
2485
+ intent: { type: "string" },
2486
+ // Chunking options
2487
+ "chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
2488
+ // MCP HTTP transport options
2489
+ http: { type: "boolean" },
2490
+ daemon: { type: "boolean" },
2491
+ port: { type: "string" },
2492
+ },
2493
+ allowPositionals: true,
2494
+ strict: false, // Allow unknown options to pass through
2495
+ });
2496
+ if (values["no-gpu"]) {
2497
+ process.env.QMD_FORCE_CPU = "1";
2498
+ }
2499
+ // Select index name (default: "index"). If no explicit --index is supplied,
2500
+ // a project-local .qmd/index.yaml overrides the global config/cache paths.
2501
+ const indexName = values.index;
2502
+ if (indexName) {
2503
+ setIndexName(indexName);
2504
+ setConfigIndexName(indexName);
2505
+ setConfigSource();
2506
+ }
2507
+ else {
2508
+ const localConfigPath = findLocalConfigPath();
2509
+ if (localConfigPath) {
2510
+ setConfigSource({ configPath: localConfigPath });
2511
+ storeDbPathOverride = getLocalDbPath(localConfigPath);
2512
+ closeDb();
2513
+ }
2514
+ else {
2515
+ setConfigSource();
2516
+ }
2517
+ }
2518
+ // Determine output format. Prefer --format <kind>; fall back to the
2519
+ // legacy boolean aliases (--csv/--md/--xml/--files/--json) which remain
2520
+ // wired up for back-compat but are no longer documented.
2521
+ let format = "cli";
2522
+ const rawFormat = typeof values.format === "string" ? values.format.toLowerCase().trim() : "";
2523
+ const VALID_FORMATS = ["cli", "json", "csv", "md", "xml", "files"];
2524
+ if (rawFormat) {
2525
+ if (VALID_FORMATS.includes(rawFormat)) {
2526
+ format = rawFormat;
2527
+ }
2528
+ else {
2529
+ console.error(`Unknown --format value: ${values.format}`);
2530
+ console.error(`Valid: ${VALID_FORMATS.join(", ")}`);
2531
+ process.exit(1);
2532
+ }
2533
+ }
2534
+ else if (values.csv)
2535
+ format = "csv";
2536
+ else if (values.md)
2537
+ format = "md";
2538
+ else if (values.xml)
2539
+ format = "xml";
2540
+ else if (values.files)
2541
+ format = "files";
2542
+ else if (values.json)
2543
+ format = "json";
2544
+ // Default limit: 20 for --files/--json, 5 otherwise
2545
+ // --all means return all results (use very large limit)
2546
+ const defaultLimit = (format === "files" || format === "json") ? 20 : 5;
2547
+ const isAll = !!values.all;
2548
+ const opts = {
2549
+ format,
2550
+ full: !!values.full,
2551
+ limit: isAll ? 100000 : (values.n ? parseInt(String(values.n), 10) || defaultLimit : defaultLimit),
2552
+ minScore: values["min-score"] ? parseFloat(String(values["min-score"])) || 0 : 0,
2553
+ all: isAll,
2554
+ collection: values.collection,
2555
+ lineNumbers: !!values["line-numbers"],
2556
+ candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
2557
+ skipRerank: !!values["no-rerank"],
2558
+ explain: !!values.explain,
2559
+ intent: values.intent,
2560
+ chunkStrategy: parseChunkStrategy(values["chunk-strategy"]),
2561
+ fullPath: !!values["full-path"],
2562
+ };
2563
+ return {
2564
+ command: positionals[0] || "",
2565
+ args: positionals.slice(1),
2566
+ query: positionals.slice(1).join(" "),
2567
+ opts,
2568
+ values,
2569
+ };
2570
+ }
2571
+ function getSkillInstallDir(globalInstall) {
2572
+ return globalInstall
2573
+ ? resolve(homedir(), ".agents", "skills", "qmd")
2574
+ : resolve(getPwd(), ".agents", "skills", "qmd");
2575
+ }
2576
+ function getClaudeSkillLinkPath(globalInstall) {
2577
+ return globalInstall
2578
+ ? resolve(homedir(), ".claude", "skills", "qmd")
2579
+ : resolve(getPwd(), ".claude", "skills", "qmd");
2580
+ }
2581
+ function pathExists(path) {
2582
+ try {
2583
+ lstatSync(path);
2584
+ return true;
2585
+ }
2586
+ catch {
2587
+ return false;
2588
+ }
2589
+ }
2590
+ function removePath(path) {
2591
+ const stat = lstatSync(path);
2592
+ if (stat.isDirectory() && !stat.isSymbolicLink()) {
2593
+ rmSync(path, { recursive: true, force: true });
2594
+ }
2595
+ else {
2596
+ unlinkSync(path);
2597
+ }
2598
+ }
2599
+ const SKILL_DIR = "skills";
2600
+ function findPackageRoot() {
2601
+ if (process.env.QMD_SKILLS_DIR) {
2602
+ return null;
2603
+ }
2604
+ const start = dirname(fileURLToPath(import.meta.url));
2605
+ let current = start;
2606
+ while (true) {
2607
+ if (existsSync(resolve(current, SKILL_DIR))) {
2608
+ return current;
2609
+ }
2610
+ const parent = dirname(current);
2611
+ if (parent === current)
2612
+ break;
2613
+ current = parent;
2614
+ }
2615
+ return null;
2616
+ }
2617
+ function getSkillSearchDirs(_runtimeOnly = false) {
2618
+ if (process.env.QMD_SKILLS_DIR) {
2619
+ return [process.env.QMD_SKILLS_DIR];
2620
+ }
2621
+ const root = findPackageRoot();
2622
+ if (!root)
2623
+ return [];
2624
+ const dir = resolve(root, SKILL_DIR);
2625
+ return existsSync(dir) ? [dir] : [];
2626
+ }
2627
+ function parseSkillFrontmatter(content) {
2628
+ const trimmed = content.trimStart();
2629
+ if (!trimmed.startsWith("---"))
2630
+ return null;
2631
+ const end = trimmed.slice(3).indexOf("\n---");
2632
+ if (end < 0)
2633
+ return null;
2634
+ const frontmatter = trimmed.slice(3, 3 + end);
2635
+ let name = "";
2636
+ let description = "";
2637
+ let hidden = false;
2638
+ const lines = frontmatter.split(/\r?\n/);
2639
+ for (let i = 0; i < lines.length; i++) {
2640
+ const line = lines[i];
2641
+ if (line.startsWith("name:")) {
2642
+ name = line.slice("name:".length).trim();
2643
+ }
2644
+ else if (line.startsWith("description:")) {
2645
+ const parts = [line.slice("description:".length).trim()];
2646
+ while (i + 1 < lines.length && /^\s+\S/.test(lines[i + 1])) {
2647
+ i++;
2648
+ parts.push(lines[i].trim());
2649
+ }
2650
+ description = parts.join(" ");
2651
+ }
2652
+ else if (line.startsWith("hidden:")) {
2653
+ const value = line.slice("hidden:".length).trim().toLowerCase();
2654
+ hidden = value === "true" || value === "yes";
2655
+ }
2656
+ }
2657
+ if (!name)
2658
+ return null;
2659
+ return { name, description, hidden };
2660
+ }
2661
+ function discoverSkills(runtimeOnly = false) {
2662
+ const skills = [];
2663
+ for (const dir of getSkillSearchDirs(runtimeOnly)) {
2664
+ let entries = [];
2665
+ try {
2666
+ entries = readdirSync(dir);
2667
+ }
2668
+ catch {
2669
+ continue;
2670
+ }
2671
+ for (const entry of entries) {
2672
+ const skillDir = resolve(dir, entry);
2673
+ const skillPath = resolve(skillDir, "SKILL.md");
2674
+ if (!existsSync(skillPath))
2675
+ continue;
2676
+ let content = "";
2677
+ try {
2678
+ content = readFileSync(skillPath, "utf-8");
2679
+ }
2680
+ catch {
2681
+ continue;
2682
+ }
2683
+ const parsed = parseSkillFrontmatter(content);
2684
+ if (!parsed)
2685
+ continue;
2686
+ skills.push({ ...parsed, dir: skillDir });
2687
+ }
2688
+ }
2689
+ return skills.sort((a, b) => a.name.localeCompare(b.name));
2690
+ }
2691
+ function findSkill(name, runtimeOnly = false) {
2692
+ return discoverSkills(runtimeOnly).find((skill) => skill.name === name) ?? null;
2693
+ }
2694
+ function readSkillContent(skill) {
2695
+ return readFileSync(resolve(skill.dir, "SKILL.md"), "utf-8");
2696
+ }
2697
+ function collectSkillFiles(skill) {
2698
+ const files = [];
2699
+ for (const subdirName of ["references", "templates", "scripts"]) {
2700
+ const subdir = resolve(skill.dir, subdirName);
2701
+ if (!existsSync(subdir))
2702
+ continue;
2703
+ for (const entry of readdirSync(subdir).sort()) {
2704
+ const filePath = resolve(subdir, entry);
2705
+ try {
2706
+ if (!statSync(filePath).isFile())
2707
+ continue;
2708
+ files.push({ relativePath: `${subdirName}/${basename(filePath)}`, content: readFileSync(filePath, "utf-8") });
2709
+ }
2710
+ catch {
2711
+ // Ignore unreadable supplementary files.
2712
+ }
2713
+ }
2714
+ }
2715
+ return files;
2716
+ }
2717
+ function showSkill() {
2718
+ const skill = findSkill("qmd");
2719
+ if (!skill) {
2720
+ throw new Error("QMD skill not found. Reinstall qmd or set QMD_SKILLS_DIR.");
2721
+ }
2722
+ console.log("QMD Skill");
2723
+ console.log("");
2724
+ const content = readSkillContent(skill);
2725
+ process.stdout.write(content.endsWith("\n") ? content : content + "\n");
2726
+ }
2727
+ function copyDirectoryContents(sourceDir, targetDir) {
2728
+ mkdirSync(targetDir, { recursive: true });
2729
+ for (const entry of readdirSync(sourceDir)) {
2730
+ const sourcePath = resolve(sourceDir, entry);
2731
+ const targetPath = resolve(targetDir, entry);
2732
+ const stat = statSync(sourcePath);
2733
+ if (stat.isDirectory()) {
2734
+ copyDirectoryContents(sourcePath, targetPath);
2735
+ }
2736
+ else if (stat.isFile()) {
2737
+ copyFileSync(sourcePath, targetPath);
2738
+ }
2739
+ }
2740
+ }
2741
+ function installedSkillStubContent() {
2742
+ return `---
2743
+ name: qmd
2744
+ description: Bootstrap QMD search instructions from the installed qmd CLI. Use when users ask to find notes, retrieve documents, inspect a wiki, or answer from indexed local markdown.
2745
+ license: MIT
2746
+ compatibility: Requires qmd CLI. Run \`qmd skill show\` for version-matched instructions.
2747
+ allowed-tools: Bash(qmd:*), mcp__qmd__*
2748
+ ---
2749
+
2750
+ # QMD - Query Markdown Documents
2751
+
2752
+ This installed skill is intentionally a small bootstrap so it does not go stale
2753
+ when the qmd package updates.
2754
+
2755
+ Load the full, version-matched QMD instructions from the CLI:
2756
+
2757
+ !\`qmd skill show\`
2758
+
2759
+ If your agent does not support bang-command expansion, run:
2760
+
2761
+ \`\`\`bash
2762
+ qmd skill show
2763
+ \`\`\`
2764
+
2765
+ Then follow those instructions. In short: search first, fetch full sources with
2766
+ \`qmd get\` or \`qmd multi-get\`, and answer from retrieved text rather than snippets.
2767
+ `;
2768
+ }
2769
+ function writeSkillInstall(targetDir, force) {
2770
+ if (pathExists(targetDir)) {
2771
+ if (!force) {
2772
+ throw new Error(`Skill already exists: ${targetDir} (use --force to replace it)`);
2773
+ }
2774
+ removePath(targetDir);
2775
+ }
2776
+ const skill = findSkill("qmd");
2777
+ if (!skill) {
2778
+ throw new Error("QMD skill not found. Reinstall qmd or set QMD_SKILLS_DIR.");
2779
+ }
2780
+ copyDirectoryContents(skill.dir, targetDir);
2781
+ writeFileSync(resolve(targetDir, "SKILL.md"), installedSkillStubContent(), "utf-8");
2782
+ }
2783
+ function outputSkillsJson(payload) {
2784
+ console.log(JSON.stringify(payload));
2785
+ }
2786
+ function runSkillsCommand(args, jsonMode, fullOption = false, allOption = false) {
2787
+ const subcommand = args[0] ?? "list";
2788
+ const runtimeSkills = () => discoverSkills(true).filter((skill) => !skill.hidden);
2789
+ switch (subcommand) {
2790
+ case "list": {
2791
+ const skills = runtimeSkills();
2792
+ if (jsonMode) {
2793
+ outputSkillsJson({ success: true, data: skills.map(({ name, description }) => ({ name, description })) });
2794
+ return;
2795
+ }
2796
+ if (skills.length === 0) {
2797
+ console.log("No skills found");
2798
+ return;
2799
+ }
2800
+ const maxName = Math.max(...skills.map((skill) => skill.name.length));
2801
+ for (const skill of skills) {
2802
+ console.log(` ${skill.name.padEnd(maxName)} ${skill.description}`);
2803
+ }
2804
+ return;
2805
+ }
2806
+ case "get": {
2807
+ const full = fullOption || args.includes("--full");
2808
+ const getAll = allOption || args.includes("--all");
2809
+ const names = args.slice(1).filter((arg) => arg !== "--full" && arg !== "--all");
2810
+ const targets = getAll ? runtimeSkills() : names.map((name) => {
2811
+ const skill = findSkill(name, true);
2812
+ if (!skill) {
2813
+ throw new Error(`Skill not found: ${name}`);
2814
+ }
2815
+ return skill;
2816
+ });
2817
+ if (targets.length === 0) {
2818
+ throw new Error("No skill name provided. Usage: qmd skills get <name>");
2819
+ }
2820
+ if (jsonMode) {
2821
+ outputSkillsJson({
2822
+ success: true,
2823
+ data: targets.map((skill) => ({
2824
+ name: skill.name,
2825
+ content: readSkillContent(skill),
2826
+ ...(full ? { files: collectSkillFiles(skill).map((file) => ({ path: file.relativePath, content: file.content })) } : {}),
2827
+ })),
2828
+ });
2829
+ return;
2830
+ }
2831
+ targets.forEach((skill, index) => {
2832
+ if (index > 0)
2833
+ console.log("\n---\n");
2834
+ const content = readSkillContent(skill);
2835
+ process.stdout.write(content.endsWith("\n") ? content : content + "\n");
2836
+ if (full) {
2837
+ for (const file of collectSkillFiles(skill)) {
2838
+ console.log(`\n--- ${file.relativePath} ---\n`);
2839
+ process.stdout.write(file.content.endsWith("\n") ? file.content : file.content + "\n");
2840
+ }
2841
+ }
2842
+ });
2843
+ return;
2844
+ }
2845
+ case "path": {
2846
+ const name = args[1];
2847
+ if (!name) {
2848
+ const paths = getSkillSearchDirs(true);
2849
+ if (jsonMode)
2850
+ outputSkillsJson({ success: true, data: { paths } });
2851
+ else
2852
+ paths.forEach((path) => console.log(path));
2853
+ return;
2854
+ }
2855
+ const skill = findSkill(name, true);
2856
+ if (!skill) {
2857
+ throw new Error(`Skill not found: ${name}`);
2858
+ }
2859
+ if (jsonMode)
2860
+ outputSkillsJson({ success: true, data: { name: skill.name, path: skill.dir } });
2861
+ else
2862
+ console.log(skill.dir);
2863
+ return;
2864
+ }
2865
+ case "help": {
2866
+ showSkillsHelp();
2867
+ return;
2868
+ }
2869
+ default:
2870
+ throw new Error(`Unknown skills subcommand: ${subcommand}`);
2871
+ }
2872
+ }
2873
+ function showSkillsHelp() {
2874
+ console.log("Usage: qmd skills <list|get|path> [options]");
2875
+ console.log("");
2876
+ console.log("Commands:");
2877
+ console.log(" list List bundled runtime skills");
2878
+ console.log(" get <name> Print a bundled runtime skill");
2879
+ console.log(" get <name> --full Include references/templates/scripts");
2880
+ console.log(" get --all Print all bundled runtime skills");
2881
+ console.log(" path [name] Print runtime skill directory path(s)");
2882
+ console.log("");
2883
+ console.log("Options:");
2884
+ console.log(" --json Print structured JSON");
2885
+ }
2886
+ function ensureClaudeSymlink(linkPath, targetDir, force) {
2887
+ const parentDir = dirname(linkPath);
2888
+ if (pathExists(parentDir)) {
2889
+ const resolvedTargetDir = realpathSync(dirname(targetDir));
2890
+ const resolvedLinkParent = realpathSync(parentDir);
2891
+ // If .claude/skills already resolves to the same directory as .agents/skills,
2892
+ // the skill is already visible to Claude and creating qmd -> qmd would loop.
2893
+ if (resolvedTargetDir === resolvedLinkParent) {
2894
+ return false;
2895
+ }
2896
+ }
2897
+ const linkTarget = relativePath(parentDir, targetDir) || ".";
2898
+ mkdirSync(parentDir, { recursive: true });
2899
+ if (pathExists(linkPath)) {
2900
+ const stat = lstatSync(linkPath);
2901
+ if (stat.isSymbolicLink() && readlinkSync(linkPath) === linkTarget) {
2902
+ return true;
2903
+ }
2904
+ if (!force) {
2905
+ throw new Error(`Claude skill path already exists: ${linkPath} (use --force to replace it)`);
2906
+ }
2907
+ removePath(linkPath);
2908
+ }
2909
+ symlinkSync(linkTarget, linkPath, "dir");
2910
+ return true;
2911
+ }
2912
+ async function shouldCreateClaudeSymlink(linkPath, autoYes) {
2913
+ if (autoYes) {
2914
+ return true;
2915
+ }
2916
+ if (!process.stdin.isTTY || !process.stdout.isTTY) {
2917
+ console.log(`Tip: create a Claude symlink manually at ${linkPath}`);
2918
+ return false;
2919
+ }
2920
+ const rl = createInterface({
2921
+ input: process.stdin,
2922
+ output: process.stdout,
2923
+ });
2924
+ try {
2925
+ const answer = await rl.question(`Create a symlink in ${linkPath}? [y/N] `);
2926
+ const normalized = answer.trim().toLowerCase();
2927
+ return normalized === "y" || normalized === "yes";
2928
+ }
2929
+ finally {
2930
+ rl.close();
2931
+ }
2932
+ }
2933
+ async function installSkill(globalInstall, force, autoYes) {
2934
+ const installDir = getSkillInstallDir(globalInstall);
2935
+ writeSkillInstall(installDir, force);
2936
+ console.log(`✓ Installed QMD skill to ${installDir}`);
2937
+ const claudeLinkPath = getClaudeSkillLinkPath(globalInstall);
2938
+ if (!(await shouldCreateClaudeSymlink(claudeLinkPath, autoYes))) {
2939
+ return;
2940
+ }
2941
+ const linked = ensureClaudeSymlink(claudeLinkPath, installDir, force);
2942
+ if (linked) {
2943
+ console.log(`✓ Linked Claude skill at ${claudeLinkPath}`);
2944
+ }
2945
+ else {
2946
+ console.log(`✓ Claude already sees the skill via ${dirname(claudeLinkPath)}`);
2947
+ }
2948
+ }
2949
+ function showHelp() {
2950
+ console.log("qmd — Quick Markdown Search");
2951
+ console.log("");
2952
+ console.log("Usage:");
2953
+ console.log(" qmd <command> [options]");
2954
+ console.log("");
2955
+ console.log("Primary commands:");
2956
+ console.log(" qmd query <query> - Hybrid search with auto expansion + reranking (recommended)");
2957
+ console.log(" qmd query 'lex:..\\nvec:...' - Structured query document (you provide lex/vec/hyde lines)");
2958
+ console.log(" qmd search <query> - Full-text BM25 keywords (no LLM)");
2959
+ console.log(" qmd vsearch <query> - Vector similarity only");
2960
+ console.log(" qmd get <file>[:from[:count]] - Show a document (line-numbered; #docid in header)");
2961
+ console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
2962
+ console.log(" qmd skills list/get/path - List and retrieve bundled runtime skills");
2963
+ console.log(" qmd skill show/install - Show or install the QMD skill");
2964
+ console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
2965
+ console.log(" qmd bench <fixture.json> - Run search quality benchmarks against a fixture file");
2966
+ console.log("");
2967
+ console.log("Collections & context:");
2968
+ console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders");
2969
+ console.log(" qmd context add/list/rm - Attach human-written summaries");
2970
+ console.log(" qmd ls [collection[/path]] - Inspect indexed files");
2971
+ console.log("");
2972
+ console.log("Maintenance:");
2973
+ console.log(" qmd init - Create a project-local .qmd index");
2974
+ console.log(" qmd status - View index + collection health");
2975
+ console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
2976
+ console.log(" qmd embed [-f] [-c <name>] - Generate/refresh vector embeddings");
2977
+ console.log(" --max-docs-per-batch <n> - Cap docs loaded into memory per embedding batch");
2978
+ console.log(" --max-batch-mb <n> - Cap UTF-8 MB loaded into memory per embedding batch");
2979
+ console.log(" qmd cleanup - Clear caches, vacuum DB");
2980
+ console.log("");
2981
+ console.log("Query syntax (qmd query):");
2982
+ console.log(" QMD queries are either a single expand query (no prefix) or a multi-line");
2983
+ console.log(" document where every line is typed with lex:, vec:, or hyde:. This grammar");
2984
+ console.log(" matches the docs in docs/SYNTAX.md and is enforced in the CLI.");
2985
+ console.log("");
2986
+ const grammar = [
2987
+ `query = expand_query | query_document ;`,
2988
+ `expand_query = text | explicit_expand ;`,
2989
+ `explicit_expand= "expand:" text ;`,
2990
+ `query_document = [ intent_line ] { typed_line } ;`,
2991
+ `intent_line = "intent:" text newline ;`,
2992
+ `typed_line = type ":" text newline ;`,
2993
+ `type = "lex" | "vec" | "hyde" ;`,
2994
+ `text = quoted_phrase | plain_text ;`,
2995
+ `quoted_phrase = '"' { character } '"' ;`,
2996
+ `plain_text = { character } ;`,
2997
+ `newline = "\\n" ;`,
2998
+ ];
2999
+ console.log(" Grammar:");
3000
+ for (const line of grammar) {
3001
+ console.log(` ${line}`);
3002
+ }
3003
+ console.log("");
3004
+ console.log(" Examples:");
3005
+ console.log(" qmd query \"how does auth work\" # single-line → implicit expand");
3006
+ console.log(" qmd query $'lex: CAP theorem\\nvec: consistency' # typed query document");
3007
+ console.log(" qmd query $'lex: \"exact matches\" sports -baseball' # phrase + negation lex search");
3008
+ console.log(" qmd query $'hyde: Hypothetical answer text' # hyde-only document");
3009
+ console.log("");
3010
+ console.log(" Constraints:");
3011
+ console.log(" - Standalone expand queries cannot mix with typed lines.");
3012
+ console.log(" - Query documents allow only lex:, vec:, or hyde: prefixes.");
3013
+ console.log(" - Each typed line must be single-line text with balanced quotes.");
3014
+ console.log("");
3015
+ console.log("AI agents & integrations:");
3016
+ console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs.");
3017
+ console.log(" - Run `qmd skills get qmd --full` for version-matched agent instructions.");
3018
+ console.log(" - `qmd skill install` installs the QMD skill into ./.agents/skills/qmd.");
3019
+ console.log(" - Use `qmd skill install --global` for ~/.agents/skills/qmd.");
3020
+ console.log(" - `qmd --skill` is kept as an alias for `qmd skill show`.");
3021
+ console.log(" - Advanced: `qmd mcp --http ...` and `qmd mcp --http --daemon` are optional for custom transports.");
3022
+ console.log("");
3023
+ console.log("Global options:");
3024
+ console.log(" --index <name> - Use a named index (default: index)");
3025
+ console.log(" QMD_EDITOR_URI - Editor link template for clickable TTY search output");
3026
+ console.log("");
3027
+ console.log("Search options:");
3028
+ console.log(" -n <num> - Max results (default 5, or 20 for --format files|json)");
3029
+ console.log(" --all - Return all matches (pair with --min-score)");
3030
+ console.log(" --min-score <num> - Minimum similarity score");
3031
+ console.log(" --full - Output full document instead of snippet");
3032
+ console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
3033
+ console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
3034
+ console.log(" --no-gpu - Force CPU mode for llama.cpp operations (same as QMD_FORCE_CPU=1)");
3035
+ console.log(" --line-numbers - Include line numbers (search; get/multi-get are on by default)");
3036
+ console.log(" --no-line-numbers - Disable line numbers for get/multi-get");
3037
+ console.log(" --full-path - Show on-disk paths instead of qmd:// + docid (get/multi-get/search/query)");
3038
+ console.log(" Paths are ./-prefixed when under $PWD, absolute otherwise");
3039
+ console.log(" --explain - Include retrieval score traces (query, CLI/--format json)");
3040
+ console.log(" --format <kind> - Output format: cli (default) | json | csv | md | xml | files");
3041
+ console.log(" -c, --collection <name> - Filter by one or more collections");
3042
+ console.log("");
3043
+ console.log("Embed/query options:");
3044
+ console.log(" --chunk-strategy <auto|regex> - Chunking mode (default: regex; auto uses AST for code files)");
3045
+ console.log("");
3046
+ console.log("Multi-get options:");
3047
+ console.log(" -l <num> - Maximum lines per file");
3048
+ console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
3049
+ console.log(" --format <kind> - Same formats as search");
3050
+ console.log("");
3051
+ console.log(`Index: ${getDbPath()}`);
3052
+ }
3053
+ function doctorCheck(label, ok, details) {
3054
+ const mark = ok ? `${c.green}✓${c.reset}` : `${c.yellow}⚠${c.reset}`;
3055
+ console.log(`${mark} ${label}: ${details}`);
3056
+ }
3057
+ function formatCount(n) {
3058
+ return n.toLocaleString("en-US");
3059
+ }
3060
+ function shortModelName(model) {
3061
+ if (model.startsWith("hf:")) {
3062
+ return model.split("/").pop() || model;
3063
+ }
3064
+ return model.length > 56 ? `${model.slice(0, 53)}...` : model;
3065
+ }
3066
+ function normalizedDoctorNextSteps(steps) {
3067
+ const unique = Array.from(new Set(steps));
3068
+ const hasForceEmbed = unique.some(step => step.includes("qmd embed --force"));
3069
+ if (!hasForceEmbed)
3070
+ return unique;
3071
+ return unique.filter(step => !step.includes("qmd embed") || step.startsWith("Run `qmd embed --force`"));
3072
+ }
3073
+ function shortHashSeq(hashSeq) {
3074
+ const idx = hashSeq.lastIndexOf("_");
3075
+ if (idx < 0)
3076
+ return hashSeq.length > 18 ? `${hashSeq.slice(0, 18)}...` : hashSeq;
3077
+ return `${hashSeq.slice(0, 12)}_${hashSeq.slice(idx + 1)}`;
3078
+ }
3079
+ function decodeStoredEmbedding(bytes) {
3080
+ return new Float32Array(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength));
3081
+ }
3082
+ function cosineDistance(a, b) {
3083
+ if (a.length !== b.length || a.length === 0)
3084
+ return Number.POSITIVE_INFINITY;
3085
+ let dot = 0;
3086
+ let normA = 0;
3087
+ let normB = 0;
3088
+ for (let i = 0; i < a.length; i++) {
3089
+ const av = a[i] ?? 0;
3090
+ const bv = b[i] ?? 0;
3091
+ dot += av * bv;
3092
+ normA += av * av;
3093
+ normB += bv * bv;
3094
+ }
3095
+ if (normA === 0 || normB === 0)
3096
+ return Number.POSITIVE_INFINITY;
3097
+ return 1 - (dot / (Math.sqrt(normA) * Math.sqrt(normB)));
3098
+ }
3099
+ function formatModelDiagnosticPath(path) {
3100
+ return sanitizeDiagnosticMessage(path);
3101
+ }
3102
+ function findCachedModelInspection(model) {
3103
+ const invalid = [];
3104
+ if (model.startsWith("hf:")) {
3105
+ const filename = model.split("/").pop();
3106
+ if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR))
3107
+ return { path: null, invalid };
3108
+ const entries = readdirSync(DEFAULT_MODEL_CACHE_DIR, { withFileTypes: true });
3109
+ for (const entry of entries) {
3110
+ if (!entry.isFile() || !entry.name.includes(filename))
3111
+ continue;
3112
+ const candidate = pathJoin(DEFAULT_MODEL_CACHE_DIR, entry.name);
3113
+ const inspection = inspectGgufFile(candidate);
3114
+ if (inspection.valid)
3115
+ return { path: candidate, invalid };
3116
+ invalid.push(`${formatModelDiagnosticPath(candidate)}: ${inspection.details}`);
3117
+ }
3118
+ return { path: null, invalid };
3119
+ }
3120
+ const inspection = inspectGgufFile(model);
3121
+ if (inspection.valid)
3122
+ return { path: model, invalid };
3123
+ if (inspection.exists)
3124
+ invalid.push(`${formatModelDiagnosticPath(model)}: ${inspection.details}`);
3125
+ return { path: null, invalid };
3126
+ }
3127
+ function envValueForDisplay(value) {
3128
+ const sanitized = sanitizeDiagnosticMessage(value);
3129
+ return sanitized.length > 96 ? `${sanitized.slice(0, 93)}...` : sanitized;
3130
+ }
3131
+ function collectEnvironmentOverrides(activeModels, configModels = {}) {
3132
+ const overrides = [];
3133
+ const add = (name, consequence) => {
3134
+ const raw = process.env[name]?.trim();
3135
+ if (!raw)
3136
+ return;
3137
+ overrides.push({ name, value: envValueForDisplay(raw), consequence });
3138
+ };
3139
+ const addModel = (name, key, active) => {
3140
+ const raw = process.env[name]?.trim();
3141
+ if (!raw)
3142
+ return;
3143
+ const configured = configModels[key];
3144
+ const consequence = configured && configured !== raw
3145
+ ? `set but ignored because index models.${key} is configured as ${configured}`
3146
+ : `sets the active ${key} model to ${active}; changes embedding/search semantics and may require \`qmd pull\` plus \`qmd embed\``;
3147
+ overrides.push({ name, value: envValueForDisplay(raw), consequence });
3148
+ };
3149
+ add("INDEX_PATH", "overrides the SQLite index path; QMD reads/writes a different database");
3150
+ add("QMD_CONFIG_DIR", "overrides the QMD config directory and takes precedence over XDG_CONFIG_HOME");
3151
+ add("XDG_CONFIG_HOME", "moves QMD config to $XDG_CONFIG_HOME/qmd when QMD_CONFIG_DIR is not set");
3152
+ add("XDG_CACHE_HOME", "moves the default index cache, model cache, and MCP daemon PID files");
3153
+ addModel("QMD_EMBED_MODEL", "embed", activeModels.embed);
3154
+ addModel("QMD_GENERATE_MODEL", "generate", activeModels.generate);
3155
+ addModel("QMD_RERANK_MODEL", "rerank", activeModels.rerank);
3156
+ add("QMD_FORCE_CPU", "forces llama.cpp to bypass GPU backends; embeddings/query will be slower but GPU crashes are avoided");
3157
+ add("QMD_LLAMA_GPU", "selects llama.cpp GPU backend (metal/cuda/vulkan) or disables GPU when set to false/off/0");
3158
+ add("QMD_DOCTOR_DEVICE_PROBE", "controls qmd doctor native device probing; 0/off skips GPU probing");
3159
+ add("QMD_EMBED_PARALLELISM", "overrides embedding parallel context count; too high can exhaust RAM/VRAM");
3160
+ add("QMD_EXPAND_CONTEXT_SIZE", "overrides query expansion context size; larger values use more memory");
3161
+ add("QMD_RERANK_CONTEXT_SIZE", "overrides reranker context size; larger values use more memory");
3162
+ add("QMD_EMBED_CONTEXT_SIZE", "overrides embed context size; larger values use more memory");
3163
+ add("QMD_EDITOR_URI", "overrides clickable editor link template in terminal output");
3164
+ add("QMD_SKILLS_DIR", "overrides where qmd skills are discovered from");
3165
+ add("QMD_METAL_KEEP_RESIDENCY", "opts back into libggml-metal residency sets on darwin; restores ~0ms perf wins for long-lived processes but re-exposes the static-destructor backtrace dump at process exit (ggml-org/llama.cpp#22593)");
3166
+ add("GGML_METAL_NO_RESIDENCY", "set automatically by the launcher on darwin to disable Metal residency sets (avoids ggml-org/llama.cpp#22593); override via QMD_METAL_KEEP_RESIDENCY=1");
3167
+ add("NO_COLOR", "disables colored terminal output");
3168
+ add("CI", "disables real LLM operations inside QMD's LlamaCpp wrapper");
3169
+ add("HF_ENDPOINT", "changes Hugging Face download endpoint used when pulling models");
3170
+ add("QMD_WRAPPER_CAPTURE", "test/debug hook for the qmd shell wrapper; should not be set in normal use");
3171
+ add("WSL_DISTRO_NAME", "enables WSL path handling heuristics");
3172
+ add("WSL_INTEROP", "enables WSL path handling heuristics");
3173
+ return overrides;
3174
+ }
3175
+ function checkDoctorIndexConfig(nextSteps) {
3176
+ try {
3177
+ const config = loadConfig();
3178
+ const collectionCount = Object.keys(config.collections ?? {}).length;
3179
+ if (collectionCount === 0) {
3180
+ doctorCheck("index config", false, "no collections configured. Next: `qmd collection add .`");
3181
+ nextSteps.push("Run `qmd collection add . --name <name>` from the folder you want to index, or edit .qmd/index.yml manually.");
3182
+ }
3183
+ else {
3184
+ doctorCheck("index config", true, `${formatCount(collectionCount)} ${collectionCount === 1 ? "collection" : "collections"} configured`);
3185
+ }
3186
+ return { config, valid: true };
3187
+ }
3188
+ catch (error) {
3189
+ const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
3190
+ const configPath = getConfigPath();
3191
+ doctorCheck("index config", false, `invalid index.yml at ${configPath}: ${message}. Next: fix the YAML and rerun \`qmd doctor\``);
3192
+ nextSteps.push(`Fix invalid YAML in ${configPath}, then rerun \`qmd doctor\`.`);
3193
+ return { config: null, valid: false };
3194
+ }
3195
+ }
3196
+ function checkEnvironmentOverrides(activeModels, configModels = {}) {
3197
+ const overrides = collectEnvironmentOverrides(activeModels, configModels);
3198
+ if (overrides.length === 0) {
3199
+ doctorCheck("environment overrides", true, "none");
3200
+ return;
3201
+ }
3202
+ doctorCheck("environment overrides", false, `${overrides.length} set`);
3203
+ for (const override of overrides) {
3204
+ console.log(` - ${override.name}=${override.value}: ${override.consequence}`);
3205
+ }
3206
+ }
3207
+ function checkModelDefaults(activeModels, configModels = {}) {
3208
+ const checks = [
3209
+ { role: "embedding", key: "embed", active: activeModels.embed, configured: configModels.embed, defaultModel: DEFAULT_EMBED_MODEL, envName: "QMD_EMBED_MODEL", envValue: process.env.QMD_EMBED_MODEL },
3210
+ { role: "generation", key: "generate", active: activeModels.generate, configured: configModels.generate, defaultModel: DEFAULT_QUERY_MODEL, envName: "QMD_GENERATE_MODEL", envValue: process.env.QMD_GENERATE_MODEL },
3211
+ { role: "reranking", key: "rerank", active: activeModels.rerank, configured: configModels.rerank, defaultModel: DEFAULT_RERANK_MODEL, envName: "QMD_RERANK_MODEL", envValue: process.env.QMD_RERANK_MODEL },
3212
+ ];
3213
+ const notes = [];
3214
+ for (const check of checks) {
3215
+ const envValue = check.envValue?.trim();
3216
+ if (envValue && check.active === envValue) {
3217
+ notes.push(`${check.role}: env ${check.envName}=${check.active} (default ${check.defaultModel}; might be ok)`);
3218
+ }
3219
+ else if (check.configured && check.configured !== check.defaultModel) {
3220
+ notes.push(`${check.role}: index ${check.configured} (default ${check.defaultModel}; might be ok)`);
3221
+ }
3222
+ else if (envValue && check.active !== envValue) {
3223
+ notes.push(`${check.role}: ${check.envName} is set to ${envValue} but index config uses ${check.active}`);
3224
+ }
3225
+ }
3226
+ if (notes.length === 0) {
3227
+ doctorCheck("model defaults", true, "using QMD codebase defaults");
3228
+ return;
3229
+ }
3230
+ doctorCheck("model defaults", false, `non-default model configuration: ${notes.join("; ")}`);
3231
+ }
3232
+ function checkModelCache(activeModels, nextSteps) {
3233
+ const models = [
3234
+ ["embedding", activeModels.embed],
3235
+ ["generation", activeModels.generate],
3236
+ ["reranking", activeModels.rerank],
3237
+ ];
3238
+ const unique = new Map();
3239
+ for (const [role, model] of models) {
3240
+ unique.set(model, [...(unique.get(model) ?? []), role]);
3241
+ }
3242
+ const missing = [];
3243
+ const cached = [];
3244
+ const invalid = [];
3245
+ for (const [model, roles] of unique) {
3246
+ const label = `${roles.join("+")}: ${model}`;
3247
+ const inspection = findCachedModelInspection(model);
3248
+ invalid.push(...inspection.invalid.map(detail => `${label} (${detail})`));
3249
+ if (inspection.path) {
3250
+ cached.push(label);
3251
+ }
3252
+ else {
3253
+ missing.push(label);
3254
+ }
3255
+ }
3256
+ if (missing.length === 0 && invalid.length === 0) {
3257
+ doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded and valid GGUF`);
3258
+ return;
3259
+ }
3260
+ const parts = [];
3261
+ if (invalid.length > 0)
3262
+ parts.push(`invalid ${invalid.length}: ${invalid.join("; ")}`);
3263
+ if (missing.length > 0)
3264
+ parts.push(`missing ${missing.length}/${unique.size}: ${missing.join("; ")}`);
3265
+ const next = invalid.length > 0
3266
+ ? "Next: run `qmd pull --refresh` (or remove the bad cached file)"
3267
+ : "Next: run `qmd pull`";
3268
+ doctorCheck("model cache", false, `${parts.join("; ")}. ${next}`);
3269
+ if (invalid.length > 0) {
3270
+ nextSteps.push("Run `qmd pull --refresh` to replace invalid cached model files, or delete the listed file and rerun `qmd pull`.");
3271
+ }
3272
+ else {
3273
+ nextSteps.push("Run `qmd pull` to download missing embedding/generation/reranking models before `qmd embed` or `qmd query`.");
3274
+ }
3275
+ }
3276
+ async function checkEmbeddingVectorSamples(db, model, fingerprint, sampleSize = 3) {
3277
+ const activeDocs = db.prepare(`SELECT COUNT(*) AS count FROM documents WHERE active = 1`).get().count;
3278
+ if (activeDocs === 0) {
3279
+ return { ok: true, details: "no active documents indexed" };
3280
+ }
3281
+ const vecTableExists = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
3282
+ if (!vecTableExists) {
3283
+ return { ok: false, details: "no vector table to test; please run qmd embed again" };
3284
+ }
3285
+ const samples = db.prepare(`
3286
+ SELECT cv.hash, cv.seq, c.doc AS body, MIN(d.path) AS path
3287
+ FROM content_vectors cv
3288
+ JOIN documents d ON d.hash = cv.hash AND d.active = 1
3289
+ JOIN content c ON c.hash = cv.hash
3290
+ WHERE cv.model = ? AND cv.embed_fingerprint = ?
3291
+ GROUP BY cv.hash, cv.seq, c.doc
3292
+ ORDER BY random()
3293
+ LIMIT ?
3294
+ `).all(model, fingerprint, sampleSize);
3295
+ if (samples.length === 0) {
3296
+ return { ok: false, details: "no current embedded chunks to test; please run qmd embed again" };
3297
+ }
3298
+ const threshold = 0.0001;
3299
+ const mismatches = [];
3300
+ await withLLMSession(async (session) => {
3301
+ for (const sample of samples) {
3302
+ const hashSeq = `${sample.hash}_${sample.seq}`;
3303
+ const chunks = await chunkDocumentByTokens(sample.body, undefined, undefined, undefined, sample.path, undefined, session.signal);
3304
+ const chunk = chunks[sample.seq];
3305
+ if (!chunk) {
3306
+ mismatches.push(`${shortHashSeq(hashSeq)}: chunk no longer exists`);
3307
+ continue;
3308
+ }
3309
+ const title = extractTitle(sample.body, sample.path);
3310
+ const result = await session.embed(formatDocForEmbedding(chunk.text, title, model), { model });
3311
+ if (!result) {
3312
+ mismatches.push(`${shortHashSeq(hashSeq)}: embedding failed`);
3313
+ continue;
3314
+ }
3315
+ const stored = db.prepare(`SELECT embedding FROM vectors_vec WHERE hash_seq = ?`).get(hashSeq);
3316
+ if (!stored) {
3317
+ mismatches.push(`${shortHashSeq(hashSeq)}: stored vector missing`);
3318
+ continue;
3319
+ }
3320
+ const distance = cosineDistance(result.embedding, decodeStoredEmbedding(stored.embedding));
3321
+ if (distance > threshold) {
3322
+ mismatches.push(`${shortHashSeq(hashSeq)}: stored vector distance ${distance.toFixed(6)}`);
3323
+ }
3324
+ }
3325
+ }, { maxDuration: 10 * 60 * 1000, name: "doctorEmbeddingVectorSample" });
3326
+ if (mismatches.length > 0) {
3327
+ return {
3328
+ ok: false,
3329
+ details: `${mismatches.length}/${samples.length} sampled chunks differ from stored vectors (${mismatches[0]}). Rebuild with \`qmd embed --force\``,
3330
+ };
3331
+ }
3332
+ return {
3333
+ ok: true,
3334
+ details: `${samples.length} sampled ${samples.length === 1 ? "chunk" : "chunks"} reproduce stored vectors`,
3335
+ };
3336
+ }
3337
+ function hasLibraryInDirs(libraryBaseName, dirs) {
3338
+ for (const dir of dirs) {
3339
+ if (!dir || !existsSync(dir))
3340
+ continue;
3341
+ try {
3342
+ for (const entry of readdirSync(dir)) {
3343
+ if (entry === libraryBaseName || entry.startsWith(`${libraryBaseName}.`))
3344
+ return true;
3345
+ }
3346
+ }
3347
+ catch { /* ignore unreadable system library dirs */ }
3348
+ }
3349
+ return false;
3350
+ }
3351
+ function linuxCudaRuntimeDiagnostic() {
3352
+ if (process.platform !== "linux")
3353
+ return null;
3354
+ const dirs = new Set();
3355
+ for (const value of [process.env.LD_LIBRARY_PATH, process.env.CUDA_PATH]) {
3356
+ for (const part of (value ?? "").split(":")) {
3357
+ if (part)
3358
+ dirs.add(part);
3359
+ }
3360
+ }
3361
+ if (process.env.CUDA_PATH) {
3362
+ dirs.add(pathJoin(process.env.CUDA_PATH, "lib64"));
3363
+ dirs.add(pathJoin(process.env.CUDA_PATH, "targets", "x86_64-linux", "lib"));
3364
+ }
3365
+ for (const dir of ["/usr/lib", "/usr/lib64", "/usr/lib/x86_64-linux-gnu", "/usr/local/cuda/lib64", "/usr/local/cuda/targets/x86_64-linux/lib"]) {
3366
+ dirs.add(dir);
3367
+ }
3368
+ try {
3369
+ for (const entry of readdirSync("/usr/local")) {
3370
+ if (!entry.toLowerCase().startsWith("cuda-"))
3371
+ continue;
3372
+ const cudaRoot = pathJoin("/usr/local", entry);
3373
+ dirs.add(pathJoin(cudaRoot, "lib64"));
3374
+ dirs.add(pathJoin(cudaRoot, "targets", "x86_64-linux", "lib"));
3375
+ }
3376
+ }
3377
+ catch { /* /usr/local may not be readable in restricted environments */ }
3378
+ const searchDirs = [...dirs];
3379
+ const hasDriver = hasLibraryInDirs("libcuda.so", searchDirs) || hasLibraryInDirs("libnvidia-ml.so", searchDirs);
3380
+ if (!hasDriver)
3381
+ return null;
3382
+ const cudaLibraries = [
3383
+ ["libcudart.so", "CUDA runtime"],
3384
+ ["libcublas.so", "cuBLAS"],
3385
+ ["libcublasLt.so", "cuBLASLt"],
3386
+ ];
3387
+ const missing = cudaLibraries
3388
+ .filter(([library]) => !hasLibraryInDirs(library, searchDirs))
3389
+ .map(([, label]) => label);
3390
+ if (missing.length === 0)
3391
+ return null;
3392
+ return `NVIDIA driver libraries are visible, but CUDA user-space libraries are missing from loader paths (${missing.join(", ")})`;
3393
+ }
3394
+ async function runDoctorDeviceChecks(nextSteps) {
3395
+ const mode = configuredGpuModeLabel();
3396
+ doctorCheck("device mode", true, mode);
3397
+ const skipProbe = ["0", "false", "off", "no", "skip"].includes((process.env.QMD_DOCTOR_DEVICE_PROBE ?? "").trim().toLowerCase());
3398
+ if (skipProbe) {
3399
+ doctorCheck("device probe", false, "skipped by QMD_DOCTOR_DEVICE_PROBE=0. Next: unset it and rerun `qmd doctor` to verify GPU/CPU acceleration");
3400
+ nextSteps.push("Unset `QMD_DOCTOR_DEVICE_PROBE` and rerun `qmd doctor` when you want to verify llama.cpp device acceleration.");
3401
+ return;
3402
+ }
3403
+ const crashHint = "Probing native llama backend now. If qmd crashes here, rerun with `QMD_FORCE_CPU=1 qmd doctor` (or `QMD_DOCTOR_DEVICE_PROBE=0 qmd doctor` to skip this probe).";
3404
+ if (process.stdout.isTTY) {
3405
+ process.stdout.write(`${c.dim}${crashHint}${c.reset}`);
3406
+ }
3407
+ try {
3408
+ const device = await getDefaultLlamaCpp().getDeviceInfo({ allowBuild: false });
3409
+ if (process.stdout.isTTY) {
3410
+ process.stdout.write(`\r${" ".repeat(crashHint.length)}\r`);
3411
+ }
3412
+ if (device.gpu) {
3413
+ const gpuLabel = device.gpu === "metal" && process.platform === "darwin"
3414
+ ? "metal (macOS Metal backend)"
3415
+ : String(device.gpu);
3416
+ const parts = [`GPU ${gpuLabel}`, `offloading ${device.gpuOffloading ? "enabled" : "disabled"}`];
3417
+ if (device.gpuDevices.length > 0)
3418
+ parts.push(`devices: ${summarizeDeviceNames(device.gpuDevices)}`);
3419
+ if (device.vram)
3420
+ parts.push(`VRAM ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
3421
+ parts.push(`${device.cpuCores} CPU math cores`);
3422
+ doctorCheck("device probe", device.gpuOffloading, device.gpuOffloading
3423
+ ? parts.join("; ")
3424
+ : `${parts.join("; ")}. Next: check QMD_LLAMA_GPU and llama.cpp backend support`);
3425
+ if (!device.gpuOffloading) {
3426
+ nextSteps.push("GPU was detected but offloading is disabled; check `QMD_LLAMA_GPU=metal|cuda|vulkan` and rerun `qmd doctor`.");
3427
+ }
3428
+ // Surface the darwin residency-set mitigation. libggml-metal's
3429
+ // process-static device dtor asserts on un-expired residency sets
3430
+ // during libc exit() (ggml-org/llama.cpp#22593), producing a giant
3431
+ // stderr backtrace after correct output. The bin/qmd launcher exports
3432
+ // GGML_METAL_NO_RESIDENCY=1 on darwin to skip the assertion entirely.
3433
+ // No measurable perf cost on short-lived CLI calls.
3434
+ if (device.gpu === "metal" && process.platform === "darwin") {
3435
+ if (isDarwinMetalMitigationActive()) {
3436
+ doctorCheck("darwin metal residency", true, "GGML_METAL_NO_RESIDENCY=1 set by launcher; clean process exit (avoids ggml-org/llama.cpp#22593). Opt back in with QMD_METAL_KEEP_RESIDENCY=1 if you run long-lived qmd processes.");
3437
+ }
3438
+ else {
3439
+ doctorCheck("darwin metal residency", false, "residency sets active (QMD_METAL_KEEP_RESIDENCY=1 or launcher bypassed); llama-using commands may dump a libggml-metal backtrace at exit (ggml-org/llama.cpp#22593) even when output succeeded.");
3440
+ nextSteps.push("Unset `QMD_METAL_KEEP_RESIDENCY` so the launcher can disable Metal residency sets; without this, query/vsearch/embed dump a stack trace at exit even on success.");
3441
+ }
3442
+ }
3443
+ }
3444
+ else {
3445
+ const cudaDiagnostic = linuxCudaRuntimeDiagnostic();
3446
+ const diagnosticSuffix = cudaDiagnostic ? ` ${cudaDiagnostic}.` : "";
3447
+ doctorCheck("device probe", false, `running on CPU (${device.cpuCores} math cores).${diagnosticSuffix} Next: install/configure Metal, CUDA, or Vulkan for faster embeddings, or set QMD_FORCE_CPU=1 to make CPU mode explicit`);
3448
+ if (cudaDiagnostic) {
3449
+ nextSteps.push(`${cudaDiagnostic}; install CUDA runtime/cuBLAS libraries or add their directory to LD_LIBRARY_PATH, then rerun \`qmd doctor\`.`);
3450
+ }
3451
+ else {
3452
+ nextSteps.push("Vector operations are running on CPU; install/configure Metal, CUDA, or Vulkan if embedding/query performance is too slow.");
3453
+ }
3454
+ }
3455
+ }
3456
+ catch (error) {
3457
+ if (process.stdout.isTTY) {
3458
+ process.stdout.write(`\r${" ".repeat(crashHint.length)}\r`);
3459
+ }
3460
+ const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
3461
+ doctorCheck("device probe", false, `probe failed: ${message}. Next: run with QMD_FORCE_CPU=1 to bypass GPU probing, or set QMD_LLAMA_GPU=metal|cuda|vulkan and retry`);
3462
+ nextSteps.push("GPU probe failed; try `QMD_FORCE_CPU=1 qmd doctor` to confirm CPU fallback, then fix GPU drivers/backend if acceleration is expected.");
3463
+ }
3464
+ }
3465
+ async function showDoctor() {
3466
+ const storeInstance = getStore();
3467
+ const db = storeInstance.db;
3468
+ const pkg = readPackageJson();
3469
+ const activeModels = resolveModelsForCli();
3470
+ const embedModel = activeModels.embed;
3471
+ const fingerprint = getEmbeddingFingerprint(embedModel);
3472
+ const nextSteps = [];
3473
+ console.log(`${c.bold}QMD Doctor${c.reset}\n`);
3474
+ console.log(`Index: ${getDbPath()}`);
3475
+ console.log(`Runtime: ${isBun ? "bun:sqlite" : "better-sqlite3"}`);
3476
+ try {
3477
+ const row = db.prepare(`SELECT sqlite_version() AS version`).get();
3478
+ doctorCheck("SQLite runtime", true, row.version);
3479
+ }
3480
+ catch (error) {
3481
+ doctorCheck("SQLite runtime", false, error instanceof Error ? error.message : String(error));
3482
+ }
3483
+ const betterSqliteVersion = pkg.dependencies?.["better-sqlite3"] ?? pkg.devDependencies?.["better-sqlite3"] ?? "not declared";
3484
+ doctorCheck("better-sqlite3 package", true, String(betterSqliteVersion));
3485
+ try {
3486
+ const row = db.prepare(`SELECT vec_version() AS version`).get();
3487
+ doctorCheck("sqlite-vec", true, row.version);
3488
+ }
3489
+ catch (error) {
3490
+ doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error));
3491
+ }
3492
+ // CJK tokenizer check (qmd-ja: Vaporetto WASM)
3493
+ try {
3494
+ await initializeVaporettoTokenizer();
3495
+ const modelPath = resolveVaporettoModelPath();
3496
+ const modelName = modelPath.split("/").pop() ?? modelPath;
3497
+ let ftsLabel = "not indexed yet";
3498
+ try {
3499
+ const row = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get();
3500
+ if (row?.value === FTS_CJK_NORMALIZED_VERSION) {
3501
+ ftsLabel = `v${FTS_CJK_NORMALIZED_VERSION} (current)`;
3502
+ }
3503
+ else if (row?.value) {
3504
+ ftsLabel = `v${row.value} -> v${FTS_CJK_NORMALIZED_VERSION} (stale, run qmd-ja update)`;
3505
+ }
3506
+ }
3507
+ catch { /* ignore */ }
3508
+ doctorCheck("CJK tokenizer", true, `Vaporetto WASM — model: ${modelName}, FTS index: ${ftsLabel}`);
3509
+ }
3510
+ catch (error) {
3511
+ doctorCheck("CJK tokenizer", false, `Vaporetto WASM failed: ${error instanceof Error ? error.message : String(error)}`);
3512
+ }
3513
+ const configCheck = checkDoctorIndexConfig(nextSteps);
3514
+ const configModels = configCheck.config?.models ?? {};
3515
+ checkEnvironmentOverrides(activeModels, configModels);
3516
+ checkModelDefaults(activeModels, configModels);
3517
+ checkModelCache(activeModels, nextSteps);
3518
+ await runDoctorDeviceChecks(nextSteps);
3519
+ try {
3520
+ const adoption = await maybeAdoptLegacyEmbeddingFingerprint(storeInstance, embedModel);
3521
+ if (adoption.checked || adoption.adopted > 0) {
3522
+ doctorCheck("legacy fingerprint adoption", adoption.adopted > 0, adoption.adopted > 0 ? `adopted ${adoption.adopted} legacy chunks; ${adoption.reason}` : adoption.reason);
3523
+ }
3524
+ }
3525
+ catch (error) {
3526
+ doctorCheck("legacy fingerprint adoption", false, error instanceof Error ? error.message : String(error));
3527
+ }
3528
+ try {
3529
+ const pending = getHashesNeedingEmbedding(db, undefined, embedModel);
3530
+ doctorCheck("embedding freshness", pending === 0, pending === 0 ? "all active documents match current fingerprint" : `${formatCount(pending)} active documents need embeddings. Next: \`qmd embed\``);
3531
+ if (pending > 0) {
3532
+ nextSteps.push(`Run \`qmd embed\` to generate ${formatCount(pending)} missing/stale document embeddings.`);
3533
+ }
3534
+ }
3535
+ catch (error) {
3536
+ doctorCheck("embedding freshness", false, error instanceof Error ? error.message : String(error));
3537
+ }
3538
+ try {
3539
+ const rows = db.prepare(`
3540
+ SELECT model, embed_fingerprint AS fingerprint, COUNT(DISTINCT hash) AS docs, COUNT(*) AS chunks
3541
+ FROM content_vectors
3542
+ GROUP BY model, embed_fingerprint
3543
+ ORDER BY chunks DESC, model, embed_fingerprint
3544
+ `).all();
3545
+ const uniqueFingerprints = new Set(rows.map(row => row.fingerprint));
3546
+ const offCurrent = rows.filter(row => row.model === embedModel && row.fingerprint !== fingerprint);
3547
+ const ok = rows.length === 0 || (uniqueFingerprints.size === 1 && rows[0]?.fingerprint === fingerprint && offCurrent.length === 0);
3548
+ const currentDocs = rows
3549
+ .filter(row => row.model === embedModel && row.fingerprint === fingerprint)
3550
+ .reduce((sum, row) => sum + row.docs, 0);
3551
+ const otherDocs = rows.reduce((sum, row) => sum + row.docs, 0) - currentDocs;
3552
+ const groups = rows.map(row => {
3553
+ const label = row.fingerprint === fingerprint ? "current" : (row.fingerprint || "legacy");
3554
+ return `${shortModelName(row.model)}:${label} ${formatCount(row.docs)} docs/${formatCount(row.chunks)} chunks`;
3555
+ }).join("; ");
3556
+ const namedFingerprintRows = rows.filter(row => row.fingerprint);
3557
+ const namedFingerprints = [...new Set(namedFingerprintRows.map(row => row.fingerprint))];
3558
+ if (namedFingerprints.length > 1) {
3559
+ const namedGroups = namedFingerprintRows
3560
+ .map(row => `${row.fingerprint}${row.fingerprint === fingerprint ? " (current)" : ""}: ${shortModelName(row.model)} ${formatCount(row.docs)} docs/${formatCount(row.chunks)} chunks`)
3561
+ .join("; ");
3562
+ doctorCheck("mixed named embedding fingerprints", false, `content_vectors contains ${namedFingerprints.length} named fingerprints: ${namedGroups}. Next: \`qmd embed\` or \`qmd embed --force\``);
3563
+ nextSteps.push("Run `qmd embed` to converge mixed named embedding fingerprints; use `qmd embed --force` if old named fingerprints or vector sample mismatches remain.");
3564
+ }
3565
+ const details = rows.length === 0
3566
+ ? `no vectors yet; current fingerprint ${fingerprint}`
3567
+ : ok
3568
+ ? `${formatCount(currentDocs)} docs on current fingerprint (${fingerprint})`
3569
+ : `${formatCount(currentDocs)} docs current, ${formatCount(otherDocs)} docs legacy/stale. ${groups}. Next: \`qmd embed\``;
3570
+ doctorCheck("embedding fingerprints", ok, details);
3571
+ if (!ok) {
3572
+ nextSteps.push("Run `qmd embed` to migrate active documents to the current embedding fingerprint; use `qmd embed --force` if vector samples still fail afterward.");
3573
+ }
3574
+ }
3575
+ catch (error) {
3576
+ doctorCheck("embedding fingerprints", false, error instanceof Error ? error.message : String(error));
3577
+ }
3578
+ try {
3579
+ const vectorSample = await checkEmbeddingVectorSamples(db, embedModel, fingerprint);
3580
+ doctorCheck("embedding vector sample", vectorSample.ok, vectorSample.details);
3581
+ if (!vectorSample.ok) {
3582
+ nextSteps.push("Run `qmd embed --force` to rebuild existing vectors that no longer reproduce under the current embedding pipeline.");
3583
+ }
3584
+ }
3585
+ catch (error) {
3586
+ const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
3587
+ doctorCheck("embedding vector sample", false, `${message}; rebuild with \`qmd embed --force\``);
3588
+ nextSteps.push("Run `qmd embed --force` to rebuild existing vectors, then rerun `qmd doctor`.");
3589
+ }
3590
+ const steps = normalizedDoctorNextSteps(nextSteps);
3591
+ if (steps.length > 0) {
3592
+ console.log(`\n${c.bold}Recommended next step${steps.length === 1 ? "" : "s"}${c.reset}`);
3593
+ for (const step of steps) {
3594
+ console.log(` - ${step}`);
3595
+ }
3596
+ }
3597
+ closeDb();
3598
+ }
3599
+ function printDoctorHint() {
3600
+ console.error("If qmd still behaves unexpectedly, run 'qmd doctor' for diagnostics.");
3601
+ }
3602
+ function exitWithError(error, code = 1) {
3603
+ console.error(error instanceof Error ? error.message : String(error));
3604
+ printDoctorHint();
3605
+ process.exit(code);
3606
+ }
3607
+ function readPackageJson() {
3608
+ const scriptDir = dirname(fileURLToPath(import.meta.url));
3609
+ const pkgPath = resolve(scriptDir, "..", "..", "package.json");
3610
+ return JSON.parse(readFileSync(pkgPath, "utf-8"));
3611
+ }
3612
+ async function showVersion() {
3613
+ const scriptDir = dirname(fileURLToPath(import.meta.url));
3614
+ const pkg = readPackageJson();
3615
+ let commit = "";
3616
+ try {
3617
+ commit = execSync(`git -C ${scriptDir} rev-parse --short HEAD`, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
3618
+ }
3619
+ catch {
3620
+ // Not a git repo or git not available
3621
+ }
3622
+ const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version;
3623
+ console.log(`qmd-ja ${versionStr}`);
3624
+ }
3625
+ // Main CLI - only run if this is the main module
3626
+ const __filename = fileURLToPath(import.meta.url);
3627
+ const argv1 = process.argv[1];
3628
+ const isMain = argv1 === __filename
3629
+ || argv1?.endsWith("/qmd.ts")
3630
+ || argv1?.endsWith("/qmd.js")
3631
+ || (argv1 != null && realpathSync(argv1) === __filename);
3632
+ if (isMain) {
3633
+ // Flip to production mode only when this module is executed as the CLI
3634
+ // entrypoint, not when imported for its exports. Tests must set INDEX_PATH
3635
+ // or use createStore() with an explicit path.
3636
+ enableProductionMode();
3637
+ const cli = parseCLI();
3638
+ if (cli.values.version) {
3639
+ await showVersion();
3640
+ process.exit(0);
3641
+ }
3642
+ if (cli.values.skill) {
3643
+ showSkill();
3644
+ process.exit(0);
3645
+ }
3646
+ if (cli.values.help && cli.command === "skill") {
3647
+ console.log("Usage: qmd skill <show|install> [options]");
3648
+ console.log("");
3649
+ console.log("Commands:");
3650
+ console.log(" show Print the QMD skill");
3651
+ console.log(" install Install QMD skill into ./.agents/skills/qmd");
3652
+ console.log("");
3653
+ console.log("Options:");
3654
+ console.log(" --global Install into ~/.agents/skills/qmd");
3655
+ console.log(" --yes Also create the .claude/skills/qmd symlink");
3656
+ console.log(" -f, --force Replace existing install or symlink");
3657
+ process.exit(0);
3658
+ }
3659
+ if (!cli.command || cli.values.help) {
3660
+ showHelp();
3661
+ process.exit(cli.values.help ? 0 : 1);
3662
+ }
3663
+ switch (cli.command) {
3664
+ case "context": {
3665
+ const subcommand = cli.args[0];
3666
+ if (!subcommand) {
3667
+ console.error("Usage: qmd context <add|list|rm>");
3668
+ console.error("");
3669
+ console.error("Commands:");
3670
+ console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
3671
+ console.error(" qmd context add / \"text\" - Add global context to all collections");
3672
+ console.error(" qmd context list - List all contexts");
3673
+ console.error(" qmd context rm <path> - Remove context");
3674
+ process.exit(1);
3675
+ }
3676
+ switch (subcommand) {
3677
+ case "add": {
3678
+ if (cli.args.length < 2) {
3679
+ console.error("Usage: qmd context add [path] \"text\"");
3680
+ console.error("");
3681
+ console.error("Examples:");
3682
+ console.error(" qmd context add \"Context for current directory\"");
3683
+ console.error(" qmd context add . \"Context for current directory\"");
3684
+ console.error(" qmd context add /subfolder \"Context for subfolder\"");
3685
+ console.error(" qmd context add / \"Global context for all collections\"");
3686
+ console.error("");
3687
+ console.error(" Using virtual paths:");
3688
+ console.error(" qmd context add qmd://journals/ \"Context for entire journals collection\"");
3689
+ console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\"");
3690
+ process.exit(1);
3691
+ }
3692
+ let pathArg;
3693
+ let contextText;
3694
+ // Check if first arg looks like a path or if it's the context text
3695
+ const firstArg = cli.args[1] || '';
3696
+ const secondArg = cli.args[2];
3697
+ if (secondArg) {
3698
+ // Two args: path + context
3699
+ pathArg = firstArg;
3700
+ contextText = cli.args.slice(2).join(" ");
3701
+ }
3702
+ else {
3703
+ // One arg: context only (use current directory)
3704
+ pathArg = undefined;
3705
+ contextText = firstArg;
3706
+ }
3707
+ await contextAdd(pathArg, contextText);
3708
+ break;
3709
+ }
3710
+ case "list": {
3711
+ contextList();
3712
+ break;
3713
+ }
3714
+ case "rm":
3715
+ case "remove": {
3716
+ if (cli.args.length < 2 || !cli.args[1]) {
3717
+ console.error("Usage: qmd context rm <path>");
3718
+ console.error("Examples:");
3719
+ console.error(" qmd context rm /");
3720
+ console.error(" qmd context rm qmd://journals/2024");
3721
+ process.exit(1);
3722
+ }
3723
+ contextRemove(cli.args[1]);
3724
+ break;
3725
+ }
3726
+ default:
3727
+ console.error(`Unknown subcommand: ${subcommand}`);
3728
+ console.error("Available: add, list, rm");
3729
+ process.exit(1);
3730
+ }
3731
+ break;
3732
+ }
3733
+ case "get": {
3734
+ if (!cli.args[0]) {
3735
+ console.error("Usage: qmd get <filepath>[:from[:count]] [--from <line>] [-l <lines>] [--no-line-numbers] [--full-path]");
3736
+ process.exit(1);
3737
+ }
3738
+ const fromLine = cli.values.from ? parseInt(cli.values.from, 10) : undefined;
3739
+ const maxLines = cli.values.l ? parseInt(cli.values.l, 10) : undefined;
3740
+ // Line numbers default ON for get; opt out with --no-line-numbers.
3741
+ const getLineNumbers = !cli.values["no-line-numbers"];
3742
+ getDocument(cli.args[0], fromLine, maxLines, getLineNumbers, !!cli.values["full-path"]);
3743
+ break;
3744
+ }
3745
+ case "multi-get": {
3746
+ if (!cli.args[0]) {
3747
+ console.error("Usage: qmd multi-get <pattern> [-l <lines>] [--max-bytes <bytes>] [--no-line-numbers] [--full-path] [--format json|csv|md|xml|files]");
3748
+ console.error(" pattern: glob (e.g., 'journals/2025-05*.md') or comma-separated list");
3749
+ process.exit(1);
3750
+ }
3751
+ const maxLinesMulti = cli.values.l ? parseInt(cli.values.l, 10) : undefined;
3752
+ const maxBytes = cli.values["max-bytes"] ? parseInt(cli.values["max-bytes"], 10) : DEFAULT_MULTI_GET_MAX_BYTES;
3753
+ // Line numbers default ON for multi-get; opt out with --no-line-numbers.
3754
+ const mgLineNumbers = !cli.values["no-line-numbers"];
3755
+ multiGet(cli.args[0], maxLinesMulti, maxBytes, cli.opts.format, mgLineNumbers, !!cli.values["full-path"]);
3756
+ break;
3757
+ }
3758
+ case "ls": {
3759
+ listFiles(cli.args[0]);
3760
+ break;
3761
+ }
3762
+ case "collection": {
3763
+ const subcommand = cli.args[0];
3764
+ switch (subcommand) {
3765
+ case "list": {
3766
+ collectionList();
3767
+ break;
3768
+ }
3769
+ case "add": {
3770
+ const pwd = cli.args[1] || getPwd();
3771
+ const resolvedPwd = pwd === '.' ? getPwd() : getRealPath(resolve(pwd));
3772
+ const globPattern = cli.values.mask || DEFAULT_GLOB;
3773
+ const name = cli.values.name;
3774
+ await collectionAdd(resolvedPwd, globPattern, name);
3775
+ break;
3776
+ }
3777
+ case "remove":
3778
+ case "rm": {
3779
+ if (!cli.args[1]) {
3780
+ console.error("Usage: qmd collection remove <name>");
3781
+ console.error(" Use 'qmd collection list' to see available collections");
3782
+ process.exit(1);
3783
+ }
3784
+ collectionRemove(cli.args[1]);
3785
+ break;
3786
+ }
3787
+ case "rename":
3788
+ case "mv": {
3789
+ if (!cli.args[1] || !cli.args[2]) {
3790
+ console.error("Usage: qmd collection rename <old-name> <new-name>");
3791
+ console.error(" Use 'qmd collection list' to see available collections");
3792
+ process.exit(1);
3793
+ }
3794
+ collectionRename(cli.args[1], cli.args[2]);
3795
+ break;
3796
+ }
3797
+ case "set-update":
3798
+ case "update-cmd": {
3799
+ const name = cli.args[1];
3800
+ const cmd = cli.args.slice(2).join(' ') || null;
3801
+ if (!name) {
3802
+ console.error("Usage: qmd collection update-cmd <name> [command]");
3803
+ console.error(" Set the command to run before indexing (e.g., 'git pull')");
3804
+ console.error(" Omit command to clear it");
3805
+ process.exit(1);
3806
+ }
3807
+ const { updateCollectionSettings, getCollection } = await import("../collections.js");
3808
+ const col = getCollection(name);
3809
+ if (!col) {
3810
+ console.error(`Collection not found: ${name}`);
3811
+ process.exit(1);
3812
+ }
3813
+ updateCollectionSettings(name, { update: cmd });
3814
+ if (cmd) {
3815
+ console.log(`✓ Set update command for '${name}': ${cmd}`);
3816
+ }
3817
+ else {
3818
+ console.log(`✓ Cleared update command for '${name}'`);
3819
+ }
3820
+ break;
3821
+ }
3822
+ case "include":
3823
+ case "exclude": {
3824
+ const name = cli.args[1];
3825
+ if (!name) {
3826
+ console.error(`Usage: qmd collection ${subcommand} <name>`);
3827
+ console.error(` ${subcommand === 'include' ? 'Include' : 'Exclude'} collection in default queries`);
3828
+ process.exit(1);
3829
+ }
3830
+ const { updateCollectionSettings, getCollection } = await import("../collections.js");
3831
+ const col = getCollection(name);
3832
+ if (!col) {
3833
+ console.error(`Collection not found: ${name}`);
3834
+ process.exit(1);
3835
+ }
3836
+ const include = subcommand === 'include';
3837
+ updateCollectionSettings(name, { includeByDefault: include });
3838
+ console.log(`✓ Collection '${name}' ${include ? 'included in' : 'excluded from'} default queries`);
3839
+ break;
3840
+ }
3841
+ case "show":
3842
+ case "info": {
3843
+ const name = cli.args[1];
3844
+ if (!name) {
3845
+ console.error("Usage: qmd collection show <name>");
3846
+ process.exit(1);
3847
+ }
3848
+ const { getCollection } = await import("../collections.js");
3849
+ const col = getCollection(name);
3850
+ if (!col) {
3851
+ console.error(`Collection not found: ${name}`);
3852
+ process.exit(1);
3853
+ }
3854
+ console.log(`Collection: ${name}`);
3855
+ console.log(` Path: ${col.path}`);
3856
+ console.log(` Pattern: ${col.pattern}`);
3857
+ console.log(` Include: ${col.includeByDefault !== false ? 'yes (default)' : 'no'}`);
3858
+ if (col.update) {
3859
+ console.log(` Update: ${col.update}`);
3860
+ }
3861
+ if (col.context) {
3862
+ const ctxCount = Object.keys(col.context).length;
3863
+ console.log(` Contexts: ${ctxCount}`);
3864
+ }
3865
+ break;
3866
+ }
3867
+ case "help":
3868
+ case undefined: {
3869
+ console.log("Usage: qmd collection <command> [options]");
3870
+ console.log("");
3871
+ console.log("Commands:");
3872
+ console.log(" list List all collections");
3873
+ console.log(" add <path> [--name NAME] Add a collection");
3874
+ console.log(" remove <name> Remove a collection");
3875
+ console.log(" rename <old> <new> Rename a collection");
3876
+ console.log(" show <name> Show collection details");
3877
+ console.log(" update-cmd <name> [cmd] Set pre-update command (e.g., 'git pull')");
3878
+ console.log(" include <name> Include in default queries");
3879
+ console.log(" exclude <name> Exclude from default queries");
3880
+ console.log("");
3881
+ console.log("Examples:");
3882
+ console.log(" qmd collection add ~/notes --name notes");
3883
+ console.log(" qmd collection update-cmd brain 'git pull'");
3884
+ console.log(" qmd collection exclude archive");
3885
+ process.exit(0);
3886
+ }
3887
+ default:
3888
+ console.error(`Unknown subcommand: ${subcommand}`);
3889
+ console.error("Run 'qmd collection help' for usage");
3890
+ printDoctorHint();
3891
+ process.exit(1);
3892
+ }
3893
+ break;
3894
+ }
3895
+ case "init":
3896
+ try {
3897
+ initLocalIndex();
3898
+ }
3899
+ catch (error) {
3900
+ exitWithError(error);
3901
+ }
3902
+ break;
3903
+ case "status":
3904
+ await showStatus();
3905
+ break;
3906
+ case "doctor":
3907
+ await showDoctor();
3908
+ break;
3909
+ case "update":
3910
+ await updateCollections();
3911
+ break;
3912
+ case "embed":
3913
+ try {
3914
+ const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
3915
+ const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
3916
+ const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
3917
+ // Validate -c against configured collections before dispatching, so a
3918
+ // typo errors with "Collection not found: X" instead of silently
3919
+ // reporting success because no pending docs match a nonexistent name.
3920
+ // embed operates on a single collection; only the first value is used.
3921
+ const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false);
3922
+ const embedCollection = embedValidatedCollections[0];
3923
+ await vectorIndex(resolveEmbedModelForCli(), !!cli.values.force, {
3924
+ maxDocsPerBatch,
3925
+ maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
3926
+ chunkStrategy: embedChunkStrategy,
3927
+ collection: embedCollection,
3928
+ });
3929
+ }
3930
+ catch (error) {
3931
+ exitWithError(error);
3932
+ }
3933
+ break;
3934
+ case "pull": {
3935
+ const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
3936
+ const activeModels = resolveModelsForCli();
3937
+ const models = [
3938
+ activeModels.embed,
3939
+ activeModels.generate,
3940
+ activeModels.rerank,
3941
+ ];
3942
+ console.log(`${c.bold}Pulling models${c.reset}`);
3943
+ const results = await pullModels(models, {
3944
+ refresh,
3945
+ cacheDir: DEFAULT_MODEL_CACHE_DIR,
3946
+ });
3947
+ for (const result of results) {
3948
+ const size = formatBytes(result.sizeBytes);
3949
+ const note = result.refreshed ? "refreshed" : "cached/checked";
3950
+ console.log(`- ${result.model} -> ${result.path} (${size}, ${note})`);
3951
+ }
3952
+ break;
3953
+ }
3954
+ case "search":
3955
+ if (!cli.query) {
3956
+ console.error("Usage: qmd search [options] <query>");
3957
+ process.exit(1);
3958
+ }
3959
+ await initializeKuromojiTokenizer(); // kuromoji: normalize CJK query
3960
+ search(cli.query, cli.opts);
3961
+ break;
3962
+ case "vsearch":
3963
+ case "vector-search": // undocumented alias
3964
+ if (!cli.query) {
3965
+ console.error("Usage: qmd vsearch [options] <query>");
3966
+ process.exit(1);
3967
+ }
3968
+ // Default min-score for vector search is 0.3
3969
+ if (!cli.values["min-score"]) {
3970
+ cli.opts.minScore = 0.3;
3971
+ }
3972
+ await vectorSearch(cli.query, cli.opts);
3973
+ break;
3974
+ case "query":
3975
+ case "deep-search": // undocumented alias
3976
+ if (!cli.query) {
3977
+ console.error("Usage: qmd query [options] <query>");
3978
+ process.exit(1);
3979
+ }
3980
+ await initializeKuromojiTokenizer(); // kuromoji: normalize CJK query
3981
+ await querySearch(cli.query, cli.opts);
3982
+ break;
3983
+ case "bench": {
3984
+ const fixturePath = cli.args[0];
3985
+ if (!fixturePath) {
3986
+ console.error("Usage: qmd bench <fixture.json> [--json] [-c collection]");
3987
+ console.error("");
3988
+ console.error("Run search quality benchmarks against a fixture file.");
3989
+ console.error("See src/bench/fixtures/example.json for the fixture format.");
3990
+ process.exit(1);
3991
+ }
3992
+ const { runBenchmark } = await import("../bench/bench.js");
3993
+ const benchCollection = cli.opts.collection;
3994
+ await runBenchmark(fixturePath, {
3995
+ json: !!cli.values.json,
3996
+ collection: Array.isArray(benchCollection) ? benchCollection[0] : benchCollection,
3997
+ dbPath: getDbPath(),
3998
+ configPath: configExists() ? getConfigPath() : undefined,
3999
+ });
4000
+ break;
4001
+ }
4002
+ case "mcp": {
4003
+ const sub = cli.args[0]; // stop | status | undefined
4004
+ // Cache dir for PID/log files — same dir as the index
4005
+ const cacheDir = process.env.XDG_CACHE_HOME
4006
+ ? resolve(process.env.XDG_CACHE_HOME, "qmd")
4007
+ : resolve(homedir(), ".cache", "qmd");
4008
+ const pidPath = resolve(cacheDir, "mcp.pid");
4009
+ // Subcommands take priority over flags
4010
+ if (sub === "stop") {
4011
+ if (!existsSync(pidPath)) {
4012
+ console.log("Not running (no PID file).");
4013
+ process.exit(0);
4014
+ }
4015
+ const pid = parseInt(readFileSync(pidPath, "utf-8").trim());
4016
+ try {
4017
+ process.kill(pid, 0); // alive?
4018
+ process.kill(pid, "SIGTERM");
4019
+ unlinkSync(pidPath);
4020
+ console.log(`Stopped QMD MCP server (PID ${pid}).`);
4021
+ }
4022
+ catch {
4023
+ unlinkSync(pidPath);
4024
+ console.log("Cleaned up stale PID file (server was not running).");
4025
+ }
4026
+ process.exit(0);
4027
+ }
4028
+ if (cli.values.http) {
4029
+ const port = Number(cli.values.port) || 8181;
4030
+ if (cli.values.daemon) {
4031
+ // Guard: check if already running
4032
+ if (existsSync(pidPath)) {
4033
+ const existingPid = parseInt(readFileSync(pidPath, "utf-8").trim());
4034
+ try {
4035
+ process.kill(existingPid, 0); // alive?
4036
+ console.error(`Already running (PID ${existingPid}). Run 'qmd mcp stop' first.`);
4037
+ process.exit(1);
4038
+ }
4039
+ catch {
4040
+ // Stale PID file — continue
4041
+ }
4042
+ }
4043
+ mkdirSync(cacheDir, { recursive: true });
4044
+ const logPath = resolve(cacheDir, "mcp.log");
4045
+ const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
4046
+ const selfPath = fileURLToPath(import.meta.url);
4047
+ const indexArgs = cli.values.index ? ["--index", String(cli.values.index)] : [];
4048
+ const spawnArgs = selfPath.endsWith(".ts")
4049
+ ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)]
4050
+ : [selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)];
4051
+ const child = nodeSpawn(process.execPath, spawnArgs, {
4052
+ stdio: ["ignore", logFd, logFd],
4053
+ detached: true,
4054
+ });
4055
+ child.unref();
4056
+ closeSync(logFd); // parent's copy; child inherited the fd
4057
+ writeFileSync(pidPath, String(child.pid));
4058
+ console.log(`Started on http://localhost:${port}/mcp (PID ${child.pid})`);
4059
+ console.log(`Logs: ${logPath}`);
4060
+ process.exit(0);
4061
+ }
4062
+ // Foreground HTTP mode — remove top-level cursor handlers so the
4063
+ // async cleanup handlers in startMcpHttpServer actually run.
4064
+ process.removeAllListeners("SIGTERM");
4065
+ process.removeAllListeners("SIGINT");
4066
+ const { startMcpHttpServer } = await import("../mcp/server.js");
4067
+ try {
4068
+ await startMcpHttpServer(port, { dbPath: getDbPath() });
4069
+ }
4070
+ catch (e) {
4071
+ if (typeof e === "object" && e !== null && "code" in e && e.code === "EADDRINUSE") {
4072
+ console.error(`Port ${port} already in use. Try a different port with --port.`);
4073
+ process.exit(1);
4074
+ }
4075
+ throw e;
4076
+ }
4077
+ }
4078
+ else {
4079
+ // Default: stdio transport
4080
+ const { startMcpServer } = await import("../mcp/server.js");
4081
+ await startMcpServer({ dbPath: getDbPath() });
4082
+ }
4083
+ break;
4084
+ }
4085
+ case "skills": {
4086
+ try {
4087
+ if (cli.values.help || cli.args[0] === "help") {
4088
+ showSkillsHelp();
4089
+ }
4090
+ else {
4091
+ runSkillsCommand(cli.args, Boolean(cli.values.json), Boolean(cli.values.full), Boolean(cli.values.all));
4092
+ }
4093
+ }
4094
+ catch (error) {
4095
+ if (cli.values.json) {
4096
+ outputSkillsJson({ success: false, error: error instanceof Error ? error.message : String(error) });
4097
+ }
4098
+ else {
4099
+ console.error(error instanceof Error ? error.message : String(error));
4100
+ }
4101
+ process.exit(1);
4102
+ }
4103
+ break;
4104
+ }
4105
+ case "skill": {
4106
+ const subcommand = cli.args[0];
4107
+ switch (subcommand) {
4108
+ case "show": {
4109
+ showSkill();
4110
+ break;
4111
+ }
4112
+ case "install": {
4113
+ try {
4114
+ await installSkill(Boolean(cli.values.global), Boolean(cli.values.force), Boolean(cli.values.yes));
4115
+ }
4116
+ catch (error) {
4117
+ exitWithError(error);
4118
+ }
4119
+ break;
4120
+ }
4121
+ case "help":
4122
+ case undefined: {
4123
+ console.log("Usage: qmd skill <show|install> [options]");
4124
+ console.log("");
4125
+ console.log("Commands:");
4126
+ console.log(" show Print the QMD skill");
4127
+ console.log(" install Install QMD skill into ./.agents/skills/qmd");
4128
+ console.log("");
4129
+ console.log("Options:");
4130
+ console.log(" --global Install into ~/.agents/skills/qmd");
4131
+ console.log(" --yes Also create the .claude/skills/qmd symlink");
4132
+ console.log(" -f, --force Replace existing install or symlink");
4133
+ process.exit(0);
4134
+ }
4135
+ default:
4136
+ console.error(`Unknown subcommand: ${subcommand}`);
4137
+ console.error("Run 'qmd skill help' for usage");
4138
+ printDoctorHint();
4139
+ process.exit(1);
4140
+ }
4141
+ break;
4142
+ }
4143
+ case "cleanup": {
4144
+ const db = getDb();
4145
+ // 1. Clear llm_cache
4146
+ const cacheCount = deleteLLMCache(db);
4147
+ console.log(`${c.green}✓${c.reset} Cleared ${cacheCount} cached API responses`);
4148
+ // 2. Remove orphaned vectors
4149
+ const orphanedVecs = cleanupOrphanedVectors(db);
4150
+ if (orphanedVecs > 0) {
4151
+ console.log(`${c.green}✓${c.reset} Removed ${orphanedVecs} orphaned embedding chunks`);
4152
+ }
4153
+ else {
4154
+ console.log(`${c.dim}No orphaned embeddings to remove${c.reset}`);
4155
+ }
4156
+ // 3. Remove inactive documents
4157
+ const inactiveDocs = deleteInactiveDocuments(db);
4158
+ if (inactiveDocs > 0) {
4159
+ console.log(`${c.green}✓${c.reset} Removed ${inactiveDocs} inactive document records`);
4160
+ }
4161
+ // 4. Vacuum to reclaim space
4162
+ vacuumDatabase(db);
4163
+ console.log(`${c.green}✓${c.reset} Database vacuumed`);
4164
+ closeDb();
4165
+ break;
4166
+ }
4167
+ default:
4168
+ console.error(`Unknown command: ${cli.command}`);
4169
+ console.error("Run 'qmd --help' for usage.");
4170
+ printDoctorHint();
4171
+ process.exit(1);
4172
+ }
4173
+ if (cli.command !== "mcp") {
4174
+ await finishSuccessfulCliCommand({
4175
+ command: cli.command,
4176
+ format: cli.opts.format,
4177
+ });
4178
+ }
4179
+ } // end if (main module)