@tobilu/qmd 1.0.0 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/qmd.ts DELETED
@@ -1,2568 +0,0 @@
1
- import { openDatabase } from "./db.js";
2
- import type { Database } from "./db.js";
3
- import fastGlob from "fast-glob";
4
- import { execSync, spawn as nodeSpawn } from "child_process";
5
- import { fileURLToPath } from "url";
6
- import { dirname, join as pathJoin } from "path";
7
- import { parseArgs } from "util";
8
- import { readFileSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
9
- import {
10
- getPwd,
11
- getRealPath,
12
- homedir,
13
- resolve,
14
- enableProductionMode,
15
- searchFTS,
16
- extractSnippet,
17
- getContextForFile,
18
- getContextForPath,
19
- listCollections,
20
- removeCollection,
21
- renameCollection,
22
- findSimilarFiles,
23
- findDocumentByDocid,
24
- isDocid,
25
- matchFilesByGlob,
26
- getHashesNeedingEmbedding,
27
- getHashesForEmbedding,
28
- clearAllEmbeddings,
29
- insertEmbedding,
30
- getStatus,
31
- hashContent,
32
- extractTitle,
33
- formatDocForEmbedding,
34
- chunkDocumentByTokens,
35
- clearCache,
36
- getCacheKey,
37
- getCachedResult,
38
- setCachedResult,
39
- getIndexHealth,
40
- parseVirtualPath,
41
- buildVirtualPath,
42
- isVirtualPath,
43
- resolveVirtualPath,
44
- toVirtualPath,
45
- insertContent,
46
- insertDocument,
47
- findActiveDocument,
48
- updateDocumentTitle,
49
- updateDocument,
50
- deactivateDocument,
51
- getActiveDocumentPaths,
52
- cleanupOrphanedContent,
53
- deleteLLMCache,
54
- deleteInactiveDocuments,
55
- cleanupOrphanedVectors,
56
- vacuumDatabase,
57
- getCollectionsWithoutContext,
58
- getTopLevelPathsWithoutContext,
59
- handelize,
60
- hybridQuery,
61
- vectorSearchQuery,
62
- addLineNumbers,
63
- type ExpandedQuery,
64
- DEFAULT_EMBED_MODEL,
65
- DEFAULT_RERANK_MODEL,
66
- DEFAULT_GLOB,
67
- DEFAULT_MULTI_GET_MAX_BYTES,
68
- createStore,
69
- getDefaultDbPath,
70
- } from "./store.js";
71
- import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "./llm.js";
72
- import {
73
- formatSearchResults,
74
- formatDocuments,
75
- escapeXml,
76
- escapeCSV,
77
- type OutputFormat,
78
- } from "./formatter.js";
79
- import {
80
- getCollection as getCollectionFromYaml,
81
- listCollections as yamlListCollections,
82
- addContext as yamlAddContext,
83
- removeContext as yamlRemoveContext,
84
- setGlobalContext,
85
- listAllContexts,
86
- setConfigIndexName,
87
- } from "./collections.js";
88
-
89
- // Enable production mode - allows using default database path
90
- // Tests must set INDEX_PATH or use createStore() with explicit path
91
- enableProductionMode();
92
-
93
- // =============================================================================
94
- // Store/DB lifecycle (no legacy singletons in store.ts)
95
- // =============================================================================
96
-
97
- let store: ReturnType<typeof createStore> | null = null;
98
- let storeDbPathOverride: string | undefined;
99
-
100
- function getStore(): ReturnType<typeof createStore> {
101
- if (!store) {
102
- store = createStore(storeDbPathOverride);
103
- }
104
- return store;
105
- }
106
-
107
- function getDb(): Database {
108
- return getStore().db;
109
- }
110
-
111
- function closeDb(): void {
112
- if (store) {
113
- store.close();
114
- store = null;
115
- }
116
- }
117
-
118
- function getDbPath(): string {
119
- return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
120
- }
121
-
122
- function setIndexName(name: string | null): void {
123
- storeDbPathOverride = name ? getDefaultDbPath(name) : undefined;
124
- // Reset open handle so next use opens the new index
125
- closeDb();
126
- }
127
-
128
- function ensureVecTable(_db: Database, dimensions: number): void {
129
- // Store owns the DB; ignore `_db` and ensure vec table on the active store
130
- getStore().ensureVecTable(dimensions);
131
- }
132
-
133
- // Terminal colors (respects NO_COLOR env)
134
- const useColor = !process.env.NO_COLOR && process.stdout.isTTY;
135
- const c = {
136
- reset: useColor ? "\x1b[0m" : "",
137
- dim: useColor ? "\x1b[2m" : "",
138
- bold: useColor ? "\x1b[1m" : "",
139
- cyan: useColor ? "\x1b[36m" : "",
140
- yellow: useColor ? "\x1b[33m" : "",
141
- green: useColor ? "\x1b[32m" : "",
142
- magenta: useColor ? "\x1b[35m" : "",
143
- blue: useColor ? "\x1b[34m" : "",
144
- };
145
-
146
- // Terminal cursor control
147
- const cursor = {
148
- hide() { process.stderr.write('\x1b[?25l'); },
149
- show() { process.stderr.write('\x1b[?25h'); },
150
- };
151
-
152
- // Ensure cursor is restored on exit
153
- process.on('SIGINT', () => { cursor.show(); process.exit(130); });
154
- process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
155
-
156
- // Terminal progress bar using OSC 9;4 escape sequence
157
- const progress = {
158
- set(percent: number) {
159
- process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
160
- },
161
- clear() {
162
- process.stderr.write(`\x1b]9;4;0\x07`);
163
- },
164
- indeterminate() {
165
- process.stderr.write(`\x1b]9;4;3\x07`);
166
- },
167
- error() {
168
- process.stderr.write(`\x1b]9;4;2\x07`);
169
- },
170
- };
171
-
172
- // Format seconds into human-readable ETA
173
- function formatETA(seconds: number): string {
174
- if (seconds < 60) return `${Math.round(seconds)}s`;
175
- if (seconds < 3600) return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`;
176
- return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
177
- }
178
-
179
-
180
- // Check index health and print warnings/tips
181
- function checkIndexHealth(db: Database): void {
182
- const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
183
-
184
- // Warn if many docs need embedding
185
- if (needsEmbedding > 0) {
186
- const pct = Math.round((needsEmbedding / totalDocs) * 100);
187
- if (pct >= 10) {
188
- process.stderr.write(`${c.yellow}Warning: ${needsEmbedding} documents (${pct}%) need embeddings. Run 'qmd embed' for better results.${c.reset}\n`);
189
- } else {
190
- process.stderr.write(`${c.dim}Tip: ${needsEmbedding} documents need embeddings. Run 'qmd embed' to index them.${c.reset}\n`);
191
- }
192
- }
193
-
194
- // Check if most recent document update is older than 2 weeks
195
- if (daysStale !== null && daysStale >= 14) {
196
- process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update' to refresh.${c.reset}\n`);
197
- }
198
- }
199
-
200
- // Compute unique display path for a document
201
- // Always include at least parent folder + filename, add more parent dirs until unique
202
- function computeDisplayPath(
203
- filepath: string,
204
- collectionPath: string,
205
- existingPaths: Set<string>
206
- ): string {
207
- // Get path relative to collection (include collection dir name)
208
- const collectionDir = collectionPath.replace(/\/$/, '');
209
- const collectionName = collectionDir.split('/').pop() || '';
210
-
211
- let relativePath: string;
212
- if (filepath.startsWith(collectionDir + '/')) {
213
- // filepath is under collection: use collection name + relative path
214
- relativePath = collectionName + filepath.slice(collectionDir.length);
215
- } else {
216
- // Fallback: just use the filepath
217
- relativePath = filepath;
218
- }
219
-
220
- const parts = relativePath.split('/').filter(p => p.length > 0);
221
-
222
- // Always include at least parent folder + filename (minimum 2 parts if available)
223
- // Then add more parent dirs until unique
224
- const minParts = Math.min(2, parts.length);
225
- for (let i = parts.length - minParts; i >= 0; i--) {
226
- const candidate = parts.slice(i).join('/');
227
- if (!existingPaths.has(candidate)) {
228
- return candidate;
229
- }
230
- }
231
-
232
- // Absolute fallback: use full path (should be unique)
233
- return filepath;
234
- }
235
-
236
-
237
- function formatTimeAgo(date: Date): string {
238
- const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
239
- if (seconds < 60) return `${seconds}s ago`;
240
- const minutes = Math.floor(seconds / 60);
241
- if (minutes < 60) return `${minutes}m ago`;
242
- const hours = Math.floor(minutes / 60);
243
- if (hours < 24) return `${hours}h ago`;
244
- const days = Math.floor(hours / 24);
245
- return `${days}d ago`;
246
- }
247
-
248
- function formatBytes(bytes: number): string {
249
- if (bytes < 1024) return `${bytes} B`;
250
- if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
251
- if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
252
- return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
253
- }
254
-
255
- async function showStatus(): Promise<void> {
256
- const dbPath = getDbPath();
257
- const db = getDb();
258
-
259
- // Collections are defined in YAML; no duplicate cleanup needed.
260
- // Collections are defined in YAML; no duplicate cleanup needed.
261
-
262
- // Index size
263
- let indexSize = 0;
264
- try {
265
- const stat = statSync(dbPath).size;
266
- indexSize = stat;
267
- } catch { }
268
-
269
- // Collections info (from YAML + database stats)
270
- const collections = listCollections(db);
271
-
272
- // Overall stats
273
- const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number };
274
- const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get() as { count: number };
275
- const needsEmbedding = getHashesNeedingEmbedding(db);
276
-
277
- // Most recent update across all collections
278
- const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
279
-
280
- console.log(`${c.bold}QMD Status${c.reset}\n`);
281
- console.log(`Index: ${dbPath}`);
282
- console.log(`Size: ${formatBytes(indexSize)}`);
283
-
284
- // MCP daemon status (check PID file liveness)
285
- const mcpCacheDir = process.env.XDG_CACHE_HOME
286
- ? resolve(process.env.XDG_CACHE_HOME, "qmd")
287
- : resolve(homedir(), ".cache", "qmd");
288
- const mcpPidPath = resolve(mcpCacheDir, "mcp.pid");
289
- if (existsSync(mcpPidPath)) {
290
- const mcpPid = parseInt(readFileSync(mcpPidPath, "utf-8").trim());
291
- try {
292
- process.kill(mcpPid, 0);
293
- console.log(`MCP: ${c.green}running${c.reset} (PID ${mcpPid})`);
294
- } catch {
295
- unlinkSync(mcpPidPath);
296
- // Stale PID file cleaned up silently
297
- }
298
- }
299
- console.log("");
300
-
301
- console.log(`${c.bold}Documents${c.reset}`);
302
- console.log(` Total: ${totalDocs.count} files indexed`);
303
- console.log(` Vectors: ${vectorCount.count} embedded`);
304
- if (needsEmbedding > 0) {
305
- console.log(` ${c.yellow}Pending: ${needsEmbedding} need embedding${c.reset} (run 'qmd embed')`);
306
- }
307
- if (mostRecent.latest) {
308
- const lastUpdate = new Date(mostRecent.latest);
309
- console.log(` Updated: ${formatTimeAgo(lastUpdate)}`);
310
- }
311
-
312
- // Get all contexts grouped by collection (from YAML)
313
- const allContexts = listAllContexts();
314
- const contextsByCollection = new Map<string, { path_prefix: string; context: string }[]>();
315
-
316
- for (const ctx of allContexts) {
317
- // Group contexts by collection name
318
- if (!contextsByCollection.has(ctx.collection)) {
319
- contextsByCollection.set(ctx.collection, []);
320
- }
321
- contextsByCollection.get(ctx.collection)!.push({
322
- path_prefix: ctx.path,
323
- context: ctx.context
324
- });
325
- }
326
-
327
- if (collections.length > 0) {
328
- console.log(`\n${c.bold}Collections${c.reset}`);
329
- for (const col of collections) {
330
- const lastMod = col.last_modified ? formatTimeAgo(new Date(col.last_modified)) : "never";
331
- const contexts = contextsByCollection.get(col.name) || [];
332
-
333
- console.log(` ${c.cyan}${col.name}${c.reset} ${c.dim}(qmd://${col.name}/)${c.reset}`);
334
- console.log(` ${c.dim}Pattern:${c.reset} ${col.glob_pattern}`);
335
- console.log(` ${c.dim}Files:${c.reset} ${col.active_count} (updated ${lastMod})`);
336
-
337
- if (contexts.length > 0) {
338
- console.log(` ${c.dim}Contexts:${c.reset} ${contexts.length}`);
339
- for (const ctx of contexts) {
340
- // Handle both empty string and '/' as root context
341
- const pathDisplay = (ctx.path_prefix === '' || ctx.path_prefix === '/') ? '/' : `/${ctx.path_prefix}`;
342
- const contextPreview = ctx.context.length > 60
343
- ? ctx.context.substring(0, 57) + '...'
344
- : ctx.context;
345
- console.log(` ${c.dim}${pathDisplay}:${c.reset} ${contextPreview}`);
346
- }
347
- }
348
- }
349
-
350
- // Show examples of virtual paths
351
- console.log(`\n${c.bold}Examples${c.reset}`);
352
- console.log(` ${c.dim}# List files in a collection${c.reset}`);
353
- if (collections.length > 0 && collections[0]) {
354
- console.log(` qmd ls ${collections[0].name}`);
355
- }
356
- console.log(` ${c.dim}# Get a document${c.reset}`);
357
- if (collections.length > 0 && collections[0]) {
358
- console.log(` qmd get qmd://${collections[0].name}/path/to/file.md`);
359
- }
360
- console.log(` ${c.dim}# Search within a collection${c.reset}`);
361
- if (collections.length > 0 && collections[0]) {
362
- console.log(` qmd search "query" -c ${collections[0].name}`);
363
- }
364
- } else {
365
- console.log(`\n${c.dim}No collections. Run 'qmd collection add .' to index markdown files.${c.reset}`);
366
- }
367
-
368
- // Device / GPU info
369
- try {
370
- const llm = getDefaultLlamaCpp();
371
- const device = await llm.getDeviceInfo();
372
- console.log(`\n${c.bold}Device${c.reset}`);
373
- if (device.gpu) {
374
- console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
375
- if (device.gpuDevices.length > 0) {
376
- // Deduplicate and count GPUs
377
- const counts = new Map<string, number>();
378
- for (const name of device.gpuDevices) {
379
- counts.set(name, (counts.get(name) || 0) + 1);
380
- }
381
- const deviceStr = Array.from(counts.entries())
382
- .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
383
- .join(', ');
384
- console.log(` Devices: ${deviceStr}`);
385
- }
386
- if (device.vram) {
387
- console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
388
- }
389
- } else {
390
- console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
391
- console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
392
- }
393
- console.log(` CPU: ${device.cpuCores} math cores`);
394
- } catch {
395
- // Don't fail status if LLM init fails
396
- }
397
-
398
- closeDb();
399
- }
400
-
401
- async function updateCollections(): Promise<void> {
402
- const db = getDb();
403
- // Collections are defined in YAML; no duplicate cleanup needed.
404
-
405
- // Clear Ollama cache on update
406
- clearCache(db);
407
-
408
- const collections = listCollections(db);
409
-
410
- if (collections.length === 0) {
411
- console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`);
412
- closeDb();
413
- return;
414
- }
415
-
416
- // Don't close db here - indexFiles will reuse it and close at the end
417
- console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
418
-
419
- for (let i = 0; i < collections.length; i++) {
420
- const col = collections[i];
421
- if (!col) continue;
422
- console.log(`${c.cyan}[${i + 1}/${collections.length}]${c.reset} ${c.bold}${col.name}${c.reset} ${c.dim}(${col.glob_pattern})${c.reset}`);
423
-
424
- // Execute custom update command if specified in YAML
425
- const yamlCol = getCollectionFromYaml(col.name);
426
- if (yamlCol?.update) {
427
- console.log(`${c.dim} Running update command: ${yamlCol.update}${c.reset}`);
428
- try {
429
- const proc = nodeSpawn("bash", ["-c", yamlCol.update], {
430
- cwd: col.pwd,
431
- stdio: ["ignore", "pipe", "pipe"],
432
- });
433
-
434
- const [output, errorOutput, exitCode] = await new Promise<[string, string, number]>((resolve, reject) => {
435
- let out = "";
436
- let err = "";
437
- proc.stdout?.on("data", (d: Buffer) => { out += d.toString(); });
438
- proc.stderr?.on("data", (d: Buffer) => { err += d.toString(); });
439
- proc.on("error", reject);
440
- proc.on("close", (code) => resolve([out, err, code ?? 1]));
441
- });
442
-
443
- if (output.trim()) {
444
- console.log(output.trim().split('\n').map(l => ` ${l}`).join('\n'));
445
- }
446
- if (errorOutput.trim()) {
447
- console.log(errorOutput.trim().split('\n').map(l => ` ${l}`).join('\n'));
448
- }
449
-
450
- if (exitCode !== 0) {
451
- console.log(`${c.yellow}✗ Update command failed with exit code ${exitCode}${c.reset}`);
452
- process.exit(exitCode);
453
- }
454
- } catch (err) {
455
- console.log(`${c.yellow}✗ Update command failed: ${err}${c.reset}`);
456
- process.exit(1);
457
- }
458
- }
459
-
460
- await indexFiles(col.pwd, col.glob_pattern, col.name, true);
461
- console.log("");
462
- }
463
-
464
- // Check if any documents need embedding (show once at end)
465
- const finalDb = getDb();
466
- const needsEmbedding = getHashesNeedingEmbedding(finalDb);
467
- closeDb();
468
-
469
- console.log(`${c.green}✓ All collections updated.${c.reset}`);
470
- if (needsEmbedding > 0) {
471
- console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
472
- }
473
- }
474
-
475
- /**
476
- * Detect which collection (if any) contains the given filesystem path.
477
- * Returns { collectionId, collectionName, relativePath } or null if not in any collection.
478
- */
479
- function detectCollectionFromPath(db: Database, fsPath: string): { collectionName: string; relativePath: string } | null {
480
- const realPath = getRealPath(fsPath);
481
-
482
- // Find collections that this path is under from YAML
483
- const allCollections = yamlListCollections();
484
-
485
- // Find longest matching path
486
- let bestMatch: { name: string; path: string } | null = null;
487
- for (const coll of allCollections) {
488
- if (realPath.startsWith(coll.path + '/') || realPath === coll.path) {
489
- if (!bestMatch || coll.path.length > bestMatch.path.length) {
490
- bestMatch = { name: coll.name, path: coll.path };
491
- }
492
- }
493
- }
494
-
495
- if (!bestMatch) return null;
496
-
497
- // Calculate relative path
498
- let relativePath = realPath;
499
- if (relativePath.startsWith(bestMatch.path + '/')) {
500
- relativePath = relativePath.slice(bestMatch.path.length + 1);
501
- } else if (relativePath === bestMatch.path) {
502
- relativePath = '';
503
- }
504
-
505
- return {
506
- collectionName: bestMatch.name,
507
- relativePath
508
- };
509
- }
510
-
511
- async function contextAdd(pathArg: string | undefined, contextText: string): Promise<void> {
512
- const db = getDb();
513
-
514
- // Handle "/" as global context (applies to all collections)
515
- if (pathArg === '/') {
516
- setGlobalContext(contextText);
517
- console.log(`${c.green}✓${c.reset} Set global context`);
518
- console.log(`${c.dim}Context: ${contextText}${c.reset}`);
519
- closeDb();
520
- return;
521
- }
522
-
523
- // Resolve path - defaults to current directory if not provided
524
- let fsPath = pathArg || '.';
525
- if (fsPath === '.' || fsPath === './') {
526
- fsPath = getPwd();
527
- } else if (fsPath.startsWith('~/')) {
528
- fsPath = homedir() + fsPath.slice(1);
529
- } else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) {
530
- fsPath = resolve(getPwd(), fsPath);
531
- }
532
-
533
- // Handle virtual paths (qmd://collection/path)
534
- if (isVirtualPath(fsPath)) {
535
- const parsed = parseVirtualPath(fsPath);
536
- if (!parsed) {
537
- console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`);
538
- process.exit(1);
539
- }
540
-
541
- const coll = getCollectionFromYaml(parsed.collectionName);
542
- if (!coll) {
543
- console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
544
- process.exit(1);
545
- }
546
-
547
- yamlAddContext(parsed.collectionName, parsed.path, contextText);
548
-
549
- const displayPath = parsed.path
550
- ? `qmd://${parsed.collectionName}/${parsed.path}`
551
- : `qmd://${parsed.collectionName}/ (collection root)`;
552
- console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
553
- console.log(`${c.dim}Context: ${contextText}${c.reset}`);
554
- closeDb();
555
- return;
556
- }
557
-
558
- // Detect collection from filesystem path
559
- const detected = detectCollectionFromPath(db, fsPath);
560
- if (!detected) {
561
- console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
562
- console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`);
563
- process.exit(1);
564
- }
565
-
566
- yamlAddContext(detected.collectionName, detected.relativePath, contextText);
567
-
568
- const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`;
569
- console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
570
- console.log(`${c.dim}Context: ${contextText}${c.reset}`);
571
- closeDb();
572
- }
573
-
574
- function contextList(): void {
575
- const db = getDb();
576
-
577
- const allContexts = listAllContexts();
578
-
579
- if (allContexts.length === 0) {
580
- console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`);
581
- closeDb();
582
- return;
583
- }
584
-
585
- console.log(`\n${c.bold}Configured Contexts${c.reset}\n`);
586
-
587
- let lastCollection = '';
588
- for (const ctx of allContexts) {
589
- if (ctx.collection !== lastCollection) {
590
- console.log(`${c.cyan}${ctx.collection}${c.reset}`);
591
- lastCollection = ctx.collection;
592
- }
593
-
594
- const displayPath = ctx.path ? ` ${ctx.path}` : ' / (root)';
595
- console.log(`${displayPath}`);
596
- console.log(` ${c.dim}${ctx.context}${c.reset}`);
597
- }
598
-
599
- closeDb();
600
- }
601
-
602
- function contextRemove(pathArg: string): void {
603
- if (pathArg === '/') {
604
- // Remove global context
605
- setGlobalContext(undefined);
606
- console.log(`${c.green}✓${c.reset} Removed global context`);
607
- return;
608
- }
609
-
610
- // Handle virtual paths
611
- if (isVirtualPath(pathArg)) {
612
- const parsed = parseVirtualPath(pathArg);
613
- if (!parsed) {
614
- console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`);
615
- process.exit(1);
616
- }
617
-
618
- const coll = getCollectionFromYaml(parsed.collectionName);
619
- if (!coll) {
620
- console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
621
- process.exit(1);
622
- }
623
-
624
- const success = yamlRemoveContext(coll.name, parsed.path);
625
-
626
- if (!success) {
627
- console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`);
628
- process.exit(1);
629
- }
630
-
631
- console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`);
632
- return;
633
- }
634
-
635
- // Handle filesystem paths
636
- let fsPath = pathArg;
637
- if (fsPath === '.' || fsPath === './') {
638
- fsPath = getPwd();
639
- } else if (fsPath.startsWith('~/')) {
640
- fsPath = homedir() + fsPath.slice(1);
641
- } else if (!fsPath.startsWith('/')) {
642
- fsPath = resolve(getPwd(), fsPath);
643
- }
644
-
645
- const db = getDb();
646
- const detected = detectCollectionFromPath(db, fsPath);
647
- closeDb();
648
-
649
- if (!detected) {
650
- console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
651
- process.exit(1);
652
- }
653
-
654
- const success = yamlRemoveContext(detected.collectionName, detected.relativePath);
655
-
656
- if (!success) {
657
- console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`);
658
- process.exit(1);
659
- }
660
-
661
- console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
662
- }
663
-
664
- function contextCheck(): void {
665
- const db = getDb();
666
-
667
- // Get collections without any context
668
- const collectionsWithoutContext = getCollectionsWithoutContext(db);
669
-
670
- // Get all collections to check for missing path contexts
671
- const allCollections = listCollections(db);
672
-
673
- if (collectionsWithoutContext.length === 0 && allCollections.length > 0) {
674
- // Check if all collections have contexts
675
- console.log(`\n${c.green}✓${c.reset} ${c.bold}All collections have context configured${c.reset}\n`);
676
- }
677
-
678
- if (collectionsWithoutContext.length > 0) {
679
- console.log(`\n${c.yellow}Collections without any context:${c.reset}\n`);
680
-
681
- for (const coll of collectionsWithoutContext) {
682
- console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(${coll.doc_count} documents)${c.reset}`);
683
- console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/ "Description of ${coll.name}"${c.reset}\n`);
684
- }
685
- }
686
-
687
- // Check for top-level paths without context within collections that DO have context
688
- const collectionsWithContext = allCollections.filter(c =>
689
- c && !collectionsWithoutContext.some(cwc => cwc.name === c.name)
690
- );
691
-
692
- let hasPathSuggestions = false;
693
-
694
- for (const coll of collectionsWithContext) {
695
- if (!coll) continue;
696
- const missingPaths = getTopLevelPathsWithoutContext(db, coll.name);
697
-
698
- if (missingPaths.length > 0) {
699
- if (!hasPathSuggestions) {
700
- console.log(`${c.yellow}Top-level directories without context:${c.reset}\n`);
701
- hasPathSuggestions = true;
702
- }
703
-
704
- console.log(`${c.cyan}${coll.name}${c.reset}`);
705
- for (const path of missingPaths) {
706
- console.log(` ${path}`);
707
- console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/${path} "Description of ${path}"${c.reset}`);
708
- }
709
- console.log('');
710
- }
711
- }
712
-
713
- if (collectionsWithoutContext.length === 0 && !hasPathSuggestions) {
714
- console.log(`${c.dim}All collections and major paths have context configured.${c.reset}`);
715
- console.log(`${c.dim}Use 'qmd context list' to see all configured contexts.${c.reset}\n`);
716
- }
717
-
718
- closeDb();
719
- }
720
-
721
- function getDocument(filename: string, fromLine?: number, maxLines?: number, lineNumbers?: boolean): void {
722
- const db = getDb();
723
-
724
- // Parse :linenum suffix from filename (e.g., "file.md:100")
725
- let inputPath = filename;
726
- const colonMatch = inputPath.match(/:(\d+)$/);
727
- if (colonMatch && !fromLine) {
728
- const matched = colonMatch[1];
729
- if (matched) {
730
- fromLine = parseInt(matched, 10);
731
- inputPath = inputPath.slice(0, -colonMatch[0].length);
732
- }
733
- }
734
-
735
- // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
736
- if (isDocid(inputPath)) {
737
- const docidMatch = findDocumentByDocid(db, inputPath);
738
- if (docidMatch) {
739
- inputPath = docidMatch.filepath;
740
- } else {
741
- console.error(`Document not found: ${filename}`);
742
- closeDb();
743
- process.exit(1);
744
- }
745
- }
746
-
747
- let doc: { collectionName: string; path: string; body: string } | null = null;
748
- let virtualPath: string;
749
-
750
- // Handle virtual paths (qmd://collection/path)
751
- if (isVirtualPath(inputPath)) {
752
- const parsed = parseVirtualPath(inputPath);
753
- if (!parsed) {
754
- console.error(`Invalid virtual path: ${inputPath}`);
755
- closeDb();
756
- process.exit(1);
757
- }
758
-
759
- // Try exact match on collection + path
760
- doc = db.prepare(`
761
- SELECT d.collection as collectionName, d.path, content.doc as body
762
- FROM documents d
763
- JOIN content ON content.hash = d.hash
764
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
765
- `).get(parsed.collectionName, parsed.path) as typeof doc;
766
-
767
- if (!doc) {
768
- // Try fuzzy match by path ending
769
- doc = db.prepare(`
770
- SELECT d.collection as collectionName, d.path, content.doc as body
771
- FROM documents d
772
- JOIN content ON content.hash = d.hash
773
- WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
774
- LIMIT 1
775
- `).get(parsed.collectionName, `%${parsed.path}`) as typeof doc;
776
- }
777
-
778
- virtualPath = inputPath;
779
- } else {
780
- // Try to interpret as collection/path format first (before filesystem path)
781
- // If path is relative (no / or ~ prefix), check if first component is a collection name
782
- if (!inputPath.startsWith('/') && !inputPath.startsWith('~')) {
783
- const parts = inputPath.split('/');
784
- if (parts.length >= 2) {
785
- const possibleCollection = parts[0];
786
- const possiblePath = parts.slice(1).join('/');
787
-
788
- // Check if this collection exists
789
- const collExists = possibleCollection ? db.prepare(`
790
- SELECT 1 FROM documents WHERE collection = ? AND active = 1 LIMIT 1
791
- `).get(possibleCollection) : null;
792
-
793
- if (collExists) {
794
- // Try exact match on collection + path
795
- doc = db.prepare(`
796
- SELECT d.collection as collectionName, d.path, content.doc as body
797
- FROM documents d
798
- JOIN content ON content.hash = d.hash
799
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
800
- `).get(possibleCollection || "", possiblePath || "") as { collectionName: string; path: string; body: string } | null;
801
-
802
- if (!doc) {
803
- // Try fuzzy match by path ending
804
- doc = db.prepare(`
805
- SELECT d.collection as collectionName, d.path, content.doc as body
806
- FROM documents d
807
- JOIN content ON content.hash = d.hash
808
- WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
809
- LIMIT 1
810
- `).get(possibleCollection || "", `%${possiblePath}`) as { collectionName: string; path: string; body: string } | null;
811
- }
812
-
813
- if (doc) {
814
- virtualPath = buildVirtualPath(doc.collectionName, doc.path);
815
- // Skip the filesystem path handling below
816
- }
817
- }
818
- }
819
- }
820
-
821
- // If not found as collection/path, handle as filesystem paths
822
- if (!doc) {
823
- let fsPath = inputPath;
824
-
825
- // Expand ~ to home directory
826
- if (fsPath.startsWith('~/')) {
827
- fsPath = homedir() + fsPath.slice(1);
828
- } else if (!fsPath.startsWith('/')) {
829
- // Relative path - resolve from current directory
830
- fsPath = resolve(getPwd(), fsPath);
831
- }
832
- fsPath = getRealPath(fsPath);
833
-
834
- // Try to detect which collection contains this path
835
- const detected = detectCollectionFromPath(db, fsPath);
836
-
837
- if (detected) {
838
- // Found collection - query by collection name + relative path
839
- doc = db.prepare(`
840
- SELECT d.collection as collectionName, d.path, content.doc as body
841
- FROM documents d
842
- JOIN content ON content.hash = d.hash
843
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
844
- `).get(detected.collectionName, detected.relativePath) as { collectionName: string; path: string; body: string } | null;
845
- }
846
-
847
- // Fuzzy match by filename (last component of path)
848
- if (!doc) {
849
- const filename = inputPath.split('/').pop() || inputPath;
850
- doc = db.prepare(`
851
- SELECT d.collection as collectionName, d.path, content.doc as body
852
- FROM documents d
853
- JOIN content ON content.hash = d.hash
854
- WHERE d.path LIKE ? AND d.active = 1
855
- LIMIT 1
856
- `).get(`%${filename}`) as { collectionName: string; path: string; body: string } | null;
857
- }
858
-
859
- if (doc) {
860
- virtualPath = buildVirtualPath(doc.collectionName, doc.path);
861
- } else {
862
- virtualPath = inputPath;
863
- }
864
- }
865
- }
866
-
867
- // Ensure doc is not null before proceeding
868
- if (!doc) {
869
- console.error(`Document not found: ${filename}`);
870
- closeDb();
871
- process.exit(1);
872
- }
873
-
874
- // Get context for this file
875
- const context = getContextForPath(db, doc.collectionName, doc.path);
876
-
877
- let output = doc.body;
878
- const startLine = fromLine || 1;
879
-
880
- // Apply line filtering if specified
881
- if (fromLine !== undefined || maxLines !== undefined) {
882
- const lines = output.split('\n');
883
- const start = startLine - 1; // Convert to 0-indexed
884
- const end = maxLines !== undefined ? start + maxLines : lines.length;
885
- output = lines.slice(start, end).join('\n');
886
- }
887
-
888
- // Add line numbers if requested
889
- if (lineNumbers) {
890
- output = addLineNumbers(output, startLine);
891
- }
892
-
893
- // Output context header if exists
894
- if (context) {
895
- console.log(`Folder Context: ${context}\n---\n`);
896
- }
897
- console.log(output);
898
- closeDb();
899
- }
900
-
901
- // Multi-get: fetch multiple documents by glob pattern or comma-separated list
902
- function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT_MULTI_GET_MAX_BYTES, format: OutputFormat = "cli"): void {
903
- const db = getDb();
904
-
905
- // Check if it's a comma-separated list or a glob pattern
906
- const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
907
-
908
- let files: { filepath: string; displayPath: string; bodyLength: number; collection?: string; path?: string }[];
909
-
910
- if (isCommaSeparated) {
911
- // Comma-separated list of files (can be virtual paths or relative paths)
912
- const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
913
- files = [];
914
- for (const name of names) {
915
- let doc: { virtual_path: string; body_length: number; collection: string; path: string } | null = null;
916
-
917
- // Handle virtual paths
918
- if (isVirtualPath(name)) {
919
- const parsed = parseVirtualPath(name);
920
- if (parsed) {
921
- // Try exact match on collection + path
922
- doc = db.prepare(`
923
- SELECT
924
- 'qmd://' || d.collection || '/' || d.path as virtual_path,
925
- LENGTH(content.doc) as body_length,
926
- d.collection,
927
- d.path
928
- FROM documents d
929
- JOIN content ON content.hash = d.hash
930
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
931
- `).get(parsed.collectionName, parsed.path) as typeof doc;
932
- }
933
- } else {
934
- // Try exact match on path
935
- doc = db.prepare(`
936
- SELECT
937
- 'qmd://' || d.collection || '/' || d.path as virtual_path,
938
- LENGTH(content.doc) as body_length,
939
- d.collection,
940
- d.path
941
- FROM documents d
942
- JOIN content ON content.hash = d.hash
943
- WHERE d.path = ? AND d.active = 1
944
- LIMIT 1
945
- `).get(name) as { virtual_path: string; body_length: number; collection: string; path: string } | null;
946
-
947
- // Try suffix match
948
- if (!doc) {
949
- doc = db.prepare(`
950
- SELECT
951
- 'qmd://' || d.collection || '/' || d.path as virtual_path,
952
- LENGTH(content.doc) as body_length,
953
- d.collection,
954
- d.path
955
- FROM documents d
956
- JOIN content ON content.hash = d.hash
957
- WHERE d.path LIKE ? AND d.active = 1
958
- LIMIT 1
959
- `).get(`%${name}`) as { virtual_path: string; body_length: number; collection: string; path: string } | null;
960
- }
961
- }
962
-
963
- if (doc) {
964
- files.push({
965
- filepath: doc.virtual_path,
966
- displayPath: doc.virtual_path,
967
- bodyLength: doc.body_length,
968
- collection: doc.collection,
969
- path: doc.path
970
- });
971
- } else {
972
- console.error(`File not found: ${name}`);
973
- }
974
- }
975
- } else {
976
- // Glob pattern - matchFilesByGlob now returns virtual paths
977
- files = matchFilesByGlob(db, pattern).map(f => ({
978
- ...f,
979
- collection: undefined, // Will be fetched later if needed
980
- path: undefined
981
- }));
982
- if (files.length === 0) {
983
- console.error(`No files matched pattern: ${pattern}`);
984
- closeDb();
985
- process.exit(1);
986
- }
987
- }
988
-
989
- // Collect results for structured output
990
- const results: { file: string; displayPath: string; title: string; body: string; context: string | null; skipped: boolean; skipReason?: string }[] = [];
991
-
992
- for (const file of files) {
993
- // Parse virtual path to get collection info if not already available
994
- let collection = file.collection;
995
- let path = file.path;
996
-
997
- if (!collection || !path) {
998
- const parsed = parseVirtualPath(file.filepath);
999
- if (parsed) {
1000
- collection = parsed.collectionName;
1001
- path = parsed.path;
1002
- }
1003
- }
1004
-
1005
- // Get context using collection-scoped function
1006
- const context = collection && path ? getContextForPath(db, collection, path) : null;
1007
-
1008
- // Check size limit
1009
- if (file.bodyLength > maxBytes) {
1010
- results.push({
1011
- file: file.filepath,
1012
- displayPath: file.displayPath,
1013
- title: file.displayPath.split('/').pop() || file.displayPath,
1014
- body: "",
1015
- context,
1016
- skipped: true,
1017
- skipReason: `File too large (${Math.round(file.bodyLength / 1024)}KB > ${Math.round(maxBytes / 1024)}KB). Use 'qmd get ${file.displayPath}' to retrieve.`,
1018
- });
1019
- continue;
1020
- }
1021
-
1022
- // Fetch document content using collection and path
1023
- if (!collection || !path) continue;
1024
-
1025
- const doc = db.prepare(`
1026
- SELECT content.doc as body, d.title
1027
- FROM documents d
1028
- JOIN content ON content.hash = d.hash
1029
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
1030
- `).get(collection, path) as { body: string; title: string } | null;
1031
-
1032
- if (!doc) continue;
1033
-
1034
- let body = doc.body;
1035
-
1036
- // Apply line limit if specified
1037
- if (maxLines !== undefined) {
1038
- const lines = body.split('\n');
1039
- body = lines.slice(0, maxLines).join('\n');
1040
- if (lines.length > maxLines) {
1041
- body += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
1042
- }
1043
- }
1044
-
1045
- results.push({
1046
- file: file.filepath,
1047
- displayPath: file.displayPath,
1048
- title: doc.title || file.displayPath.split('/').pop() || file.displayPath,
1049
- body,
1050
- context,
1051
- skipped: false,
1052
- });
1053
- }
1054
-
1055
- closeDb();
1056
-
1057
- // Output based on format
1058
- if (format === "json") {
1059
- const output = results.map(r => ({
1060
- file: r.displayPath,
1061
- title: r.title,
1062
- ...(r.context && { context: r.context }),
1063
- ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
1064
- }));
1065
- console.log(JSON.stringify(output, null, 2));
1066
- } else if (format === "csv") {
1067
- const escapeField = (val: string | null | undefined): string => {
1068
- if (val === null || val === undefined) return "";
1069
- const str = String(val);
1070
- if (str.includes(",") || str.includes('"') || str.includes("\n")) {
1071
- return `"${str.replace(/"/g, '""')}"`;
1072
- }
1073
- return str;
1074
- };
1075
- console.log("file,title,context,skipped,body");
1076
- for (const r of results) {
1077
- console.log([r.displayPath, r.title, r.context, r.skipped ? "true" : "false", r.skipped ? r.skipReason : r.body].map(escapeField).join(","));
1078
- }
1079
- } else if (format === "files") {
1080
- for (const r of results) {
1081
- const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
1082
- const status = r.skipped ? "[SKIPPED]" : "";
1083
- console.log(`${r.displayPath}${ctx}${status ? `,${status}` : ""}`);
1084
- }
1085
- } else if (format === "md") {
1086
- for (const r of results) {
1087
- console.log(`## ${r.displayPath}\n`);
1088
- if (r.title && r.title !== r.displayPath) console.log(`**Title:** ${r.title}\n`);
1089
- if (r.context) console.log(`**Context:** ${r.context}\n`);
1090
- if (r.skipped) {
1091
- console.log(`> ${r.skipReason}\n`);
1092
- } else {
1093
- console.log("```");
1094
- console.log(r.body);
1095
- console.log("```\n");
1096
- }
1097
- }
1098
- } else if (format === "xml") {
1099
- console.log('<?xml version="1.0" encoding="UTF-8"?>');
1100
- console.log("<documents>");
1101
- for (const r of results) {
1102
- console.log(" <document>");
1103
- console.log(` <file>${escapeXml(r.displayPath)}</file>`);
1104
- console.log(` <title>${escapeXml(r.title)}</title>`);
1105
- if (r.context) console.log(` <context>${escapeXml(r.context)}</context>`);
1106
- if (r.skipped) {
1107
- console.log(` <skipped>true</skipped>`);
1108
- console.log(` <reason>${escapeXml(r.skipReason || "")}</reason>`);
1109
- } else {
1110
- console.log(` <body>${escapeXml(r.body)}</body>`);
1111
- }
1112
- console.log(" </document>");
1113
- }
1114
- console.log("</documents>");
1115
- } else {
1116
- // CLI format (default)
1117
- for (const r of results) {
1118
- console.log(`\n${'='.repeat(60)}`);
1119
- console.log(`File: ${r.displayPath}`);
1120
- console.log(`${'='.repeat(60)}\n`);
1121
-
1122
- if (r.skipped) {
1123
- console.log(`[SKIPPED: ${r.skipReason}]`);
1124
- continue;
1125
- }
1126
-
1127
- if (r.context) {
1128
- console.log(`Folder Context: ${r.context}\n---\n`);
1129
- }
1130
- console.log(r.body);
1131
- }
1132
- }
1133
- }
1134
-
1135
- // List files in virtual file tree
1136
- function listFiles(pathArg?: string): void {
1137
- const db = getDb();
1138
-
1139
- if (!pathArg) {
1140
- // No argument - list all collections
1141
- const yamlCollections = yamlListCollections();
1142
-
1143
- if (yamlCollections.length === 0) {
1144
- console.log("No collections found. Run 'qmd add .' to index files.");
1145
- closeDb();
1146
- return;
1147
- }
1148
-
1149
- // Get file counts from database for each collection
1150
- const collections = yamlCollections.map(coll => {
1151
- const stats = db.prepare(`
1152
- SELECT COUNT(*) as file_count
1153
- FROM documents d
1154
- WHERE d.collection = ? AND d.active = 1
1155
- `).get(coll.name) as { file_count: number } | null;
1156
-
1157
- return {
1158
- name: coll.name,
1159
- file_count: stats?.file_count || 0
1160
- };
1161
- });
1162
-
1163
- console.log(`${c.bold}Collections:${c.reset}\n`);
1164
- for (const coll of collections) {
1165
- console.log(` ${c.dim}qmd://${c.reset}${c.cyan}${coll.name}/${c.reset} ${c.dim}(${coll.file_count} files)${c.reset}`);
1166
- }
1167
- closeDb();
1168
- return;
1169
- }
1170
-
1171
- // Parse the path argument
1172
- let collectionName: string;
1173
- let pathPrefix: string | null = null;
1174
-
1175
- if (pathArg.startsWith('qmd://')) {
1176
- // Virtual path format: qmd://collection/path
1177
- const parsed = parseVirtualPath(pathArg);
1178
- if (!parsed) {
1179
- console.error(`Invalid virtual path: ${pathArg}`);
1180
- closeDb();
1181
- process.exit(1);
1182
- }
1183
- collectionName = parsed.collectionName;
1184
- pathPrefix = parsed.path;
1185
- } else {
1186
- // Just collection name or collection/path
1187
- const parts = pathArg.split('/');
1188
- collectionName = parts[0] || '';
1189
- if (parts.length > 1) {
1190
- pathPrefix = parts.slice(1).join('/');
1191
- }
1192
- }
1193
-
1194
- // Get the collection
1195
- const coll = getCollectionFromYaml(collectionName);
1196
- if (!coll) {
1197
- console.error(`Collection not found: ${collectionName}`);
1198
- console.error(`Run 'qmd ls' to see available collections.`);
1199
- closeDb();
1200
- process.exit(1);
1201
- }
1202
-
1203
- // List files in the collection with size and modification time
1204
- let query: string;
1205
- let params: any[];
1206
-
1207
- if (pathPrefix) {
1208
- // List files under a specific path
1209
- query = `
1210
- SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
1211
- FROM documents d
1212
- JOIN content ct ON d.hash = ct.hash
1213
- WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
1214
- ORDER BY d.path
1215
- `;
1216
- params = [coll.name, `${pathPrefix}%`];
1217
- } else {
1218
- // List all files in the collection
1219
- query = `
1220
- SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
1221
- FROM documents d
1222
- JOIN content ct ON d.hash = ct.hash
1223
- WHERE d.collection = ? AND d.active = 1
1224
- ORDER BY d.path
1225
- `;
1226
- params = [coll.name];
1227
- }
1228
-
1229
- const files = db.prepare(query).all(...params) as { path: string; title: string; modified_at: string; size: number }[];
1230
-
1231
- if (files.length === 0) {
1232
- if (pathPrefix) {
1233
- console.log(`No files found under qmd://${collectionName}/${pathPrefix}`);
1234
- } else {
1235
- console.log(`No files found in collection: ${collectionName}`);
1236
- }
1237
- closeDb();
1238
- return;
1239
- }
1240
-
1241
- // Calculate max widths for alignment
1242
- const maxSize = Math.max(...files.map(f => formatBytes(f.size).length));
1243
-
1244
- // Output in ls -l style
1245
- for (const file of files) {
1246
- const sizeStr = formatBytes(file.size).padStart(maxSize);
1247
- const date = new Date(file.modified_at);
1248
- const timeStr = formatLsTime(date);
1249
-
1250
- // Dim the qmd:// prefix, highlight the filename
1251
- console.log(`${sizeStr} ${timeStr} ${c.dim}qmd://${collectionName}/${c.reset}${c.cyan}${file.path}${c.reset}`);
1252
- }
1253
-
1254
- closeDb();
1255
- }
1256
-
1257
- // Format date/time like ls -l
1258
- function formatLsTime(date: Date): string {
1259
- const now = new Date();
1260
- const sixMonthsAgo = new Date(now.getTime() - 6 * 30 * 24 * 60 * 60 * 1000);
1261
-
1262
- const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
1263
- const month = months[date.getMonth()];
1264
- const day = date.getDate().toString().padStart(2, ' ');
1265
-
1266
- // If file is older than 6 months, show year instead of time
1267
- if (date < sixMonthsAgo) {
1268
- const year = date.getFullYear();
1269
- return `${month} ${day} ${year}`;
1270
- } else {
1271
- const hours = date.getHours().toString().padStart(2, '0');
1272
- const minutes = date.getMinutes().toString().padStart(2, '0');
1273
- return `${month} ${day} ${hours}:${minutes}`;
1274
- }
1275
- }
1276
-
1277
- // Collection management commands
1278
- function collectionList(): void {
1279
- const db = getDb();
1280
- const collections = listCollections(db);
1281
-
1282
- if (collections.length === 0) {
1283
- console.log("No collections found. Run 'qmd add .' to create one.");
1284
- closeDb();
1285
- return;
1286
- }
1287
-
1288
- console.log(`${c.bold}Collections (${collections.length}):${c.reset}\n`);
1289
-
1290
- for (const coll of collections) {
1291
- const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
1292
- const timeAgo = formatTimeAgo(updatedAt);
1293
-
1294
- console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}`);
1295
- console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
1296
- console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
1297
- console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
1298
- console.log();
1299
- }
1300
-
1301
- closeDb();
1302
- }
1303
-
1304
- async function collectionAdd(pwd: string, globPattern: string, name?: string): Promise<void> {
1305
- // If name not provided, generate from pwd basename
1306
- let collName = name;
1307
- if (!collName) {
1308
- const parts = pwd.split('/').filter(Boolean);
1309
- collName = parts[parts.length - 1] || 'root';
1310
- }
1311
-
1312
- // Check if collection with this name already exists in YAML
1313
- const existing = getCollectionFromYaml(collName);
1314
- if (existing) {
1315
- console.error(`${c.yellow}Collection '${collName}' already exists.${c.reset}`);
1316
- console.error(`Use a different name with --name <name>`);
1317
- process.exit(1);
1318
- }
1319
-
1320
- // Check if a collection with this pwd+glob already exists in YAML
1321
- const allCollections = yamlListCollections();
1322
- const existingPwdGlob = allCollections.find(c => c.path === pwd && c.pattern === globPattern);
1323
-
1324
- if (existingPwdGlob) {
1325
- console.error(`${c.yellow}A collection already exists for this path and pattern:${c.reset}`);
1326
- console.error(` Name: ${existingPwdGlob.name} (qmd://${existingPwdGlob.name}/)`);
1327
- console.error(` Pattern: ${globPattern}`);
1328
- console.error(`\nUse 'qmd update' to re-index it, or remove it first with 'qmd collection remove ${existingPwdGlob.name}'`);
1329
- process.exit(1);
1330
- }
1331
-
1332
- // Add to YAML config
1333
- const { addCollection } = await import("./collections.js");
1334
- addCollection(collName, pwd, globPattern);
1335
-
1336
- // Create the collection and index files
1337
- console.log(`Creating collection '${collName}'...`);
1338
- await indexFiles(pwd, globPattern, collName);
1339
- console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`);
1340
- }
1341
-
1342
- function collectionRemove(name: string): void {
1343
- // Check if collection exists in YAML
1344
- const coll = getCollectionFromYaml(name);
1345
- if (!coll) {
1346
- console.error(`${c.yellow}Collection not found: ${name}${c.reset}`);
1347
- console.error(`Run 'qmd collection list' to see available collections.`);
1348
- process.exit(1);
1349
- }
1350
-
1351
- const db = getDb();
1352
- const result = removeCollection(db, name);
1353
- closeDb();
1354
-
1355
- console.log(`${c.green}✓${c.reset} Removed collection '${name}'`);
1356
- console.log(` Deleted ${result.deletedDocs} documents`);
1357
- if (result.cleanedHashes > 0) {
1358
- console.log(` Cleaned up ${result.cleanedHashes} orphaned content hashes`);
1359
- }
1360
- }
1361
-
1362
- function collectionRename(oldName: string, newName: string): void {
1363
- // Check if old collection exists in YAML
1364
- const coll = getCollectionFromYaml(oldName);
1365
- if (!coll) {
1366
- console.error(`${c.yellow}Collection not found: ${oldName}${c.reset}`);
1367
- console.error(`Run 'qmd collection list' to see available collections.`);
1368
- process.exit(1);
1369
- }
1370
-
1371
- // Check if new name already exists in YAML
1372
- const existing = getCollectionFromYaml(newName);
1373
- if (existing) {
1374
- console.error(`${c.yellow}Collection name already exists: ${newName}${c.reset}`);
1375
- console.error(`Choose a different name or remove the existing collection first.`);
1376
- process.exit(1);
1377
- }
1378
-
1379
- const db = getDb();
1380
- renameCollection(db, oldName, newName);
1381
- closeDb();
1382
-
1383
- console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
1384
- console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`);
1385
- }
1386
-
1387
- async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, collectionName?: string, suppressEmbedNotice: boolean = false): Promise<void> {
1388
- const db = getDb();
1389
- const resolvedPwd = pwd || getPwd();
1390
- const now = new Date().toISOString();
1391
- const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
1392
-
1393
- // Clear Ollama cache on index
1394
- clearCache(db);
1395
-
1396
- // Collection name must be provided (from YAML)
1397
- if (!collectionName) {
1398
- throw new Error("Collection name is required. Collections must be defined in ~/.config/qmd/index.yml");
1399
- }
1400
-
1401
- console.log(`Collection: ${resolvedPwd} (${globPattern})`);
1402
-
1403
- progress.indeterminate();
1404
- const allFiles: string[] = await fastGlob(globPattern, {
1405
- cwd: resolvedPwd,
1406
- onlyFiles: true,
1407
- followSymbolicLinks: false,
1408
- dot: false,
1409
- ignore: excludeDirs.map(d => `**/${d}/**`),
1410
- });
1411
- // Filter hidden files/folders (dot: false handles top-level but not nested)
1412
- const files = allFiles.filter(file => {
1413
- const parts = file.split("/");
1414
- return !parts.some(part => part.startsWith("."));
1415
- });
1416
-
1417
- const total = files.length;
1418
- if (total === 0) {
1419
- progress.clear();
1420
- console.log("No files found matching pattern.");
1421
- closeDb();
1422
- return;
1423
- }
1424
-
1425
- let indexed = 0, updated = 0, unchanged = 0, processed = 0;
1426
- const seenPaths = new Set<string>();
1427
- const startTime = Date.now();
1428
-
1429
- for (const relativeFile of files) {
1430
- const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
1431
- const path = handelize(relativeFile); // Normalize path for token-friendliness
1432
- seenPaths.add(path);
1433
-
1434
- const content = readFileSync(filepath, "utf-8");
1435
-
1436
- // Skip empty files - nothing useful to index
1437
- if (!content.trim()) {
1438
- processed++;
1439
- continue;
1440
- }
1441
-
1442
- const hash = await hashContent(content);
1443
- const title = extractTitle(content, relativeFile);
1444
-
1445
- // Check if document exists in this collection with this path
1446
- const existing = findActiveDocument(db, collectionName, path);
1447
-
1448
- if (existing) {
1449
- if (existing.hash === hash) {
1450
- // Hash unchanged, but check if title needs updating
1451
- if (existing.title !== title) {
1452
- updateDocumentTitle(db, existing.id, title, now);
1453
- updated++;
1454
- } else {
1455
- unchanged++;
1456
- }
1457
- } else {
1458
- // Content changed - insert new content hash and update document
1459
- insertContent(db, hash, content, now);
1460
- const stat = statSync(filepath);
1461
- updateDocument(db, existing.id, title, hash,
1462
- stat ? new Date(stat.mtime).toISOString() : now);
1463
- updated++;
1464
- }
1465
- } else {
1466
- // New document - insert content and document
1467
- indexed++;
1468
- insertContent(db, hash, content, now);
1469
- const stat = statSync(filepath);
1470
- insertDocument(db, collectionName, path, title, hash,
1471
- stat ? new Date(stat.birthtime).toISOString() : now,
1472
- stat ? new Date(stat.mtime).toISOString() : now);
1473
- }
1474
-
1475
- processed++;
1476
- progress.set((processed / total) * 100);
1477
- const elapsed = (Date.now() - startTime) / 1000;
1478
- const rate = processed / elapsed;
1479
- const remaining = (total - processed) / rate;
1480
- const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
1481
- process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
1482
- }
1483
-
1484
- // Deactivate documents in this collection that no longer exist
1485
- const allActive = getActiveDocumentPaths(db, collectionName);
1486
- let removed = 0;
1487
- for (const path of allActive) {
1488
- if (!seenPaths.has(path)) {
1489
- deactivateDocument(db, collectionName, path);
1490
- removed++;
1491
- }
1492
- }
1493
-
1494
- // Clean up orphaned content hashes (content not referenced by any document)
1495
- const orphanedContent = cleanupOrphanedContent(db);
1496
-
1497
- // Check if vector index needs updating
1498
- const needsEmbedding = getHashesNeedingEmbedding(db);
1499
-
1500
- progress.clear();
1501
- console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
1502
- if (orphanedContent > 0) {
1503
- console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
1504
- }
1505
-
1506
- if (needsEmbedding > 0 && !suppressEmbedNotice) {
1507
- console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
1508
- }
1509
-
1510
- closeDb();
1511
- }
1512
-
1513
- function renderProgressBar(percent: number, width: number = 30): string {
1514
- const filled = Math.round((percent / 100) * width);
1515
- const empty = width - filled;
1516
- const bar = "█".repeat(filled) + "░".repeat(empty);
1517
- return bar;
1518
- }
1519
-
1520
- async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean = false): Promise<void> {
1521
- const db = getDb();
1522
- const now = new Date().toISOString();
1523
-
1524
- // If force, clear all vectors
1525
- if (force) {
1526
- console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
1527
- clearAllEmbeddings(db);
1528
- }
1529
-
1530
- // Find unique hashes that need embedding (from active documents)
1531
- const hashesToEmbed = getHashesForEmbedding(db);
1532
-
1533
- if (hashesToEmbed.length === 0) {
1534
- console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
1535
- closeDb();
1536
- return;
1537
- }
1538
-
1539
- // Prepare documents with chunks
1540
- type ChunkItem = { hash: string; title: string; text: string; seq: number; pos: number; tokens: number; bytes: number; displayName: string };
1541
- const allChunks: ChunkItem[] = [];
1542
- let multiChunkDocs = 0;
1543
-
1544
- // Chunk all documents using actual token counts
1545
- process.stderr.write(`Chunking ${hashesToEmbed.length} documents by token count...\n`);
1546
- for (const item of hashesToEmbed) {
1547
- const encoder = new TextEncoder();
1548
- const bodyBytes = encoder.encode(item.body).length;
1549
- if (bodyBytes === 0) continue; // Skip empty
1550
-
1551
- const title = extractTitle(item.body, item.path);
1552
- const displayName = item.path;
1553
- const chunks = await chunkDocumentByTokens(item.body); // Uses actual tokenizer
1554
-
1555
- if (chunks.length > 1) multiChunkDocs++;
1556
-
1557
- for (let seq = 0; seq < chunks.length; seq++) {
1558
- allChunks.push({
1559
- hash: item.hash,
1560
- title,
1561
- text: chunks[seq]!.text, // Chunk is guaranteed to exist by seq loop
1562
- seq,
1563
- pos: chunks[seq]!.pos,
1564
- tokens: chunks[seq]!.tokens,
1565
- bytes: encoder.encode(chunks[seq]!.text).length,
1566
- displayName,
1567
- });
1568
- }
1569
- }
1570
-
1571
- if (allChunks.length === 0) {
1572
- console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`);
1573
- closeDb();
1574
- return;
1575
- }
1576
-
1577
- const totalBytes = allChunks.reduce((sum, chk) => sum + chk.bytes, 0);
1578
- const totalChunks = allChunks.length;
1579
- const totalDocs = hashesToEmbed.length;
1580
-
1581
- console.log(`${c.bold}Embedding ${totalDocs} documents${c.reset} ${c.dim}(${totalChunks} chunks, ${formatBytes(totalBytes)})${c.reset}`);
1582
- if (multiChunkDocs > 0) {
1583
- console.log(`${c.dim}${multiChunkDocs} documents split into multiple chunks${c.reset}`);
1584
- }
1585
- console.log(`${c.dim}Model: ${model}${c.reset}\n`);
1586
-
1587
- // Hide cursor during embedding
1588
- cursor.hide();
1589
-
1590
- // Wrap all LLM embedding operations in a session for lifecycle management
1591
- // Use 30 minute timeout for large collections
1592
- await withLLMSession(async (session) => {
1593
- // Get embedding dimensions from first chunk
1594
- progress.indeterminate();
1595
- const firstChunk = allChunks[0];
1596
- if (!firstChunk) {
1597
- throw new Error("No chunks available to embed");
1598
- }
1599
- const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title);
1600
- const firstResult = await session.embed(firstText);
1601
- if (!firstResult) {
1602
- throw new Error("Failed to get embedding dimensions from first chunk");
1603
- }
1604
- ensureVecTable(db, firstResult.embedding.length);
1605
-
1606
- let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
1607
- const startTime = Date.now();
1608
-
1609
- // Batch embedding for better throughput
1610
- // Process in batches of 32 to balance memory usage and efficiency
1611
- const BATCH_SIZE = 32;
1612
-
1613
- for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
1614
- const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
1615
- const batch = allChunks.slice(batchStart, batchEnd);
1616
-
1617
- // Format texts for embedding
1618
- const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
1619
-
1620
- try {
1621
- // Batch embed all texts at once
1622
- const embeddings = await session.embedBatch(texts);
1623
-
1624
- // Insert each embedding
1625
- for (let i = 0; i < batch.length; i++) {
1626
- const chunk = batch[i]!;
1627
- const embedding = embeddings[i];
1628
-
1629
- if (embedding) {
1630
- insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
1631
- chunksEmbedded++;
1632
- } else {
1633
- errors++;
1634
- console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}${c.reset}`);
1635
- }
1636
- bytesProcessed += chunk.bytes;
1637
- }
1638
- } catch (err) {
1639
- // If batch fails, try individual embeddings as fallback
1640
- for (const chunk of batch) {
1641
- try {
1642
- const text = formatDocForEmbedding(chunk.text, chunk.title);
1643
- const result = await session.embed(text);
1644
- if (result) {
1645
- insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
1646
- chunksEmbedded++;
1647
- } else {
1648
- errors++;
1649
- }
1650
- } catch (innerErr) {
1651
- errors++;
1652
- console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}: ${innerErr}${c.reset}`);
1653
- }
1654
- bytesProcessed += chunk.bytes;
1655
- }
1656
- }
1657
-
1658
- const percent = (bytesProcessed / totalBytes) * 100;
1659
- progress.set(percent);
1660
-
1661
- const elapsed = (Date.now() - startTime) / 1000;
1662
- const bytesPerSec = bytesProcessed / elapsed;
1663
- const remainingBytes = totalBytes - bytesProcessed;
1664
- const etaSec = remainingBytes / bytesPerSec;
1665
-
1666
- const bar = renderProgressBar(percent);
1667
- const percentStr = percent.toFixed(0).padStart(3);
1668
- const throughput = `${formatBytes(bytesPerSec)}/s`;
1669
- const eta = elapsed > 2 ? formatETA(etaSec) : "...";
1670
- const errStr = errors > 0 ? ` ${c.yellow}${errors} err${c.reset}` : "";
1671
-
1672
- process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
1673
- }
1674
-
1675
- progress.clear();
1676
- cursor.show();
1677
- const totalTimeSec = (Date.now() - startTime) / 1000;
1678
- const avgThroughput = formatBytes(totalBytes / totalTimeSec);
1679
-
1680
- console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
1681
- console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${chunksEmbedded}${c.reset} chunks from ${c.bold}${totalDocs}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset} ${c.dim}(${avgThroughput}/s)${c.reset}`);
1682
- if (errors > 0) {
1683
- console.log(`${c.yellow}⚠ ${errors} chunks failed${c.reset}`);
1684
- }
1685
- }, { maxDuration: 30 * 60 * 1000, name: 'embed-command' });
1686
-
1687
- closeDb();
1688
- }
1689
-
1690
- // Sanitize a term for FTS5: remove punctuation except apostrophes
1691
- function sanitizeFTS5Term(term: string): string {
1692
- // Remove all non-alphanumeric except apostrophes (for contractions like "don't")
1693
- return term.replace(/[^\w']/g, '').trim();
1694
- }
1695
-
1696
- // Build FTS5 query: phrase-aware with fallback to individual terms
1697
- function buildFTS5Query(query: string): string {
1698
- // Sanitize the full query for phrase matching
1699
- const sanitizedQuery = query.replace(/[^\w\s']/g, '').trim();
1700
-
1701
- const terms = query
1702
- .split(/\s+/)
1703
- .map(sanitizeFTS5Term)
1704
- .filter(term => term.length >= 2); // Skip single chars and empty
1705
-
1706
- if (terms.length === 0) return "";
1707
- if (terms.length === 1) return `"${terms[0]!.replace(/"/g, '""')}"`;
1708
-
1709
- // Strategy: exact phrase OR proximity match OR individual terms
1710
- // Exact phrase matches rank highest, then close proximity, then any term
1711
- const phrase = `"${sanitizedQuery.replace(/"/g, '""')}"`;
1712
- const quotedTerms = terms.map(t => `"${t.replace(/"/g, '""')}"`);
1713
-
1714
- // FTS5 NEAR syntax: NEAR(term1 term2, distance)
1715
- const nearPhrase = `NEAR(${quotedTerms.join(' ')}, 10)`;
1716
- const orTerms = quotedTerms.join(' OR ');
1717
-
1718
- // Exact phrase > proximity > any term
1719
- return `(${phrase}) OR (${nearPhrase}) OR (${orTerms})`;
1720
- }
1721
-
1722
- // Normalize BM25 score to 0-1 range using sigmoid
1723
- function normalizeBM25(score: number): number {
1724
- // BM25 scores are negative in SQLite (lower = better)
1725
- // Typical range: -15 (excellent) to -2 (weak match)
1726
- // Map to 0-1 where higher is better
1727
- const absScore = Math.abs(score);
1728
- // Sigmoid-ish normalization: maps ~2-15 range to ~0.1-0.95
1729
- return 1 / (1 + Math.exp(-(absScore - 5) / 3));
1730
- }
1731
-
1732
- type OutputOptions = {
1733
- format: OutputFormat;
1734
- full: boolean;
1735
- limit: number;
1736
- minScore: number;
1737
- all?: boolean;
1738
- collection?: string; // Filter by collection name (pwd suffix match)
1739
- lineNumbers?: boolean; // Add line numbers to output
1740
- context?: string; // Optional context for query expansion
1741
- };
1742
-
1743
- // Highlight query terms in text (skip short words < 3 chars)
1744
- function highlightTerms(text: string, query: string): string {
1745
- if (!useColor) return text;
1746
- const terms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
1747
- let result = text;
1748
- for (const term of terms) {
1749
- const regex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
1750
- result = result.replace(regex, `${c.yellow}${c.bold}$1${c.reset}`);
1751
- }
1752
- return result;
1753
- }
1754
-
1755
- // Format score with color based on value
1756
- function formatScore(score: number): string {
1757
- const pct = (score * 100).toFixed(0).padStart(3);
1758
- if (!useColor) return `${pct}%`;
1759
- if (score >= 0.7) return `${c.green}${pct}%${c.reset}`;
1760
- if (score >= 0.4) return `${c.yellow}${pct}%${c.reset}`;
1761
- return `${c.dim}${pct}%${c.reset}`;
1762
- }
1763
-
1764
- // Shorten directory path for display - relative to $HOME (used for context paths, not documents)
1765
- function shortPath(dirpath: string): string {
1766
- const home = homedir();
1767
- if (dirpath.startsWith(home)) {
1768
- return '~' + dirpath.slice(home.length);
1769
- }
1770
- return dirpath;
1771
- }
1772
-
1773
- function outputResults(results: { file: string; displayPath: string; title: string; body: string; score: number; context?: string | null; chunkPos?: number; hash?: string; docid?: string }[], query: string, opts: OutputOptions): void {
1774
- const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
1775
-
1776
- if (filtered.length === 0) {
1777
- console.log("No results found above minimum score threshold.");
1778
- return;
1779
- }
1780
-
1781
- // Helper to create qmd:// URI from displayPath
1782
- const toQmdPath = (displayPath: string) => `qmd://${displayPath}`;
1783
-
1784
- if (opts.format === "json") {
1785
- // JSON output for LLM consumption
1786
- const output = filtered.map(row => {
1787
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1788
- let body = opts.full ? row.body : undefined;
1789
- let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
1790
- if (opts.lineNumbers) {
1791
- if (body) body = addLineNumbers(body);
1792
- if (snippet) snippet = addLineNumbers(snippet);
1793
- }
1794
- return {
1795
- ...(docid && { docid: `#${docid}` }),
1796
- score: Math.round(row.score * 100) / 100,
1797
- file: toQmdPath(row.displayPath),
1798
- title: row.title,
1799
- ...(row.context && { context: row.context }),
1800
- ...(body && { body }),
1801
- ...(snippet && { snippet }),
1802
- };
1803
- });
1804
- console.log(JSON.stringify(output, null, 2));
1805
- } else if (opts.format === "files") {
1806
- // Simple docid,score,filepath,context output
1807
- for (const row of filtered) {
1808
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1809
- const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
1810
- console.log(`#${docid},${row.score.toFixed(2)},${toQmdPath(row.displayPath)}${ctx}`);
1811
- }
1812
- } else if (opts.format === "cli") {
1813
- for (let i = 0; i < filtered.length; i++) {
1814
- const row = filtered[i];
1815
- if (!row) continue;
1816
- const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
1817
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1818
-
1819
- // Line 1: filepath with docid
1820
- const path = toQmdPath(row.displayPath);
1821
- // Only show :line if we actually found a term match in the snippet body (exclude header line).
1822
- const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
1823
- const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
1824
- const lineInfo = hasMatch ? `:${line}` : "";
1825
- const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
1826
- console.log(`${c.cyan}${path}${c.dim}${lineInfo}${c.reset}${docidStr}`);
1827
-
1828
- // Line 2: Title (if available)
1829
- if (row.title) {
1830
- console.log(`${c.bold}Title: ${row.title}${c.reset}`);
1831
- }
1832
-
1833
- // Line 3: Context (if available)
1834
- if (row.context) {
1835
- console.log(`${c.dim}Context: ${row.context}${c.reset}`);
1836
- }
1837
-
1838
- // Line 4: Score
1839
- const score = formatScore(row.score);
1840
- console.log(`Score: ${c.bold}${score}${c.reset}`);
1841
- console.log();
1842
-
1843
- // Snippet with highlighting (diff-style header included)
1844
- let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
1845
- const highlighted = highlightTerms(displaySnippet, query);
1846
- console.log(highlighted);
1847
-
1848
- // Double empty line between results
1849
- if (i < filtered.length - 1) console.log('\n');
1850
- }
1851
- } else if (opts.format === "md") {
1852
- for (let i = 0; i < filtered.length; i++) {
1853
- const row = filtered[i];
1854
- if (!row) continue;
1855
- const heading = row.title || row.displayPath;
1856
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1857
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
1858
- if (opts.lineNumbers) {
1859
- content = addLineNumbers(content);
1860
- }
1861
- const docidLine = docid ? `**docid:** \`#${docid}\`\n` : "";
1862
- const contextLine = row.context ? `**context:** ${row.context}\n` : "";
1863
- console.log(`---\n# ${heading}\n${docidLine}${contextLine}\n${content}\n`);
1864
- }
1865
- } else if (opts.format === "xml") {
1866
- for (const row of filtered) {
1867
- const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
1868
- const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
1869
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1870
- let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
1871
- if (opts.lineNumbers) {
1872
- content = addLineNumbers(content);
1873
- }
1874
- console.log(`<file docid="#${docid}" name="${toQmdPath(row.displayPath)}"${titleAttr}${contextAttr}>\n${content}\n</file>\n`);
1875
- }
1876
- } else {
1877
- // CSV format
1878
- console.log("docid,score,file,title,context,line,snippet");
1879
- for (const row of filtered) {
1880
- const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
1881
- let content = opts.full ? row.body : snippet;
1882
- if (opts.lineNumbers) {
1883
- content = addLineNumbers(content, line);
1884
- }
1885
- const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1886
- const snippetText = content || "";
1887
- console.log(`#${docid},${row.score.toFixed(4)},${escapeCSV(toQmdPath(row.displayPath))},${escapeCSV(row.title || "")},${escapeCSV(row.context || "")},${line},${escapeCSV(snippetText)}`);
1888
- }
1889
- }
1890
- }
1891
-
1892
- function search(query: string, opts: OutputOptions): void {
1893
- const db = getDb();
1894
-
1895
- // Validate collection filter if specified
1896
- let collectionName: string | undefined;
1897
- if (opts.collection) {
1898
- const coll = getCollectionFromYaml(opts.collection);
1899
- if (!coll) {
1900
- console.error(`Collection not found: ${opts.collection}`);
1901
- closeDb();
1902
- process.exit(1);
1903
- }
1904
- collectionName = opts.collection;
1905
- }
1906
-
1907
- // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
1908
- const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
1909
- const results = searchFTS(db, query, fetchLimit, collectionName);
1910
-
1911
- // Add context to results
1912
- const resultsWithContext = results.map(r => ({
1913
- file: r.filepath,
1914
- displayPath: r.displayPath,
1915
- title: r.title,
1916
- body: r.body || "",
1917
- score: r.score,
1918
- context: getContextForFile(db, r.filepath),
1919
- hash: r.hash,
1920
- docid: r.docid,
1921
- }));
1922
-
1923
- closeDb();
1924
-
1925
- if (resultsWithContext.length === 0) {
1926
- console.log("No results found.");
1927
- return;
1928
- }
1929
- outputResults(resultsWithContext, query, opts);
1930
- }
1931
-
1932
- // Log query expansion as a tree to stderr (CLI progress feedback)
1933
- function logExpansionTree(originalQuery: string, expanded: ExpandedQuery[]): void {
1934
- const lines: string[] = [];
1935
- lines.push(`${c.dim}├─ ${originalQuery}${c.reset}`);
1936
- for (const q of expanded) {
1937
- let preview = q.text.replace(/\n/g, ' ');
1938
- if (preview.length > 72) preview = preview.substring(0, 69) + '...';
1939
- lines.push(`${c.dim}├─ ${q.type}: ${preview}${c.reset}`);
1940
- }
1941
- if (lines.length > 0) {
1942
- lines[lines.length - 1] = lines[lines.length - 1]!.replace('├─', '└─');
1943
- }
1944
- for (const line of lines) process.stderr.write(line + '\n');
1945
- }
1946
-
1947
- async function vectorSearch(query: string, opts: OutputOptions, _model: string = DEFAULT_EMBED_MODEL): Promise<void> {
1948
- const store = getStore();
1949
-
1950
- if (opts.collection) {
1951
- const coll = getCollectionFromYaml(opts.collection);
1952
- if (!coll) {
1953
- console.error(`Collection not found: ${opts.collection}`);
1954
- closeDb();
1955
- process.exit(1);
1956
- }
1957
- }
1958
-
1959
- checkIndexHealth(store.db);
1960
-
1961
- await withLLMSession(async () => {
1962
- const results = await vectorSearchQuery(store, query, {
1963
- collection: opts.collection,
1964
- limit: opts.all ? 500 : (opts.limit || 10),
1965
- minScore: opts.minScore || 0.3,
1966
- hooks: {
1967
- onExpand: (original, expanded) => {
1968
- logExpansionTree(original, expanded);
1969
- process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
1970
- },
1971
- },
1972
- });
1973
-
1974
- closeDb();
1975
-
1976
- if (results.length === 0) {
1977
- console.log("No results found.");
1978
- return;
1979
- }
1980
-
1981
- outputResults(results.map(r => ({
1982
- file: r.file,
1983
- displayPath: r.displayPath,
1984
- title: r.title,
1985
- body: r.body,
1986
- score: r.score,
1987
- context: r.context,
1988
- docid: r.docid,
1989
- })), query, { ...opts, limit: results.length });
1990
- }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
1991
- }
1992
-
1993
- async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
1994
- const store = getStore();
1995
-
1996
- if (opts.collection) {
1997
- const coll = getCollectionFromYaml(opts.collection);
1998
- if (!coll) {
1999
- console.error(`Collection not found: ${opts.collection}`);
2000
- closeDb();
2001
- process.exit(1);
2002
- }
2003
- }
2004
-
2005
- checkIndexHealth(store.db);
2006
-
2007
- await withLLMSession(async () => {
2008
- const results = await hybridQuery(store, query, {
2009
- collection: opts.collection,
2010
- limit: opts.all ? 500 : (opts.limit || 10),
2011
- minScore: opts.minScore || 0,
2012
- hooks: {
2013
- onStrongSignal: (score) => {
2014
- process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
2015
- },
2016
- onExpand: (original, expanded) => {
2017
- logExpansionTree(original, expanded);
2018
- process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
2019
- },
2020
- onRerankStart: (chunkCount) => {
2021
- process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}\n`);
2022
- progress.indeterminate();
2023
- },
2024
- onRerankDone: () => {
2025
- progress.clear();
2026
- },
2027
- },
2028
- });
2029
-
2030
- closeDb();
2031
-
2032
- if (results.length === 0) {
2033
- console.log("No results found.");
2034
- return;
2035
- }
2036
-
2037
- // Map to CLI output format — use bestChunk for snippet display
2038
- outputResults(results.map(r => ({
2039
- file: r.file,
2040
- displayPath: r.displayPath,
2041
- title: r.title,
2042
- body: r.bestChunk,
2043
- chunkPos: r.bestChunkPos,
2044
- score: r.score,
2045
- context: r.context,
2046
- docid: r.docid,
2047
- })), query, { ...opts, limit: results.length });
2048
- }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
2049
- }
2050
-
2051
- // Parse CLI arguments using util.parseArgs
2052
- function parseCLI() {
2053
- const { values, positionals } = parseArgs({
2054
- args: process.argv.slice(2), // Skip node and script path
2055
- options: {
2056
- // Global options
2057
- index: {
2058
- type: "string",
2059
- },
2060
- context: {
2061
- type: "string",
2062
- },
2063
- "no-lex": {
2064
- type: "boolean",
2065
- },
2066
- help: { type: "boolean", short: "h" },
2067
- version: { type: "boolean", short: "v" },
2068
- // Search options
2069
- n: { type: "string" },
2070
- "min-score": { type: "string" },
2071
- all: { type: "boolean" },
2072
- full: { type: "boolean" },
2073
- csv: { type: "boolean" },
2074
- md: { type: "boolean" },
2075
- xml: { type: "boolean" },
2076
- files: { type: "boolean" },
2077
- json: { type: "boolean" },
2078
- collection: { type: "string", short: "c" }, // Filter by collection
2079
- // Collection options
2080
- name: { type: "string" }, // collection name
2081
- mask: { type: "string" }, // glob pattern
2082
- // Embed options
2083
- force: { type: "boolean", short: "f" },
2084
- // Update options
2085
- pull: { type: "boolean" }, // git pull before update
2086
- refresh: { type: "boolean" },
2087
- // Get options
2088
- l: { type: "string" }, // max lines
2089
- from: { type: "string" }, // start line
2090
- "max-bytes": { type: "string" }, // max bytes for multi-get
2091
- "line-numbers": { type: "boolean" }, // add line numbers to output
2092
- // MCP HTTP transport options
2093
- http: { type: "boolean" },
2094
- daemon: { type: "boolean" },
2095
- port: { type: "string" },
2096
- },
2097
- allowPositionals: true,
2098
- strict: false, // Allow unknown options to pass through
2099
- });
2100
-
2101
- // Select index name (default: "index")
2102
- const indexName = values.index as string | undefined;
2103
- if (indexName) {
2104
- setIndexName(indexName);
2105
- setConfigIndexName(indexName);
2106
- }
2107
-
2108
- // Determine output format
2109
- let format: OutputFormat = "cli";
2110
- if (values.csv) format = "csv";
2111
- else if (values.md) format = "md";
2112
- else if (values.xml) format = "xml";
2113
- else if (values.files) format = "files";
2114
- else if (values.json) format = "json";
2115
-
2116
- // Default limit: 20 for --files/--json, 5 otherwise
2117
- // --all means return all results (use very large limit)
2118
- const defaultLimit = (format === "files" || format === "json") ? 20 : 5;
2119
- const isAll = !!values.all;
2120
-
2121
- const opts: OutputOptions = {
2122
- format,
2123
- full: !!values.full,
2124
- limit: isAll ? 100000 : (values.n ? parseInt(String(values.n), 10) || defaultLimit : defaultLimit),
2125
- minScore: values["min-score"] ? parseFloat(String(values["min-score"])) || 0 : 0,
2126
- all: isAll,
2127
- collection: values.collection as string | undefined,
2128
- lineNumbers: !!values["line-numbers"],
2129
- };
2130
-
2131
- return {
2132
- command: positionals[0] || "",
2133
- args: positionals.slice(1),
2134
- query: positionals.slice(1).join(" "),
2135
- opts,
2136
- values,
2137
- };
2138
- }
2139
-
2140
- function showHelp(): void {
2141
- console.log("Usage:");
2142
- console.log(" qmd collection add [path] --name <name> --mask <pattern> - Create/index collection");
2143
- console.log(" qmd collection list - List all collections with details");
2144
- console.log(" qmd collection remove <name> - Remove a collection by name");
2145
- console.log(" qmd collection rename <old> <new> - Rename a collection");
2146
- console.log(" qmd ls [collection[/path]] - List collections or files in a collection");
2147
- console.log(" qmd context add [path] \"text\" - Add context for path (defaults to current dir)");
2148
- console.log(" qmd context list - List all contexts");
2149
- console.log(" qmd context rm <path> - Remove context");
2150
- console.log(" qmd get <file>[:line] [-l N] [--from N] - Get document (optionally from line, max N lines)");
2151
- console.log(" qmd multi-get <pattern> [-l N] [--max-bytes N] - Get multiple docs by glob or comma-separated list");
2152
- console.log(" qmd status - Show index status and collections");
2153
- console.log(" qmd update [--pull] - Re-index all collections (--pull: git pull first)");
2154
- console.log(" qmd embed [-f] - Create vector embeddings (900 tokens/chunk, 15% overlap)");
2155
- console.log(" qmd cleanup - Remove cache and orphaned data, vacuum DB");
2156
- console.log(" qmd query <query> - Search with query expansion + reranking (recommended)");
2157
- console.log(" qmd search <query> - Full-text keyword search (BM25, no LLM)");
2158
- console.log(" qmd vsearch <query> - Vector similarity search (no reranking)");
2159
- console.log(" qmd mcp - Start MCP server (stdio transport)");
2160
- console.log(" qmd mcp --http [--port N] - Start MCP server (HTTP transport, default port 8181)");
2161
- console.log(" qmd mcp --http --daemon - Start MCP server as background daemon");
2162
- console.log(" qmd mcp stop - Stop background MCP daemon");
2163
- console.log("");
2164
- console.log("Global options:");
2165
- console.log(" --index <name> - Use custom index name (default: index)");
2166
- console.log("");
2167
- console.log("Search options:");
2168
- console.log(" -n <num> - Number of results (default: 5, or 20 for --files)");
2169
- console.log(" --all - Return all matches (use with --min-score to filter)");
2170
- console.log(" --min-score <num> - Minimum similarity score");
2171
- console.log(" --full - Output full document instead of snippet");
2172
- console.log(" --line-numbers - Add line numbers to output");
2173
- console.log(" --files - Output docid,score,filepath,context (default: 20 results)");
2174
- console.log(" --json - JSON output with snippets (default: 20 results)");
2175
- console.log(" --csv - CSV output with snippets");
2176
- console.log(" --md - Markdown output");
2177
- console.log(" --xml - XML output");
2178
- console.log(" -c, --collection <name> - Filter results to a specific collection");
2179
- console.log("");
2180
- console.log("Multi-get options:");
2181
- console.log(" -l <num> - Maximum lines per file");
2182
- console.log(" --max-bytes <num> - Skip files larger than N bytes (default: 10240)");
2183
- console.log(" --json/--csv/--md/--xml/--files - Output format (same as search)");
2184
- console.log("");
2185
- console.log("Models (auto-downloaded from HuggingFace):");
2186
- console.log(" Embedding: embeddinggemma-300M-Q8_0");
2187
- console.log(" Reranking: qwen3-reranker-0.6b-q8_0");
2188
- console.log(" Generation: Qwen3-0.6B-Q8_0");
2189
- console.log("");
2190
- console.log(`Index: ${getDbPath()}`);
2191
- }
2192
-
2193
- async function showVersion(): Promise<void> {
2194
- const scriptDir = dirname(fileURLToPath(import.meta.url));
2195
- const pkgPath = resolve(scriptDir, "..", "package.json");
2196
- const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
2197
-
2198
- let commit = "";
2199
- try {
2200
- commit = execSync(`git -C ${scriptDir} rev-parse --short HEAD`, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
2201
- } catch {
2202
- // Not a git repo or git not available
2203
- }
2204
-
2205
- const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version;
2206
- console.log(`qmd ${versionStr}`);
2207
- }
2208
-
2209
- // Main CLI - only run if this is the main module
2210
- if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsWith("/qmd.ts")) {
2211
- const cli = parseCLI();
2212
-
2213
- if (cli.values.version) {
2214
- await showVersion();
2215
- process.exit(0);
2216
- }
2217
-
2218
- if (!cli.command || cli.values.help) {
2219
- showHelp();
2220
- process.exit(cli.values.help ? 0 : 1);
2221
- }
2222
-
2223
- switch (cli.command) {
2224
- case "context": {
2225
- const subcommand = cli.args[0];
2226
- if (!subcommand) {
2227
- console.error("Usage: qmd context <add|list|check|rm>");
2228
- console.error("");
2229
- console.error("Commands:");
2230
- console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
2231
- console.error(" qmd context add / \"text\" - Add global context to all collections");
2232
- console.error(" qmd context list - List all contexts");
2233
- console.error(" qmd context check - Check for missing contexts");
2234
- console.error(" qmd context rm <path> - Remove context");
2235
- process.exit(1);
2236
- }
2237
-
2238
- switch (subcommand) {
2239
- case "add": {
2240
- if (cli.args.length < 2) {
2241
- console.error("Usage: qmd context add [path] \"text\"");
2242
- console.error("");
2243
- console.error("Examples:");
2244
- console.error(" qmd context add \"Context for current directory\"");
2245
- console.error(" qmd context add . \"Context for current directory\"");
2246
- console.error(" qmd context add /subfolder \"Context for subfolder\"");
2247
- console.error(" qmd context add / \"Global context for all collections\"");
2248
- console.error("");
2249
- console.error(" Using virtual paths:");
2250
- console.error(" qmd context add qmd://journals/ \"Context for entire journals collection\"");
2251
- console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\"");
2252
- process.exit(1);
2253
- }
2254
-
2255
- let pathArg: string | undefined;
2256
- let contextText: string;
2257
-
2258
- // Check if first arg looks like a path or if it's the context text
2259
- const firstArg = cli.args[1] || '';
2260
- const secondArg = cli.args[2];
2261
-
2262
- if (secondArg) {
2263
- // Two args: path + context
2264
- pathArg = firstArg;
2265
- contextText = cli.args.slice(2).join(" ");
2266
- } else {
2267
- // One arg: context only (use current directory)
2268
- pathArg = undefined;
2269
- contextText = firstArg;
2270
- }
2271
-
2272
- await contextAdd(pathArg, contextText);
2273
- break;
2274
- }
2275
-
2276
- case "list": {
2277
- contextList();
2278
- break;
2279
- }
2280
-
2281
- case "check": {
2282
- contextCheck();
2283
- break;
2284
- }
2285
-
2286
- case "rm":
2287
- case "remove": {
2288
- if (cli.args.length < 2 || !cli.args[1]) {
2289
- console.error("Usage: qmd context rm <path>");
2290
- console.error("Examples:");
2291
- console.error(" qmd context rm /");
2292
- console.error(" qmd context rm qmd://journals/2024");
2293
- process.exit(1);
2294
- }
2295
- contextRemove(cli.args[1]);
2296
- break;
2297
- }
2298
-
2299
- default:
2300
- console.error(`Unknown subcommand: ${subcommand}`);
2301
- console.error("Available: add, list, check, rm");
2302
- process.exit(1);
2303
- }
2304
- break;
2305
- }
2306
-
2307
- case "get": {
2308
- if (!cli.args[0]) {
2309
- console.error("Usage: qmd get <filepath>[:line] [--from <line>] [-l <lines>] [--line-numbers]");
2310
- process.exit(1);
2311
- }
2312
- const fromLine = cli.values.from ? parseInt(cli.values.from as string, 10) : undefined;
2313
- const maxLines = cli.values.l ? parseInt(cli.values.l as string, 10) : undefined;
2314
- getDocument(cli.args[0], fromLine, maxLines, cli.opts.lineNumbers);
2315
- break;
2316
- }
2317
-
2318
- case "multi-get": {
2319
- if (!cli.args[0]) {
2320
- console.error("Usage: qmd multi-get <pattern> [-l <lines>] [--max-bytes <bytes>] [--json|--csv|--md|--xml|--files]");
2321
- console.error(" pattern: glob (e.g., 'journals/2025-05*.md') or comma-separated list");
2322
- process.exit(1);
2323
- }
2324
- const maxLinesMulti = cli.values.l ? parseInt(cli.values.l as string, 10) : undefined;
2325
- const maxBytes = cli.values["max-bytes"] ? parseInt(cli.values["max-bytes"] as string, 10) : DEFAULT_MULTI_GET_MAX_BYTES;
2326
- multiGet(cli.args[0], maxLinesMulti, maxBytes, cli.opts.format);
2327
- break;
2328
- }
2329
-
2330
- case "ls": {
2331
- listFiles(cli.args[0]);
2332
- break;
2333
- }
2334
-
2335
- case "collection": {
2336
- const subcommand = cli.args[0];
2337
- switch (subcommand) {
2338
- case "list": {
2339
- collectionList();
2340
- break;
2341
- }
2342
-
2343
- case "add": {
2344
- const pwd = cli.args[1] || getPwd();
2345
- const resolvedPwd = pwd === '.' ? getPwd() : getRealPath(resolve(pwd));
2346
- const globPattern = cli.values.mask as string || DEFAULT_GLOB;
2347
- const name = cli.values.name as string | undefined;
2348
-
2349
- await collectionAdd(resolvedPwd, globPattern, name);
2350
- break;
2351
- }
2352
-
2353
- case "remove":
2354
- case "rm": {
2355
- if (!cli.args[1]) {
2356
- console.error("Usage: qmd collection remove <name>");
2357
- console.error(" Use 'qmd collection list' to see available collections");
2358
- process.exit(1);
2359
- }
2360
- collectionRemove(cli.args[1]);
2361
- break;
2362
- }
2363
-
2364
- case "rename":
2365
- case "mv": {
2366
- if (!cli.args[1] || !cli.args[2]) {
2367
- console.error("Usage: qmd collection rename <old-name> <new-name>");
2368
- console.error(" Use 'qmd collection list' to see available collections");
2369
- process.exit(1);
2370
- }
2371
- collectionRename(cli.args[1], cli.args[2]);
2372
- break;
2373
- }
2374
-
2375
- default:
2376
- console.error(`Unknown subcommand: ${subcommand}`);
2377
- console.error("Available: list, add, remove, rename");
2378
- process.exit(1);
2379
- }
2380
- break;
2381
- }
2382
-
2383
- case "status":
2384
- await showStatus();
2385
- break;
2386
-
2387
- case "update":
2388
- await updateCollections();
2389
- break;
2390
-
2391
- case "embed":
2392
- await vectorIndex(DEFAULT_EMBED_MODEL, !!cli.values.force);
2393
- break;
2394
-
2395
- case "pull": {
2396
- const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
2397
- const models = [
2398
- DEFAULT_EMBED_MODEL_URI,
2399
- DEFAULT_GENERATE_MODEL_URI,
2400
- DEFAULT_RERANK_MODEL_URI,
2401
- ];
2402
- console.log(`${c.bold}Pulling models${c.reset}`);
2403
- const results = await pullModels(models, {
2404
- refresh,
2405
- cacheDir: DEFAULT_MODEL_CACHE_DIR,
2406
- });
2407
- for (const result of results) {
2408
- const size = formatBytes(result.sizeBytes);
2409
- const note = result.refreshed ? "refreshed" : "cached/checked";
2410
- console.log(`- ${result.model} -> ${result.path} (${size}, ${note})`);
2411
- }
2412
- break;
2413
- }
2414
-
2415
- case "search":
2416
- if (!cli.query) {
2417
- console.error("Usage: qmd search [options] <query>");
2418
- process.exit(1);
2419
- }
2420
- search(cli.query, cli.opts);
2421
- break;
2422
-
2423
- case "vsearch":
2424
- case "vector-search": // undocumented alias
2425
- if (!cli.query) {
2426
- console.error("Usage: qmd vsearch [options] <query>");
2427
- process.exit(1);
2428
- }
2429
- // Default min-score for vector search is 0.3
2430
- if (!cli.values["min-score"]) {
2431
- cli.opts.minScore = 0.3;
2432
- }
2433
- await vectorSearch(cli.query, cli.opts);
2434
- break;
2435
-
2436
- case "query":
2437
- case "deep-search": // undocumented alias
2438
- if (!cli.query) {
2439
- console.error("Usage: qmd query [options] <query>");
2440
- process.exit(1);
2441
- }
2442
- await querySearch(cli.query, cli.opts);
2443
- break;
2444
-
2445
- case "mcp": {
2446
- const sub = cli.args[0]; // stop | status | undefined
2447
-
2448
- // Cache dir for PID/log files — same dir as the index
2449
- const cacheDir = process.env.XDG_CACHE_HOME
2450
- ? resolve(process.env.XDG_CACHE_HOME, "qmd")
2451
- : resolve(homedir(), ".cache", "qmd");
2452
- const pidPath = resolve(cacheDir, "mcp.pid");
2453
-
2454
- // Subcommands take priority over flags
2455
- if (sub === "stop") {
2456
- if (!existsSync(pidPath)) {
2457
- console.log("Not running (no PID file).");
2458
- process.exit(0);
2459
- }
2460
- const pid = parseInt(readFileSync(pidPath, "utf-8").trim());
2461
- try {
2462
- process.kill(pid, 0); // alive?
2463
- process.kill(pid, "SIGTERM");
2464
- unlinkSync(pidPath);
2465
- console.log(`Stopped QMD MCP server (PID ${pid}).`);
2466
- } catch {
2467
- unlinkSync(pidPath);
2468
- console.log("Cleaned up stale PID file (server was not running).");
2469
- }
2470
- process.exit(0);
2471
- }
2472
-
2473
- if (cli.values.http) {
2474
- const port = Number(cli.values.port) || 8181;
2475
-
2476
- if (cli.values.daemon) {
2477
- // Guard: check if already running
2478
- if (existsSync(pidPath)) {
2479
- const existingPid = parseInt(readFileSync(pidPath, "utf-8").trim());
2480
- try {
2481
- process.kill(existingPid, 0); // alive?
2482
- console.error(`Already running (PID ${existingPid}). Run 'qmd mcp stop' first.`);
2483
- process.exit(1);
2484
- } catch {
2485
- // Stale PID file — continue
2486
- }
2487
- }
2488
-
2489
- mkdirSync(cacheDir, { recursive: true });
2490
- const logPath = resolve(cacheDir, "mcp.log");
2491
- const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
2492
- const tsxLoader = pathJoin(dirname(fileURLToPath(import.meta.url)), "..", "node_modules", "tsx", "dist", "esm", "index.mjs");
2493
- const child = nodeSpawn(process.execPath, ["--import", tsxLoader, fileURLToPath(import.meta.url), "mcp", "--http", "--port", String(port)], {
2494
- stdio: ["ignore", logFd, logFd],
2495
- detached: true,
2496
- });
2497
- child.unref();
2498
- closeSync(logFd); // parent's copy; child inherited the fd
2499
-
2500
- writeFileSync(pidPath, String(child.pid));
2501
- console.log(`Started on http://localhost:${port}/mcp (PID ${child.pid})`);
2502
- console.log(`Logs: ${logPath}`);
2503
- process.exit(0);
2504
- }
2505
-
2506
- // Foreground HTTP mode — remove top-level cursor handlers so the
2507
- // async cleanup handlers in startMcpHttpServer actually run.
2508
- process.removeAllListeners("SIGTERM");
2509
- process.removeAllListeners("SIGINT");
2510
- const { startMcpHttpServer } = await import("./mcp.js");
2511
- try {
2512
- await startMcpHttpServer(port);
2513
- } catch (e: any) {
2514
- if (e?.code === "EADDRINUSE") {
2515
- console.error(`Port ${port} already in use. Try a different port with --port.`);
2516
- process.exit(1);
2517
- }
2518
- throw e;
2519
- }
2520
- } else {
2521
- // Default: stdio transport
2522
- const { startMcpServer } = await import("./mcp.js");
2523
- await startMcpServer();
2524
- }
2525
- break;
2526
- }
2527
-
2528
- case "cleanup": {
2529
- const db = getDb();
2530
-
2531
- // 1. Clear llm_cache
2532
- const cacheCount = deleteLLMCache(db);
2533
- console.log(`${c.green}✓${c.reset} Cleared ${cacheCount} cached API responses`);
2534
-
2535
- // 2. Remove orphaned vectors
2536
- const orphanedVecs = cleanupOrphanedVectors(db);
2537
- if (orphanedVecs > 0) {
2538
- console.log(`${c.green}✓${c.reset} Removed ${orphanedVecs} orphaned embedding chunks`);
2539
- } else {
2540
- console.log(`${c.dim}No orphaned embeddings to remove${c.reset}`);
2541
- }
2542
-
2543
- // 3. Remove inactive documents
2544
- const inactiveDocs = deleteInactiveDocuments(db);
2545
- if (inactiveDocs > 0) {
2546
- console.log(`${c.green}✓${c.reset} Removed ${inactiveDocs} inactive document records`);
2547
- }
2548
-
2549
- // 4. Vacuum to reclaim space
2550
- vacuumDatabase(db);
2551
- console.log(`${c.green}✓${c.reset} Database vacuumed`);
2552
-
2553
- closeDb();
2554
- break;
2555
- }
2556
-
2557
- default:
2558
- console.error(`Unknown command: ${cli.command}`);
2559
- console.error("Run 'qmd --help' for usage.");
2560
- process.exit(1);
2561
- }
2562
-
2563
- if (cli.command !== "mcp") {
2564
- await disposeDefaultLlamaCpp();
2565
- process.exit(0);
2566
- }
2567
-
2568
- } // end if (main module)