@ambicuity/kindx 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/kindx.js ADDED
@@ -0,0 +1,2621 @@
1
+ #!/usr/bin/env node
2
+ import fastGlob from "fast-glob";
3
+ import { execSync, spawn as nodeSpawn } from "child_process";
4
+ import { fileURLToPath } from "url";
5
+ import { dirname, join as pathJoin } from "path";
6
+ import { parseArgs } from "util";
7
+ import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
8
+ import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, } from "./repository.js";
9
+ import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "./inference.js";
10
+ import { escapeXml, escapeCSV, } from "./renderer.js";
11
+ import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, setGlobalContext, listAllContexts, setConfigIndexName, } from "./catalogs.js";
12
+ // Enable production mode - allows using default database path
13
+ // Tests must set INDEX_PATH or use createStore() with explicit path
14
+ enableProductionMode();
15
+ // =============================================================================
16
+ // Store/DB lifecycle (no legacy singletons in repository.ts)
17
+ // =============================================================================
18
+ let store = null;
19
+ let storeDbPathOverride;
20
+ function getStore() {
21
+ if (!store) {
22
+ store = createStore(storeDbPathOverride);
23
+ }
24
+ return store;
25
+ }
26
+ function getDb() {
27
+ return getStore().db;
28
+ }
29
+ function closeDb() {
30
+ if (store) {
31
+ store.close();
32
+ store = null;
33
+ }
34
+ }
35
+ function getDbPath() {
36
+ return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
37
+ }
38
+ function setIndexName(name) {
39
+ let normalizedName = name;
40
+ // Normalize relative paths to prevent malformed database paths
41
+ if (name && name.includes('/')) {
42
+ const { resolve } = require('path');
43
+ const { cwd } = require('process');
44
+ const absolutePath = resolve(cwd(), name);
45
+ // Replace path separators with underscores to create a valid filename
46
+ normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
47
+ }
48
+ storeDbPathOverride = normalizedName ? getDefaultDbPath(normalizedName) : undefined;
49
+ // Reset open handle so next use opens the new index
50
+ closeDb();
51
+ }
52
+ function ensureVecTable(_db, dimensions) {
53
+ // Store owns the DB; ignore `_db` and ensure vec table on the active store
54
+ getStore().ensureVecTable(dimensions);
55
+ }
56
+ // Terminal colors (respects NO_COLOR env)
57
+ const useColor = !process.env.NO_COLOR && process.stdout.isTTY;
58
+ const c = {
59
+ reset: useColor ? "\x1b[0m" : "",
60
+ dim: useColor ? "\x1b[2m" : "",
61
+ bold: useColor ? "\x1b[1m" : "",
62
+ cyan: useColor ? "\x1b[36m" : "",
63
+ yellow: useColor ? "\x1b[33m" : "",
64
+ green: useColor ? "\x1b[32m" : "",
65
+ magenta: useColor ? "\x1b[35m" : "",
66
+ blue: useColor ? "\x1b[34m" : "",
67
+ };
68
+ // Terminal cursor control
69
+ const cursor = {
70
+ hide() { process.stderr.write('\x1b[?25l'); },
71
+ show() { process.stderr.write('\x1b[?25h'); },
72
+ };
73
+ // Ensure cursor is restored on exit
74
+ process.on('SIGINT', () => { cursor.show(); process.exit(130); });
75
+ process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
76
+ // Terminal progress bar using OSC 9;4 escape sequence (TTY only)
77
+ const isTTY = process.stderr.isTTY;
78
+ const progress = {
79
+ set(percent) {
80
+ if (isTTY)
81
+ process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
82
+ },
83
+ clear() {
84
+ if (isTTY)
85
+ process.stderr.write(`\x1b]9;4;0\x07`);
86
+ },
87
+ indeterminate() {
88
+ if (isTTY)
89
+ process.stderr.write(`\x1b]9;4;3\x07`);
90
+ },
91
+ error() {
92
+ if (isTTY)
93
+ process.stderr.write(`\x1b]9;4;2\x07`);
94
+ },
95
+ };
96
+ // Format seconds into human-readable ETA
97
+ function formatETA(seconds) {
98
+ if (seconds < 60)
99
+ return `${Math.round(seconds)}s`;
100
+ if (seconds < 3600)
101
+ return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`;
102
+ return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
103
+ }
104
+ // Check index health and print warnings/tips
105
+ function checkIndexHealth(db) {
106
+ const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
107
+ // Warn if many docs need embedding
108
+ if (needsEmbedding > 0) {
109
+ const pct = Math.round((needsEmbedding / totalDocs) * 100);
110
+ if (pct >= 10) {
111
+ process.stderr.write(`${c.yellow}Warning: ${needsEmbedding} documents (${pct}%) need embeddings. Run 'kindx embed' for better results.${c.reset}\n`);
112
+ }
113
+ else {
114
+ process.stderr.write(`${c.dim}Tip: ${needsEmbedding} documents need embeddings. Run 'kindx embed' to index them.${c.reset}\n`);
115
+ }
116
+ }
117
+ // Check if most recent document update is older than 2 weeks
118
+ if (daysStale !== null && daysStale >= 14) {
119
+ process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'kindx update' to refresh.${c.reset}\n`);
120
+ }
121
+ }
122
+ // Compute unique display path for a document
123
+ // Always include at least parent folder + filename, add more parent dirs until unique
124
+ function computeDisplayPath(filepath, collectionPath, existingPaths) {
125
+ // Get path relative to collection (include collection dir name)
126
+ const collectionDir = collectionPath.replace(/\/$/, '');
127
+ const collectionName = collectionDir.split('/').pop() || '';
128
+ let relativePath;
129
+ if (filepath.startsWith(collectionDir + '/')) {
130
+ // filepath is under collection: use collection name + relative path
131
+ relativePath = collectionName + filepath.slice(collectionDir.length);
132
+ }
133
+ else {
134
+ // Fallback: just use the filepath
135
+ relativePath = filepath;
136
+ }
137
+ const parts = relativePath.split('/').filter(p => p.length > 0);
138
+ // Always include at least parent folder + filename (minimum 2 parts if available)
139
+ // Then add more parent dirs until unique
140
+ const minParts = Math.min(2, parts.length);
141
+ for (let i = parts.length - minParts; i >= 0; i--) {
142
+ const candidate = parts.slice(i).join('/');
143
+ if (!existingPaths.has(candidate)) {
144
+ return candidate;
145
+ }
146
+ }
147
+ // Absolute fallback: use full path (should be unique)
148
+ return filepath;
149
+ }
150
+ function formatTimeAgo(date) {
151
+ const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
152
+ if (seconds < 60)
153
+ return `${seconds}s ago`;
154
+ const minutes = Math.floor(seconds / 60);
155
+ if (minutes < 60)
156
+ return `${minutes}m ago`;
157
+ const hours = Math.floor(minutes / 60);
158
+ if (hours < 24)
159
+ return `${hours}h ago`;
160
+ const days = Math.floor(hours / 24);
161
+ return `${days}d ago`;
162
+ }
163
+ function formatMs(ms) {
164
+ if (ms < 1000)
165
+ return `${ms}ms`;
166
+ return `${(ms / 1000).toFixed(1)}s`;
167
+ }
168
+ function formatBytes(bytes) {
169
+ if (bytes < 1024)
170
+ return `${bytes} B`;
171
+ if (bytes < 1024 * 1024)
172
+ return `${(bytes / 1024).toFixed(1)} KB`;
173
+ if (bytes < 1024 * 1024 * 1024)
174
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
175
+ return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
176
+ }
177
+ async function showStatus() {
178
+ const dbPath = getDbPath();
179
+ const db = getDb();
180
+ // Collections are defined in YAML; no duplicate cleanup needed.
181
+ // Collections are defined in YAML; no duplicate cleanup needed.
182
+ // Index size
183
+ let indexSize = 0;
184
+ try {
185
+ const stat = statSync(dbPath).size;
186
+ indexSize = stat;
187
+ }
188
+ catch { }
189
+ // Collections info (from YAML + database stats)
190
+ const collections = listCollections(db);
191
+ // Overall stats
192
+ const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get();
193
+ const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get();
194
+ const needsEmbedding = getHashesNeedingEmbedding(db);
195
+ // Most recent update across all collections
196
+ const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get();
197
+ console.log(`${c.bold}KINDX Status${c.reset}\n`);
198
+ console.log(`Index: ${dbPath}`);
199
+ console.log(`Size: ${formatBytes(indexSize)}`);
200
+ // MCP daemon status (check PID file liveness)
201
+ const mcpCacheDir = process.env.XDG_CACHE_HOME
202
+ ? resolve(process.env.XDG_CACHE_HOME, "kindx")
203
+ : resolve(homedir(), ".cache", "kindx");
204
+ const mcpPidPath = resolve(mcpCacheDir, "mcp.pid");
205
+ if (existsSync(mcpPidPath)) {
206
+ const mcpPid = parseInt(readFileSync(mcpPidPath, "utf-8").trim());
207
+ try {
208
+ process.kill(mcpPid, 0);
209
+ console.log(`MCP: ${c.green}running${c.reset} (PID ${mcpPid})`);
210
+ }
211
+ catch {
212
+ unlinkSync(mcpPidPath);
213
+ // Stale PID file cleaned up silently
214
+ }
215
+ }
216
+ console.log("");
217
+ console.log(`${c.bold}Documents${c.reset}`);
218
+ console.log(` Total: ${totalDocs.count} files indexed`);
219
+ console.log(` Vectors: ${vectorCount.count} embedded`);
220
+ if (needsEmbedding > 0) {
221
+ console.log(` ${c.yellow}Pending: ${needsEmbedding} need embedding${c.reset} (run 'kindx embed')`);
222
+ }
223
+ if (mostRecent.latest) {
224
+ const lastUpdate = new Date(mostRecent.latest);
225
+ console.log(` Updated: ${formatTimeAgo(lastUpdate)}`);
226
+ }
227
+ // Get all contexts grouped by collection (from YAML)
228
+ const allContexts = listAllContexts();
229
+ const contextsByCollection = new Map();
230
+ for (const ctx of allContexts) {
231
+ // Group contexts by collection name
232
+ if (!contextsByCollection.has(ctx.collection)) {
233
+ contextsByCollection.set(ctx.collection, []);
234
+ }
235
+ contextsByCollection.get(ctx.collection).push({
236
+ path_prefix: ctx.path,
237
+ context: ctx.context
238
+ });
239
+ }
240
+ if (collections.length > 0) {
241
+ console.log(`\n${c.bold}Collections${c.reset}`);
242
+ for (const col of collections) {
243
+ const lastMod = col.last_modified ? formatTimeAgo(new Date(col.last_modified)) : "never";
244
+ const contexts = contextsByCollection.get(col.name) || [];
245
+ console.log(` ${c.cyan}${col.name}${c.reset} ${c.dim}(kindx://${col.name}/)${c.reset}`);
246
+ console.log(` ${c.dim}Pattern:${c.reset} ${col.glob_pattern}`);
247
+ console.log(` ${c.dim}Files:${c.reset} ${col.active_count} (updated ${lastMod})`);
248
+ if (contexts.length > 0) {
249
+ console.log(` ${c.dim}Contexts:${c.reset} ${contexts.length}`);
250
+ for (const ctx of contexts) {
251
+ // Handle both empty string and '/' as root context
252
+ const pathDisplay = (ctx.path_prefix === '' || ctx.path_prefix === '/') ? '/' : `/${ctx.path_prefix}`;
253
+ const contextPreview = ctx.context.length > 60
254
+ ? ctx.context.substring(0, 57) + '...'
255
+ : ctx.context;
256
+ console.log(` ${c.dim}${pathDisplay}:${c.reset} ${contextPreview}`);
257
+ }
258
+ }
259
+ }
260
+ // Show examples of virtual paths
261
+ console.log(`\n${c.bold}Examples${c.reset}`);
262
+ console.log(` ${c.dim}# List files in a collection${c.reset}`);
263
+ if (collections.length > 0 && collections[0]) {
264
+ console.log(` kindx ls ${collections[0].name}`);
265
+ }
266
+ console.log(` ${c.dim}# Get a document${c.reset}`);
267
+ if (collections.length > 0 && collections[0]) {
268
+ console.log(` kindx get kindx://${collections[0].name}/path/to/file.md`);
269
+ }
270
+ console.log(` ${c.dim}# Search within a collection${c.reset}`);
271
+ if (collections.length > 0 && collections[0]) {
272
+ console.log(` kindx search "query" -c ${collections[0].name}`);
273
+ }
274
+ }
275
+ else {
276
+ console.log(`\n${c.dim}No collections. Run 'kindx collection add .' to index markdown files.${c.reset}`);
277
+ }
278
+ // Models
279
+ {
280
+ // hf:org/repo/file.gguf → https://huggingface.co/org/repo
281
+ const hfLink = (uri) => {
282
+ const match = uri.match(/^hf:([^/]+\/[^/]+)\//);
283
+ return match ? `https://huggingface.co/${match[1]}` : uri;
284
+ };
285
+ console.log(`\n${c.bold}Models${c.reset}`);
286
+ console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
287
+ console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
288
+ console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
289
+ }
290
+ // Device / GPU info
291
+ try {
292
+ const llm = getDefaultLlamaCpp();
293
+ const device = await llm.getDeviceInfo();
294
+ console.log(`\n${c.bold}Device${c.reset}`);
295
+ if (device.gpu) {
296
+ console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
297
+ if (device.gpuDevices.length > 0) {
298
+ // Deduplicate and count GPUs
299
+ const counts = new Map();
300
+ for (const name of device.gpuDevices) {
301
+ counts.set(name, (counts.get(name) || 0) + 1);
302
+ }
303
+ const deviceStr = Array.from(counts.entries())
304
+ .map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
305
+ .join(', ');
306
+ console.log(` Devices: ${deviceStr}`);
307
+ }
308
+ if (device.vram) {
309
+ console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
310
+ }
311
+ }
312
+ else {
313
+ console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
314
+ console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
315
+ }
316
+ console.log(` CPU: ${device.cpuCores} math cores`);
317
+ }
318
+ catch {
319
+ // Don't fail status if LLM init fails
320
+ }
321
+ // Tips section
322
+ const tips = [];
323
+ // Check for collections without context
324
+ const collectionsWithoutContext = collections.filter(col => {
325
+ const contexts = contextsByCollection.get(col.name) || [];
326
+ return contexts.length === 0;
327
+ });
328
+ if (collectionsWithoutContext.length > 0) {
329
+ const names = collectionsWithoutContext.map(c => c.name).slice(0, 3).join(', ');
330
+ const more = collectionsWithoutContext.length > 3 ? ` +${collectionsWithoutContext.length - 3} more` : '';
331
+ tips.push(`Add context to collections for better search results: ${names}${more}`);
332
+ tips.push(` ${c.dim}kindx context add kindx://<name>/ "What this collection contains"${c.reset}`);
333
+ tips.push(` ${c.dim}kindx context add kindx://<name>/meeting-notes "Weekly team meeting notes"${c.reset}`);
334
+ }
335
+ // Check for collections without update commands
336
+ const collectionsWithoutUpdate = collections.filter(col => {
337
+ const yamlCol = getCollectionFromYaml(col.name);
338
+ return !yamlCol?.update;
339
+ });
340
+ if (collectionsWithoutUpdate.length > 0 && collections.length > 1) {
341
+ const names = collectionsWithoutUpdate.map(c => c.name).slice(0, 3).join(', ');
342
+ const more = collectionsWithoutUpdate.length > 3 ? ` +${collectionsWithoutUpdate.length - 3} more` : '';
343
+ tips.push(`Add update commands to keep collections fresh: ${names}${more}`);
344
+ tips.push(` ${c.dim}kindx collection update-cmd <name> 'git stash && git pull --rebase --ff-only && git stash pop'${c.reset}`);
345
+ }
346
+ if (tips.length > 0) {
347
+ console.log(`\n${c.bold}Tips${c.reset}`);
348
+ for (const tip of tips) {
349
+ console.log(` ${tip}`);
350
+ }
351
+ }
352
+ closeDb();
353
+ }
354
+ async function updateCollections(collectionFilter) {
355
+ const db = getDb();
356
+ // Collections are defined in YAML; no duplicate cleanup needed.
357
+ // Clear Ollama cache on update
358
+ clearCache(db);
359
+ let collections = listCollections(db);
360
+ if (collections.length === 0) {
361
+ console.log(`${c.dim}No collections found. Run 'kindx collection add .' to index markdown files.${c.reset}`);
362
+ closeDb();
363
+ return;
364
+ }
365
+ // Filter to a single collection if --collection flag was provided
366
+ if (collectionFilter) {
367
+ collections = collections.filter(col => col.name === collectionFilter);
368
+ if (collections.length === 0) {
369
+ console.error(`${c.yellow}Collection not found: ${collectionFilter}${c.reset}`);
370
+ console.error(`Run 'kindx collection list' to see available collections.`);
371
+ closeDb();
372
+ process.exit(1);
373
+ }
374
+ }
375
+ // Don't close db here - indexFiles will reuse it and close at the end
376
+ console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
377
+ for (let i = 0; i < collections.length; i++) {
378
+ const col = collections[i];
379
+ if (!col)
380
+ continue;
381
+ console.log(`${c.cyan}[${i + 1}/${collections.length}]${c.reset} ${c.bold}${col.name}${c.reset} ${c.dim}(${col.glob_pattern})${c.reset}`);
382
+ // Execute custom update command if specified in YAML
383
+ const yamlCol = getCollectionFromYaml(col.name);
384
+ if (yamlCol?.update) {
385
+ console.log(`${c.dim} Running update command: ${yamlCol.update}${c.reset}`);
386
+ try {
387
+ const proc = nodeSpawn("bash", ["-c", yamlCol.update], {
388
+ cwd: col.pwd,
389
+ stdio: ["ignore", "pipe", "pipe"],
390
+ });
391
+ const [output, errorOutput, exitCode] = await new Promise((resolve, reject) => {
392
+ let out = "";
393
+ let err = "";
394
+ proc.stdout?.on("data", (d) => { out += d.toString(); });
395
+ proc.stderr?.on("data", (d) => { err += d.toString(); });
396
+ proc.on("error", reject);
397
+ proc.on("close", (code) => resolve([out, err, code ?? 1]));
398
+ });
399
+ if (output.trim()) {
400
+ console.log(output.trim().split('\n').map(l => ` ${l}`).join('\n'));
401
+ }
402
+ if (errorOutput.trim()) {
403
+ console.log(errorOutput.trim().split('\n').map(l => ` ${l}`).join('\n'));
404
+ }
405
+ if (exitCode !== 0) {
406
+ console.log(`${c.yellow}✗ Update command failed with exit code ${exitCode}${c.reset}`);
407
+ process.exit(exitCode);
408
+ }
409
+ }
410
+ catch (err) {
411
+ console.log(`${c.yellow}✗ Update command failed: ${err}${c.reset}`);
412
+ process.exit(1);
413
+ }
414
+ }
415
+ await indexFiles(col.pwd, col.glob_pattern, col.name, true, yamlCol?.ignore);
416
+ console.log("");
417
+ }
418
+ // Check if any documents need embedding (show once at end)
419
+ const finalDb = getDb();
420
+ const needsEmbedding = getHashesNeedingEmbedding(finalDb);
421
+ closeDb();
422
+ console.log(`${c.green}✓ All collections updated.${c.reset}`);
423
+ if (needsEmbedding > 0) {
424
+ console.log(`\nRun 'kindx embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
425
+ }
426
+ }
427
+ /**
428
+ * Detect which collection (if any) contains the given filesystem path.
429
+ * Returns { collectionId, collectionName, relativePath } or null if not in any collection.
430
+ */
431
+ function detectCollectionFromPath(db, fsPath) {
432
+ const realPath = getRealPath(fsPath);
433
+ // Find collections that this path is under from YAML
434
+ const allCollections = yamlListCollections();
435
+ // Find longest matching path
436
+ let bestMatch = null;
437
+ for (const coll of allCollections) {
438
+ if (realPath.startsWith(coll.path + '/') || realPath === coll.path) {
439
+ if (!bestMatch || coll.path.length > bestMatch.path.length) {
440
+ bestMatch = { name: coll.name, path: coll.path };
441
+ }
442
+ }
443
+ }
444
+ if (!bestMatch)
445
+ return null;
446
+ // Calculate relative path
447
+ let relativePath = realPath;
448
+ if (relativePath.startsWith(bestMatch.path + '/')) {
449
+ relativePath = relativePath.slice(bestMatch.path.length + 1);
450
+ }
451
+ else if (relativePath === bestMatch.path) {
452
+ relativePath = '';
453
+ }
454
+ return {
455
+ collectionName: bestMatch.name,
456
+ relativePath
457
+ };
458
+ }
459
+ async function contextAdd(pathArg, contextText) {
460
+ const db = getDb();
461
+ // Handle "/" as global context (applies to all collections)
462
+ if (pathArg === '/') {
463
+ setGlobalContext(contextText);
464
+ console.log(`${c.green}✓${c.reset} Set global context`);
465
+ console.log(`${c.dim}Context: ${contextText}${c.reset}`);
466
+ closeDb();
467
+ return;
468
+ }
469
+ // Resolve path - defaults to current directory if not provided
470
+ let fsPath = pathArg || '.';
471
+ if (fsPath === '.' || fsPath === './') {
472
+ fsPath = getPwd();
473
+ }
474
+ else if (fsPath.startsWith('~/')) {
475
+ fsPath = homedir() + fsPath.slice(1);
476
+ }
477
+ else if (!fsPath.startsWith('/') && !fsPath.startsWith('kindx://')) {
478
+ fsPath = resolve(getPwd(), fsPath);
479
+ }
480
+ // Handle virtual paths (kindx://collection/path)
481
+ if (isVirtualPath(fsPath)) {
482
+ const parsed = parseVirtualPath(fsPath);
483
+ if (!parsed) {
484
+ console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`);
485
+ process.exit(1);
486
+ }
487
+ const coll = getCollectionFromYaml(parsed.collectionName);
488
+ if (!coll) {
489
+ console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
490
+ process.exit(1);
491
+ }
492
+ yamlAddContext(parsed.collectionName, parsed.path, contextText);
493
+ const displayPath = parsed.path
494
+ ? `kindx://${parsed.collectionName}/${parsed.path}`
495
+ : `kindx://${parsed.collectionName}/ (collection root)`;
496
+ console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
497
+ console.log(`${c.dim}Context: ${contextText}${c.reset}`);
498
+ closeDb();
499
+ return;
500
+ }
501
+ // Detect collection from filesystem path
502
+ const detected = detectCollectionFromPath(db, fsPath);
503
+ if (!detected) {
504
+ console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
505
+ console.error(`${c.dim}Run 'kindx status' to see indexed collections${c.reset}`);
506
+ process.exit(1);
507
+ }
508
+ yamlAddContext(detected.collectionName, detected.relativePath, contextText);
509
+ const displayPath = detected.relativePath ? `kindx://${detected.collectionName}/${detected.relativePath}` : `kindx://${detected.collectionName}/`;
510
+ console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`);
511
+ console.log(`${c.dim}Context: ${contextText}${c.reset}`);
512
+ closeDb();
513
+ }
514
+ function contextList() {
515
+ const db = getDb();
516
+ const allContexts = listAllContexts();
517
+ if (allContexts.length === 0) {
518
+ console.log(`${c.dim}No contexts configured. Use 'kindx context add' to add one.${c.reset}`);
519
+ closeDb();
520
+ return;
521
+ }
522
+ console.log(`\n${c.bold}Configured Contexts${c.reset}\n`);
523
+ let lastCollection = '';
524
+ for (const ctx of allContexts) {
525
+ if (ctx.collection !== lastCollection) {
526
+ console.log(`${c.cyan}${ctx.collection}${c.reset}`);
527
+ lastCollection = ctx.collection;
528
+ }
529
+ const displayPath = ctx.path ? ` ${ctx.path}` : ' / (root)';
530
+ console.log(`${displayPath}`);
531
+ console.log(` ${c.dim}${ctx.context}${c.reset}`);
532
+ }
533
+ closeDb();
534
+ }
535
+ function contextRemove(pathArg) {
536
+ if (pathArg === '/') {
537
+ // Remove global context
538
+ setGlobalContext(undefined);
539
+ console.log(`${c.green}✓${c.reset} Removed global context`);
540
+ return;
541
+ }
542
+ // Handle virtual paths
543
+ if (isVirtualPath(pathArg)) {
544
+ const parsed = parseVirtualPath(pathArg);
545
+ if (!parsed) {
546
+ console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`);
547
+ process.exit(1);
548
+ }
549
+ const coll = getCollectionFromYaml(parsed.collectionName);
550
+ if (!coll) {
551
+ console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
552
+ process.exit(1);
553
+ }
554
+ const success = yamlRemoveContext(coll.name, parsed.path);
555
+ if (!success) {
556
+ console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`);
557
+ process.exit(1);
558
+ }
559
+ console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`);
560
+ return;
561
+ }
562
+ // Handle filesystem paths
563
+ let fsPath = pathArg;
564
+ if (fsPath === '.' || fsPath === './') {
565
+ fsPath = getPwd();
566
+ }
567
+ else if (fsPath.startsWith('~/')) {
568
+ fsPath = homedir() + fsPath.slice(1);
569
+ }
570
+ else if (!fsPath.startsWith('/')) {
571
+ fsPath = resolve(getPwd(), fsPath);
572
+ }
573
+ const db = getDb();
574
+ const detected = detectCollectionFromPath(db, fsPath);
575
+ closeDb();
576
+ if (!detected) {
577
+ console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
578
+ process.exit(1);
579
+ }
580
+ const success = yamlRemoveContext(detected.collectionName, detected.relativePath);
581
+ if (!success) {
582
+ console.error(`${c.yellow}No context found for: kindx://${detected.collectionName}/${detected.relativePath}${c.reset}`);
583
+ process.exit(1);
584
+ }
585
+ console.log(`${c.green}✓${c.reset} Removed context for: kindx://${detected.collectionName}/${detected.relativePath}`);
586
+ }
587
+ function getDocument(filename, fromLine, maxLines, lineNumbers) {
588
+ const db = getDb();
589
+ // Parse :linenum suffix from filename (e.g., "file.md:100")
590
+ let inputPath = filename;
591
+ const colonMatch = inputPath.match(/:(\d+)$/);
592
+ if (colonMatch && !fromLine) {
593
+ const matched = colonMatch[1];
594
+ if (matched) {
595
+ fromLine = parseInt(matched, 10);
596
+ inputPath = inputPath.slice(0, -colonMatch[0].length);
597
+ }
598
+ }
599
+ // Handle docid lookup (#abc123, abc123, "#abc123", "abc123", etc.)
600
+ if (isDocid(inputPath)) {
601
+ const docidMatch = findDocumentByDocid(db, inputPath);
602
+ if (docidMatch) {
603
+ inputPath = docidMatch.filepath;
604
+ }
605
+ else {
606
+ console.error(`Document not found: ${filename}`);
607
+ closeDb();
608
+ process.exit(1);
609
+ }
610
+ }
611
+ let doc = null;
612
+ let virtualPath;
613
+ // Handle virtual paths (kindx://collection/path)
614
+ if (isVirtualPath(inputPath)) {
615
+ const parsed = parseVirtualPath(inputPath);
616
+ if (!parsed) {
617
+ console.error(`Invalid virtual path: ${inputPath}`);
618
+ closeDb();
619
+ process.exit(1);
620
+ }
621
+ // Try exact match on collection + path
622
+ doc = db.prepare(`
623
+ SELECT d.collection as collectionName, d.path, content.doc as body
624
+ FROM documents d
625
+ JOIN content ON content.hash = d.hash
626
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
627
+ `).get(parsed.collectionName, parsed.path);
628
+ if (!doc) {
629
+ // Try fuzzy match by path ending
630
+ doc = db.prepare(`
631
+ SELECT d.collection as collectionName, d.path, content.doc as body
632
+ FROM documents d
633
+ JOIN content ON content.hash = d.hash
634
+ WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
635
+ LIMIT 1
636
+ `).get(parsed.collectionName, `%${parsed.path}`);
637
+ }
638
+ virtualPath = inputPath;
639
+ }
640
+ else {
641
+ // Try to interpret as collection/path format first (before filesystem path)
642
+ // If path is relative (no / or ~ prefix), check if first component is a collection name
643
+ if (!inputPath.startsWith('/') && !inputPath.startsWith('~')) {
644
+ const parts = inputPath.split('/');
645
+ if (parts.length >= 2) {
646
+ const possibleCollection = parts[0];
647
+ const possiblePath = parts.slice(1).join('/');
648
+ // Check if this collection exists
649
+ const collExists = possibleCollection ? db.prepare(`
650
+ SELECT 1 FROM documents WHERE collection = ? AND active = 1 LIMIT 1
651
+ `).get(possibleCollection) : null;
652
+ if (collExists) {
653
+ // Try exact match on collection + path
654
+ doc = db.prepare(`
655
+ SELECT d.collection as collectionName, d.path, content.doc as body
656
+ FROM documents d
657
+ JOIN content ON content.hash = d.hash
658
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
659
+ `).get(possibleCollection || "", possiblePath || "");
660
+ if (!doc) {
661
+ // Try fuzzy match by path ending
662
+ doc = db.prepare(`
663
+ SELECT d.collection as collectionName, d.path, content.doc as body
664
+ FROM documents d
665
+ JOIN content ON content.hash = d.hash
666
+ WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
667
+ LIMIT 1
668
+ `).get(possibleCollection || "", `%${possiblePath}`);
669
+ }
670
+ if (doc) {
671
+ virtualPath = buildVirtualPath(doc.collectionName, doc.path);
672
+ // Skip the filesystem path handling below
673
+ }
674
+ }
675
+ }
676
+ }
677
+ // If not found as collection/path, handle as filesystem paths
678
+ if (!doc) {
679
+ let fsPath = inputPath;
680
+ // Expand ~ to home directory
681
+ if (fsPath.startsWith('~/')) {
682
+ fsPath = homedir() + fsPath.slice(1);
683
+ }
684
+ else if (!fsPath.startsWith('/')) {
685
+ // Relative path - resolve from current directory
686
+ fsPath = resolve(getPwd(), fsPath);
687
+ }
688
+ fsPath = getRealPath(fsPath);
689
+ // Try to detect which collection contains this path
690
+ const detected = detectCollectionFromPath(db, fsPath);
691
+ if (detected) {
692
+ // Found collection - query by collection name + relative path
693
+ doc = db.prepare(`
694
+ SELECT d.collection as collectionName, d.path, content.doc as body
695
+ FROM documents d
696
+ JOIN content ON content.hash = d.hash
697
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
698
+ `).get(detected.collectionName, detected.relativePath);
699
+ }
700
+ // Fuzzy match by filename (last component of path)
701
+ if (!doc) {
702
+ const filename = inputPath.split('/').pop() || inputPath;
703
+ doc = db.prepare(`
704
+ SELECT d.collection as collectionName, d.path, content.doc as body
705
+ FROM documents d
706
+ JOIN content ON content.hash = d.hash
707
+ WHERE d.path LIKE ? AND d.active = 1
708
+ LIMIT 1
709
+ `).get(`%${filename}`);
710
+ }
711
+ if (doc) {
712
+ virtualPath = buildVirtualPath(doc.collectionName, doc.path);
713
+ }
714
+ else {
715
+ virtualPath = inputPath;
716
+ }
717
+ }
718
+ }
719
+ // Ensure doc is not null before proceeding
720
+ if (!doc) {
721
+ console.error(`Document not found: ${filename}`);
722
+ closeDb();
723
+ process.exit(1);
724
+ }
725
+ // Get context for this file
726
+ const context = getContextForPath(db, doc.collectionName, doc.path);
727
+ let output = doc.body;
728
+ const startLine = fromLine || 1;
729
+ // Apply line filtering if specified
730
+ if (fromLine !== undefined || maxLines !== undefined) {
731
+ const lines = output.split('\n');
732
+ const start = startLine - 1; // Convert to 0-indexed
733
+ const end = maxLines !== undefined ? start + maxLines : lines.length;
734
+ output = lines.slice(start, end).join('\n');
735
+ }
736
+ // Add line numbers if requested
737
+ if (lineNumbers) {
738
+ output = addLineNumbers(output, startLine);
739
+ }
740
+ // Output context header if exists
741
+ if (context) {
742
+ console.log(`Folder Context: ${context}\n---\n`);
743
+ }
744
+ console.log(output);
745
+ closeDb();
746
+ }
747
+ // Multi-get: fetch multiple documents by glob pattern or comma-separated list
748
+ function multiGet(pattern, maxLines, maxBytes = DEFAULT_MULTI_GET_MAX_BYTES, format = "cli") {
749
+ const db = getDb();
750
+ // Check if it's a comma-separated list or a glob pattern
751
+ const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
752
+ let files;
753
+ if (isCommaSeparated) {
754
+ // Comma-separated list of files (can be virtual paths or relative paths)
755
+ const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
756
+ files = [];
757
+ for (const name of names) {
758
+ let doc = null;
759
+ // Handle virtual paths
760
+ if (isVirtualPath(name)) {
761
+ const parsed = parseVirtualPath(name);
762
+ if (parsed) {
763
+ // Try exact match on collection + path
764
+ doc = db.prepare(`
765
+ SELECT
766
+ 'kindx://' || d.collection || '/' || d.path as virtual_path,
767
+ LENGTH(content.doc) as body_length,
768
+ d.collection,
769
+ d.path
770
+ FROM documents d
771
+ JOIN content ON content.hash = d.hash
772
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
773
+ `).get(parsed.collectionName, parsed.path);
774
+ }
775
+ }
776
+ else {
777
+ // Try exact match on path
778
+ doc = db.prepare(`
779
+ SELECT
780
+ 'kindx://' || d.collection || '/' || d.path as virtual_path,
781
+ LENGTH(content.doc) as body_length,
782
+ d.collection,
783
+ d.path
784
+ FROM documents d
785
+ JOIN content ON content.hash = d.hash
786
+ WHERE d.path = ? AND d.active = 1
787
+ LIMIT 1
788
+ `).get(name);
789
+ // Try suffix match
790
+ if (!doc) {
791
+ doc = db.prepare(`
792
+ SELECT
793
+ 'kindx://' || d.collection || '/' || d.path as virtual_path,
794
+ LENGTH(content.doc) as body_length,
795
+ d.collection,
796
+ d.path
797
+ FROM documents d
798
+ JOIN content ON content.hash = d.hash
799
+ WHERE d.path LIKE ? AND d.active = 1
800
+ LIMIT 1
801
+ `).get(`%${name}`);
802
+ }
803
+ }
804
+ if (doc) {
805
+ files.push({
806
+ filepath: doc.virtual_path,
807
+ displayPath: doc.virtual_path,
808
+ bodyLength: doc.body_length,
809
+ collection: doc.collection,
810
+ path: doc.path
811
+ });
812
+ }
813
+ else {
814
+ console.error(`File not found: ${name}`);
815
+ }
816
+ }
817
+ }
818
+ else {
819
+ // Glob pattern - matchFilesByGlob now returns virtual paths
820
+ files = matchFilesByGlob(db, pattern).map(f => ({
821
+ ...f,
822
+ collection: undefined, // Will be fetched later if needed
823
+ path: undefined
824
+ }));
825
+ if (files.length === 0) {
826
+ console.error(`No files matched pattern: ${pattern}`);
827
+ closeDb();
828
+ process.exit(1);
829
+ }
830
+ }
831
+ // Collect results for structured output
832
+ const results = [];
833
+ for (const file of files) {
834
+ // Parse virtual path to get collection info if not already available
835
+ let collection = file.collection;
836
+ let path = file.path;
837
+ if (!collection || !path) {
838
+ const parsed = parseVirtualPath(file.filepath);
839
+ if (parsed) {
840
+ collection = parsed.collectionName;
841
+ path = parsed.path;
842
+ }
843
+ }
844
+ // Get context using collection-scoped function
845
+ const context = collection && path ? getContextForPath(db, collection, path) : null;
846
+ // Check size limit
847
+ if (file.bodyLength > maxBytes) {
848
+ results.push({
849
+ file: file.filepath,
850
+ displayPath: file.displayPath,
851
+ title: file.displayPath.split('/').pop() || file.displayPath,
852
+ body: "",
853
+ context,
854
+ skipped: true,
855
+ skipReason: `File too large (${Math.round(file.bodyLength / 1024)}KB > ${Math.round(maxBytes / 1024)}KB). Use 'kindx get ${file.displayPath}' to retrieve.`,
856
+ });
857
+ continue;
858
+ }
859
+ // Fetch document content using collection and path
860
+ if (!collection || !path)
861
+ continue;
862
+ const doc = db.prepare(`
863
+ SELECT content.doc as body, d.title
864
+ FROM documents d
865
+ JOIN content ON content.hash = d.hash
866
+ WHERE d.collection = ? AND d.path = ? AND d.active = 1
867
+ `).get(collection, path);
868
+ if (!doc)
869
+ continue;
870
+ let body = doc.body;
871
+ // Apply line limit if specified
872
+ if (maxLines !== undefined) {
873
+ const lines = body.split('\n');
874
+ body = lines.slice(0, maxLines).join('\n');
875
+ if (lines.length > maxLines) {
876
+ body += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
877
+ }
878
+ }
879
+ results.push({
880
+ file: file.filepath,
881
+ displayPath: file.displayPath,
882
+ title: doc.title || file.displayPath.split('/').pop() || file.displayPath,
883
+ body,
884
+ context,
885
+ skipped: false,
886
+ });
887
+ }
888
+ closeDb();
889
+ // Output based on format
890
+ if (format === "json") {
891
+ const output = results.map(r => ({
892
+ file: r.displayPath,
893
+ title: r.title,
894
+ ...(r.context && { context: r.context }),
895
+ ...(r.skipped ? { skipped: true, reason: r.skipReason } : { body: r.body }),
896
+ }));
897
+ console.log(JSON.stringify(output, null, 2));
898
+ }
899
+ else if (format === "csv") {
900
+ const escapeField = (val) => {
901
+ if (val === null || val === undefined)
902
+ return "";
903
+ const str = String(val);
904
+ if (str.includes(",") || str.includes('"') || str.includes("\n")) {
905
+ return `"${str.replace(/"/g, '""')}"`;
906
+ }
907
+ return str;
908
+ };
909
+ console.log("file,title,context,skipped,body");
910
+ for (const r of results) {
911
+ console.log([r.displayPath, r.title, r.context, r.skipped ? "true" : "false", r.skipped ? r.skipReason : r.body].map(escapeField).join(","));
912
+ }
913
+ }
914
+ else if (format === "files") {
915
+ for (const r of results) {
916
+ const ctx = r.context ? `,"${r.context.replace(/"/g, '""')}"` : "";
917
+ const status = r.skipped ? "[SKIPPED]" : "";
918
+ console.log(`${r.displayPath}${ctx}${status ? `,${status}` : ""}`);
919
+ }
920
+ }
921
+ else if (format === "md") {
922
+ for (const r of results) {
923
+ console.log(`## ${r.displayPath}\n`);
924
+ if (r.title && r.title !== r.displayPath)
925
+ console.log(`**Title:** ${r.title}\n`);
926
+ if (r.context)
927
+ console.log(`**Context:** ${r.context}\n`);
928
+ if (r.skipped) {
929
+ console.log(`> ${r.skipReason}\n`);
930
+ }
931
+ else {
932
+ console.log("```");
933
+ console.log(r.body);
934
+ console.log("```\n");
935
+ }
936
+ }
937
+ }
938
+ else if (format === "xml") {
939
+ console.log('<?xml version="1.0" encoding="UTF-8"?>');
940
+ console.log("<documents>");
941
+ for (const r of results) {
942
+ console.log(" <document>");
943
+ console.log(` <file>${escapeXml(r.displayPath)}</file>`);
944
+ console.log(` <title>${escapeXml(r.title)}</title>`);
945
+ if (r.context)
946
+ console.log(` <context>${escapeXml(r.context)}</context>`);
947
+ if (r.skipped) {
948
+ console.log(` <skipped>true</skipped>`);
949
+ console.log(` <reason>${escapeXml(r.skipReason || "")}</reason>`);
950
+ }
951
+ else {
952
+ console.log(` <body>${escapeXml(r.body)}</body>`);
953
+ }
954
+ console.log(" </document>");
955
+ }
956
+ console.log("</documents>");
957
+ }
958
+ else {
959
+ // CLI format (default)
960
+ for (const r of results) {
961
+ console.log(`\n${'='.repeat(60)}`);
962
+ console.log(`File: ${r.displayPath}`);
963
+ console.log(`${'='.repeat(60)}\n`);
964
+ if (r.skipped) {
965
+ console.log(`[SKIPPED: ${r.skipReason}]`);
966
+ continue;
967
+ }
968
+ if (r.context) {
969
+ console.log(`Folder Context: ${r.context}\n---\n`);
970
+ }
971
+ console.log(r.body);
972
+ }
973
+ }
974
+ }
975
+ // List files in virtual file tree
976
+ function listFiles(pathArg) {
977
+ const db = getDb();
978
+ if (!pathArg) {
979
+ // No argument - list all collections
980
+ const yamlCollections = yamlListCollections();
981
+ if (yamlCollections.length === 0) {
982
+ console.log("No collections found. Run 'kindx collection add .' to index files.");
983
+ closeDb();
984
+ return;
985
+ }
986
+ // Get file counts from database for each collection
987
+ const collections = yamlCollections.map(coll => {
988
+ const stats = db.prepare(`
989
+ SELECT COUNT(*) as file_count
990
+ FROM documents d
991
+ WHERE d.collection = ? AND d.active = 1
992
+ `).get(coll.name);
993
+ return {
994
+ name: coll.name,
995
+ file_count: stats?.file_count || 0
996
+ };
997
+ });
998
+ console.log(`${c.bold}Collections:${c.reset}\n`);
999
+ for (const coll of collections) {
1000
+ console.log(` ${c.dim}kindx://${c.reset}${c.cyan}${coll.name}/${c.reset} ${c.dim}(${coll.file_count} files)${c.reset}`);
1001
+ }
1002
+ closeDb();
1003
+ return;
1004
+ }
1005
+ // Parse the path argument
1006
+ let collectionName;
1007
+ let pathPrefix = null;
1008
+ if (pathArg.startsWith('kindx://')) {
1009
+ // Virtual path format: kindx://collection/path
1010
+ const parsed = parseVirtualPath(pathArg);
1011
+ if (!parsed) {
1012
+ console.error(`Invalid virtual path: ${pathArg}`);
1013
+ closeDb();
1014
+ process.exit(1);
1015
+ }
1016
+ collectionName = parsed.collectionName;
1017
+ pathPrefix = parsed.path;
1018
+ }
1019
+ else {
1020
+ // Just collection name or collection/path
1021
+ const parts = pathArg.split('/');
1022
+ collectionName = parts[0] || '';
1023
+ if (parts.length > 1) {
1024
+ pathPrefix = parts.slice(1).join('/');
1025
+ }
1026
+ }
1027
+ // Get the collection
1028
+ const coll = getCollectionFromYaml(collectionName);
1029
+ if (!coll) {
1030
+ console.error(`Collection not found: ${collectionName}`);
1031
+ console.error(`Run 'kindx ls' to see available collections.`);
1032
+ closeDb();
1033
+ process.exit(1);
1034
+ }
1035
+ // List files in the collection with size and modification time
1036
+ let query;
1037
+ let params;
1038
+ if (pathPrefix) {
1039
+ // List files under a specific path
1040
+ query = `
1041
+ SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
1042
+ FROM documents d
1043
+ JOIN content ct ON d.hash = ct.hash
1044
+ WHERE d.collection = ? AND d.path LIKE ? AND d.active = 1
1045
+ ORDER BY d.path
1046
+ `;
1047
+ params = [coll.name, `${pathPrefix}%`];
1048
+ }
1049
+ else {
1050
+ // List all files in the collection
1051
+ query = `
1052
+ SELECT d.path, d.title, d.modified_at, LENGTH(ct.doc) as size
1053
+ FROM documents d
1054
+ JOIN content ct ON d.hash = ct.hash
1055
+ WHERE d.collection = ? AND d.active = 1
1056
+ ORDER BY d.path
1057
+ `;
1058
+ params = [coll.name];
1059
+ }
1060
+ const files = db.prepare(query).all(...params);
1061
+ if (files.length === 0) {
1062
+ if (pathPrefix) {
1063
+ console.log(`No files found under kindx://${collectionName}/${pathPrefix}`);
1064
+ }
1065
+ else {
1066
+ console.log(`No files found in collection: ${collectionName}`);
1067
+ }
1068
+ closeDb();
1069
+ return;
1070
+ }
1071
+ // Calculate max widths for alignment
1072
+ const maxSize = Math.max(...files.map(f => formatBytes(f.size).length));
1073
+ // Output in ls -l style
1074
+ for (const file of files) {
1075
+ const sizeStr = formatBytes(file.size).padStart(maxSize);
1076
+ const date = new Date(file.modified_at);
1077
+ const timeStr = formatLsTime(date);
1078
+ // Dim the kindx:// prefix, highlight the filename
1079
+ console.log(`${sizeStr} ${timeStr} ${c.dim}kindx://${collectionName}/${c.reset}${c.cyan}${file.path}${c.reset}`);
1080
+ }
1081
+ closeDb();
1082
+ }
1083
+ // Format date/time like ls -l
1084
+ function formatLsTime(date) {
1085
+ const now = new Date();
1086
+ const sixMonthsAgo = new Date(now.getTime() - 6 * 30 * 24 * 60 * 60 * 1000);
1087
+ const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
1088
+ const month = months[date.getMonth()];
1089
+ const day = date.getDate().toString().padStart(2, ' ');
1090
+ // If file is older than 6 months, show year instead of time
1091
+ if (date < sixMonthsAgo) {
1092
+ const year = date.getFullYear();
1093
+ return `${month} ${day} ${year}`;
1094
+ }
1095
+ else {
1096
+ const hours = date.getHours().toString().padStart(2, '0');
1097
+ const minutes = date.getMinutes().toString().padStart(2, '0');
1098
+ return `${month} ${day} ${hours}:${minutes}`;
1099
+ }
1100
+ }
1101
+ // Collection management commands
1102
+ function collectionList() {
1103
+ const db = getDb();
1104
+ const collections = listCollections(db);
1105
+ if (collections.length === 0) {
1106
+ console.log("No collections found. Run 'kindx collection add .' to create one.");
1107
+ closeDb();
1108
+ return;
1109
+ }
1110
+ console.log(`${c.bold}Collections (${collections.length}):${c.reset}\n`);
1111
+ for (const coll of collections) {
1112
+ const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
1113
+ const timeAgo = formatTimeAgo(updatedAt);
1114
+ // Get YAML config to check includeByDefault
1115
+ const yamlColl = getCollectionFromYaml(coll.name);
1116
+ const excluded = yamlColl?.includeByDefault === false;
1117
+ const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
1118
+ console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(kindx://${coll.name}/)${c.reset}${excludeTag}`);
1119
+ console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
1120
+ if (yamlColl?.ignore?.length) {
1121
+ console.log(` ${c.dim}Ignore:${c.reset} ${yamlColl.ignore.join(', ')}`);
1122
+ }
1123
+ console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
1124
+ console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
1125
+ console.log();
1126
+ }
1127
+ closeDb();
1128
+ }
1129
+ async function collectionAdd(pwd, globPattern, name) {
1130
+ // If name not provided, generate from pwd basename
1131
+ let collName = name;
1132
+ if (!collName) {
1133
+ const parts = pwd.split('/').filter(Boolean);
1134
+ collName = parts[parts.length - 1] || 'root';
1135
+ }
1136
+ // Check if collection with this name already exists in YAML
1137
+ const existing = getCollectionFromYaml(collName);
1138
+ if (existing) {
1139
+ console.error(`${c.yellow}Collection '${collName}' already exists.${c.reset}`);
1140
+ console.error(`Use a different name with --name <name>`);
1141
+ process.exit(1);
1142
+ }
1143
+ // Check if a collection with this pwd+glob already exists in YAML
1144
+ const allCollections = yamlListCollections();
1145
+ const existingPwdGlob = allCollections.find(c => c.path === pwd && c.pattern === globPattern);
1146
+ if (existingPwdGlob) {
1147
+ console.error(`${c.yellow}A collection already exists for this path and pattern:${c.reset}`);
1148
+ console.error(` Name: ${existingPwdGlob.name} (kindx://${existingPwdGlob.name}/)`);
1149
+ console.error(` Pattern: ${globPattern}`);
1150
+ console.error(`\nUse 'kindx update' to re-index it, or remove it first with 'kindx collection remove ${existingPwdGlob.name}'`);
1151
+ process.exit(1);
1152
+ }
1153
+ // Add to YAML config
1154
+ const { addCollection } = await import("./catalogs.js");
1155
+ addCollection(collName, pwd, globPattern);
1156
+ // Create the collection and index files
1157
+ console.log(`Creating collection '${collName}'...`);
1158
+ const newColl = getCollectionFromYaml(collName);
1159
+ await indexFiles(pwd, globPattern, collName, false, newColl?.ignore);
1160
+ console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`);
1161
+ }
1162
+ function collectionRemove(name) {
1163
+ // Check if collection exists in YAML
1164
+ const coll = getCollectionFromYaml(name);
1165
+ if (!coll) {
1166
+ console.error(`${c.yellow}Collection not found: ${name}${c.reset}`);
1167
+ console.error(`Run 'kindx collection list' to see available collections.`);
1168
+ process.exit(1);
1169
+ }
1170
+ const db = getDb();
1171
+ const result = removeCollection(db, name);
1172
+ closeDb();
1173
+ console.log(`${c.green}✓${c.reset} Removed collection '${name}'`);
1174
+ console.log(` Deleted ${result.deletedDocs} documents`);
1175
+ if (result.cleanedHashes > 0) {
1176
+ console.log(` Cleaned up ${result.cleanedHashes} orphaned content hashes`);
1177
+ }
1178
+ }
1179
+ function collectionRename(oldName, newName) {
1180
+ // Check if old collection exists in YAML
1181
+ const coll = getCollectionFromYaml(oldName);
1182
+ if (!coll) {
1183
+ console.error(`${c.yellow}Collection not found: ${oldName}${c.reset}`);
1184
+ console.error(`Run 'kindx collection list' to see available collections.`);
1185
+ process.exit(1);
1186
+ }
1187
+ // Check if new name already exists in YAML
1188
+ const existing = getCollectionFromYaml(newName);
1189
+ if (existing) {
1190
+ console.error(`${c.yellow}Collection name already exists: ${newName}${c.reset}`);
1191
+ console.error(`Choose a different name or remove the existing collection first.`);
1192
+ process.exit(1);
1193
+ }
1194
+ const db = getDb();
1195
+ renameCollection(db, oldName, newName);
1196
+ closeDb();
1197
+ console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
1198
+ console.log(` Virtual paths updated: ${c.cyan}kindx://${oldName}/${c.reset} → ${c.cyan}kindx://${newName}/${c.reset}`);
1199
+ }
1200
+ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppressEmbedNotice = false, ignorePatterns) {
1201
+ const db = getDb();
1202
+ const resolvedPwd = pwd || getPwd();
1203
+ const now = new Date().toISOString();
1204
+ const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
1205
+ // Clear Ollama cache on index
1206
+ clearCache(db);
1207
+ // Collection name must be provided (from YAML)
1208
+ if (!collectionName) {
1209
+ throw new Error("Collection name is required. Collections must be defined in ~/.config/kindx/index.yml");
1210
+ }
1211
+ console.log(`Collection: ${resolvedPwd} (${globPattern})`);
1212
+ progress.indeterminate();
1213
+ const allIgnore = [
1214
+ ...excludeDirs.map(d => `**/${d}/**`),
1215
+ ...(ignorePatterns || []),
1216
+ ];
1217
+ const allFiles = await fastGlob(globPattern, {
1218
+ cwd: resolvedPwd,
1219
+ onlyFiles: true,
1220
+ followSymbolicLinks: false,
1221
+ dot: false,
1222
+ ignore: allIgnore,
1223
+ });
1224
+ // Filter hidden files/folders (dot: false handles top-level but not nested)
1225
+ const files = allFiles.filter(file => {
1226
+ const parts = file.split("/");
1227
+ return !parts.some(part => part.startsWith("."));
1228
+ });
1229
+ const total = files.length;
1230
+ const hasNoFiles = total === 0;
1231
+ if (hasNoFiles) {
1232
+ progress.clear();
1233
+ console.log("No files found matching pattern.");
1234
+ // Continue so the deactivation pass can mark previously indexed docs as inactive.
1235
+ }
1236
+ let indexed = 0, updated = 0, unchanged = 0, processed = 0;
1237
+ const seenPaths = new Set();
1238
+ const startTime = Date.now();
1239
+ for (const relativeFile of files) {
1240
+ const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
1241
+ const path = handelize(relativeFile); // Normalize path for token-friendliness
1242
+ seenPaths.add(path);
1243
+ let content;
1244
+ try {
1245
+ content = readFileSync(filepath, "utf-8");
1246
+ }
1247
+ catch (err) {
1248
+ // Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
1249
+ processed++;
1250
+ progress.set((processed / total) * 100);
1251
+ continue;
1252
+ }
1253
+ // Skip empty files - nothing useful to index
1254
+ if (!content.trim()) {
1255
+ processed++;
1256
+ continue;
1257
+ }
1258
+ const hash = await hashContent(content);
1259
+ const title = extractTitle(content, relativeFile);
1260
+ // Check if document exists in this collection with this path
1261
+ const existing = findActiveDocument(db, collectionName, path);
1262
+ if (existing) {
1263
+ if (existing.hash === hash) {
1264
+ // Hash unchanged, but check if title needs updating
1265
+ if (existing.title !== title) {
1266
+ updateDocumentTitle(db, existing.id, title, now);
1267
+ updated++;
1268
+ }
1269
+ else {
1270
+ unchanged++;
1271
+ }
1272
+ }
1273
+ else {
1274
+ // Content changed - insert new content hash and update document
1275
+ insertContent(db, hash, content, now);
1276
+ const stat = statSync(filepath);
1277
+ updateDocument(db, existing.id, title, hash, stat ? new Date(stat.mtime).toISOString() : now);
1278
+ updated++;
1279
+ }
1280
+ }
1281
+ else {
1282
+ // New document - insert content and document
1283
+ indexed++;
1284
+ insertContent(db, hash, content, now);
1285
+ const stat = statSync(filepath);
1286
+ insertDocument(db, collectionName, path, title, hash, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now);
1287
+ }
1288
+ processed++;
1289
+ progress.set((processed / total) * 100);
1290
+ const elapsed = (Date.now() - startTime) / 1000;
1291
+ const rate = processed / elapsed;
1292
+ const remaining = (total - processed) / rate;
1293
+ const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
1294
+ if (isTTY)
1295
+ process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
1296
+ }
1297
+ // Deactivate documents in this collection that no longer exist
1298
+ const allActive = getActiveDocumentPaths(db, collectionName);
1299
+ let removed = 0;
1300
+ for (const path of allActive) {
1301
+ if (!seenPaths.has(path)) {
1302
+ deactivateDocument(db, collectionName, path);
1303
+ removed++;
1304
+ }
1305
+ }
1306
+ // Clean up orphaned content hashes (content not referenced by any document)
1307
+ const orphanedContent = cleanupOrphanedContent(db);
1308
+ // Check if vector index needs updating
1309
+ const needsEmbedding = getHashesNeedingEmbedding(db);
1310
+ progress.clear();
1311
+ console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
1312
+ if (orphanedContent > 0) {
1313
+ console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
1314
+ }
1315
+ if (needsEmbedding > 0 && !suppressEmbedNotice) {
1316
+ console.log(`\nRun 'kindx embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
1317
+ }
1318
+ closeDb();
1319
+ }
1320
+ function renderProgressBar(percent, width = 30) {
1321
+ const filled = Math.round((percent / 100) * width);
1322
+ const empty = width - filled;
1323
+ const bar = "█".repeat(filled) + "░".repeat(empty);
1324
+ return bar;
1325
+ }
1326
+ async function vectorIndex(model = DEFAULT_EMBED_MODEL, force = false) {
1327
+ const db = getDb();
1328
+ const now = new Date().toISOString();
1329
+ // If force, clear all vectors
1330
+ if (force) {
1331
+ console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
1332
+ clearAllEmbeddings(db);
1333
+ }
1334
+ // Find unique hashes that need embedding (from active documents)
1335
+ const hashesToEmbed = getHashesForEmbedding(db);
1336
+ if (hashesToEmbed.length === 0) {
1337
+ console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
1338
+ closeDb();
1339
+ return;
1340
+ }
1341
+ const allChunks = [];
1342
+ let multiChunkDocs = 0;
1343
+ // Chunk all documents using actual token counts
1344
+ process.stderr.write(`Chunking ${hashesToEmbed.length} documents by token count...\n`);
1345
+ for (const item of hashesToEmbed) {
1346
+ const encoder = new TextEncoder();
1347
+ const bodyBytes = encoder.encode(item.body).length;
1348
+ if (bodyBytes === 0)
1349
+ continue; // Skip empty
1350
+ const title = extractTitle(item.body, item.path);
1351
+ const displayName = item.path;
1352
+ const chunks = await chunkDocumentByTokens(item.body); // Uses actual tokenizer
1353
+ if (chunks.length > 1)
1354
+ multiChunkDocs++;
1355
+ for (let seq = 0; seq < chunks.length; seq++) {
1356
+ allChunks.push({
1357
+ hash: item.hash,
1358
+ title,
1359
+ text: chunks[seq].text, // Chunk is guaranteed to exist by seq loop
1360
+ seq,
1361
+ pos: chunks[seq].pos,
1362
+ tokens: chunks[seq].tokens,
1363
+ bytes: encoder.encode(chunks[seq].text).length,
1364
+ displayName,
1365
+ });
1366
+ }
1367
+ }
1368
+ if (allChunks.length === 0) {
1369
+ console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`);
1370
+ closeDb();
1371
+ return;
1372
+ }
1373
+ const totalBytes = allChunks.reduce((sum, chk) => sum + chk.bytes, 0);
1374
+ const totalChunks = allChunks.length;
1375
+ const totalDocs = hashesToEmbed.length;
1376
+ console.log(`${c.bold}Embedding ${totalDocs} documents${c.reset} ${c.dim}(${totalChunks} chunks, ${formatBytes(totalBytes)})${c.reset}`);
1377
+ if (multiChunkDocs > 0) {
1378
+ console.log(`${c.dim}${multiChunkDocs} documents split into multiple chunks${c.reset}`);
1379
+ }
1380
+ console.log(`${c.dim}Model: ${model}${c.reset}\n`);
1381
+ // Hide cursor during embedding
1382
+ cursor.hide();
1383
+ // Wrap all LLM embedding operations in a session for lifecycle management
1384
+ // Use 30 minute timeout for large collections
1385
+ await withLLMSession(async (session) => {
1386
+ // Get embedding dimensions from first chunk
1387
+ progress.indeterminate();
1388
+ const firstChunk = allChunks[0];
1389
+ if (!firstChunk) {
1390
+ throw new Error("No chunks available to embed");
1391
+ }
1392
+ const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title);
1393
+ const firstResult = await session.embed(firstText);
1394
+ if (!firstResult) {
1395
+ throw new Error("Failed to get embedding dimensions from first chunk");
1396
+ }
1397
+ ensureVecTable(db, firstResult.embedding.length);
1398
+ let chunksEmbedded = 0, errors = 0, bytesProcessed = 0;
1399
+ const startTime = Date.now();
1400
+ // Batch embedding for better throughput
1401
+ // Process in batches of 32 to balance memory usage and efficiency
1402
+ const BATCH_SIZE = 32;
1403
+ for (let batchStart = 0; batchStart < allChunks.length; batchStart += BATCH_SIZE) {
1404
+ const batchEnd = Math.min(batchStart + BATCH_SIZE, allChunks.length);
1405
+ const batch = allChunks.slice(batchStart, batchEnd);
1406
+ // Format texts for embedding
1407
+ const texts = batch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
1408
+ try {
1409
+ // Batch embed all texts at once
1410
+ const embeddings = await session.embedBatch(texts);
1411
+ // Insert each embedding
1412
+ for (let i = 0; i < batch.length; i++) {
1413
+ const chunk = batch[i];
1414
+ const embedding = embeddings[i];
1415
+ if (embedding) {
1416
+ insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
1417
+ chunksEmbedded++;
1418
+ }
1419
+ else {
1420
+ errors++;
1421
+ console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}${c.reset}`);
1422
+ }
1423
+ bytesProcessed += chunk.bytes;
1424
+ }
1425
+ }
1426
+ catch (err) {
1427
+ // If batch fails, try individual embeddings as fallback
1428
+ for (const chunk of batch) {
1429
+ try {
1430
+ const text = formatDocForEmbedding(chunk.text, chunk.title);
1431
+ const result = await session.embed(text);
1432
+ if (result) {
1433
+ insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
1434
+ chunksEmbedded++;
1435
+ }
1436
+ else {
1437
+ errors++;
1438
+ }
1439
+ }
1440
+ catch (innerErr) {
1441
+ errors++;
1442
+ console.error(`\n${c.yellow}⚠ Error embedding "${chunk.displayName}" chunk ${chunk.seq}: ${innerErr}${c.reset}`);
1443
+ }
1444
+ bytesProcessed += chunk.bytes;
1445
+ }
1446
+ }
1447
+ const percent = (bytesProcessed / totalBytes) * 100;
1448
+ progress.set(percent);
1449
+ const elapsed = (Date.now() - startTime) / 1000;
1450
+ const bytesPerSec = bytesProcessed / elapsed;
1451
+ const remainingBytes = totalBytes - bytesProcessed;
1452
+ const etaSec = remainingBytes / bytesPerSec;
1453
+ const bar = renderProgressBar(percent);
1454
+ const percentStr = percent.toFixed(0).padStart(3);
1455
+ const throughput = `${formatBytes(bytesPerSec)}/s`;
1456
+ const eta = elapsed > 2 ? formatETA(etaSec) : "...";
1457
+ const errStr = errors > 0 ? ` ${c.yellow}${errors} err${c.reset}` : "";
1458
+ if (isTTY)
1459
+ process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
1460
+ }
1461
+ progress.clear();
1462
+ cursor.show();
1463
+ const totalTimeSec = (Date.now() - startTime) / 1000;
1464
+ const avgThroughput = formatBytes(totalBytes / totalTimeSec);
1465
+ console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
1466
+ console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${chunksEmbedded}${c.reset} chunks from ${c.bold}${totalDocs}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset} ${c.dim}(${avgThroughput}/s)${c.reset}`);
1467
+ if (errors > 0) {
1468
+ console.log(`${c.yellow}⚠ ${errors} chunks failed${c.reset}`);
1469
+ }
1470
+ }, { maxDuration: 30 * 60 * 1000, name: 'embed-command' });
1471
+ closeDb();
1472
+ }
1473
+ // Sanitize a term for FTS5: remove punctuation except apostrophes and underscores
1474
+ function sanitizeFTS5Term(term) {
1475
+ // Preserve underscores so snake_case identifiers (e.g., my_function_name)
1476
+ // are treated as single terms, not split into separate words.
1477
+ return term.replace(/[^\w']/g, '').trim();
1478
+ }
1479
+ // Build FTS5 query: phrase-aware with fallback to individual terms
1480
+ function buildFTS5Query(query) {
1481
+ // Sanitize the full query for phrase matching
1482
+ const sanitizedQuery = query.replace(/[^\w\s']/g, '').trim();
1483
+ const terms = query
1484
+ .split(/\s+/)
1485
+ .map(sanitizeFTS5Term)
1486
+ .filter(term => term.length >= 2); // Skip single chars and empty
1487
+ if (terms.length === 0)
1488
+ return "";
1489
+ if (terms.length === 1)
1490
+ return `"${terms[0].replace(/"/g, '""')}"`;
1491
+ // Strategy: exact phrase OR proximity match OR individual terms
1492
+ // Exact phrase matches rank highest, then close proximity, then any term
1493
+ const phrase = `"${sanitizedQuery.replace(/"/g, '""')}"`;
1494
+ const quotedTerms = terms.map(t => `"${t.replace(/"/g, '""')}"`);
1495
+ // FTS5 NEAR syntax: NEAR(term1 term2, distance)
1496
+ const nearPhrase = `NEAR(${quotedTerms.join(' ')}, 10)`;
1497
+ const orTerms = quotedTerms.join(' OR ');
1498
+ // Exact phrase > proximity > any term
1499
+ return `(${phrase}) OR (${nearPhrase}) OR (${orTerms})`;
1500
+ }
1501
+ // Normalize BM25 score to 0-1 range using sigmoid
1502
+ function normalizeBM25(score) {
1503
+ // BM25 scores are negative in SQLite (lower = better)
1504
+ // Typical range: -15 (excellent) to -2 (weak match)
1505
+ // Map to 0-1 where higher is better
1506
+ const absScore = Math.abs(score);
1507
+ // Sigmoid-ish normalization: maps ~2-15 range to ~0.1-0.95
1508
+ return 1 / (1 + Math.exp(-(absScore - 5) / 3));
1509
+ }
1510
+ // Highlight query terms in text (skip short words < 3 chars)
1511
+ function highlightTerms(text, query) {
1512
+ if (!useColor)
1513
+ return text;
1514
+ const terms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
1515
+ let result = text;
1516
+ for (const term of terms) {
1517
+ const regex = new RegExp(`(${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
1518
+ result = result.replace(regex, `${c.yellow}${c.bold}$1${c.reset}`);
1519
+ }
1520
+ return result;
1521
+ }
1522
+ // Format score with color based on value
1523
+ function formatScore(score) {
1524
+ const pct = (score * 100).toFixed(0).padStart(3);
1525
+ if (!useColor)
1526
+ return `${pct}%`;
1527
+ if (score >= 0.7)
1528
+ return `${c.green}${pct}%${c.reset}`;
1529
+ if (score >= 0.4)
1530
+ return `${c.yellow}${pct}%${c.reset}`;
1531
+ return `${c.dim}${pct}%${c.reset}`;
1532
+ }
1533
+ function formatExplainNumber(value) {
1534
+ return value.toFixed(4);
1535
+ }
1536
+ // Shorten directory path for display - relative to $HOME (used for context paths, not documents)
1537
+ function shortPath(dirpath) {
1538
+ const home = homedir();
1539
+ if (dirpath.startsWith(home)) {
1540
+ return '~' + dirpath.slice(home.length);
1541
+ }
1542
+ return dirpath;
1543
+ }
1544
+ // Emit format-safe empty output for search commands.
1545
+ function printEmptySearchResults(format, reason = "no_results") {
1546
+ if (format === "json") {
1547
+ console.log("[]");
1548
+ return;
1549
+ }
1550
+ if (format === "csv") {
1551
+ console.log("docid,score,file,title,context,line,snippet");
1552
+ return;
1553
+ }
1554
+ if (format === "xml") {
1555
+ console.log("<results></results>");
1556
+ return;
1557
+ }
1558
+ if (format === "md" || format === "files") {
1559
+ return;
1560
+ }
1561
+ if (reason === "min_score") {
1562
+ console.log("No results found above minimum score threshold.");
1563
+ return;
1564
+ }
1565
+ console.log("No results found.");
1566
+ }
1567
+ function outputResults(results, query, opts) {
1568
+ const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
1569
+ if (filtered.length === 0) {
1570
+ printEmptySearchResults(opts.format, "min_score");
1571
+ return;
1572
+ }
1573
+ // Helper to create kindx:// URI from displayPath
1574
+ const toQmdPath = (displayPath) => `kindx://${displayPath}`;
1575
+ if (opts.format === "json") {
1576
+ // JSON output for LLM consumption
1577
+ const output = filtered.map(row => {
1578
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1579
+ let body = opts.full ? row.body : undefined;
1580
+ let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
1581
+ if (opts.lineNumbers) {
1582
+ if (body)
1583
+ body = addLineNumbers(body);
1584
+ if (snippet)
1585
+ snippet = addLineNumbers(snippet);
1586
+ }
1587
+ return {
1588
+ ...(docid && { docid: `#${docid}` }),
1589
+ score: Math.round(row.score * 100) / 100,
1590
+ file: toQmdPath(row.displayPath),
1591
+ title: row.title,
1592
+ ...(row.context && { context: row.context }),
1593
+ ...(body && { body }),
1594
+ ...(snippet && { snippet }),
1595
+ ...(opts.explain && row.explain && { explain: row.explain }),
1596
+ };
1597
+ });
1598
+ console.log(JSON.stringify(output, null, 2));
1599
+ }
1600
+ else if (opts.format === "files") {
1601
+ // Simple docid,score,filepath,context output
1602
+ for (const row of filtered) {
1603
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1604
+ const ctx = row.context ? `,"${row.context.replace(/"/g, '""')}"` : "";
1605
+ console.log(`#${docid},${row.score.toFixed(2)},${toQmdPath(row.displayPath)}${ctx}`);
1606
+ }
1607
+ }
1608
+ else if (opts.format === "cli") {
1609
+ for (let i = 0; i < filtered.length; i++) {
1610
+ const row = filtered[i];
1611
+ if (!row)
1612
+ continue;
1613
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
1614
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1615
+ // Line 1: filepath with docid
1616
+ const path = toQmdPath(row.displayPath);
1617
+ // Only show :line if we actually found a term match in the snippet body (exclude header line).
1618
+ const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase();
1619
+ const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t));
1620
+ const lineInfo = hasMatch ? `:${line}` : "";
1621
+ const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : "";
1622
+ console.log(`${c.cyan}${path}${c.dim}${lineInfo}${c.reset}${docidStr}`);
1623
+ // Line 2: Title (if available)
1624
+ if (row.title) {
1625
+ console.log(`${c.bold}Title: ${row.title}${c.reset}`);
1626
+ }
1627
+ // Line 3: Context (if available)
1628
+ if (row.context) {
1629
+ console.log(`${c.dim}Context: ${row.context}${c.reset}`);
1630
+ }
1631
+ // Line 4: Score
1632
+ const score = formatScore(row.score);
1633
+ console.log(`Score: ${c.bold}${score}${c.reset}`);
1634
+ if (opts.explain && row.explain) {
1635
+ const explain = row.explain;
1636
+ const ftsScores = explain.ftsScores.length > 0
1637
+ ? explain.ftsScores.map(formatExplainNumber).join(", ")
1638
+ : "none";
1639
+ const vecScores = explain.vectorScores.length > 0
1640
+ ? explain.vectorScores.map(formatExplainNumber).join(", ")
1641
+ : "none";
1642
+ const contribSummary = explain.rrf.contributions
1643
+ .slice()
1644
+ .sort((a, b) => b.rrfContribution - a.rrfContribution)
1645
+ .slice(0, 3)
1646
+ .map(c => `${c.source}/${c.queryType}#${c.rank}:${formatExplainNumber(c.rrfContribution)}`)
1647
+ .join(" | ");
1648
+ console.log(`${c.dim}Explain: fts=[${ftsScores}] vec=[${vecScores}]${c.reset}`);
1649
+ console.log(`${c.dim} RRF: total=${formatExplainNumber(explain.rrf.totalScore)} base=${formatExplainNumber(explain.rrf.baseScore)} bonus=${formatExplainNumber(explain.rrf.topRankBonus)} rank=${explain.rrf.rank}${c.reset}`);
1650
+ console.log(`${c.dim} Blend: ${Math.round(explain.rrf.weight * 100)}%*${formatExplainNumber(explain.rrf.positionScore)} + ${Math.round((1 - explain.rrf.weight) * 100)}%*${formatExplainNumber(explain.rerankScore)} = ${formatExplainNumber(explain.blendedScore)}${c.reset}`);
1651
+ if (contribSummary.length > 0) {
1652
+ console.log(`${c.dim} Top RRF contributions: ${contribSummary}${c.reset}`);
1653
+ }
1654
+ }
1655
+ console.log();
1656
+ // Snippet with highlighting (diff-style header included)
1657
+ let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
1658
+ const highlighted = highlightTerms(displaySnippet, query);
1659
+ console.log(highlighted);
1660
+ // Double empty line between results
1661
+ if (i < filtered.length - 1)
1662
+ console.log('\n');
1663
+ }
1664
+ }
1665
+ else if (opts.format === "md") {
1666
+ for (let i = 0; i < filtered.length; i++) {
1667
+ const row = filtered[i];
1668
+ if (!row)
1669
+ continue;
1670
+ const heading = row.title || row.displayPath;
1671
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
1672
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
1673
+ if (opts.lineNumbers) {
1674
+ content = addLineNumbers(content);
1675
+ }
1676
+ const docidLine = docid ? `**docid:** \`#${docid}\`\n` : "";
1677
+ const contextLine = row.context ? `**context:** ${row.context}\n` : "";
1678
+ console.log(`---\n# ${heading}\n${docidLine}${contextLine}\n${content}\n`);
1679
+ }
1680
+ }
1681
+ else if (opts.format === "xml") {
1682
+ for (const row of filtered) {
1683
+ const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
1684
+ const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
1685
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1686
+ let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
1687
+ if (opts.lineNumbers) {
1688
+ content = addLineNumbers(content);
1689
+ }
1690
+ console.log(`<file docid="#${docid}" name="${toQmdPath(row.displayPath)}"${titleAttr}${contextAttr}>\n${content}\n</file>\n`);
1691
+ }
1692
+ }
1693
+ else {
1694
+ // CSV format
1695
+ console.log("docid,score,file,title,context,line,snippet");
1696
+ for (const row of filtered) {
1697
+ const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
1698
+ let content = opts.full ? row.body : snippet;
1699
+ if (opts.lineNumbers) {
1700
+ content = addLineNumbers(content, line);
1701
+ }
1702
+ const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
1703
+ const snippetText = content || "";
1704
+ console.log(`#${docid},${row.score.toFixed(4)},${escapeCSV(toQmdPath(row.displayPath))},${escapeCSV(row.title || "")},${escapeCSV(row.context || "")},${line},${escapeCSV(snippetText)}`);
1705
+ }
1706
+ }
1707
+ }
1708
+ // Resolve -c collection filter: supports single string, array, or undefined.
1709
+ // Returns validated collection names (exits on unknown collection).
1710
+ function resolveCollectionFilter(raw, useDefaults = false) {
1711
+ // If no filter specified and useDefaults is true, use default collections
1712
+ if (!raw && useDefaults) {
1713
+ return getDefaultCollectionNames();
1714
+ }
1715
+ if (!raw)
1716
+ return [];
1717
+ const names = Array.isArray(raw) ? raw : [raw];
1718
+ const validated = [];
1719
+ for (const name of names) {
1720
+ const coll = getCollectionFromYaml(name);
1721
+ if (!coll) {
1722
+ console.error(`Collection not found: ${name}`);
1723
+ closeDb();
1724
+ process.exit(1);
1725
+ }
1726
+ validated.push(name);
1727
+ }
1728
+ return validated;
1729
+ }
1730
+ // Post-filter results to only include files from specified collections.
1731
+ function filterByCollections(results, collectionNames) {
1732
+ if (collectionNames.length <= 1)
1733
+ return results;
1734
+ const prefixes = collectionNames.map(n => `kindx://${n}/`);
1735
+ return results.filter(r => {
1736
+ const path = r.filepath || r.file || '';
1737
+ return prefixes.some(p => path.startsWith(p));
1738
+ });
1739
+ }
1740
+ /**
1741
+ * Parse structured search query syntax.
1742
+ * Lines starting with lex:, vec:, or hyde: are routed directly.
1743
+ * Plain lines without prefix go through query expansion.
1744
+ *
1745
+ * Returns null if this is a plain query (single line, no prefix).
1746
+ * Returns StructuredSubSearch[] if structured syntax detected.
1747
+ * Throws if multiple plain lines (ambiguous).
1748
+ *
1749
+ * Examples:
1750
+ * "CAP theorem" -> null (plain query, use expansion)
1751
+ * "lex: CAP theorem" -> [{ type: 'lex', query: 'CAP theorem' }]
1752
+ * "lex: CAP\nvec: consistency" -> [{ type: 'lex', ... }, { type: 'vec', ... }]
1753
+ * "CAP\nconsistency" -> throws (multiple plain lines)
1754
+ */
1755
+ function parseStructuredQuery(query) {
1756
+ const rawLines = query.split('\n').map((line, idx) => ({
1757
+ raw: line,
1758
+ trimmed: line.trim(),
1759
+ number: idx + 1,
1760
+ })).filter(line => line.trimmed.length > 0);
1761
+ if (rawLines.length === 0)
1762
+ return null;
1763
+ const prefixRe = /^(lex|vec|hyde):\s*/i;
1764
+ const expandRe = /^expand:\s*/i;
1765
+ const typed = [];
1766
+ for (const line of rawLines) {
1767
+ if (expandRe.test(line.trimmed)) {
1768
+ if (rawLines.length > 1) {
1769
+ throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
1770
+ }
1771
+ const text = line.trimmed.replace(expandRe, '').trim();
1772
+ if (!text) {
1773
+ throw new Error('expand: query must include text.');
1774
+ }
1775
+ return null; // treat as standalone expand query
1776
+ }
1777
+ const match = line.trimmed.match(prefixRe);
1778
+ if (match) {
1779
+ const type = match[1].toLowerCase();
1780
+ const text = line.trimmed.slice(match[0].length).trim();
1781
+ if (!text) {
1782
+ throw new Error(`Line ${line.number} (${type}:) must include text.`);
1783
+ }
1784
+ if (/\r|\n/.test(text)) {
1785
+ throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
1786
+ }
1787
+ typed.push({ type, query: text, line: line.number });
1788
+ continue;
1789
+ }
1790
+ if (rawLines.length === 1) {
1791
+ // Single plain line -> implicit expand
1792
+ return null;
1793
+ }
1794
+ throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde: prefix. Each line in a query document must start with one.`);
1795
+ }
1796
+ return typed.length > 0 ? typed : null;
1797
+ }
1798
+ function search(query, opts) {
1799
+ const db = getDb();
1800
+ // Validate collection filter (supports multiple -c flags)
1801
+ // Use default collections if none specified
1802
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
1803
+ const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1804
+ // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
1805
+ const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
1806
+ const results = filterByCollections(searchFTS(db, query, fetchLimit, singleCollection), collectionNames);
1807
+ // Add context to results
1808
+ const resultsWithContext = results.map(r => ({
1809
+ file: r.filepath,
1810
+ displayPath: r.displayPath,
1811
+ title: r.title,
1812
+ body: r.body || "",
1813
+ score: r.score,
1814
+ context: getContextForFile(db, r.filepath),
1815
+ hash: r.hash,
1816
+ docid: r.docid,
1817
+ }));
1818
+ closeDb();
1819
+ if (resultsWithContext.length === 0) {
1820
+ printEmptySearchResults(opts.format);
1821
+ return;
1822
+ }
1823
+ outputResults(resultsWithContext, query, opts);
1824
+ }
1825
+ // Log query expansion as a tree to stderr (CLI progress feedback)
1826
+ function logExpansionTree(originalQuery, expanded) {
1827
+ const lines = [];
1828
+ lines.push(`${c.dim}├─ ${originalQuery}${c.reset}`);
1829
+ for (const q of expanded) {
1830
+ let preview = q.text.replace(/\n/g, ' ');
1831
+ if (preview.length > 72)
1832
+ preview = preview.substring(0, 69) + '...';
1833
+ lines.push(`${c.dim}├─ ${q.type}: ${preview}${c.reset}`);
1834
+ }
1835
+ if (lines.length > 0) {
1836
+ lines[lines.length - 1] = lines[lines.length - 1].replace('├─', '└─');
1837
+ }
1838
+ for (const line of lines)
1839
+ process.stderr.write(line + '\n');
1840
+ }
1841
+ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
1842
+ const store = getStore();
1843
+ // Validate collection filter (supports multiple -c flags)
1844
+ // Use default collections if none specified
1845
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
1846
+ const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1847
+ checkIndexHealth(store.db);
1848
+ await withLLMSession(async () => {
1849
+ let results = await vectorSearchQuery(store, query, {
1850
+ collection: singleCollection,
1851
+ limit: opts.all ? 500 : (opts.limit || 10),
1852
+ minScore: opts.minScore || 0.3,
1853
+ hooks: {
1854
+ onExpand: (original, expanded) => {
1855
+ logExpansionTree(original, expanded);
1856
+ process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
1857
+ },
1858
+ },
1859
+ });
1860
+ // Post-filter for multi-collection
1861
+ if (collectionNames.length > 1) {
1862
+ results = results.filter(r => {
1863
+ const prefixes = collectionNames.map(n => `kindx://${n}/`);
1864
+ return prefixes.some(p => r.file.startsWith(p));
1865
+ });
1866
+ }
1867
+ closeDb();
1868
+ if (results.length === 0) {
1869
+ printEmptySearchResults(opts.format);
1870
+ return;
1871
+ }
1872
+ outputResults(results.map(r => ({
1873
+ file: r.file,
1874
+ displayPath: r.displayPath,
1875
+ title: r.title,
1876
+ body: r.body,
1877
+ score: r.score,
1878
+ context: r.context,
1879
+ docid: r.docid,
1880
+ })), query, { ...opts, limit: results.length });
1881
+ }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
1882
+ }
1883
+ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rerankModel = DEFAULT_RERANK_MODEL) {
1884
+ const store = getStore();
1885
+ // Validate collection filter (supports multiple -c flags)
1886
+ // Use default collections if none specified
1887
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
1888
+ const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1889
+ checkIndexHealth(store.db);
1890
+ // Check for structured query syntax (lex:/vec:/hyde: prefixes)
1891
+ const structuredQueries = parseStructuredQuery(query);
1892
+ await withLLMSession(async () => {
1893
+ let results;
1894
+ if (structuredQueries) {
1895
+ // Structured search — user provided their own query expansions
1896
+ const typeLabels = structuredQueries.map(s => s.type).join('+');
1897
+ process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
1898
+ // Log each sub-query
1899
+ for (const s of structuredQueries) {
1900
+ let preview = s.query.replace(/\n/g, ' ');
1901
+ if (preview.length > 72)
1902
+ preview = preview.substring(0, 69) + '...';
1903
+ process.stderr.write(`${c.dim}├─ ${s.type}: ${preview}${c.reset}\n`);
1904
+ }
1905
+ process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`);
1906
+ results = await structuredSearch(store, structuredQueries, {
1907
+ collections: singleCollection ? [singleCollection] : undefined,
1908
+ limit: opts.all ? 500 : (opts.limit || 10),
1909
+ minScore: opts.minScore || 0,
1910
+ candidateLimit: opts.candidateLimit,
1911
+ explain: !!opts.explain,
1912
+ hooks: {
1913
+ onEmbedStart: (count) => {
1914
+ process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
1915
+ },
1916
+ onEmbedDone: (ms) => {
1917
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1918
+ },
1919
+ onRerankStart: (chunkCount) => {
1920
+ process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
1921
+ progress.indeterminate();
1922
+ },
1923
+ onRerankDone: (ms) => {
1924
+ progress.clear();
1925
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1926
+ },
1927
+ },
1928
+ });
1929
+ }
1930
+ else {
1931
+ // Standard hybrid query with automatic expansion
1932
+ results = await hybridQuery(store, query, {
1933
+ collection: singleCollection,
1934
+ limit: opts.all ? 500 : (opts.limit || 10),
1935
+ minScore: opts.minScore || 0,
1936
+ candidateLimit: opts.candidateLimit,
1937
+ explain: !!opts.explain,
1938
+ hooks: {
1939
+ onStrongSignal: (score) => {
1940
+ process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
1941
+ },
1942
+ onExpandStart: () => {
1943
+ process.stderr.write(`${c.dim}Expanding query...${c.reset}`);
1944
+ },
1945
+ onExpand: (original, expanded, ms) => {
1946
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1947
+ logExpansionTree(original, expanded);
1948
+ process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
1949
+ },
1950
+ onEmbedStart: (count) => {
1951
+ process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
1952
+ },
1953
+ onEmbedDone: (ms) => {
1954
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1955
+ },
1956
+ onRerankStart: (chunkCount) => {
1957
+ process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
1958
+ progress.indeterminate();
1959
+ },
1960
+ onRerankDone: (ms) => {
1961
+ progress.clear();
1962
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1963
+ },
1964
+ },
1965
+ });
1966
+ }
1967
+ // Post-filter for multi-collection
1968
+ if (collectionNames.length > 1) {
1969
+ results = results.filter(r => {
1970
+ const prefixes = collectionNames.map(n => `kindx://${n}/`);
1971
+ return prefixes.some(p => r.file.startsWith(p));
1972
+ });
1973
+ }
1974
+ closeDb();
1975
+ if (results.length === 0) {
1976
+ printEmptySearchResults(opts.format);
1977
+ return;
1978
+ }
1979
+ // Use first lex/vec query for output context, or original query
1980
+ const displayQuery = structuredQueries
1981
+ ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
1982
+ : query;
1983
+ // Map to CLI output format — use bestChunk for snippet display
1984
+ outputResults(results.map(r => ({
1985
+ file: r.file,
1986
+ displayPath: r.displayPath,
1987
+ title: r.title,
1988
+ body: r.bestChunk,
1989
+ chunkPos: r.bestChunkPos,
1990
+ score: r.score,
1991
+ context: r.context,
1992
+ docid: r.docid,
1993
+ explain: r.explain,
1994
+ })), displayQuery, { ...opts, limit: results.length });
1995
+ }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
1996
+ }
1997
+ // Parse CLI arguments using util.parseArgs
1998
+ function parseCLI() {
1999
+ const { values, positionals } = parseArgs({
2000
+ args: process.argv.slice(2), // Skip node and script path
2001
+ options: {
2002
+ // Global options
2003
+ index: {
2004
+ type: "string",
2005
+ },
2006
+ context: {
2007
+ type: "string",
2008
+ },
2009
+ help: { type: "boolean", short: "h" },
2010
+ version: { type: "boolean", short: "v" },
2011
+ skill: { type: "boolean" },
2012
+ // Search options
2013
+ n: { type: "string" },
2014
+ "min-score": { type: "string" },
2015
+ all: { type: "boolean" },
2016
+ full: { type: "boolean" },
2017
+ csv: { type: "boolean" },
2018
+ md: { type: "boolean" },
2019
+ xml: { type: "boolean" },
2020
+ files: { type: "boolean" },
2021
+ json: { type: "boolean" },
2022
+ explain: { type: "boolean" },
2023
+ collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s)
2024
+ // Collection options
2025
+ name: { type: "string" }, // collection name
2026
+ mask: { type: "string" }, // glob pattern
2027
+ // Embed options
2028
+ force: { type: "boolean", short: "f" },
2029
+ // Update options
2030
+ pull: { type: "boolean" }, // git pull before update
2031
+ refresh: { type: "boolean" },
2032
+ // Get options
2033
+ l: { type: "string" }, // max lines
2034
+ from: { type: "string" }, // start line
2035
+ "max-bytes": { type: "string" }, // max bytes for multi-get
2036
+ "line-numbers": { type: "boolean" }, // add line numbers to output
2037
+ // Query options
2038
+ "candidate-limit": { type: "string", short: "C" },
2039
+ // MCP HTTP transport options
2040
+ http: { type: "boolean" },
2041
+ daemon: { type: "boolean" },
2042
+ port: { type: "string" },
2043
+ },
2044
+ allowPositionals: true,
2045
+ strict: false, // Allow unknown options to pass through
2046
+ });
2047
+ // Select index name (default: "index")
2048
+ const indexName = values.index;
2049
+ if (indexName) {
2050
+ setIndexName(indexName);
2051
+ setConfigIndexName(indexName);
2052
+ }
2053
+ // Determine output format
2054
+ let format = "cli";
2055
+ if (values.csv)
2056
+ format = "csv";
2057
+ else if (values.md)
2058
+ format = "md";
2059
+ else if (values.xml)
2060
+ format = "xml";
2061
+ else if (values.files)
2062
+ format = "files";
2063
+ else if (values.json)
2064
+ format = "json";
2065
+ // Default limit: 20 for --files/--json, 5 otherwise
2066
+ // --all means return all results (use very large limit)
2067
+ const defaultLimit = (format === "files" || format === "json") ? 20 : 5;
2068
+ const isAll = !!values.all;
2069
+ const opts = {
2070
+ format,
2071
+ full: !!values.full,
2072
+ limit: isAll ? 100000 : (values.n ? parseInt(String(values.n), 10) || defaultLimit : defaultLimit),
2073
+ minScore: values["min-score"] ? parseFloat(String(values["min-score"])) || 0 : 0,
2074
+ all: isAll,
2075
+ collection: values.collection,
2076
+ lineNumbers: !!values["line-numbers"],
2077
+ candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
2078
+ explain: !!values.explain,
2079
+ };
2080
+ return {
2081
+ command: positionals[0] || "",
2082
+ args: positionals.slice(1),
2083
+ query: positionals.slice(1).join(" "),
2084
+ opts,
2085
+ values,
2086
+ };
2087
+ }
2088
+ function showSkill() {
2089
+ const scriptDir = dirname(fileURLToPath(import.meta.url));
2090
+ const relativePath = pathJoin("skills", "kindx", "SKILL.md");
2091
+ const skillPath = pathJoin(scriptDir, "..", relativePath);
2092
+ console.log(`KINDX Skill (${relativePath})`);
2093
+ console.log(`Location: ${skillPath}`);
2094
+ console.log("");
2095
+ if (!existsSync(skillPath)) {
2096
+ console.error("SKILL.md not found. If you built from source, ensure capabilities/kindx/SKILL.md exists.");
2097
+ return;
2098
+ }
2099
+ const content = readFileSync(skillPath, "utf-8");
2100
+ process.stdout.write(content.endsWith("\n") ? content : content + "\n");
2101
+ }
2102
+ function showHelp() {
2103
+ console.log("kindx -- Knowledge INDexer");
2104
+ console.log("");
2105
+ console.log("Usage:");
2106
+ console.log(" kindx <command> [options]");
2107
+ console.log("");
2108
+ console.log("Primary commands:");
2109
+ console.log(" kindx query <query> - Hybrid search with auto expansion + reranking (recommended)");
2110
+ console.log(" kindx query 'lex:..\\nvec:...' - Structured query document (you provide lex/vec/hyde lines)");
2111
+ console.log(" kindx search <query> - Full-text BM25 keywords (no LLM)");
2112
+ console.log(" kindx vsearch <query> - Vector similarity only");
2113
+ console.log(" kindx get <file>[:line] [-l N] - Show a single document, optional line slice");
2114
+ console.log(" kindx multi-get <pattern> - Batch fetch via glob or comma-separated list");
2115
+ console.log(" kindx mcp - Start the MCP server (stdio transport for AI agents)");
2116
+ console.log("");
2117
+ console.log("Collections & context:");
2118
+ console.log(" kindx collection add/list/remove/rename/show - Manage indexed folders");
2119
+ console.log(" kindx context add/list/rm - Attach human-written summaries");
2120
+ console.log(" kindx ls [collection[/path]] - Inspect indexed files");
2121
+ console.log("");
2122
+ console.log("Maintenance:");
2123
+ console.log(" kindx status - View index + collection health");
2124
+ console.log(" kindx update [--pull] - Re-index collections (optionally git pull first)");
2125
+ console.log(" kindx embed [-f] - Generate/refresh vector embeddings");
2126
+ console.log(" kindx cleanup - Clear caches, vacuum DB");
2127
+ console.log("");
2128
+ console.log("Query syntax (kindx query):");
2129
+ console.log(" KINDX queries are either a single expand query (no prefix) or a multi-line");
2130
+ console.log(" document where every line is typed with lex:, vec:, or hyde:. This grammar");
2131
+ console.log(" matches the docs in docs/SYNTAX.md and is enforced in the CLI.");
2132
+ console.log("");
2133
+ const grammar = [
2134
+ `query = expand_query | query_document ;`,
2135
+ `expand_query = text | explicit_expand ;`,
2136
+ `explicit_expand= "expand:" text ;`,
2137
+ `query_document = { typed_line } ;`,
2138
+ `typed_line = type ":" text newline ;`,
2139
+ `type = "lex" | "vec" | "hyde" ;`,
2140
+ `text = quoted_phrase | plain_text ;`,
2141
+ `quoted_phrase = '"' { character } '"' ;`,
2142
+ `plain_text = { character } ;`,
2143
+ `newline = "\\n" ;`,
2144
+ ];
2145
+ console.log(" Grammar:");
2146
+ for (const line of grammar) {
2147
+ console.log(` ${line}`);
2148
+ }
2149
+ console.log("");
2150
+ console.log(" Examples:");
2151
+ console.log(" kindx query \"how does auth work\" # single-line → implicit expand");
2152
+ console.log(" kindx query $'lex: CAP theorem\\nvec: consistency' # typed query document");
2153
+ console.log(" kindx query $'lex: \"exact matches\" sports -baseball' # phrase + negation lex search");
2154
+ console.log(" kindx query $'hyde: Hypothetical answer text' # hyde-only document");
2155
+ console.log("");
2156
+ console.log(" Constraints:");
2157
+ console.log(" - Standalone expand queries cannot mix with typed lines.");
2158
+ console.log(" - Query documents allow only lex:, vec:, or hyde: prefixes.");
2159
+ console.log(" - Each typed line must be single-line text with balanced quotes.");
2160
+ console.log("");
2161
+ console.log("AI agents & integrations:");
2162
+ console.log(" - Run `kindx mcp` to expose the MCP server (stdio) to agents/IDEs.");
2163
+ console.log(" - `kindx --skill` prints the packaged capabilities/kindx/SKILL.md (path + contents).");
2164
+ console.log(" - Advanced: `kindx mcp --http ...` and `kindx mcp --http --daemon` are optional for custom transports.");
2165
+ console.log("");
2166
+ console.log("Global options:");
2167
+ console.log(" --index <name> - Use a named index (default: index)");
2168
+ console.log("");
2169
+ console.log("Search options:");
2170
+ console.log(" -n <num> - Max results (default 5, or 20 for --files/--json)");
2171
+ console.log(" --all - Return all matches (pair with --min-score)");
2172
+ console.log(" --min-score <num> - Minimum similarity score");
2173
+ console.log(" --full - Output full document instead of snippet");
2174
+ console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
2175
+ console.log(" --line-numbers - Include line numbers in output");
2176
+ console.log(" --explain - Include retrieval score traces (query --json/CLI)");
2177
+ console.log(" --files | --json | --csv | --md | --xml - Output format");
2178
+ console.log(" -c, --collection <name> - Filter by one or more collections");
2179
+ console.log("");
2180
+ console.log("Multi-get options:");
2181
+ console.log(" -l <num> - Maximum lines per file");
2182
+ console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
2183
+ console.log(" --json/--csv/--md/--xml/--files - Same formats as search");
2184
+ console.log("");
2185
+ console.log(`Index: ${getDbPath()}`);
2186
+ }
2187
+ async function showVersion() {
2188
+ const scriptDir = dirname(fileURLToPath(import.meta.url));
2189
+ const pkgPath = resolve(scriptDir, "..", "package.json");
2190
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
2191
+ let commit = "";
2192
+ try {
2193
+ commit = execSync(`git -C ${scriptDir} rev-parse --short HEAD`, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
2194
+ }
2195
+ catch {
2196
+ // Not a git repo or git not available
2197
+ }
2198
+ const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version;
2199
+ console.log(`kindx ${versionStr}`);
2200
+ }
2201
+ // Main CLI - only run if this is the main module
2202
+ const __filename = fileURLToPath(import.meta.url);
2203
+ const argv1 = process.argv[1];
2204
+ const isMain = argv1 === __filename
2205
+ || argv1?.endsWith("/kindx.ts")
2206
+ || argv1?.endsWith("/kindx.js")
2207
+ || (argv1 != null && realpathSync(argv1) === __filename);
2208
+ if (isMain) {
2209
+ const cli = parseCLI();
2210
+ if (cli.values.version) {
2211
+ await showVersion();
2212
+ process.exit(0);
2213
+ }
2214
+ if (cli.values.skill) {
2215
+ showSkill();
2216
+ process.exit(0);
2217
+ }
2218
+ if (!cli.command || cli.values.help) {
2219
+ showHelp();
2220
+ process.exit(cli.values.help ? 0 : 1);
2221
+ }
2222
+ switch (cli.command) {
2223
+ case "context": {
2224
+ const subcommand = cli.args[0];
2225
+ if (!subcommand) {
2226
+ console.error("Usage: kindx context <add|list|rm>");
2227
+ console.error("");
2228
+ console.error("Commands:");
2229
+ console.error(" kindx context add [path] \"text\" - Add context (defaults to current dir)");
2230
+ console.error(" kindx context add / \"text\" - Add global context to all collections");
2231
+ console.error(" kindx context list - List all contexts");
2232
+ console.error(" kindx context rm <path> - Remove context");
2233
+ process.exit(1);
2234
+ }
2235
+ switch (subcommand) {
2236
+ case "add": {
2237
+ if (cli.args.length < 2) {
2238
+ console.error("Usage: kindx context add [path] \"text\"");
2239
+ console.error("");
2240
+ console.error("Examples:");
2241
+ console.error(" kindx context add \"Context for current directory\"");
2242
+ console.error(" kindx context add . \"Context for current directory\"");
2243
+ console.error(" kindx context add /subfolder \"Context for subfolder\"");
2244
+ console.error(" kindx context add / \"Global context for all collections\"");
2245
+ console.error("");
2246
+ console.error(" Using virtual paths:");
2247
+ console.error(" kindx context add kindx://journals/ \"Context for entire journals collection\"");
2248
+ console.error(" kindx context add kindx://journals/2024 \"Context for 2024 journals\"");
2249
+ process.exit(1);
2250
+ }
2251
+ let pathArg;
2252
+ let contextText;
2253
+ // Check if first arg looks like a path or if it's the context text
2254
+ const firstArg = cli.args[1] || '';
2255
+ const secondArg = cli.args[2];
2256
+ if (secondArg) {
2257
+ // Two args: path + context
2258
+ pathArg = firstArg;
2259
+ contextText = cli.args.slice(2).join(" ");
2260
+ }
2261
+ else {
2262
+ // One arg: context only (use current directory)
2263
+ pathArg = undefined;
2264
+ contextText = firstArg;
2265
+ }
2266
+ await contextAdd(pathArg, contextText);
2267
+ break;
2268
+ }
2269
+ case "list": {
2270
+ contextList();
2271
+ break;
2272
+ }
2273
+ case "rm":
2274
+ case "remove": {
2275
+ if (cli.args.length < 2 || !cli.args[1]) {
2276
+ console.error("Usage: kindx context rm <path>");
2277
+ console.error("Examples:");
2278
+ console.error(" kindx context rm /");
2279
+ console.error(" kindx context rm kindx://journals/2024");
2280
+ process.exit(1);
2281
+ }
2282
+ contextRemove(cli.args[1]);
2283
+ break;
2284
+ }
2285
+ default:
2286
+ console.error(`Unknown subcommand: ${subcommand}`);
2287
+ console.error("Available: add, list, rm");
2288
+ process.exit(1);
2289
+ }
2290
+ break;
2291
+ }
2292
+ case "get": {
2293
+ if (!cli.args[0]) {
2294
+ console.error("Usage: kindx get <filepath>[:line] [--from <line>] [-l <lines>] [--line-numbers]");
2295
+ process.exit(1);
2296
+ }
2297
+ const fromLine = cli.values.from ? parseInt(cli.values.from, 10) : undefined;
2298
+ const maxLines = cli.values.l ? parseInt(cli.values.l, 10) : undefined;
2299
+ getDocument(cli.args[0], fromLine, maxLines, cli.opts.lineNumbers);
2300
+ break;
2301
+ }
2302
+ case "multi-get": {
2303
+ if (!cli.args[0]) {
2304
+ console.error("Usage: kindx multi-get <pattern> [-l <lines>] [--max-bytes <bytes>] [--json|--csv|--md|--xml|--files]");
2305
+ console.error(" pattern: glob (e.g., 'journals/2025-05*.md') or comma-separated list");
2306
+ process.exit(1);
2307
+ }
2308
+ const maxLinesMulti = cli.values.l ? parseInt(cli.values.l, 10) : undefined;
2309
+ const maxBytes = cli.values["max-bytes"] ? parseInt(cli.values["max-bytes"], 10) : DEFAULT_MULTI_GET_MAX_BYTES;
2310
+ multiGet(cli.args[0], maxLinesMulti, maxBytes, cli.opts.format);
2311
+ break;
2312
+ }
2313
+ case "ls": {
2314
+ listFiles(cli.args[0]);
2315
+ break;
2316
+ }
2317
+ case "collection": {
2318
+ const subcommand = cli.args[0];
2319
+ switch (subcommand) {
2320
+ case "list": {
2321
+ collectionList();
2322
+ break;
2323
+ }
2324
+ case "add": {
2325
+ const pwd = cli.args[1] || getPwd();
2326
+ const resolvedPwd = pwd === '.' ? getPwd() : getRealPath(resolve(pwd));
2327
+ const globPattern = cli.values.mask || DEFAULT_GLOB;
2328
+ const name = cli.values.name;
2329
+ await collectionAdd(resolvedPwd, globPattern, name);
2330
+ break;
2331
+ }
2332
+ case "remove":
2333
+ case "rm": {
2334
+ if (!cli.args[1]) {
2335
+ console.error("Usage: kindx collection remove <name>");
2336
+ console.error(" Use 'kindx collection list' to see available collections");
2337
+ process.exit(1);
2338
+ }
2339
+ collectionRemove(cli.args[1]);
2340
+ break;
2341
+ }
2342
+ case "rename":
2343
+ case "mv": {
2344
+ if (!cli.args[1] || !cli.args[2]) {
2345
+ console.error("Usage: kindx collection rename <old-name> <new-name>");
2346
+ console.error(" Use 'kindx collection list' to see available collections");
2347
+ process.exit(1);
2348
+ }
2349
+ collectionRename(cli.args[1], cli.args[2]);
2350
+ break;
2351
+ }
2352
+ case "set-update":
2353
+ case "update-cmd": {
2354
+ const name = cli.args[1];
2355
+ const cmd = cli.args.slice(2).join(' ') || null;
2356
+ if (!name) {
2357
+ console.error("Usage: kindx collection update-cmd <name> [command]");
2358
+ console.error(" Set the command to run before indexing (e.g., 'git pull')");
2359
+ console.error(" Omit command to clear it");
2360
+ process.exit(1);
2361
+ }
2362
+ const { updateCollectionSettings, getCollection } = await import("./catalogs.js");
2363
+ const col = getCollection(name);
2364
+ if (!col) {
2365
+ console.error(`Collection not found: ${name}`);
2366
+ process.exit(1);
2367
+ }
2368
+ updateCollectionSettings(name, { update: cmd });
2369
+ if (cmd) {
2370
+ console.log(`✓ Set update command for '${name}': ${cmd}`);
2371
+ }
2372
+ else {
2373
+ console.log(`✓ Cleared update command for '${name}'`);
2374
+ }
2375
+ break;
2376
+ }
2377
+ case "include":
2378
+ case "exclude": {
2379
+ const name = cli.args[1];
2380
+ if (!name) {
2381
+ console.error(`Usage: kindx collection ${subcommand} <name>`);
2382
+ console.error(` ${subcommand === 'include' ? 'Include' : 'Exclude'} collection in default queries`);
2383
+ process.exit(1);
2384
+ }
2385
+ const { updateCollectionSettings, getCollection } = await import("./catalogs.js");
2386
+ const col = getCollection(name);
2387
+ if (!col) {
2388
+ console.error(`Collection not found: ${name}`);
2389
+ process.exit(1);
2390
+ }
2391
+ const include = subcommand === 'include';
2392
+ updateCollectionSettings(name, { includeByDefault: include });
2393
+ console.log(`✓ Collection '${name}' ${include ? 'included in' : 'excluded from'} default queries`);
2394
+ break;
2395
+ }
2396
+ case "show":
2397
+ case "info": {
2398
+ const name = cli.args[1];
2399
+ if (!name) {
2400
+ console.error("Usage: kindx collection show <name>");
2401
+ process.exit(1);
2402
+ }
2403
+ const { getCollection } = await import("./catalogs.js");
2404
+ const col = getCollection(name);
2405
+ if (!col) {
2406
+ console.error(`Collection not found: ${name}`);
2407
+ process.exit(1);
2408
+ }
2409
+ console.log(`Collection: ${name}`);
2410
+ console.log(` Path: ${col.path}`);
2411
+ console.log(` Pattern: ${col.pattern}`);
2412
+ console.log(` Include: ${col.includeByDefault !== false ? 'yes (default)' : 'no'}`);
2413
+ if (col.update) {
2414
+ console.log(` Update: ${col.update}`);
2415
+ }
2416
+ if (col.context) {
2417
+ const ctxCount = Object.keys(col.context).length;
2418
+ console.log(` Contexts: ${ctxCount}`);
2419
+ }
2420
+ break;
2421
+ }
2422
+ case "help":
2423
+ case undefined: {
2424
+ console.log("Usage: kindx collection <command> [options]");
2425
+ console.log("");
2426
+ console.log("Commands:");
2427
+ console.log(" list List all collections");
2428
+ console.log(" add <path> [--name NAME] Add a collection");
2429
+ console.log(" remove <name> Remove a collection");
2430
+ console.log(" rename <old> <new> Rename a collection");
2431
+ console.log(" show <name> Show collection details");
2432
+ console.log(" update-cmd <name> [cmd] Set pre-update command (e.g., 'git pull')");
2433
+ console.log(" include <name> Include in default queries");
2434
+ console.log(" exclude <name> Exclude from default queries");
2435
+ console.log("");
2436
+ console.log("Examples:");
2437
+ console.log(" kindx collection add ~/notes --name notes");
2438
+ console.log(" kindx collection update-cmd brain 'git pull'");
2439
+ console.log(" kindx collection exclude archive");
2440
+ process.exit(0);
2441
+ }
2442
+ default:
2443
+ console.error(`Unknown subcommand: ${subcommand}`);
2444
+ console.error("Run 'kindx collection help' for usage");
2445
+ process.exit(1);
2446
+ }
2447
+ break;
2448
+ }
2449
+ case "status":
2450
+ await showStatus();
2451
+ break;
2452
+ case "update": {
2453
+ const collFilter = cli.values.collection;
2454
+ await updateCollections(collFilter);
2455
+ break;
2456
+ }
2457
+ case "embed":
2458
+ await vectorIndex(DEFAULT_EMBED_MODEL, !!cli.values.force);
2459
+ break;
2460
+ case "pull": {
2461
+ const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
2462
+ const models = [
2463
+ DEFAULT_EMBED_MODEL_URI,
2464
+ DEFAULT_GENERATE_MODEL_URI,
2465
+ DEFAULT_RERANK_MODEL_URI,
2466
+ ];
2467
+ console.log(`${c.bold}Pulling models${c.reset}`);
2468
+ const results = await pullModels(models, {
2469
+ refresh,
2470
+ cacheDir: DEFAULT_MODEL_CACHE_DIR,
2471
+ });
2472
+ for (const result of results) {
2473
+ const size = formatBytes(result.sizeBytes);
2474
+ const note = result.refreshed ? "refreshed" : "cached/checked";
2475
+ console.log(`- ${result.model} -> ${result.path} (${size}, ${note})`);
2476
+ }
2477
+ break;
2478
+ }
2479
+ case "search":
2480
+ if (!cli.query) {
2481
+ console.error("Usage: kindx search [options] <query>");
2482
+ process.exit(1);
2483
+ }
2484
+ search(cli.query, cli.opts);
2485
+ break;
2486
+ case "vsearch":
2487
+ case "vector-search": // undocumented alias
2488
+ if (!cli.query) {
2489
+ console.error("Usage: kindx vsearch [options] <query>");
2490
+ process.exit(1);
2491
+ }
2492
+ // Default min-score for vector search is 0.3
2493
+ if (!cli.values["min-score"]) {
2494
+ cli.opts.minScore = 0.3;
2495
+ }
2496
+ await vectorSearch(cli.query, cli.opts);
2497
+ break;
2498
+ case "query":
2499
+ case "deep-search": // undocumented alias
2500
+ if (!cli.query) {
2501
+ console.error("Usage: kindx query [options] <query>");
2502
+ process.exit(1);
2503
+ }
2504
+ await querySearch(cli.query, cli.opts);
2505
+ break;
2506
+ case "mcp": {
2507
+ const sub = cli.args[0]; // stop | status | undefined
2508
+ // Cache dir for PID/log files — same dir as the index
2509
+ const cacheDir = process.env.XDG_CACHE_HOME
2510
+ ? resolve(process.env.XDG_CACHE_HOME, "kindx")
2511
+ : resolve(homedir(), ".cache", "kindx");
2512
+ const pidPath = resolve(cacheDir, "mcp.pid");
2513
+ // Subcommands take priority over flags
2514
+ if (sub === "stop") {
2515
+ if (!existsSync(pidPath)) {
2516
+ console.log("Not running (no PID file).");
2517
+ process.exit(0);
2518
+ }
2519
+ const pid = parseInt(readFileSync(pidPath, "utf-8").trim());
2520
+ try {
2521
+ process.kill(pid, 0); // alive?
2522
+ process.kill(pid, "SIGTERM");
2523
+ unlinkSync(pidPath);
2524
+ console.log(`Stopped KINDX MCP server (PID ${pid}).`);
2525
+ }
2526
+ catch {
2527
+ unlinkSync(pidPath);
2528
+ console.log("Cleaned up stale PID file (server was not running).");
2529
+ }
2530
+ process.exit(0);
2531
+ }
2532
+ if (cli.values.http) {
2533
+ const port = Number(cli.values.port) || 8181;
2534
+ if (cli.values.daemon) {
2535
+ // Guard: check if already running
2536
+ if (existsSync(pidPath)) {
2537
+ const existingPid = parseInt(readFileSync(pidPath, "utf-8").trim());
2538
+ try {
2539
+ process.kill(existingPid, 0); // alive?
2540
+ console.error(`Already running (PID ${existingPid}). Run 'kindx mcp stop' first.`);
2541
+ process.exit(1);
2542
+ }
2543
+ catch {
2544
+ // Stale PID file — continue
2545
+ }
2546
+ }
2547
+ mkdirSync(cacheDir, { recursive: true });
2548
+ const logPath = resolve(cacheDir, "mcp.log");
2549
+ const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
2550
+ const selfPath = fileURLToPath(import.meta.url);
2551
+ const spawnArgs = selfPath.endsWith(".ts")
2552
+ ? ["--import", pathJoin(dirname(selfPath), "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)]
2553
+ : [selfPath, "mcp", "--http", "--port", String(port)];
2554
+ const child = nodeSpawn(process.execPath, spawnArgs, {
2555
+ stdio: ["ignore", logFd, logFd],
2556
+ detached: true,
2557
+ });
2558
+ child.unref();
2559
+ closeSync(logFd); // parent's copy; child inherited the fd
2560
+ writeFileSync(pidPath, String(child.pid));
2561
+ console.log(`Started on http://localhost:${port}/mcp (PID ${child.pid})`);
2562
+ console.log(`Logs: ${logPath}`);
2563
+ process.exit(0);
2564
+ }
2565
+ // Foreground HTTP mode — remove top-level cursor handlers so the
2566
+ // async cleanup handlers in startMcpHttpServer actually run.
2567
+ process.removeAllListeners("SIGTERM");
2568
+ process.removeAllListeners("SIGINT");
2569
+ const { startMcpHttpServer } = await import("./protocol.js");
2570
+ try {
2571
+ await startMcpHttpServer(port);
2572
+ }
2573
+ catch (e) {
2574
+ if (e?.code === "EADDRINUSE") {
2575
+ console.error(`Port ${port} already in use. Try a different port with --port.`);
2576
+ process.exit(1);
2577
+ }
2578
+ throw e;
2579
+ }
2580
+ }
2581
+ else {
2582
+ // Default: stdio transport
2583
+ const { startMcpServer } = await import("./protocol.js");
2584
+ await startMcpServer();
2585
+ }
2586
+ break;
2587
+ }
2588
+ case "cleanup": {
2589
+ const db = getDb();
2590
+ // 1. Clear llm_cache
2591
+ const cacheCount = deleteLLMCache(db);
2592
+ console.log(`${c.green}✓${c.reset} Cleared ${cacheCount} cached API responses`);
2593
+ // 2. Remove orphaned vectors
2594
+ const orphanedVecs = cleanupOrphanedVectors(db);
2595
+ if (orphanedVecs > 0) {
2596
+ console.log(`${c.green}✓${c.reset} Removed ${orphanedVecs} orphaned embedding chunks`);
2597
+ }
2598
+ else {
2599
+ console.log(`${c.dim}No orphaned embeddings to remove${c.reset}`);
2600
+ }
2601
+ // 3. Remove inactive documents
2602
+ const inactiveDocs = deleteInactiveDocuments(db);
2603
+ if (inactiveDocs > 0) {
2604
+ console.log(`${c.green}✓${c.reset} Removed ${inactiveDocs} inactive document records`);
2605
+ }
2606
+ // 4. Vacuum to reclaim space
2607
+ vacuumDatabase(db);
2608
+ console.log(`${c.green}✓${c.reset} Database vacuumed`);
2609
+ closeDb();
2610
+ break;
2611
+ }
2612
+ default:
2613
+ console.error(`Unknown command: ${cli.command}`);
2614
+ console.error("Run 'kindx --help' for usage.");
2615
+ process.exit(1);
2616
+ }
2617
+ if (cli.command !== "mcp") {
2618
+ await disposeDefaultLlamaCpp();
2619
+ process.exit(0);
2620
+ }
2621
+ } // end if (main module)