nano-brain 2026.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/AGENTS_SNIPPET.md +36 -0
  2. package/CHANGELOG.md +68 -0
  3. package/README.md +281 -0
  4. package/SKILL.md +153 -0
  5. package/bin/cli.js +18 -0
  6. package/index.html +929 -0
  7. package/nano-brain +4 -0
  8. package/opencode-mcp.json +9 -0
  9. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/.openspec.yaml +2 -0
  10. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/design.md +68 -0
  11. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/proposal.md +27 -0
  12. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-integration-testing/spec.md +50 -0
  13. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-server/spec.md +40 -0
  14. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/search-pipeline/spec.md +29 -0
  15. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/tasks.md +37 -0
  16. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/.openspec.yaml +2 -0
  17. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/design.md +111 -0
  18. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/proposal.md +30 -0
  19. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/mcp-server/spec.md +33 -0
  20. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/storage-limits/spec.md +90 -0
  21. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/workspace-scoping/spec.md +66 -0
  22. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/tasks.md +199 -0
  23. package/openspec/changes/codebase-indexing/.openspec.yaml +2 -0
  24. package/openspec/changes/codebase-indexing/design.md +169 -0
  25. package/openspec/changes/codebase-indexing/proposal.md +30 -0
  26. package/openspec/changes/codebase-indexing/specs/codebase-collection/spec.md +187 -0
  27. package/openspec/changes/codebase-indexing/specs/mcp-server/spec.md +36 -0
  28. package/openspec/changes/codebase-indexing/tasks.md +56 -0
  29. package/openspec/specs/mcp-integration-testing/spec.md +50 -0
  30. package/openspec/specs/mcp-server/spec.md +75 -0
  31. package/openspec/specs/search-pipeline/spec.md +29 -0
  32. package/openspec/specs/storage-limits/spec.md +94 -0
  33. package/openspec/specs/workspace-scoping/spec.md +70 -0
  34. package/package.json +34 -0
  35. package/site/build.js +66 -0
  36. package/site/partials/_api.html +83 -0
  37. package/site/partials/_compare.html +100 -0
  38. package/site/partials/_config.html +23 -0
  39. package/site/partials/_features.html +43 -0
  40. package/site/partials/_footer.html +6 -0
  41. package/site/partials/_hero.html +9 -0
  42. package/site/partials/_how-it-works.html +26 -0
  43. package/site/partials/_models.html +18 -0
  44. package/site/partials/_quick-start.html +15 -0
  45. package/site/partials/_stats.html +1 -0
  46. package/site/partials/_tech-stack.html +13 -0
  47. package/site/script.js +12 -0
  48. package/site/shell.html +44 -0
  49. package/site/styles.css +548 -0
  50. package/src/chunker.ts +427 -0
  51. package/src/codebase.ts +331 -0
  52. package/src/collections.ts +192 -0
  53. package/src/embeddings.ts +293 -0
  54. package/src/expansion.ts +79 -0
  55. package/src/harvester.ts +306 -0
  56. package/src/index.ts +503 -0
  57. package/src/reranker.ts +103 -0
  58. package/src/search.ts +294 -0
  59. package/src/server.ts +664 -0
  60. package/src/storage.ts +221 -0
  61. package/src/store.ts +623 -0
  62. package/src/types.ts +202 -0
  63. package/src/watcher.ts +384 -0
  64. package/test/chunker.test.ts +479 -0
  65. package/test/cli.test.ts +309 -0
  66. package/test/codebase-chunker.test.ts +446 -0
  67. package/test/codebase.test.ts +678 -0
  68. package/test/collections.test.ts +571 -0
  69. package/test/harvester.test.ts +636 -0
  70. package/test/integration.test.ts +150 -0
  71. package/test/llm.test.ts +322 -0
  72. package/test/search.test.ts +572 -0
  73. package/test/server.test.ts +541 -0
  74. package/test/storage.test.ts +302 -0
  75. package/test/store.test.ts +465 -0
  76. package/test/watcher.test.ts +656 -0
  77. package/test/workspace.test.ts +239 -0
  78. package/tsconfig.json +19 -0
  79. package/vitest.config.ts +16 -0
package/src/store.ts ADDED
@@ -0,0 +1,623 @@
1
+ import Database from 'better-sqlite3';
2
+ import * as sqliteVec from 'sqlite-vec';
3
+ import type { Store, Document, SearchResult, IndexHealth } from './types.js';
4
+ import * as fs from 'fs';
5
+ import * as path from 'path';
6
+ import * as crypto from 'crypto';
7
+ import { chunkMarkdown } from './chunker.js';
8
+
9
+ export function sanitizeFTS5Query(query: string): string {
10
+ const trimmed = query.trim();
11
+ if (!trimmed) return '';
12
+ const escaped = trimmed.replace(/"/g, '""');
13
+ return `"${escaped}"`;
14
+ }
15
+
16
+ export function createStore(dbPath: string): Store {
17
+ const dir = path.dirname(dbPath);
18
+ if (!fs.existsSync(dir)) {
19
+ fs.mkdirSync(dir, { recursive: true });
20
+ }
21
+ const db = new Database(dbPath);
22
+
23
+ db.pragma('journal_mode = WAL');
24
+ db.pragma('foreign_keys = ON');
25
+
26
+ let vecAvailable = false;
27
+
28
+ try {
29
+ sqliteVec.load(db);
30
+ vecAvailable = true;
31
+ } catch {
32
+ console.warn('sqlite-vec extension not available, vector search disabled');
33
+ }
34
+
35
+ db.exec(`
36
+ CREATE TABLE IF NOT EXISTS content (
37
+ hash TEXT PRIMARY KEY,
38
+ body TEXT NOT NULL,
39
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
40
+ );
41
+
42
+ CREATE TABLE IF NOT EXISTS documents (
43
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
44
+ collection TEXT NOT NULL,
45
+ path TEXT NOT NULL,
46
+ title TEXT NOT NULL,
47
+ hash TEXT NOT NULL,
48
+ agent TEXT,
49
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
50
+ modified_at TEXT NOT NULL DEFAULT (datetime('now')),
51
+ active INTEGER NOT NULL DEFAULT 1,
52
+ FOREIGN KEY (hash) REFERENCES content(hash),
53
+ UNIQUE(collection, path)
54
+ );
55
+
56
+ CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection, active);
57
+ CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash);
58
+ CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path, active);
59
+
60
+ CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
61
+ filepath,
62
+ title,
63
+ body,
64
+ tokenize='porter unicode61'
65
+ );
66
+
67
+ CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN
68
+ INSERT INTO documents_fts(filepath, title, body)
69
+ SELECT NEW.collection || '/' || NEW.path, NEW.title, c.body
70
+ FROM content c WHERE c.hash = NEW.hash;
71
+ END;
72
+
73
+ CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
74
+ DELETE FROM documents_fts WHERE filepath = OLD.collection || '/' || OLD.path;
75
+ END;
76
+
77
+ CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE OF hash ON documents BEGIN
78
+ DELETE FROM documents_fts WHERE filepath = OLD.collection || '/' || OLD.path;
79
+ INSERT INTO documents_fts(filepath, title, body)
80
+ SELECT NEW.collection || '/' || NEW.path, NEW.title, c.body
81
+ FROM content c WHERE c.hash = NEW.hash;
82
+ END;
83
+
84
+ CREATE TABLE IF NOT EXISTS content_vectors (
85
+ hash TEXT NOT NULL,
86
+ seq INTEGER NOT NULL DEFAULT 0,
87
+ pos INTEGER NOT NULL DEFAULT 0,
88
+ model TEXT NOT NULL,
89
+ embedded_at TEXT NOT NULL DEFAULT (datetime('now')),
90
+ PRIMARY KEY (hash, seq)
91
+ );
92
+
93
+ CREATE TABLE IF NOT EXISTS llm_cache (
94
+ hash TEXT PRIMARY KEY,
95
+ result TEXT NOT NULL,
96
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
97
+ );
98
+ `);
99
+
100
+ const hasProjectHash = (db.prepare("PRAGMA table_info(documents)").all() as Array<{ name: string }>).some(col => col.name === 'project_hash');
101
+ if (!hasProjectHash) {
102
+ db.exec("ALTER TABLE documents ADD COLUMN project_hash TEXT DEFAULT 'global'");
103
+ const sessionPathRegex = /sessions\/([a-f0-9]{12})\//i;
104
+ const rows = db.prepare("SELECT id, path FROM documents").all() as Array<{ id: number; path: string }>;
105
+ const updateStmt = db.prepare("UPDATE documents SET project_hash = ? WHERE id = ?");
106
+ for (const row of rows) {
107
+ const match = row.path.match(sessionPathRegex);
108
+ if (match) {
109
+ updateStmt.run(match[1], row.id);
110
+ }
111
+ }
112
+ }
113
+ db.exec("CREATE INDEX IF NOT EXISTS idx_documents_project_hash ON documents(project_hash, active)");
114
+
115
+ if (vecAvailable) {
116
+ try {
117
+ db.exec(`
118
+ CREATE VIRTUAL TABLE IF NOT EXISTS vectors_vec USING vec0(
119
+ hash_seq TEXT PRIMARY KEY,
120
+ embedding float[768] distance_metric=cosine
121
+ );
122
+ `);
123
+ } catch (err) {
124
+ console.warn('Failed to create vector table:', err);
125
+ vecAvailable = false;
126
+ }
127
+ }
128
+
129
+ const insertContentStmt = db.prepare(`
130
+ INSERT OR IGNORE INTO content (hash, body) VALUES (?, ?)
131
+ `);
132
+
133
+ const insertDocumentStmt = db.prepare(`
134
+ INSERT INTO documents (collection, path, title, hash, agent, created_at, modified_at, active, project_hash)
135
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
136
+ ON CONFLICT(collection, path) DO UPDATE SET
137
+ title = excluded.title,
138
+ hash = excluded.hash,
139
+ agent = excluded.agent,
140
+ modified_at = excluded.modified_at,
141
+ active = excluded.active,
142
+ project_hash = excluded.project_hash
143
+ `);
144
+
145
+ const findDocumentByPathStmt = db.prepare(`
146
+ SELECT id, collection, path, title, hash, agent, created_at as createdAt, modified_at as modifiedAt, active, project_hash as projectHash
147
+ FROM documents WHERE path = ? AND active = 1
148
+ `);
149
+
150
+ const findDocumentByDocidStmt = db.prepare(`
151
+ SELECT id, collection, path, title, hash, agent, created_at as createdAt, modified_at as modifiedAt, active, project_hash as projectHash
152
+ FROM documents WHERE substr(hash, 1, 6) = ? AND active = 1
153
+ `);
154
+
155
+ const getContentStmt = db.prepare(`
156
+ SELECT body FROM content WHERE hash = ?
157
+ `);
158
+
159
+ const deactivateDocumentStmt = db.prepare(`
160
+ UPDATE documents SET active = 0 WHERE collection = ? AND path = ?
161
+ `);
162
+
163
+ const bulkDeactivateExceptStmt = db.prepare(`
164
+ UPDATE documents SET active = 0
165
+ WHERE collection = ? AND path NOT IN (SELECT value FROM json_each(?))
166
+ `);
167
+
168
+ const searchFTSStmt = db.prepare(`
169
+ SELECT
170
+ d.id, d.path, d.collection, d.title, d.hash, d.agent,
171
+ snippet(documents_fts, 2, '<mark>', '</mark>', '...', 64) as snippet,
172
+ bm25(documents_fts) as score
173
+ FROM documents_fts f
174
+ JOIN documents d ON f.filepath = d.collection || '/' || d.path
175
+ WHERE documents_fts MATCH ? AND d.active = 1
176
+ ORDER BY bm25(documents_fts)
177
+ LIMIT ?
178
+ `);
179
+
180
+ const searchFTSWithCollectionStmt = db.prepare(`
181
+ SELECT
182
+ d.id, d.path, d.collection, d.title, d.hash, d.agent,
183
+ snippet(documents_fts, 2, '<mark>', '</mark>', '...', 64) as snippet,
184
+ bm25(documents_fts) as score
185
+ FROM documents_fts f
186
+ JOIN documents d ON f.filepath = d.collection || '/' || d.path
187
+ WHERE documents_fts MATCH ? AND d.active = 1 AND d.collection = ?
188
+ ORDER BY bm25(documents_fts)
189
+ LIMIT ?
190
+ `);
191
+
192
+ const searchFTSWithWorkspaceStmt = db.prepare(`
193
+ SELECT
194
+ d.id, d.path, d.collection, d.title, d.hash, d.agent,
195
+ snippet(documents_fts, 2, '<mark>', '</mark>', '...', 64) as snippet,
196
+ bm25(documents_fts) as score
197
+ FROM documents_fts f
198
+ JOIN documents d ON f.filepath = d.collection || '/' || d.path
199
+ WHERE documents_fts MATCH ? AND d.active = 1 AND d.project_hash IN (?, 'global')
200
+ ORDER BY bm25(documents_fts)
201
+ LIMIT ?
202
+ `);
203
+
204
+ const searchFTSWithWorkspaceAndCollectionStmt = db.prepare(`
205
+ SELECT
206
+ d.id, d.path, d.collection, d.title, d.hash, d.agent,
207
+ snippet(documents_fts, 2, '<mark>', '</mark>', '...', 64) as snippet,
208
+ bm25(documents_fts) as score
209
+ FROM documents_fts f
210
+ JOIN documents d ON f.filepath = d.collection || '/' || d.path
211
+ WHERE documents_fts MATCH ? AND d.active = 1 AND d.collection = ? AND d.project_hash IN (?, 'global')
212
+ ORDER BY bm25(documents_fts)
213
+ LIMIT ?
214
+ `);
215
+
216
+ const insertEmbeddingStmt = db.prepare(`
217
+ INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model)
218
+ VALUES (?, ?, ?, ?)
219
+ `);
220
+
221
+ const getCachedResultStmt = db.prepare(`
222
+ SELECT result FROM llm_cache WHERE hash = ?
223
+ `);
224
+
225
+ const setCachedResultStmt = db.prepare(`
226
+ INSERT OR REPLACE INTO llm_cache (hash, result) VALUES (?, ?)
227
+ `);
228
+
229
+ const getDocumentCountStmt = db.prepare(`
230
+ SELECT COUNT(*) as count FROM documents WHERE active = 1
231
+ `);
232
+
233
+ const getChunkCountStmt = db.prepare(`
234
+ SELECT COUNT(*) as count FROM content_vectors
235
+ `);
236
+
237
+ const getCollectionStatsStmt = db.prepare(`
238
+ SELECT collection as name, COUNT(*) as documentCount, MIN(path) as path
239
+ FROM documents WHERE active = 1
240
+ GROUP BY collection
241
+ `);
242
+
243
+ const getWorkspaceStatsStmt = db.prepare(`
244
+ SELECT project_hash as projectHash, COUNT(*) as count
245
+ FROM documents WHERE active = 1
246
+ GROUP BY project_hash
247
+ `);
248
+
249
+ const getHashesNeedingEmbeddingStmt = db.prepare(`
250
+ SELECT c.hash, c.body, d.path
251
+ FROM content c
252
+ JOIN documents d ON d.hash = c.hash AND d.active = 1
253
+ LEFT JOIN content_vectors cv ON cv.hash = c.hash
254
+ WHERE cv.hash IS NULL
255
+ `);
256
+
257
+ const getHashesNeedingEmbeddingByWorkspaceStmt = db.prepare(`
258
+ SELECT c.hash, c.body, d.path
259
+ FROM content c
260
+ JOIN documents d ON d.hash = c.hash AND d.active = 1
261
+ LEFT JOIN content_vectors cv ON cv.hash = c.hash
262
+ WHERE cv.hash IS NULL AND d.project_hash IN (?, 'global')
263
+ `);
264
+ const getNextHashNeedingEmbeddingStmt = db.prepare(`
265
+ SELECT c.hash, c.body, d.path
266
+ FROM content c
267
+ JOIN documents d ON d.hash = c.hash AND d.active = 1
268
+ LEFT JOIN content_vectors cv ON cv.hash = c.hash
269
+ WHERE cv.hash IS NULL
270
+ LIMIT 1
271
+ `);
272
+
273
+ const getNextHashNeedingEmbeddingByWorkspaceStmt = db.prepare(`
274
+ SELECT c.hash, c.body, d.path
275
+ FROM content c
276
+ JOIN documents d ON d.hash = c.hash AND d.active = 1
277
+ LEFT JOIN content_vectors cv ON cv.hash = c.hash
278
+ WHERE cv.hash IS NULL AND d.project_hash IN (?, 'global')
279
+ LIMIT 1
280
+ `);
281
+
282
+ return {
283
+ modelStatus: {
284
+ embedding: 'missing',
285
+ reranker: 'missing',
286
+ expander: 'missing',
287
+ },
288
+
289
+ close() {
290
+ db.close();
291
+ },
292
+
293
+ insertContent(hash: string, body: string) {
294
+ insertContentStmt.run(hash, body);
295
+ },
296
+
297
+ insertDocument(doc: Omit<Document, 'id'>): number {
298
+ const result = insertDocumentStmt.run(
299
+ doc.collection,
300
+ doc.path,
301
+ doc.title,
302
+ doc.hash,
303
+ doc.agent ?? null,
304
+ doc.createdAt,
305
+ doc.modifiedAt,
306
+ doc.active ? 1 : 0,
307
+ doc.projectHash ?? 'global'
308
+ );
309
+ return Number(result.lastInsertRowid);
310
+ },
311
+
312
+ findDocument(pathOrDocid: string): Document | null {
313
+ let row: Record<string, unknown> | undefined;
314
+
315
+ if (pathOrDocid.length === 6 && /^[a-f0-9]+$/i.test(pathOrDocid)) {
316
+ row = findDocumentByDocidStmt.get(pathOrDocid.toLowerCase()) as Record<string, unknown> | undefined;
317
+ }
318
+
319
+ if (!row) {
320
+ row = findDocumentByPathStmt.get(pathOrDocid) as Record<string, unknown> | undefined;
321
+ }
322
+
323
+ if (!row) return null;
324
+
325
+ return {
326
+ id: row.id as number,
327
+ collection: row.collection as string,
328
+ path: row.path as string,
329
+ title: row.title as string,
330
+ hash: row.hash as string,
331
+ agent: row.agent as string | undefined,
332
+ createdAt: row.createdAt as string,
333
+ modifiedAt: row.modifiedAt as string,
334
+ active: Boolean(row.active),
335
+ projectHash: row.projectHash as string | undefined,
336
+ };
337
+ },
338
+
339
+ getDocumentBody(hash: string, fromLine?: number, maxLines?: number): string | null {
340
+ const row = getContentStmt.get(hash) as { body: string } | undefined;
341
+ if (!row) return null;
342
+
343
+ if (fromLine === undefined && maxLines === undefined) {
344
+ return row.body;
345
+ }
346
+
347
+ const lines = row.body.split('\n');
348
+ const start = fromLine ?? 0;
349
+ const end = maxLines !== undefined ? start + maxLines : lines.length;
350
+ return lines.slice(start, end).join('\n');
351
+ },
352
+
353
+ deactivateDocument(collection: string, path: string) {
354
+ deactivateDocumentStmt.run(collection, path);
355
+ },
356
+
357
+ bulkDeactivateExcept(collection: string, activePaths: string[]): number {
358
+ const result = bulkDeactivateExceptStmt.run(collection, JSON.stringify(activePaths));
359
+ return result.changes;
360
+ },
361
+
362
+ insertEmbedding(hash: string, seq: number, pos: number, embedding: number[], model: string) {
363
+ insertEmbeddingStmt.run(hash, seq, pos, model);
364
+
365
+ if (vecAvailable) {
366
+ try {
367
+ const hashSeq = `${hash}:${seq}`;
368
+ const insertVecStmt = db.prepare(`
369
+ INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)
370
+ `);
371
+ insertVecStmt.run(hashSeq, new Float32Array(embedding));
372
+ } catch (err) {
373
+ console.warn('Failed to insert vector:', err);
374
+ }
375
+ }
376
+ },
377
+
378
+ ensureVecTable(dimensions: number) {
379
+ if (!vecAvailable) return;
380
+ try {
381
+ let needsRebuild = false;
382
+ // Check if existing table has correct dimensions by trying a dummy query
383
+ try {
384
+ const testVec = new Float32Array(dimensions);
385
+ db.prepare('SELECT hash_seq FROM vectors_vec WHERE embedding MATCH ? LIMIT 1').get(testVec);
386
+ // Table exists with correct dimensions — check consistency
387
+ const vecCount = (db.prepare('SELECT COUNT(*) as count FROM vectors_vec').get() as { count: number }).count;
388
+ const cvCount = (db.prepare('SELECT COUNT(*) as count FROM content_vectors').get() as { count: number }).count;
389
+ if (vecCount === 0 && cvCount > 0) {
390
+ // vectors_vec was rebuilt but content_vectors has stale tracking rows
391
+ console.error(`[store] vectors_vec empty but content_vectors has ${cvCount} stale rows, clearing for re-embedding`);
392
+ db.exec(`DELETE FROM content_vectors`);
393
+ }
394
+ return;
395
+ } catch {
396
+ needsRebuild = true;
397
+ }
398
+ if (needsRebuild) {
399
+ db.exec(`DROP TABLE IF EXISTS vectors_vec`);
400
+ db.exec(`DELETE FROM content_vectors`);
401
+ db.exec(`
402
+ CREATE VIRTUAL TABLE vectors_vec USING vec0(
403
+ hash_seq TEXT PRIMARY KEY,
404
+ embedding float[${dimensions}] distance_metric=cosine
405
+ );
406
+ `);
407
+ console.error(`[store] Recreated vectors_vec with ${dimensions} dimensions, cleared content_vectors for re-embedding`);
408
+ }
409
+ } catch (err) {
410
+ console.warn('Failed to recreate vector table:', err);
411
+ }
412
+ },
413
+
414
+ searchFTS(query: string, limit = 10, collection?: string, projectHash?: string): SearchResult[] {
415
+ const sanitized = sanitizeFTS5Query(query);
416
+ if (!sanitized) return [];
417
+
418
+ let rows: unknown[];
419
+ if (projectHash && projectHash !== 'all') {
420
+ if (collection) {
421
+ rows = searchFTSWithWorkspaceAndCollectionStmt.all(sanitized, collection, projectHash, limit);
422
+ } else {
423
+ rows = searchFTSWithWorkspaceStmt.all(sanitized, projectHash, limit);
424
+ }
425
+ } else {
426
+ rows = collection
427
+ ? searchFTSWithCollectionStmt.all(sanitized, collection, limit)
428
+ : searchFTSStmt.all(sanitized, limit);
429
+ }
430
+
431
+ return (rows as Array<Record<string, unknown>>).map(row => ({
432
+ id: String(row.id),
433
+ path: row.path as string,
434
+ collection: row.collection as string,
435
+ title: row.title as string,
436
+ snippet: row.snippet as string,
437
+ score: Math.abs(row.score as number),
438
+ startLine: 0,
439
+ endLine: 0,
440
+ docid: (row.hash as string).substring(0, 6),
441
+ agent: row.agent as string | undefined,
442
+ }));
443
+ },
444
+
445
+ searchVec(query: string, embedding: number[], limit = 10, collection?: string, projectHash?: string): SearchResult[] {
446
+ if (!vecAvailable) {
447
+ return [];
448
+ }
449
+
450
+ try {
451
+ let sql = `
452
+ SELECT v.hash_seq, v.distance, d.id, d.path, d.collection, d.title, d.hash, d.agent
453
+ FROM vectors_vec v
454
+ JOIN documents d ON substr(v.hash_seq, 1, instr(v.hash_seq, ':') - 1) = d.hash
455
+ WHERE v.embedding MATCH ?
456
+ AND k = ?
457
+ AND d.active = 1
458
+ `;
459
+
460
+ const params: (Float32Array | string | number)[] = [new Float32Array(embedding), limit];
461
+ if (collection) {
462
+ sql += ` AND d.collection = ?`;
463
+ params.push(collection);
464
+ }
465
+ if (projectHash && projectHash !== 'all') {
466
+ sql += ` AND d.project_hash IN (?, 'global')`;
467
+ params.push(projectHash);
468
+ }
469
+ sql += ` ORDER BY v.distance`;
470
+
471
+ const stmt = db.prepare(sql);
472
+ const rows = stmt.all(...params) as Array<Record<string, unknown>>;
473
+
474
+ return rows.map(row => ({
475
+ id: String(row.id),
476
+ path: row.path as string,
477
+ collection: row.collection as string,
478
+ title: row.title as string,
479
+ snippet: '',
480
+ score: 1 - (row.distance as number),
481
+ startLine: 0,
482
+ endLine: 0,
483
+ docid: (row.hash as string).substring(0, 6),
484
+ agent: row.agent as string | undefined,
485
+ }));
486
+ } catch (err) {
487
+ console.warn('Vector search failed:', err);
488
+ return [];
489
+ }
490
+ },
491
+
492
+ getCachedResult(hash: string): string | null {
493
+ const row = getCachedResultStmt.get(hash) as { result: string } | undefined;
494
+ return row?.result ?? null;
495
+ },
496
+
497
+ setCachedResult(hash: string, result: string) {
498
+ setCachedResultStmt.run(hash, result);
499
+ },
500
+
501
+ getIndexHealth(): IndexHealth {
502
+ const docCount = (getDocumentCountStmt.get() as { count: number }).count;
503
+ const chunkCount = (getChunkCountStmt.get() as { count: number }).count;
504
+ const collections = getCollectionStatsStmt.all() as Array<{ name: string; documentCount: number; path: string }>;
505
+ const pending = (getHashesNeedingEmbeddingStmt.all() as unknown[]).length;
506
+ const workspaceStats = this.getWorkspaceStats();
507
+
508
+ let dbSize = 0;
509
+ try {
510
+ const stats = fs.statSync(dbPath);
511
+ dbSize = stats.size;
512
+ } catch {
513
+ // ignore
514
+ }
515
+
516
+ return {
517
+ documentCount: docCount,
518
+ chunkCount: chunkCount,
519
+ pendingEmbeddings: pending,
520
+ collections: collections,
521
+ databaseSize: dbSize,
522
+ modelStatus: this.modelStatus,
523
+ workspaceStats: workspaceStats,
524
+ };
525
+ },
526
+
527
+ getHashesNeedingEmbedding(projectHash?: string): Array<{ hash: string; body: string; path: string }> {
528
+ if (projectHash && projectHash !== 'all') {
529
+ return getHashesNeedingEmbeddingByWorkspaceStmt.all(projectHash) as Array<{ hash: string; body: string; path: string }>;
530
+ }
531
+ return getHashesNeedingEmbeddingStmt.all() as Array<{ hash: string; body: string; path: string }>;
532
+ },
533
+
534
+ getNextHashNeedingEmbedding(projectHash?: string): { hash: string; body: string; path: string } | null {
535
+ if (projectHash && projectHash !== 'all') {
536
+ return getNextHashNeedingEmbeddingByWorkspaceStmt.get(projectHash) as { hash: string; body: string; path: string } | null;
537
+ }
538
+ return getNextHashNeedingEmbeddingStmt.get() as { hash: string; body: string; path: string } | null;
539
+ },
540
+
541
+ getWorkspaceStats(): Array<{ projectHash: string; count: number }> {
542
+ return getWorkspaceStatsStmt.all() as Array<{ projectHash: string; count: number }>;
543
+ },
544
+
545
+ deleteDocumentsByPath(filePath: string): number {
546
+ const deleteStmt = db.prepare(`DELETE FROM documents WHERE path = ? AND active = 1`);
547
+ const result = deleteStmt.run(filePath);
548
+ return result.changes;
549
+ },
550
+
551
+ cleanOrphanedEmbeddings(): number {
552
+ let totalDeleted = 0;
553
+
554
+ const deleteContentVectorsStmt = db.prepare(`
555
+ DELETE FROM content_vectors WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
556
+ `);
557
+ const cvResult = deleteContentVectorsStmt.run();
558
+ totalDeleted += cvResult.changes;
559
+
560
+ if (vecAvailable) {
561
+ try {
562
+ const deleteVecStmt = db.prepare(`
563
+ DELETE FROM vectors_vec WHERE substr(hash_seq, 1, instr(hash_seq, ':') - 1) NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
564
+ `);
565
+ const vecResult = deleteVecStmt.run();
566
+ totalDeleted += vecResult.changes;
567
+ } catch {
568
+ }
569
+ }
570
+
571
+ return totalDeleted;
572
+ },
573
+
574
+ getCollectionStorageSize(collection: string): number {
575
+ const stmt = db.prepare(`
576
+ SELECT COALESCE(SUM(LENGTH(c.body)), 0) as totalSize
577
+ FROM documents d
578
+ JOIN content c ON c.hash = d.hash
579
+ WHERE d.collection = ? AND d.active = 1
580
+ `);
581
+ const row = stmt.get(collection) as { totalSize: number } | undefined;
582
+ return row?.totalSize ?? 0;
583
+ },
584
+ };
585
+ }
586
+
587
+ export function computeHash(content: string): string {
588
+ return crypto.createHash('sha256').update(content).digest('hex');
589
+ }
590
+
591
+ export function indexDocument(
592
+ store: Store,
593
+ collection: string,
594
+ filePath: string,
595
+ content: string,
596
+ title: string,
597
+ projectHash?: string
598
+ ): { hash: string; chunks: number; skipped: boolean } {
599
+ const hash = computeHash(content);
600
+
601
+ const existingDoc = store.findDocument(filePath);
602
+ if (existingDoc && existingDoc.hash === hash) {
603
+ return { hash, chunks: 0, skipped: true };
604
+ }
605
+
606
+ store.insertContent(hash, content);
607
+
608
+ const chunks = chunkMarkdown(content, hash);
609
+
610
+ const now = new Date().toISOString();
611
+ store.insertDocument({
612
+ collection,
613
+ path: filePath,
614
+ title,
615
+ hash,
616
+ createdAt: existingDoc?.createdAt ?? now,
617
+ modifiedAt: now,
618
+ active: true,
619
+ projectHash,
620
+ });
621
+
622
+ return { hash, chunks: chunks.length, skipped: false };
623
+ }