nano-brain 2026.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS_SNIPPET.md +36 -0
- package/CHANGELOG.md +68 -0
- package/README.md +281 -0
- package/SKILL.md +153 -0
- package/bin/cli.js +18 -0
- package/index.html +929 -0
- package/nano-brain +4 -0
- package/opencode-mcp.json +9 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/.openspec.yaml +2 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/design.md +68 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/proposal.md +27 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-integration-testing/spec.md +50 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-server/spec.md +40 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/search-pipeline/spec.md +29 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/tasks.md +37 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/.openspec.yaml +2 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/design.md +111 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/proposal.md +30 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/mcp-server/spec.md +33 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/storage-limits/spec.md +90 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/workspace-scoping/spec.md +66 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/tasks.md +199 -0
- package/openspec/changes/codebase-indexing/.openspec.yaml +2 -0
- package/openspec/changes/codebase-indexing/design.md +169 -0
- package/openspec/changes/codebase-indexing/proposal.md +30 -0
- package/openspec/changes/codebase-indexing/specs/codebase-collection/spec.md +187 -0
- package/openspec/changes/codebase-indexing/specs/mcp-server/spec.md +36 -0
- package/openspec/changes/codebase-indexing/tasks.md +56 -0
- package/openspec/specs/mcp-integration-testing/spec.md +50 -0
- package/openspec/specs/mcp-server/spec.md +75 -0
- package/openspec/specs/search-pipeline/spec.md +29 -0
- package/openspec/specs/storage-limits/spec.md +94 -0
- package/openspec/specs/workspace-scoping/spec.md +70 -0
- package/package.json +34 -0
- package/site/build.js +66 -0
- package/site/partials/_api.html +83 -0
- package/site/partials/_compare.html +100 -0
- package/site/partials/_config.html +23 -0
- package/site/partials/_features.html +43 -0
- package/site/partials/_footer.html +6 -0
- package/site/partials/_hero.html +9 -0
- package/site/partials/_how-it-works.html +26 -0
- package/site/partials/_models.html +18 -0
- package/site/partials/_quick-start.html +15 -0
- package/site/partials/_stats.html +1 -0
- package/site/partials/_tech-stack.html +13 -0
- package/site/script.js +12 -0
- package/site/shell.html +44 -0
- package/site/styles.css +548 -0
- package/src/chunker.ts +427 -0
- package/src/codebase.ts +331 -0
- package/src/collections.ts +192 -0
- package/src/embeddings.ts +293 -0
- package/src/expansion.ts +79 -0
- package/src/harvester.ts +306 -0
- package/src/index.ts +503 -0
- package/src/reranker.ts +103 -0
- package/src/search.ts +294 -0
- package/src/server.ts +664 -0
- package/src/storage.ts +221 -0
- package/src/store.ts +623 -0
- package/src/types.ts +202 -0
- package/src/watcher.ts +384 -0
- package/test/chunker.test.ts +479 -0
- package/test/cli.test.ts +309 -0
- package/test/codebase-chunker.test.ts +446 -0
- package/test/codebase.test.ts +678 -0
- package/test/collections.test.ts +571 -0
- package/test/harvester.test.ts +636 -0
- package/test/integration.test.ts +150 -0
- package/test/llm.test.ts +322 -0
- package/test/search.test.ts +572 -0
- package/test/server.test.ts +541 -0
- package/test/storage.test.ts +302 -0
- package/test/store.test.ts +465 -0
- package/test/watcher.test.ts +656 -0
- package/test/workspace.test.ts +239 -0
- package/tsconfig.json +19 -0
- package/vitest.config.ts +16 -0
package/src/store.ts
ADDED
|
@@ -0,0 +1,623 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import * as sqliteVec from 'sqlite-vec';
|
|
3
|
+
import type { Store, Document, SearchResult, IndexHealth } from './types.js';
|
|
4
|
+
import * as fs from 'fs';
|
|
5
|
+
import * as path from 'path';
|
|
6
|
+
import * as crypto from 'crypto';
|
|
7
|
+
import { chunkMarkdown } from './chunker.js';
|
|
8
|
+
|
|
9
|
+
export function sanitizeFTS5Query(query: string): string {
|
|
10
|
+
const trimmed = query.trim();
|
|
11
|
+
if (!trimmed) return '';
|
|
12
|
+
const escaped = trimmed.replace(/"/g, '""');
|
|
13
|
+
return `"${escaped}"`;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function createStore(dbPath: string): Store {
|
|
17
|
+
const dir = path.dirname(dbPath);
|
|
18
|
+
if (!fs.existsSync(dir)) {
|
|
19
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
20
|
+
}
|
|
21
|
+
const db = new Database(dbPath);
|
|
22
|
+
|
|
23
|
+
db.pragma('journal_mode = WAL');
|
|
24
|
+
db.pragma('foreign_keys = ON');
|
|
25
|
+
|
|
26
|
+
let vecAvailable = false;
|
|
27
|
+
|
|
28
|
+
try {
|
|
29
|
+
sqliteVec.load(db);
|
|
30
|
+
vecAvailable = true;
|
|
31
|
+
} catch {
|
|
32
|
+
console.warn('sqlite-vec extension not available, vector search disabled');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
db.exec(`
|
|
36
|
+
CREATE TABLE IF NOT EXISTS content (
|
|
37
|
+
hash TEXT PRIMARY KEY,
|
|
38
|
+
body TEXT NOT NULL,
|
|
39
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
43
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
44
|
+
collection TEXT NOT NULL,
|
|
45
|
+
path TEXT NOT NULL,
|
|
46
|
+
title TEXT NOT NULL,
|
|
47
|
+
hash TEXT NOT NULL,
|
|
48
|
+
agent TEXT,
|
|
49
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
50
|
+
modified_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
51
|
+
active INTEGER NOT NULL DEFAULT 1,
|
|
52
|
+
FOREIGN KEY (hash) REFERENCES content(hash),
|
|
53
|
+
UNIQUE(collection, path)
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection, active);
|
|
57
|
+
CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash);
|
|
58
|
+
CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path, active);
|
|
59
|
+
|
|
60
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
|
|
61
|
+
filepath,
|
|
62
|
+
title,
|
|
63
|
+
body,
|
|
64
|
+
tokenize='porter unicode61'
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN
|
|
68
|
+
INSERT INTO documents_fts(filepath, title, body)
|
|
69
|
+
SELECT NEW.collection || '/' || NEW.path, NEW.title, c.body
|
|
70
|
+
FROM content c WHERE c.hash = NEW.hash;
|
|
71
|
+
END;
|
|
72
|
+
|
|
73
|
+
CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
|
|
74
|
+
DELETE FROM documents_fts WHERE filepath = OLD.collection || '/' || OLD.path;
|
|
75
|
+
END;
|
|
76
|
+
|
|
77
|
+
CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE OF hash ON documents BEGIN
|
|
78
|
+
DELETE FROM documents_fts WHERE filepath = OLD.collection || '/' || OLD.path;
|
|
79
|
+
INSERT INTO documents_fts(filepath, title, body)
|
|
80
|
+
SELECT NEW.collection || '/' || NEW.path, NEW.title, c.body
|
|
81
|
+
FROM content c WHERE c.hash = NEW.hash;
|
|
82
|
+
END;
|
|
83
|
+
|
|
84
|
+
CREATE TABLE IF NOT EXISTS content_vectors (
|
|
85
|
+
hash TEXT NOT NULL,
|
|
86
|
+
seq INTEGER NOT NULL DEFAULT 0,
|
|
87
|
+
pos INTEGER NOT NULL DEFAULT 0,
|
|
88
|
+
model TEXT NOT NULL,
|
|
89
|
+
embedded_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
90
|
+
PRIMARY KEY (hash, seq)
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
CREATE TABLE IF NOT EXISTS llm_cache (
|
|
94
|
+
hash TEXT PRIMARY KEY,
|
|
95
|
+
result TEXT NOT NULL,
|
|
96
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
97
|
+
);
|
|
98
|
+
`);
|
|
99
|
+
|
|
100
|
+
const hasProjectHash = (db.prepare("PRAGMA table_info(documents)").all() as Array<{ name: string }>).some(col => col.name === 'project_hash');
|
|
101
|
+
if (!hasProjectHash) {
|
|
102
|
+
db.exec("ALTER TABLE documents ADD COLUMN project_hash TEXT DEFAULT 'global'");
|
|
103
|
+
const sessionPathRegex = /sessions\/([a-f0-9]{12})\//i;
|
|
104
|
+
const rows = db.prepare("SELECT id, path FROM documents").all() as Array<{ id: number; path: string }>;
|
|
105
|
+
const updateStmt = db.prepare("UPDATE documents SET project_hash = ? WHERE id = ?");
|
|
106
|
+
for (const row of rows) {
|
|
107
|
+
const match = row.path.match(sessionPathRegex);
|
|
108
|
+
if (match) {
|
|
109
|
+
updateStmt.run(match[1], row.id);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_documents_project_hash ON documents(project_hash, active)");
|
|
114
|
+
|
|
115
|
+
if (vecAvailable) {
|
|
116
|
+
try {
|
|
117
|
+
db.exec(`
|
|
118
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vectors_vec USING vec0(
|
|
119
|
+
hash_seq TEXT PRIMARY KEY,
|
|
120
|
+
embedding float[768] distance_metric=cosine
|
|
121
|
+
);
|
|
122
|
+
`);
|
|
123
|
+
} catch (err) {
|
|
124
|
+
console.warn('Failed to create vector table:', err);
|
|
125
|
+
vecAvailable = false;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const insertContentStmt = db.prepare(`
|
|
130
|
+
INSERT OR IGNORE INTO content (hash, body) VALUES (?, ?)
|
|
131
|
+
`);
|
|
132
|
+
|
|
133
|
+
const insertDocumentStmt = db.prepare(`
|
|
134
|
+
INSERT INTO documents (collection, path, title, hash, agent, created_at, modified_at, active, project_hash)
|
|
135
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
136
|
+
ON CONFLICT(collection, path) DO UPDATE SET
|
|
137
|
+
title = excluded.title,
|
|
138
|
+
hash = excluded.hash,
|
|
139
|
+
agent = excluded.agent,
|
|
140
|
+
modified_at = excluded.modified_at,
|
|
141
|
+
active = excluded.active,
|
|
142
|
+
project_hash = excluded.project_hash
|
|
143
|
+
`);
|
|
144
|
+
|
|
145
|
+
const findDocumentByPathStmt = db.prepare(`
|
|
146
|
+
SELECT id, collection, path, title, hash, agent, created_at as createdAt, modified_at as modifiedAt, active, project_hash as projectHash
|
|
147
|
+
FROM documents WHERE path = ? AND active = 1
|
|
148
|
+
`);
|
|
149
|
+
|
|
150
|
+
const findDocumentByDocidStmt = db.prepare(`
|
|
151
|
+
SELECT id, collection, path, title, hash, agent, created_at as createdAt, modified_at as modifiedAt, active, project_hash as projectHash
|
|
152
|
+
FROM documents WHERE substr(hash, 1, 6) = ? AND active = 1
|
|
153
|
+
`);
|
|
154
|
+
|
|
155
|
+
const getContentStmt = db.prepare(`
|
|
156
|
+
SELECT body FROM content WHERE hash = ?
|
|
157
|
+
`);
|
|
158
|
+
|
|
159
|
+
const deactivateDocumentStmt = db.prepare(`
|
|
160
|
+
UPDATE documents SET active = 0 WHERE collection = ? AND path = ?
|
|
161
|
+
`);
|
|
162
|
+
|
|
163
|
+
const bulkDeactivateExceptStmt = db.prepare(`
|
|
164
|
+
UPDATE documents SET active = 0
|
|
165
|
+
WHERE collection = ? AND path NOT IN (SELECT value FROM json_each(?))
|
|
166
|
+
`);
|
|
167
|
+
|
|
168
|
+
const searchFTSStmt = db.prepare(`
|
|
169
|
+
SELECT
|
|
170
|
+
d.id, d.path, d.collection, d.title, d.hash, d.agent,
|
|
171
|
+
snippet(documents_fts, 2, '<mark>', '</mark>', '...', 64) as snippet,
|
|
172
|
+
bm25(documents_fts) as score
|
|
173
|
+
FROM documents_fts f
|
|
174
|
+
JOIN documents d ON f.filepath = d.collection || '/' || d.path
|
|
175
|
+
WHERE documents_fts MATCH ? AND d.active = 1
|
|
176
|
+
ORDER BY bm25(documents_fts)
|
|
177
|
+
LIMIT ?
|
|
178
|
+
`);
|
|
179
|
+
|
|
180
|
+
const searchFTSWithCollectionStmt = db.prepare(`
|
|
181
|
+
SELECT
|
|
182
|
+
d.id, d.path, d.collection, d.title, d.hash, d.agent,
|
|
183
|
+
snippet(documents_fts, 2, '<mark>', '</mark>', '...', 64) as snippet,
|
|
184
|
+
bm25(documents_fts) as score
|
|
185
|
+
FROM documents_fts f
|
|
186
|
+
JOIN documents d ON f.filepath = d.collection || '/' || d.path
|
|
187
|
+
WHERE documents_fts MATCH ? AND d.active = 1 AND d.collection = ?
|
|
188
|
+
ORDER BY bm25(documents_fts)
|
|
189
|
+
LIMIT ?
|
|
190
|
+
`);
|
|
191
|
+
|
|
192
|
+
const searchFTSWithWorkspaceStmt = db.prepare(`
|
|
193
|
+
SELECT
|
|
194
|
+
d.id, d.path, d.collection, d.title, d.hash, d.agent,
|
|
195
|
+
snippet(documents_fts, 2, '<mark>', '</mark>', '...', 64) as snippet,
|
|
196
|
+
bm25(documents_fts) as score
|
|
197
|
+
FROM documents_fts f
|
|
198
|
+
JOIN documents d ON f.filepath = d.collection || '/' || d.path
|
|
199
|
+
WHERE documents_fts MATCH ? AND d.active = 1 AND d.project_hash IN (?, 'global')
|
|
200
|
+
ORDER BY bm25(documents_fts)
|
|
201
|
+
LIMIT ?
|
|
202
|
+
`);
|
|
203
|
+
|
|
204
|
+
const searchFTSWithWorkspaceAndCollectionStmt = db.prepare(`
|
|
205
|
+
SELECT
|
|
206
|
+
d.id, d.path, d.collection, d.title, d.hash, d.agent,
|
|
207
|
+
snippet(documents_fts, 2, '<mark>', '</mark>', '...', 64) as snippet,
|
|
208
|
+
bm25(documents_fts) as score
|
|
209
|
+
FROM documents_fts f
|
|
210
|
+
JOIN documents d ON f.filepath = d.collection || '/' || d.path
|
|
211
|
+
WHERE documents_fts MATCH ? AND d.active = 1 AND d.collection = ? AND d.project_hash IN (?, 'global')
|
|
212
|
+
ORDER BY bm25(documents_fts)
|
|
213
|
+
LIMIT ?
|
|
214
|
+
`);
|
|
215
|
+
|
|
216
|
+
const insertEmbeddingStmt = db.prepare(`
|
|
217
|
+
INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model)
|
|
218
|
+
VALUES (?, ?, ?, ?)
|
|
219
|
+
`);
|
|
220
|
+
|
|
221
|
+
const getCachedResultStmt = db.prepare(`
|
|
222
|
+
SELECT result FROM llm_cache WHERE hash = ?
|
|
223
|
+
`);
|
|
224
|
+
|
|
225
|
+
const setCachedResultStmt = db.prepare(`
|
|
226
|
+
INSERT OR REPLACE INTO llm_cache (hash, result) VALUES (?, ?)
|
|
227
|
+
`);
|
|
228
|
+
|
|
229
|
+
const getDocumentCountStmt = db.prepare(`
|
|
230
|
+
SELECT COUNT(*) as count FROM documents WHERE active = 1
|
|
231
|
+
`);
|
|
232
|
+
|
|
233
|
+
const getChunkCountStmt = db.prepare(`
|
|
234
|
+
SELECT COUNT(*) as count FROM content_vectors
|
|
235
|
+
`);
|
|
236
|
+
|
|
237
|
+
const getCollectionStatsStmt = db.prepare(`
|
|
238
|
+
SELECT collection as name, COUNT(*) as documentCount, MIN(path) as path
|
|
239
|
+
FROM documents WHERE active = 1
|
|
240
|
+
GROUP BY collection
|
|
241
|
+
`);
|
|
242
|
+
|
|
243
|
+
const getWorkspaceStatsStmt = db.prepare(`
|
|
244
|
+
SELECT project_hash as projectHash, COUNT(*) as count
|
|
245
|
+
FROM documents WHERE active = 1
|
|
246
|
+
GROUP BY project_hash
|
|
247
|
+
`);
|
|
248
|
+
|
|
249
|
+
const getHashesNeedingEmbeddingStmt = db.prepare(`
|
|
250
|
+
SELECT c.hash, c.body, d.path
|
|
251
|
+
FROM content c
|
|
252
|
+
JOIN documents d ON d.hash = c.hash AND d.active = 1
|
|
253
|
+
LEFT JOIN content_vectors cv ON cv.hash = c.hash
|
|
254
|
+
WHERE cv.hash IS NULL
|
|
255
|
+
`);
|
|
256
|
+
|
|
257
|
+
const getHashesNeedingEmbeddingByWorkspaceStmt = db.prepare(`
|
|
258
|
+
SELECT c.hash, c.body, d.path
|
|
259
|
+
FROM content c
|
|
260
|
+
JOIN documents d ON d.hash = c.hash AND d.active = 1
|
|
261
|
+
LEFT JOIN content_vectors cv ON cv.hash = c.hash
|
|
262
|
+
WHERE cv.hash IS NULL AND d.project_hash IN (?, 'global')
|
|
263
|
+
`);
|
|
264
|
+
const getNextHashNeedingEmbeddingStmt = db.prepare(`
|
|
265
|
+
SELECT c.hash, c.body, d.path
|
|
266
|
+
FROM content c
|
|
267
|
+
JOIN documents d ON d.hash = c.hash AND d.active = 1
|
|
268
|
+
LEFT JOIN content_vectors cv ON cv.hash = c.hash
|
|
269
|
+
WHERE cv.hash IS NULL
|
|
270
|
+
LIMIT 1
|
|
271
|
+
`);
|
|
272
|
+
|
|
273
|
+
const getNextHashNeedingEmbeddingByWorkspaceStmt = db.prepare(`
|
|
274
|
+
SELECT c.hash, c.body, d.path
|
|
275
|
+
FROM content c
|
|
276
|
+
JOIN documents d ON d.hash = c.hash AND d.active = 1
|
|
277
|
+
LEFT JOIN content_vectors cv ON cv.hash = c.hash
|
|
278
|
+
WHERE cv.hash IS NULL AND d.project_hash IN (?, 'global')
|
|
279
|
+
LIMIT 1
|
|
280
|
+
`);
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
modelStatus: {
|
|
284
|
+
embedding: 'missing',
|
|
285
|
+
reranker: 'missing',
|
|
286
|
+
expander: 'missing',
|
|
287
|
+
},
|
|
288
|
+
|
|
289
|
+
close() {
|
|
290
|
+
db.close();
|
|
291
|
+
},
|
|
292
|
+
|
|
293
|
+
insertContent(hash: string, body: string) {
|
|
294
|
+
insertContentStmt.run(hash, body);
|
|
295
|
+
},
|
|
296
|
+
|
|
297
|
+
insertDocument(doc: Omit<Document, 'id'>): number {
|
|
298
|
+
const result = insertDocumentStmt.run(
|
|
299
|
+
doc.collection,
|
|
300
|
+
doc.path,
|
|
301
|
+
doc.title,
|
|
302
|
+
doc.hash,
|
|
303
|
+
doc.agent ?? null,
|
|
304
|
+
doc.createdAt,
|
|
305
|
+
doc.modifiedAt,
|
|
306
|
+
doc.active ? 1 : 0,
|
|
307
|
+
doc.projectHash ?? 'global'
|
|
308
|
+
);
|
|
309
|
+
return Number(result.lastInsertRowid);
|
|
310
|
+
},
|
|
311
|
+
|
|
312
|
+
findDocument(pathOrDocid: string): Document | null {
|
|
313
|
+
let row: Record<string, unknown> | undefined;
|
|
314
|
+
|
|
315
|
+
if (pathOrDocid.length === 6 && /^[a-f0-9]+$/i.test(pathOrDocid)) {
|
|
316
|
+
row = findDocumentByDocidStmt.get(pathOrDocid.toLowerCase()) as Record<string, unknown> | undefined;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
if (!row) {
|
|
320
|
+
row = findDocumentByPathStmt.get(pathOrDocid) as Record<string, unknown> | undefined;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
if (!row) return null;
|
|
324
|
+
|
|
325
|
+
return {
|
|
326
|
+
id: row.id as number,
|
|
327
|
+
collection: row.collection as string,
|
|
328
|
+
path: row.path as string,
|
|
329
|
+
title: row.title as string,
|
|
330
|
+
hash: row.hash as string,
|
|
331
|
+
agent: row.agent as string | undefined,
|
|
332
|
+
createdAt: row.createdAt as string,
|
|
333
|
+
modifiedAt: row.modifiedAt as string,
|
|
334
|
+
active: Boolean(row.active),
|
|
335
|
+
projectHash: row.projectHash as string | undefined,
|
|
336
|
+
};
|
|
337
|
+
},
|
|
338
|
+
|
|
339
|
+
getDocumentBody(hash: string, fromLine?: number, maxLines?: number): string | null {
|
|
340
|
+
const row = getContentStmt.get(hash) as { body: string } | undefined;
|
|
341
|
+
if (!row) return null;
|
|
342
|
+
|
|
343
|
+
if (fromLine === undefined && maxLines === undefined) {
|
|
344
|
+
return row.body;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const lines = row.body.split('\n');
|
|
348
|
+
const start = fromLine ?? 0;
|
|
349
|
+
const end = maxLines !== undefined ? start + maxLines : lines.length;
|
|
350
|
+
return lines.slice(start, end).join('\n');
|
|
351
|
+
},
|
|
352
|
+
|
|
353
|
+
deactivateDocument(collection: string, path: string) {
|
|
354
|
+
deactivateDocumentStmt.run(collection, path);
|
|
355
|
+
},
|
|
356
|
+
|
|
357
|
+
bulkDeactivateExcept(collection: string, activePaths: string[]): number {
|
|
358
|
+
const result = bulkDeactivateExceptStmt.run(collection, JSON.stringify(activePaths));
|
|
359
|
+
return result.changes;
|
|
360
|
+
},
|
|
361
|
+
|
|
362
|
+
insertEmbedding(hash: string, seq: number, pos: number, embedding: number[], model: string) {
|
|
363
|
+
insertEmbeddingStmt.run(hash, seq, pos, model);
|
|
364
|
+
|
|
365
|
+
if (vecAvailable) {
|
|
366
|
+
try {
|
|
367
|
+
const hashSeq = `${hash}:${seq}`;
|
|
368
|
+
const insertVecStmt = db.prepare(`
|
|
369
|
+
INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)
|
|
370
|
+
`);
|
|
371
|
+
insertVecStmt.run(hashSeq, new Float32Array(embedding));
|
|
372
|
+
} catch (err) {
|
|
373
|
+
console.warn('Failed to insert vector:', err);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
},
|
|
377
|
+
|
|
378
|
+
ensureVecTable(dimensions: number) {
|
|
379
|
+
if (!vecAvailable) return;
|
|
380
|
+
try {
|
|
381
|
+
let needsRebuild = false;
|
|
382
|
+
// Check if existing table has correct dimensions by trying a dummy query
|
|
383
|
+
try {
|
|
384
|
+
const testVec = new Float32Array(dimensions);
|
|
385
|
+
db.prepare('SELECT hash_seq FROM vectors_vec WHERE embedding MATCH ? LIMIT 1').get(testVec);
|
|
386
|
+
// Table exists with correct dimensions — check consistency
|
|
387
|
+
const vecCount = (db.prepare('SELECT COUNT(*) as count FROM vectors_vec').get() as { count: number }).count;
|
|
388
|
+
const cvCount = (db.prepare('SELECT COUNT(*) as count FROM content_vectors').get() as { count: number }).count;
|
|
389
|
+
if (vecCount === 0 && cvCount > 0) {
|
|
390
|
+
// vectors_vec was rebuilt but content_vectors has stale tracking rows
|
|
391
|
+
console.error(`[store] vectors_vec empty but content_vectors has ${cvCount} stale rows, clearing for re-embedding`);
|
|
392
|
+
db.exec(`DELETE FROM content_vectors`);
|
|
393
|
+
}
|
|
394
|
+
return;
|
|
395
|
+
} catch {
|
|
396
|
+
needsRebuild = true;
|
|
397
|
+
}
|
|
398
|
+
if (needsRebuild) {
|
|
399
|
+
db.exec(`DROP TABLE IF EXISTS vectors_vec`);
|
|
400
|
+
db.exec(`DELETE FROM content_vectors`);
|
|
401
|
+
db.exec(`
|
|
402
|
+
CREATE VIRTUAL TABLE vectors_vec USING vec0(
|
|
403
|
+
hash_seq TEXT PRIMARY KEY,
|
|
404
|
+
embedding float[${dimensions}] distance_metric=cosine
|
|
405
|
+
);
|
|
406
|
+
`);
|
|
407
|
+
console.error(`[store] Recreated vectors_vec with ${dimensions} dimensions, cleared content_vectors for re-embedding`);
|
|
408
|
+
}
|
|
409
|
+
} catch (err) {
|
|
410
|
+
console.warn('Failed to recreate vector table:', err);
|
|
411
|
+
}
|
|
412
|
+
},
|
|
413
|
+
|
|
414
|
+
searchFTS(query: string, limit = 10, collection?: string, projectHash?: string): SearchResult[] {
|
|
415
|
+
const sanitized = sanitizeFTS5Query(query);
|
|
416
|
+
if (!sanitized) return [];
|
|
417
|
+
|
|
418
|
+
let rows: unknown[];
|
|
419
|
+
if (projectHash && projectHash !== 'all') {
|
|
420
|
+
if (collection) {
|
|
421
|
+
rows = searchFTSWithWorkspaceAndCollectionStmt.all(sanitized, collection, projectHash, limit);
|
|
422
|
+
} else {
|
|
423
|
+
rows = searchFTSWithWorkspaceStmt.all(sanitized, projectHash, limit);
|
|
424
|
+
}
|
|
425
|
+
} else {
|
|
426
|
+
rows = collection
|
|
427
|
+
? searchFTSWithCollectionStmt.all(sanitized, collection, limit)
|
|
428
|
+
: searchFTSStmt.all(sanitized, limit);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
return (rows as Array<Record<string, unknown>>).map(row => ({
|
|
432
|
+
id: String(row.id),
|
|
433
|
+
path: row.path as string,
|
|
434
|
+
collection: row.collection as string,
|
|
435
|
+
title: row.title as string,
|
|
436
|
+
snippet: row.snippet as string,
|
|
437
|
+
score: Math.abs(row.score as number),
|
|
438
|
+
startLine: 0,
|
|
439
|
+
endLine: 0,
|
|
440
|
+
docid: (row.hash as string).substring(0, 6),
|
|
441
|
+
agent: row.agent as string | undefined,
|
|
442
|
+
}));
|
|
443
|
+
},
|
|
444
|
+
|
|
445
|
+
searchVec(query: string, embedding: number[], limit = 10, collection?: string, projectHash?: string): SearchResult[] {
|
|
446
|
+
if (!vecAvailable) {
|
|
447
|
+
return [];
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
try {
|
|
451
|
+
let sql = `
|
|
452
|
+
SELECT v.hash_seq, v.distance, d.id, d.path, d.collection, d.title, d.hash, d.agent
|
|
453
|
+
FROM vectors_vec v
|
|
454
|
+
JOIN documents d ON substr(v.hash_seq, 1, instr(v.hash_seq, ':') - 1) = d.hash
|
|
455
|
+
WHERE v.embedding MATCH ?
|
|
456
|
+
AND k = ?
|
|
457
|
+
AND d.active = 1
|
|
458
|
+
`;
|
|
459
|
+
|
|
460
|
+
const params: (Float32Array | string | number)[] = [new Float32Array(embedding), limit];
|
|
461
|
+
if (collection) {
|
|
462
|
+
sql += ` AND d.collection = ?`;
|
|
463
|
+
params.push(collection);
|
|
464
|
+
}
|
|
465
|
+
if (projectHash && projectHash !== 'all') {
|
|
466
|
+
sql += ` AND d.project_hash IN (?, 'global')`;
|
|
467
|
+
params.push(projectHash);
|
|
468
|
+
}
|
|
469
|
+
sql += ` ORDER BY v.distance`;
|
|
470
|
+
|
|
471
|
+
const stmt = db.prepare(sql);
|
|
472
|
+
const rows = stmt.all(...params) as Array<Record<string, unknown>>;
|
|
473
|
+
|
|
474
|
+
return rows.map(row => ({
|
|
475
|
+
id: String(row.id),
|
|
476
|
+
path: row.path as string,
|
|
477
|
+
collection: row.collection as string,
|
|
478
|
+
title: row.title as string,
|
|
479
|
+
snippet: '',
|
|
480
|
+
score: 1 - (row.distance as number),
|
|
481
|
+
startLine: 0,
|
|
482
|
+
endLine: 0,
|
|
483
|
+
docid: (row.hash as string).substring(0, 6),
|
|
484
|
+
agent: row.agent as string | undefined,
|
|
485
|
+
}));
|
|
486
|
+
} catch (err) {
|
|
487
|
+
console.warn('Vector search failed:', err);
|
|
488
|
+
return [];
|
|
489
|
+
}
|
|
490
|
+
},
|
|
491
|
+
|
|
492
|
+
getCachedResult(hash: string): string | null {
|
|
493
|
+
const row = getCachedResultStmt.get(hash) as { result: string } | undefined;
|
|
494
|
+
return row?.result ?? null;
|
|
495
|
+
},
|
|
496
|
+
|
|
497
|
+
setCachedResult(hash: string, result: string) {
|
|
498
|
+
setCachedResultStmt.run(hash, result);
|
|
499
|
+
},
|
|
500
|
+
|
|
501
|
+
getIndexHealth(): IndexHealth {
|
|
502
|
+
const docCount = (getDocumentCountStmt.get() as { count: number }).count;
|
|
503
|
+
const chunkCount = (getChunkCountStmt.get() as { count: number }).count;
|
|
504
|
+
const collections = getCollectionStatsStmt.all() as Array<{ name: string; documentCount: number; path: string }>;
|
|
505
|
+
const pending = (getHashesNeedingEmbeddingStmt.all() as unknown[]).length;
|
|
506
|
+
const workspaceStats = this.getWorkspaceStats();
|
|
507
|
+
|
|
508
|
+
let dbSize = 0;
|
|
509
|
+
try {
|
|
510
|
+
const stats = fs.statSync(dbPath);
|
|
511
|
+
dbSize = stats.size;
|
|
512
|
+
} catch {
|
|
513
|
+
// ignore
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
return {
|
|
517
|
+
documentCount: docCount,
|
|
518
|
+
chunkCount: chunkCount,
|
|
519
|
+
pendingEmbeddings: pending,
|
|
520
|
+
collections: collections,
|
|
521
|
+
databaseSize: dbSize,
|
|
522
|
+
modelStatus: this.modelStatus,
|
|
523
|
+
workspaceStats: workspaceStats,
|
|
524
|
+
};
|
|
525
|
+
},
|
|
526
|
+
|
|
527
|
+
getHashesNeedingEmbedding(projectHash?: string): Array<{ hash: string; body: string; path: string }> {
|
|
528
|
+
if (projectHash && projectHash !== 'all') {
|
|
529
|
+
return getHashesNeedingEmbeddingByWorkspaceStmt.all(projectHash) as Array<{ hash: string; body: string; path: string }>;
|
|
530
|
+
}
|
|
531
|
+
return getHashesNeedingEmbeddingStmt.all() as Array<{ hash: string; body: string; path: string }>;
|
|
532
|
+
},
|
|
533
|
+
|
|
534
|
+
getNextHashNeedingEmbedding(projectHash?: string): { hash: string; body: string; path: string } | null {
|
|
535
|
+
if (projectHash && projectHash !== 'all') {
|
|
536
|
+
return getNextHashNeedingEmbeddingByWorkspaceStmt.get(projectHash) as { hash: string; body: string; path: string } | null;
|
|
537
|
+
}
|
|
538
|
+
return getNextHashNeedingEmbeddingStmt.get() as { hash: string; body: string; path: string } | null;
|
|
539
|
+
},
|
|
540
|
+
|
|
541
|
+
getWorkspaceStats(): Array<{ projectHash: string; count: number }> {
|
|
542
|
+
return getWorkspaceStatsStmt.all() as Array<{ projectHash: string; count: number }>;
|
|
543
|
+
},
|
|
544
|
+
|
|
545
|
+
deleteDocumentsByPath(filePath: string): number {
|
|
546
|
+
const deleteStmt = db.prepare(`DELETE FROM documents WHERE path = ? AND active = 1`);
|
|
547
|
+
const result = deleteStmt.run(filePath);
|
|
548
|
+
return result.changes;
|
|
549
|
+
},
|
|
550
|
+
|
|
551
|
+
cleanOrphanedEmbeddings(): number {
|
|
552
|
+
let totalDeleted = 0;
|
|
553
|
+
|
|
554
|
+
const deleteContentVectorsStmt = db.prepare(`
|
|
555
|
+
DELETE FROM content_vectors WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
|
|
556
|
+
`);
|
|
557
|
+
const cvResult = deleteContentVectorsStmt.run();
|
|
558
|
+
totalDeleted += cvResult.changes;
|
|
559
|
+
|
|
560
|
+
if (vecAvailable) {
|
|
561
|
+
try {
|
|
562
|
+
const deleteVecStmt = db.prepare(`
|
|
563
|
+
DELETE FROM vectors_vec WHERE substr(hash_seq, 1, instr(hash_seq, ':') - 1) NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
|
|
564
|
+
`);
|
|
565
|
+
const vecResult = deleteVecStmt.run();
|
|
566
|
+
totalDeleted += vecResult.changes;
|
|
567
|
+
} catch {
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
return totalDeleted;
|
|
572
|
+
},
|
|
573
|
+
|
|
574
|
+
getCollectionStorageSize(collection: string): number {
|
|
575
|
+
const stmt = db.prepare(`
|
|
576
|
+
SELECT COALESCE(SUM(LENGTH(c.body)), 0) as totalSize
|
|
577
|
+
FROM documents d
|
|
578
|
+
JOIN content c ON c.hash = d.hash
|
|
579
|
+
WHERE d.collection = ? AND d.active = 1
|
|
580
|
+
`);
|
|
581
|
+
const row = stmt.get(collection) as { totalSize: number } | undefined;
|
|
582
|
+
return row?.totalSize ?? 0;
|
|
583
|
+
},
|
|
584
|
+
};
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
export function computeHash(content: string): string {
|
|
588
|
+
return crypto.createHash('sha256').update(content).digest('hex');
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
export function indexDocument(
|
|
592
|
+
store: Store,
|
|
593
|
+
collection: string,
|
|
594
|
+
filePath: string,
|
|
595
|
+
content: string,
|
|
596
|
+
title: string,
|
|
597
|
+
projectHash?: string
|
|
598
|
+
): { hash: string; chunks: number; skipped: boolean } {
|
|
599
|
+
const hash = computeHash(content);
|
|
600
|
+
|
|
601
|
+
const existingDoc = store.findDocument(filePath);
|
|
602
|
+
if (existingDoc && existingDoc.hash === hash) {
|
|
603
|
+
return { hash, chunks: 0, skipped: true };
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
store.insertContent(hash, content);
|
|
607
|
+
|
|
608
|
+
const chunks = chunkMarkdown(content, hash);
|
|
609
|
+
|
|
610
|
+
const now = new Date().toISOString();
|
|
611
|
+
store.insertDocument({
|
|
612
|
+
collection,
|
|
613
|
+
path: filePath,
|
|
614
|
+
title,
|
|
615
|
+
hash,
|
|
616
|
+
createdAt: existingDoc?.createdAt ?? now,
|
|
617
|
+
modifiedAt: now,
|
|
618
|
+
active: true,
|
|
619
|
+
projectHash,
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
return { hash, chunks: chunks.length, skipped: false };
|
|
623
|
+
}
|