@tai-io/codesearch 2026.313.1614
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.d.ts +3 -0
- package/dist/build-info.js +4 -0
- package/dist/config.d.ts +62 -0
- package/dist/config.js +52 -0
- package/dist/core/cleanup.d.ts +8 -0
- package/dist/core/cleanup.js +41 -0
- package/dist/core/doc-indexer.d.ts +13 -0
- package/dist/core/doc-indexer.js +76 -0
- package/dist/core/doc-searcher.d.ts +13 -0
- package/dist/core/doc-searcher.js +65 -0
- package/dist/core/file-category.d.ts +7 -0
- package/dist/core/file-category.js +75 -0
- package/dist/core/indexer.d.ts +18 -0
- package/dist/core/indexer.js +177 -0
- package/dist/core/preview.d.ts +13 -0
- package/dist/core/preview.js +58 -0
- package/dist/core/repo-map.d.ts +33 -0
- package/dist/core/repo-map.js +144 -0
- package/dist/core/searcher.d.ts +12 -0
- package/dist/core/searcher.js +97 -0
- package/dist/core/sync.d.ts +15 -0
- package/dist/core/sync.js +212 -0
- package/dist/core/targeted-indexer.d.ts +19 -0
- package/dist/core/targeted-indexer.js +127 -0
- package/dist/embedding/factory.d.ts +4 -0
- package/dist/embedding/factory.js +24 -0
- package/dist/embedding/openai.d.ts +33 -0
- package/dist/embedding/openai.js +234 -0
- package/dist/embedding/truncate.d.ts +6 -0
- package/dist/embedding/truncate.js +14 -0
- package/dist/embedding/types.d.ts +18 -0
- package/dist/embedding/types.js +2 -0
- package/dist/errors.d.ts +17 -0
- package/dist/errors.js +21 -0
- package/dist/format.d.ts +18 -0
- package/dist/format.js +151 -0
- package/dist/hooks/cli-router.d.ts +7 -0
- package/dist/hooks/cli-router.js +47 -0
- package/dist/hooks/hook-output.d.ts +56 -0
- package/dist/hooks/hook-output.js +21 -0
- package/dist/hooks/post-tool-use.d.ts +13 -0
- package/dist/hooks/post-tool-use.js +123 -0
- package/dist/hooks/stop-hook.d.ts +11 -0
- package/dist/hooks/stop-hook.js +137 -0
- package/dist/hooks/targeted-runner.d.ts +11 -0
- package/dist/hooks/targeted-runner.js +58 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +138 -0
- package/dist/paths.d.ts +11 -0
- package/dist/paths.js +54 -0
- package/dist/setup-message.d.ts +4 -0
- package/dist/setup-message.js +48 -0
- package/dist/splitter/ast.d.ts +13 -0
- package/dist/splitter/ast.js +231 -0
- package/dist/splitter/line.d.ts +10 -0
- package/dist/splitter/line.js +103 -0
- package/dist/splitter/symbol-extract.d.ts +16 -0
- package/dist/splitter/symbol-extract.js +61 -0
- package/dist/splitter/types.d.ts +16 -0
- package/dist/splitter/types.js +2 -0
- package/dist/state/doc-metadata.d.ts +18 -0
- package/dist/state/doc-metadata.js +59 -0
- package/dist/state/registry.d.ts +7 -0
- package/dist/state/registry.js +46 -0
- package/dist/state/snapshot.d.ts +26 -0
- package/dist/state/snapshot.js +100 -0
- package/dist/tool-schemas.d.ts +215 -0
- package/dist/tool-schemas.js +269 -0
- package/dist/tools.d.ts +58 -0
- package/dist/tools.js +245 -0
- package/dist/vectordb/rrf.d.ts +32 -0
- package/dist/vectordb/rrf.js +88 -0
- package/dist/vectordb/sqlite.d.ts +34 -0
- package/dist/vectordb/sqlite.js +624 -0
- package/dist/vectordb/types.d.ts +63 -0
- package/dist/vectordb/types.js +2 -0
- package/messages.yaml +69 -0
- package/package.json +79 -0
|
@@ -0,0 +1,624 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import * as sqliteVec from 'sqlite-vec';
|
|
3
|
+
import fs from 'node:fs';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { mkdirSync } from 'node:fs';
|
|
6
|
+
import { dirname } from 'node:path';
|
|
7
|
+
import { pathToCollectionName, getCodesearchDbPath, getSnapshotDbPath, getSnapshotDir, } from '../paths.js';
|
|
8
|
+
import { VectorDBError } from '../errors.js';
|
|
9
|
+
import { reciprocalRankFusion, rankByTermFrequency, } from './rrf.js';
|
|
10
|
+
const INSERT_BATCH_SIZE = 500;
|
|
11
|
+
let db = null;
|
|
12
|
+
function getDb(dbPath) {
|
|
13
|
+
if (db)
|
|
14
|
+
return db;
|
|
15
|
+
mkdirSync(dirname(dbPath), { recursive: true });
|
|
16
|
+
db = new Database(dbPath);
|
|
17
|
+
sqliteVec.load(db);
|
|
18
|
+
db.pragma('journal_mode = WAL');
|
|
19
|
+
db.pragma('foreign_keys = ON');
|
|
20
|
+
db.exec(`
|
|
21
|
+
CREATE TABLE IF NOT EXISTS collections (
|
|
22
|
+
name TEXT PRIMARY KEY,
|
|
23
|
+
dimension INTEGER NOT NULL,
|
|
24
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
25
|
+
);
|
|
26
|
+
|
|
27
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
28
|
+
id TEXT PRIMARY KEY,
|
|
29
|
+
collection TEXT NOT NULL REFERENCES collections(name) ON DELETE CASCADE,
|
|
30
|
+
content TEXT NOT NULL,
|
|
31
|
+
relative_path TEXT NOT NULL,
|
|
32
|
+
start_line INTEGER NOT NULL,
|
|
33
|
+
end_line INTEGER NOT NULL,
|
|
34
|
+
file_extension TEXT NOT NULL,
|
|
35
|
+
language TEXT NOT NULL,
|
|
36
|
+
file_category TEXT,
|
|
37
|
+
symbol_name TEXT,
|
|
38
|
+
symbol_kind TEXT,
|
|
39
|
+
symbol_signature TEXT,
|
|
40
|
+
parent_symbol TEXT,
|
|
41
|
+
indexed_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
42
|
+
file_modified_at TEXT
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_collection ON chunks(collection);
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(collection, relative_path);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_ext ON chunks(collection, file_extension);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_symbol ON chunks(collection, symbol_name)
|
|
49
|
+
WHERE symbol_name IS NOT NULL;
|
|
50
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_indexed ON chunks(collection, indexed_at);
|
|
51
|
+
|
|
52
|
+
CREATE TABLE IF NOT EXISTS snapshots (
|
|
53
|
+
collection_name TEXT PRIMARY KEY,
|
|
54
|
+
data TEXT NOT NULL,
|
|
55
|
+
updated_at TEXT NOT NULL
|
|
56
|
+
);
|
|
57
|
+
`);
|
|
58
|
+
migrateOldSnapshotsDb(db);
|
|
59
|
+
process.on('exit', () => {
|
|
60
|
+
try {
|
|
61
|
+
db?.close();
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
// best-effort
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
return db;
|
|
68
|
+
}
|
|
69
|
+
function migrateOldSnapshotsDb(database) {
|
|
70
|
+
try {
|
|
71
|
+
const oldDbPath = getSnapshotDbPath();
|
|
72
|
+
if (!fs.existsSync(oldDbPath))
|
|
73
|
+
return;
|
|
74
|
+
const oldDb = new Database(oldDbPath, { readonly: true });
|
|
75
|
+
try {
|
|
76
|
+
const rows = oldDb.prepare('SELECT collection_name, data, updated_at FROM snapshots').all();
|
|
77
|
+
if (rows.length > 0) {
|
|
78
|
+
const insert = database.prepare('INSERT OR IGNORE INTO snapshots (collection_name, data, updated_at) VALUES (?, ?, ?)');
|
|
79
|
+
const tx = database.transaction(() => {
|
|
80
|
+
for (const row of rows) {
|
|
81
|
+
insert.run(row.collection_name, row.data, row.updated_at);
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
tx();
|
|
85
|
+
console.warn(`Migrated ${rows.length} snapshot(s) from old snapshots.db`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
finally {
|
|
89
|
+
oldDb.close();
|
|
90
|
+
}
|
|
91
|
+
// Also migrate JSON snapshots from the old snapshot directory
|
|
92
|
+
migrateJsonSnapshots(database);
|
|
93
|
+
// Remove old DB after successful migration
|
|
94
|
+
try {
|
|
95
|
+
fs.unlinkSync(oldDbPath);
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
// best-effort
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
// migration is best-effort
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
function migrateJsonSnapshots(database) {
|
|
106
|
+
try {
|
|
107
|
+
const snapshotDir = getSnapshotDir();
|
|
108
|
+
if (!fs.existsSync(snapshotDir))
|
|
109
|
+
return;
|
|
110
|
+
const files = fs.readdirSync(snapshotDir).filter((f) => f.endsWith('.json'));
|
|
111
|
+
if (files.length === 0)
|
|
112
|
+
return;
|
|
113
|
+
const insert = database.prepare('INSERT OR IGNORE INTO snapshots (collection_name, data, updated_at) VALUES (?, ?, ?)');
|
|
114
|
+
let migrated = 0;
|
|
115
|
+
for (const file of files) {
|
|
116
|
+
const collectionName = file.replace(/\.json$/, '');
|
|
117
|
+
try {
|
|
118
|
+
const raw = fs.readFileSync(path.join(snapshotDir, file), 'utf-8');
|
|
119
|
+
JSON.parse(raw); // validate
|
|
120
|
+
insert.run(collectionName, raw, new Date().toISOString());
|
|
121
|
+
migrated++;
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
// skip corrupted files
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (migrated > 0) {
|
|
128
|
+
console.warn(`Migrated ${migrated} JSON snapshot(s) to SQLite`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
// migration is best-effort
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
function sanitizeName(name) {
|
|
136
|
+
// Collection names are already sanitized by pathToCollectionName,
|
|
137
|
+
// but double-check to prevent SQL injection in dynamic table names
|
|
138
|
+
if (!/^[a-z0-9_]+$/.test(name)) {
|
|
139
|
+
throw new VectorDBError(`Invalid collection name: ${name}`);
|
|
140
|
+
}
|
|
141
|
+
return name;
|
|
142
|
+
}
|
|
143
|
+
function vecToBuffer(vec) {
|
|
144
|
+
return Buffer.from(new Float32Array(vec).buffer);
|
|
145
|
+
}
|
|
146
|
+
export class SqliteVectorDB {
|
|
147
|
+
dbPath;
|
|
148
|
+
constructor(dbPath) {
|
|
149
|
+
this.dbPath = dbPath;
|
|
150
|
+
}
|
|
151
|
+
get db() {
|
|
152
|
+
return getDb(this.dbPath);
|
|
153
|
+
}
|
|
154
|
+
async createCollection(name, dimension) {
|
|
155
|
+
const safe = sanitizeName(name);
|
|
156
|
+
try {
|
|
157
|
+
const existing = this.db
|
|
158
|
+
.prepare('SELECT dimension FROM collections WHERE name = ?')
|
|
159
|
+
.get(safe);
|
|
160
|
+
if (existing)
|
|
161
|
+
return;
|
|
162
|
+
this.db.transaction(() => {
|
|
163
|
+
this.db
|
|
164
|
+
.prepare('INSERT INTO collections (name, dimension) VALUES (?, ?)')
|
|
165
|
+
.run(safe, dimension);
|
|
166
|
+
this.db.exec(`
|
|
167
|
+
CREATE VIRTUAL TABLE "${safe}_vec" USING vec0(
|
|
168
|
+
id TEXT PRIMARY KEY,
|
|
169
|
+
vector float[${dimension}]
|
|
170
|
+
)
|
|
171
|
+
`);
|
|
172
|
+
this.db.exec(`
|
|
173
|
+
CREATE VIRTUAL TABLE "${safe}_fts" USING fts5(
|
|
174
|
+
content,
|
|
175
|
+
content=chunks,
|
|
176
|
+
content_rowid=rowid
|
|
177
|
+
)
|
|
178
|
+
`);
|
|
179
|
+
this.db.exec(`
|
|
180
|
+
CREATE TRIGGER "${safe}_fts_insert" AFTER INSERT ON chunks
|
|
181
|
+
WHEN NEW.collection = '${safe}'
|
|
182
|
+
BEGIN
|
|
183
|
+
INSERT INTO "${safe}_fts"(rowid, content) VALUES (NEW.rowid, NEW.content);
|
|
184
|
+
END
|
|
185
|
+
`);
|
|
186
|
+
this.db.exec(`
|
|
187
|
+
CREATE TRIGGER "${safe}_fts_update" AFTER UPDATE ON chunks
|
|
188
|
+
WHEN OLD.collection = '${safe}' AND NEW.collection = '${safe}'
|
|
189
|
+
BEGIN
|
|
190
|
+
INSERT INTO "${safe}_fts"("${safe}_fts", rowid, content)
|
|
191
|
+
VALUES ('delete', OLD.rowid, OLD.content);
|
|
192
|
+
INSERT INTO "${safe}_fts"(rowid, content) VALUES (NEW.rowid, NEW.content);
|
|
193
|
+
END
|
|
194
|
+
`);
|
|
195
|
+
this.db.exec(`
|
|
196
|
+
CREATE TRIGGER "${safe}_fts_delete" AFTER DELETE ON chunks
|
|
197
|
+
WHEN OLD.collection = '${safe}'
|
|
198
|
+
BEGIN
|
|
199
|
+
INSERT INTO "${safe}_fts"("${safe}_fts", rowid, content)
|
|
200
|
+
VALUES ('delete', OLD.rowid, OLD.content);
|
|
201
|
+
END
|
|
202
|
+
`);
|
|
203
|
+
})();
|
|
204
|
+
}
|
|
205
|
+
catch (err) {
|
|
206
|
+
throw new VectorDBError(`Failed to create collection "${name}"`, err);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
async hasCollection(name) {
|
|
210
|
+
const safe = sanitizeName(name);
|
|
211
|
+
const row = this.db
|
|
212
|
+
.prepare('SELECT 1 FROM collections WHERE name = ?')
|
|
213
|
+
.get(safe);
|
|
214
|
+
return row !== undefined;
|
|
215
|
+
}
|
|
216
|
+
async dropCollection(name) {
|
|
217
|
+
const safe = sanitizeName(name);
|
|
218
|
+
try {
|
|
219
|
+
this.db.transaction(() => {
|
|
220
|
+
// Drop triggers first (they reference chunks table)
|
|
221
|
+
this.db.exec(`DROP TRIGGER IF EXISTS "${safe}_fts_insert"`);
|
|
222
|
+
this.db.exec(`DROP TRIGGER IF EXISTS "${safe}_fts_update"`);
|
|
223
|
+
this.db.exec(`DROP TRIGGER IF EXISTS "${safe}_fts_delete"`);
|
|
224
|
+
// Drop virtual tables
|
|
225
|
+
this.db.exec(`DROP TABLE IF EXISTS "${safe}_vec"`);
|
|
226
|
+
this.db.exec(`DROP TABLE IF EXISTS "${safe}_fts"`);
|
|
227
|
+
// Delete chunk rows (FK cascade won't fire for virtual tables)
|
|
228
|
+
this.db.prepare('DELETE FROM chunks WHERE collection = ?').run(safe);
|
|
229
|
+
// Delete collection entry
|
|
230
|
+
this.db.prepare('DELETE FROM collections WHERE name = ?').run(safe);
|
|
231
|
+
})();
|
|
232
|
+
}
|
|
233
|
+
catch (err) {
|
|
234
|
+
throw new VectorDBError(`Failed to drop collection "${name}"`, err);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
async insert(name, documents) {
|
|
238
|
+
if (documents.length === 0)
|
|
239
|
+
return;
|
|
240
|
+
const safe = sanitizeName(name);
|
|
241
|
+
try {
|
|
242
|
+
const insertChunk = this.db.prepare(`
|
|
243
|
+
INSERT OR REPLACE INTO chunks
|
|
244
|
+
(id, collection, content, relative_path, start_line, end_line,
|
|
245
|
+
file_extension, language, file_category, symbol_name, symbol_kind,
|
|
246
|
+
symbol_signature, parent_symbol, indexed_at, file_modified_at)
|
|
247
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), ?)
|
|
248
|
+
`);
|
|
249
|
+
const insertVec = this.db.prepare(`
|
|
250
|
+
INSERT OR REPLACE INTO "${safe}_vec" (id, vector) VALUES (?, ?)
|
|
251
|
+
`);
|
|
252
|
+
for (let i = 0; i < documents.length; i += INSERT_BATCH_SIZE) {
|
|
253
|
+
const batch = documents.slice(i, i + INSERT_BATCH_SIZE);
|
|
254
|
+
this.db.transaction(() => {
|
|
255
|
+
for (const doc of batch) {
|
|
256
|
+
insertChunk.run(doc.id, safe, doc.content, doc.relativePath, doc.startLine, doc.endLine, doc.fileExtension, doc.language, doc.fileCategory ?? null, doc.symbolName ?? null, doc.symbolKind ?? null, doc.symbolSignature ?? null, doc.parentSymbol ?? null, doc.fileModifiedAt ?? null);
|
|
257
|
+
insertVec.run(doc.id, vecToBuffer(doc.vector));
|
|
258
|
+
}
|
|
259
|
+
})();
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
catch (err) {
|
|
263
|
+
throw new VectorDBError(`Failed to insert ${documents.length} documents into "${name}"`, err);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
async search(name, params) {
|
|
267
|
+
const safe = sanitizeName(name);
|
|
268
|
+
try {
|
|
269
|
+
const fetchLimit = params.limit * 2;
|
|
270
|
+
// Extension filter clause
|
|
271
|
+
let extClause = '';
|
|
272
|
+
let extParams = [];
|
|
273
|
+
if (params.extensionFilter?.length) {
|
|
274
|
+
const placeholders = params.extensionFilter.map(() => '?').join(', ');
|
|
275
|
+
extClause = `AND c.file_extension IN (${placeholders})`;
|
|
276
|
+
extParams = params.extensionFilter;
|
|
277
|
+
}
|
|
278
|
+
// Dense vector search
|
|
279
|
+
const denseRows = this.db
|
|
280
|
+
.prepare(`SELECT v.id, v.distance
|
|
281
|
+
FROM "${safe}_vec" v
|
|
282
|
+
JOIN chunks c ON c.id = v.id AND c.collection = ?
|
|
283
|
+
WHERE v.vector MATCH ?
|
|
284
|
+
AND k = ?
|
|
285
|
+
${extClause}
|
|
286
|
+
ORDER BY v.distance ASC`)
|
|
287
|
+
.all(safe, vecToBuffer(params.queryVector), fetchLimit, ...extParams);
|
|
288
|
+
// Convert distance to similarity score (cosine distance → similarity)
|
|
289
|
+
const denseResults = denseRows.map((row) => ({
|
|
290
|
+
id: row.id,
|
|
291
|
+
score: 1 - row.distance,
|
|
292
|
+
payload: null,
|
|
293
|
+
}));
|
|
294
|
+
// FTS search
|
|
295
|
+
let textResults = [];
|
|
296
|
+
if (params.queryText) {
|
|
297
|
+
// Tokenize query for FTS5 — escape special chars, join with OR
|
|
298
|
+
const ftsQuery = params.queryText
|
|
299
|
+
.split(/\s+/)
|
|
300
|
+
.filter((t) => t.length > 0)
|
|
301
|
+
.map((t) => `"${t.replace(/"/g, '""')}"`)
|
|
302
|
+
.join(' OR ');
|
|
303
|
+
if (ftsQuery) {
|
|
304
|
+
const ftsRows = this.db
|
|
305
|
+
.prepare(`SELECT c.id, c.content
|
|
306
|
+
FROM "${safe}_fts" f
|
|
307
|
+
JOIN chunks c ON c.rowid = f.rowid AND c.collection = ?
|
|
308
|
+
WHERE "${safe}_fts" MATCH ?
|
|
309
|
+
${extClause}
|
|
310
|
+
LIMIT ?`)
|
|
311
|
+
.all(safe, ftsQuery, fetchLimit, ...extParams);
|
|
312
|
+
const pointsForRank = ftsRows.map((r) => ({
|
|
313
|
+
id: r.id,
|
|
314
|
+
payload: { content: r.content },
|
|
315
|
+
}));
|
|
316
|
+
textResults = rankByTermFrequency(pointsForRank, params.queryText);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
// If we have both dense and text results, fuse them
|
|
320
|
+
if (denseResults.length === 0 && textResults.length === 0) {
|
|
321
|
+
return [];
|
|
322
|
+
}
|
|
323
|
+
const fused = reciprocalRankFusion(denseResults, textResults, params.limit * 2);
|
|
324
|
+
// Fetch full metadata for the fused result IDs
|
|
325
|
+
const resultIds = fused.map((r) => r.relativePath || '').length
|
|
326
|
+
? fused
|
|
327
|
+
: [];
|
|
328
|
+
if (fused.length === 0)
|
|
329
|
+
return [];
|
|
330
|
+
// Get IDs from fused results — they're in the SearchResult format from RRF
|
|
331
|
+
// but we need to re-fetch from chunks for full metadata
|
|
332
|
+
// RRF returns SearchResult[] which has relativePath etc from payload extraction
|
|
333
|
+
// We need the chunk IDs — let's collect from both dense and text results
|
|
334
|
+
const allIds = new Set();
|
|
335
|
+
for (const d of denseResults)
|
|
336
|
+
allIds.add(String(d.id));
|
|
337
|
+
for (const t of textResults)
|
|
338
|
+
allIds.add(String(t.id));
|
|
339
|
+
if (allIds.size === 0)
|
|
340
|
+
return [];
|
|
341
|
+
const idList = [...allIds];
|
|
342
|
+
const placeholders = idList.map(() => '?').join(', ');
|
|
343
|
+
const metadataRows = this.db
|
|
344
|
+
.prepare(`SELECT id, content, relative_path, start_line, end_line,
|
|
345
|
+
file_extension, language, file_category, indexed_at
|
|
346
|
+
FROM chunks
|
|
347
|
+
WHERE id IN (${placeholders}) AND collection = ?`)
|
|
348
|
+
.all(...idList, safe);
|
|
349
|
+
const metaMap = new Map(metadataRows.map((r) => [r.id, r]));
|
|
350
|
+
// Now rebuild dense and text results with full payloads for RRF
|
|
351
|
+
const denseWithPayload = denseResults
|
|
352
|
+
.filter((d) => metaMap.has(String(d.id)))
|
|
353
|
+
.map((d) => {
|
|
354
|
+
const m = metaMap.get(String(d.id));
|
|
355
|
+
return {
|
|
356
|
+
id: d.id,
|
|
357
|
+
score: d.score,
|
|
358
|
+
payload: {
|
|
359
|
+
content: m.content,
|
|
360
|
+
relativePath: m.relative_path,
|
|
361
|
+
startLine: m.start_line,
|
|
362
|
+
endLine: m.end_line,
|
|
363
|
+
fileExtension: m.file_extension,
|
|
364
|
+
language: m.language,
|
|
365
|
+
fileCategory: m.file_category ?? '',
|
|
366
|
+
},
|
|
367
|
+
};
|
|
368
|
+
});
|
|
369
|
+
const textWithPayload = textResults
|
|
370
|
+
.filter((t) => metaMap.has(String(t.id)))
|
|
371
|
+
.map((t) => {
|
|
372
|
+
const m = metaMap.get(String(t.id));
|
|
373
|
+
return {
|
|
374
|
+
id: t.id,
|
|
375
|
+
rawScore: t.rawScore,
|
|
376
|
+
payload: {
|
|
377
|
+
content: m.content,
|
|
378
|
+
relativePath: m.relative_path,
|
|
379
|
+
startLine: m.start_line,
|
|
380
|
+
endLine: m.end_line,
|
|
381
|
+
fileExtension: m.file_extension,
|
|
382
|
+
language: m.language,
|
|
383
|
+
fileCategory: m.file_category ?? '',
|
|
384
|
+
},
|
|
385
|
+
};
|
|
386
|
+
});
|
|
387
|
+
return reciprocalRankFusion(denseWithPayload, textWithPayload, params.limit);
|
|
388
|
+
}
|
|
389
|
+
catch (err) {
|
|
390
|
+
throw new VectorDBError(`Search failed in collection "${name}"`, err);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
async getById(name, id) {
|
|
394
|
+
const safe = sanitizeName(name);
|
|
395
|
+
try {
|
|
396
|
+
const row = this.db
|
|
397
|
+
.prepare(`SELECT c.*, v.vector
|
|
398
|
+
FROM chunks c
|
|
399
|
+
JOIN "${safe}_vec" v ON v.id = c.id
|
|
400
|
+
WHERE c.id = ? AND c.collection = ?`)
|
|
401
|
+
.get(id, safe);
|
|
402
|
+
if (!row)
|
|
403
|
+
return null;
|
|
404
|
+
const vectorBuf = row.vector;
|
|
405
|
+
const vector = [...new Float32Array(vectorBuf.buffer, vectorBuf.byteOffset, vectorBuf.byteLength / 4)];
|
|
406
|
+
return {
|
|
407
|
+
payload: {
|
|
408
|
+
content: row.content,
|
|
409
|
+
relativePath: row.relative_path,
|
|
410
|
+
startLine: row.start_line,
|
|
411
|
+
endLine: row.end_line,
|
|
412
|
+
fileExtension: row.file_extension,
|
|
413
|
+
language: row.language,
|
|
414
|
+
fileCategory: row.file_category ?? '',
|
|
415
|
+
symbolName: row.symbol_name ?? '',
|
|
416
|
+
symbolKind: row.symbol_kind ?? '',
|
|
417
|
+
symbolSignature: row.symbol_signature ?? '',
|
|
418
|
+
parentSymbol: row.parent_symbol ?? '',
|
|
419
|
+
indexedAt: row.indexed_at ?? '',
|
|
420
|
+
fileModifiedAt: row.file_modified_at ?? '',
|
|
421
|
+
},
|
|
422
|
+
vector,
|
|
423
|
+
};
|
|
424
|
+
}
|
|
425
|
+
catch (err) {
|
|
426
|
+
throw new VectorDBError(`Failed to retrieve point "${id}" from "${name}"`, err);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
async updatePoint(name, id, vector, payload) {
|
|
430
|
+
const safe = sanitizeName(name);
|
|
431
|
+
try {
|
|
432
|
+
this.db.transaction(() => {
|
|
433
|
+
this.db
|
|
434
|
+
.prepare(`INSERT OR REPLACE INTO chunks
|
|
435
|
+
(id, collection, content, relative_path, start_line, end_line,
|
|
436
|
+
file_extension, language, file_category, symbol_name, symbol_kind,
|
|
437
|
+
symbol_signature, parent_symbol, indexed_at, file_modified_at)
|
|
438
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), ?)`)
|
|
439
|
+
.run(id, safe, String(payload.content ?? ''), String(payload.relativePath ?? ''), Number(payload.startLine ?? 0), Number(payload.endLine ?? 0), String(payload.fileExtension ?? ''), String(payload.language ?? ''), String(payload.fileCategory ?? ''), String(payload.symbolName ?? ''), String(payload.symbolKind ?? ''), String(payload.symbolSignature ?? ''), String(payload.parentSymbol ?? ''), payload.fileModifiedAt != null ? String(payload.fileModifiedAt) : null);
|
|
440
|
+
this.db
|
|
441
|
+
.prepare(`INSERT OR REPLACE INTO "${safe}_vec" (id, vector) VALUES (?, ?)`)
|
|
442
|
+
.run(id, vecToBuffer(vector));
|
|
443
|
+
})();
|
|
444
|
+
}
|
|
445
|
+
catch (err) {
|
|
446
|
+
throw new VectorDBError(`Failed to update point "${id}" in "${name}"`, err);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
async deleteByPath(name, relativePath) {
|
|
450
|
+
const safe = sanitizeName(name);
|
|
451
|
+
try {
|
|
452
|
+
const ids = this.db
|
|
453
|
+
.prepare('SELECT id FROM chunks WHERE collection = ? AND relative_path = ?')
|
|
454
|
+
.all(safe, relativePath);
|
|
455
|
+
if (ids.length === 0)
|
|
456
|
+
return;
|
|
457
|
+
this.db.transaction(() => {
|
|
458
|
+
const deleteVec = this.db.prepare(`DELETE FROM "${safe}_vec" WHERE id = ?`);
|
|
459
|
+
for (const { id } of ids) {
|
|
460
|
+
deleteVec.run(id);
|
|
461
|
+
}
|
|
462
|
+
this.db
|
|
463
|
+
.prepare('DELETE FROM chunks WHERE collection = ? AND relative_path = ?')
|
|
464
|
+
.run(safe, relativePath);
|
|
465
|
+
})();
|
|
466
|
+
}
|
|
467
|
+
catch (err) {
|
|
468
|
+
throw new VectorDBError(`Failed to delete documents for path "${relativePath}" from "${name}"`, err);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
async deleteByFilter(name, filter) {
|
|
472
|
+
const safe = sanitizeName(name);
|
|
473
|
+
try {
|
|
474
|
+
// Build WHERE clause from filter
|
|
475
|
+
const columnMap = {
|
|
476
|
+
relativePath: 'relative_path',
|
|
477
|
+
startLine: 'start_line',
|
|
478
|
+
endLine: 'end_line',
|
|
479
|
+
fileExtension: 'file_extension',
|
|
480
|
+
fileCategory: 'file_category',
|
|
481
|
+
symbolName: 'symbol_name',
|
|
482
|
+
symbolKind: 'symbol_kind',
|
|
483
|
+
};
|
|
484
|
+
const conditions = ['collection = ?'];
|
|
485
|
+
const params = [safe];
|
|
486
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
487
|
+
const col = columnMap[key] ?? key;
|
|
488
|
+
conditions.push(`"${col}" = ?`);
|
|
489
|
+
params.push(value);
|
|
490
|
+
}
|
|
491
|
+
const where = conditions.join(' AND ');
|
|
492
|
+
const ids = this.db
|
|
493
|
+
.prepare(`SELECT id FROM chunks WHERE ${where}`)
|
|
494
|
+
.all(...params);
|
|
495
|
+
if (ids.length === 0)
|
|
496
|
+
return;
|
|
497
|
+
this.db.transaction(() => {
|
|
498
|
+
const deleteVec = this.db.prepare(`DELETE FROM "${safe}_vec" WHERE id = ?`);
|
|
499
|
+
for (const { id } of ids) {
|
|
500
|
+
deleteVec.run(id);
|
|
501
|
+
}
|
|
502
|
+
this.db
|
|
503
|
+
.prepare(`DELETE FROM chunks WHERE ${where}`)
|
|
504
|
+
.run(...params);
|
|
505
|
+
})();
|
|
506
|
+
}
|
|
507
|
+
catch (err) {
|
|
508
|
+
throw new VectorDBError(`Failed to delete by filter from "${name}"`, err);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
async listSymbols(name) {
|
|
512
|
+
const safe = sanitizeName(name);
|
|
513
|
+
try {
|
|
514
|
+
const rows = this.db
|
|
515
|
+
.prepare(`SELECT symbol_name, symbol_kind, relative_path, start_line,
|
|
516
|
+
symbol_signature, parent_symbol
|
|
517
|
+
FROM chunks
|
|
518
|
+
WHERE collection = ? AND symbol_name IS NOT NULL AND symbol_name != ''`)
|
|
519
|
+
.all(safe);
|
|
520
|
+
return rows.map((row) => ({
|
|
521
|
+
name: row.symbol_name,
|
|
522
|
+
kind: row.symbol_kind,
|
|
523
|
+
relativePath: row.relative_path,
|
|
524
|
+
startLine: row.start_line,
|
|
525
|
+
...(row.symbol_signature ? { signature: row.symbol_signature } : {}),
|
|
526
|
+
...(row.parent_symbol ? { parentName: row.parent_symbol } : {}),
|
|
527
|
+
}));
|
|
528
|
+
}
|
|
529
|
+
catch (err) {
|
|
530
|
+
throw new VectorDBError(`Failed to list symbols from "${name}"`, err);
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
async scrollAll(name) {
|
|
534
|
+
const safe = sanitizeName(name);
|
|
535
|
+
try {
|
|
536
|
+
const rows = this.db
|
|
537
|
+
.prepare(`SELECT c.*, v.vector
|
|
538
|
+
FROM chunks c
|
|
539
|
+
JOIN "${safe}_vec" v ON v.id = c.id
|
|
540
|
+
WHERE c.collection = ?`)
|
|
541
|
+
.all(safe);
|
|
542
|
+
return rows.map((row) => {
|
|
543
|
+
const vectorBuf = row.vector;
|
|
544
|
+
const vector = [...new Float32Array(vectorBuf.buffer, vectorBuf.byteOffset, vectorBuf.byteLength / 4)];
|
|
545
|
+
return {
|
|
546
|
+
id: String(row.id),
|
|
547
|
+
vector,
|
|
548
|
+
payload: {
|
|
549
|
+
content: row.content,
|
|
550
|
+
relativePath: row.relative_path,
|
|
551
|
+
startLine: row.start_line,
|
|
552
|
+
endLine: row.end_line,
|
|
553
|
+
fileExtension: row.file_extension,
|
|
554
|
+
language: row.language,
|
|
555
|
+
fileCategory: row.file_category ?? '',
|
|
556
|
+
symbolName: row.symbol_name ?? '',
|
|
557
|
+
symbolKind: row.symbol_kind ?? '',
|
|
558
|
+
symbolSignature: row.symbol_signature ?? '',
|
|
559
|
+
parentSymbol: row.parent_symbol ?? '',
|
|
560
|
+
},
|
|
561
|
+
};
|
|
562
|
+
});
|
|
563
|
+
}
|
|
564
|
+
catch (err) {
|
|
565
|
+
throw new VectorDBError(`Failed to scroll all points from "${name}"`, err);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
// ── Snapshot functions ─────────────────────────────────────────────────────
|
|
570
|
+
function resolveDbPath(dbPath) {
|
|
571
|
+
return dbPath ?? getCodesearchDbPath();
|
|
572
|
+
}
|
|
573
|
+
export function loadSnapshot(rootPath, dbPath) {
|
|
574
|
+
const name = pathToCollectionName(rootPath);
|
|
575
|
+
const row = getDb(resolveDbPath(dbPath))
|
|
576
|
+
.prepare('SELECT data FROM snapshots WHERE collection_name = ?')
|
|
577
|
+
.get(name);
|
|
578
|
+
if (!row)
|
|
579
|
+
return null;
|
|
580
|
+
try {
|
|
581
|
+
return JSON.parse(row.data);
|
|
582
|
+
}
|
|
583
|
+
catch {
|
|
584
|
+
console.warn(`Corrupted snapshot for ${name}, ignoring`);
|
|
585
|
+
return null;
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
export function saveSnapshot(rootPath, snapshot, dbPath) {
|
|
589
|
+
const name = pathToCollectionName(rootPath);
|
|
590
|
+
getDb(resolveDbPath(dbPath))
|
|
591
|
+
.prepare('INSERT OR REPLACE INTO snapshots (collection_name, data, updated_at) VALUES (?, ?, ?)')
|
|
592
|
+
.run(name, JSON.stringify(snapshot), new Date().toISOString());
|
|
593
|
+
}
|
|
594
|
+
export function deleteSnapshot(rootPath, dbPath) {
|
|
595
|
+
const name = pathToCollectionName(rootPath);
|
|
596
|
+
getDb(resolveDbPath(dbPath)).prepare('DELETE FROM snapshots WHERE collection_name = ?').run(name);
|
|
597
|
+
}
|
|
598
|
+
export function snapshotExists(rootPath, dbPath) {
|
|
599
|
+
const name = pathToCollectionName(rootPath);
|
|
600
|
+
const row = getDb(resolveDbPath(dbPath))
|
|
601
|
+
.prepare('SELECT 1 FROM snapshots WHERE collection_name = ?')
|
|
602
|
+
.get(name);
|
|
603
|
+
return row !== undefined;
|
|
604
|
+
}
|
|
605
|
+
export function listSnapshotCollections(dbPath) {
|
|
606
|
+
const rows = getDb(resolveDbPath(dbPath)).prepare('SELECT collection_name FROM snapshots').all();
|
|
607
|
+
return rows.map((r) => r.collection_name);
|
|
608
|
+
}
|
|
609
|
+
export function deleteSnapshotByCollection(collectionName, dbPath) {
|
|
610
|
+
getDb(resolveDbPath(dbPath)).prepare('DELETE FROM snapshots WHERE collection_name = ?').run(collectionName);
|
|
611
|
+
}
|
|
612
|
+
/** Reset the module-level DB connection (for testing). */
|
|
613
|
+
export function resetForTesting() {
|
|
614
|
+
if (db) {
|
|
615
|
+
try {
|
|
616
|
+
db.close();
|
|
617
|
+
}
|
|
618
|
+
catch {
|
|
619
|
+
// ignore
|
|
620
|
+
}
|
|
621
|
+
db = null;
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
//# sourceMappingURL=sqlite.js.map
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import type { EmbeddingVector } from '../embedding/types.js';
|
|
2
|
+
export interface CodeDocument {
|
|
3
|
+
id: string;
|
|
4
|
+
content: string;
|
|
5
|
+
vector: EmbeddingVector;
|
|
6
|
+
relativePath: string;
|
|
7
|
+
startLine: number;
|
|
8
|
+
endLine: number;
|
|
9
|
+
fileExtension: string;
|
|
10
|
+
language: string;
|
|
11
|
+
fileCategory?: string;
|
|
12
|
+
symbolName?: string;
|
|
13
|
+
symbolKind?: string;
|
|
14
|
+
symbolSignature?: string;
|
|
15
|
+
parentSymbol?: string;
|
|
16
|
+
indexedAt?: string;
|
|
17
|
+
fileModifiedAt?: string;
|
|
18
|
+
}
|
|
19
|
+
export interface SymbolEntry {
|
|
20
|
+
name: string;
|
|
21
|
+
kind: string;
|
|
22
|
+
relativePath: string;
|
|
23
|
+
startLine: number;
|
|
24
|
+
signature?: string;
|
|
25
|
+
parentName?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface HybridSearchParams {
|
|
28
|
+
queryVector: EmbeddingVector;
|
|
29
|
+
queryText: string;
|
|
30
|
+
limit: number;
|
|
31
|
+
extensionFilter?: string[];
|
|
32
|
+
}
|
|
33
|
+
export interface SearchResult {
|
|
34
|
+
content: string;
|
|
35
|
+
relativePath: string;
|
|
36
|
+
startLine: number;
|
|
37
|
+
endLine: number;
|
|
38
|
+
fileExtension: string;
|
|
39
|
+
language: string;
|
|
40
|
+
score: number;
|
|
41
|
+
fileCategory?: string;
|
|
42
|
+
}
|
|
43
|
+
export interface VectorDB {
|
|
44
|
+
createCollection(name: string, dimension: number): Promise<void>;
|
|
45
|
+
hasCollection(name: string): Promise<boolean>;
|
|
46
|
+
dropCollection(name: string): Promise<void>;
|
|
47
|
+
insert(name: string, documents: CodeDocument[]): Promise<void>;
|
|
48
|
+
search(name: string, params: HybridSearchParams): Promise<SearchResult[]>;
|
|
49
|
+
deleteByPath(name: string, relativePath: string): Promise<void>;
|
|
50
|
+
deleteByFilter(name: string, filter: Record<string, unknown>): Promise<void>;
|
|
51
|
+
getById(name: string, id: string): Promise<{
|
|
52
|
+
payload: Record<string, unknown>;
|
|
53
|
+
vector: number[];
|
|
54
|
+
} | null>;
|
|
55
|
+
updatePoint(name: string, id: string, vector: number[], payload: Record<string, unknown>): Promise<void>;
|
|
56
|
+
listSymbols(name: string): Promise<SymbolEntry[]>;
|
|
57
|
+
scrollAll(name: string): Promise<{
|
|
58
|
+
id: string | number;
|
|
59
|
+
vector: number[];
|
|
60
|
+
payload: Record<string, unknown>;
|
|
61
|
+
}[]>;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=types.d.ts.map
|