indexer-cli 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -4
- package/dist/cli/commands/ensure-indexed.js +13 -6
- package/dist/cli/commands/ensure-indexed.js.map +1 -1
- package/dist/cli/commands/index.js +4 -5
- package/dist/cli/commands/index.js.map +1 -1
- package/dist/cli/commands/init.js +7 -5
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/search.js +2 -3
- package/dist/cli/commands/search.js.map +1 -1
- package/dist/cli/commands/skill-template.d.ts +1 -1
- package/dist/cli/commands/skill-template.js +6 -63
- package/dist/cli/commands/skill-template.js.map +1 -1
- package/dist/cli/commands/skill-template.md +145 -0
- package/dist/storage/vectors.d.ts +10 -19
- package/dist/storage/vectors.js +229 -352
- package/dist/storage/vectors.js.map +1 -1
- package/package.json +4 -6
package/dist/storage/vectors.js
CHANGED
|
@@ -32,12 +32,15 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
32
32
|
return result;
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
35
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.LanceDbVectorStore = exports.REQUIRED_COLUMNS = void 0;
|
|
39
|
+
exports.LanceDbVectorStore = exports.SqliteVecVectorStore = exports.REQUIRED_COLUMNS = void 0;
|
|
37
40
|
const node_fs_1 = require("node:fs");
|
|
38
|
-
const
|
|
39
|
-
const
|
|
40
|
-
const
|
|
41
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
42
|
+
const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
|
|
43
|
+
const sqliteVec = __importStar(require("sqlite-vec"));
|
|
41
44
|
exports.REQUIRED_COLUMNS = [
|
|
42
45
|
"project_id",
|
|
43
46
|
"chunk_id",
|
|
@@ -48,361 +51,271 @@ exports.REQUIRED_COLUMNS = [
|
|
|
48
51
|
"content_hash",
|
|
49
52
|
"chunk_type",
|
|
50
53
|
"primary_symbol",
|
|
51
|
-
"
|
|
54
|
+
"embedding",
|
|
52
55
|
];
|
|
53
|
-
const
|
|
54
|
-
class
|
|
56
|
+
const UPSERT_BATCH_SIZE = 200;
|
|
57
|
+
class SqliteVecVectorStore {
|
|
55
58
|
dbPath;
|
|
56
59
|
vectorSize;
|
|
57
|
-
tableName;
|
|
58
|
-
cacheTTL;
|
|
59
60
|
db;
|
|
60
|
-
table;
|
|
61
61
|
initialized = false;
|
|
62
|
-
lastCacheRefresh = 0;
|
|
63
|
-
operationQueue = Promise.resolve();
|
|
64
62
|
constructor(options) {
|
|
65
63
|
this.dbPath = options.dbPath;
|
|
66
64
|
this.vectorSize = options.vectorSize;
|
|
67
|
-
this.
|
|
68
|
-
this.cacheTTL = options.cacheTTL ?? 5 * 60 * 1000;
|
|
65
|
+
this.db = this.openDatabase();
|
|
69
66
|
}
|
|
70
67
|
async initialize() {
|
|
71
68
|
if (this.initialized) {
|
|
72
69
|
return;
|
|
73
70
|
}
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
71
|
+
const db = this.getDb();
|
|
72
|
+
db.exec(`
|
|
73
|
+
CREATE TABLE IF NOT EXISTS vector_meta (
|
|
74
|
+
chunk_id TEXT PRIMARY KEY,
|
|
75
|
+
project_id TEXT NOT NULL,
|
|
76
|
+
snapshot_id TEXT NOT NULL,
|
|
77
|
+
file_path TEXT NOT NULL,
|
|
78
|
+
start_line INTEGER NOT NULL,
|
|
79
|
+
end_line INTEGER NOT NULL,
|
|
80
|
+
content_hash TEXT NOT NULL,
|
|
81
|
+
chunk_type TEXT NOT NULL DEFAULT '',
|
|
82
|
+
primary_symbol TEXT NOT NULL DEFAULT ''
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
CREATE INDEX IF NOT EXISTS idx_vector_meta_snapshot_id
|
|
86
|
+
ON vector_meta(snapshot_id);
|
|
87
|
+
|
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_vector_meta_project_id
|
|
89
|
+
ON vector_meta(project_id);
|
|
90
|
+
|
|
91
|
+
CREATE INDEX IF NOT EXISTS idx_vector_meta_file_path
|
|
92
|
+
ON vector_meta(file_path);
|
|
93
|
+
`);
|
|
94
|
+
const vecChunksExists = db
|
|
95
|
+
.prepare("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'vec_chunks'")
|
|
96
|
+
.get();
|
|
97
|
+
if (!vecChunksExists) {
|
|
98
|
+
db.exec(`
|
|
99
|
+
CREATE VIRTUAL TABLE vec_chunks USING vec0(
|
|
100
|
+
chunk_id TEXT PRIMARY KEY,
|
|
101
|
+
embedding float[${this.vectorSize}]
|
|
102
|
+
)
|
|
103
|
+
`);
|
|
87
104
|
}
|
|
88
105
|
this.initialized = true;
|
|
89
106
|
}
|
|
90
107
|
async close() {
|
|
108
|
+
if (this.db) {
|
|
109
|
+
this.db.close();
|
|
110
|
+
this.db = null;
|
|
111
|
+
}
|
|
91
112
|
this.initialized = false;
|
|
92
|
-
this.db = null;
|
|
93
|
-
this.table = null;
|
|
94
113
|
}
|
|
95
114
|
async upsert(vectors) {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
115
|
+
if (vectors.length === 0) {
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
await this.initialize();
|
|
119
|
+
const db = this.getDb();
|
|
120
|
+
const deleteVectorStatement = db.prepare("DELETE FROM vec_chunks WHERE chunk_id = ?");
|
|
121
|
+
const deleteMetaStatement = db.prepare("DELETE FROM vector_meta WHERE chunk_id = ?");
|
|
122
|
+
const insertMetaStatement = db.prepare(`
|
|
123
|
+
INSERT INTO vector_meta (
|
|
124
|
+
chunk_id,
|
|
125
|
+
project_id,
|
|
126
|
+
snapshot_id,
|
|
127
|
+
file_path,
|
|
128
|
+
start_line,
|
|
129
|
+
end_line,
|
|
130
|
+
content_hash,
|
|
131
|
+
chunk_type,
|
|
132
|
+
primary_symbol
|
|
133
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
134
|
+
`);
|
|
135
|
+
const insertVectorStatement = db.prepare("INSERT INTO vec_chunks (chunk_id, embedding) VALUES (?, vec_f32(?))");
|
|
136
|
+
const upsertBatch = db.transaction((batch) => {
|
|
137
|
+
for (const vector of batch) {
|
|
138
|
+
deleteVectorStatement.run(vector.chunkId);
|
|
139
|
+
deleteMetaStatement.run(vector.chunkId);
|
|
140
|
+
insertMetaStatement.run(vector.chunkId, vector.projectId, vector.snapshotId, vector.filePath, vector.startLine, vector.endLine, vector.contentHash, vector.chunkType ?? "", vector.primarySymbol ?? "");
|
|
141
|
+
insertVectorStatement.run(vector.chunkId, this.embeddingToBuffer(vector.embedding));
|
|
142
|
+
}
|
|
122
143
|
});
|
|
144
|
+
for (let index = 0; index < vectors.length; index += UPSERT_BATCH_SIZE) {
|
|
145
|
+
const batch = vectors.slice(index, index + UPSERT_BATCH_SIZE);
|
|
146
|
+
upsertBatch(batch);
|
|
147
|
+
}
|
|
123
148
|
}
|
|
124
149
|
async search(queryEmbedding, topK, filters) {
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
.filter(prefilter)
|
|
159
|
-
.limit(COPY_VECTORS_QUERY_LIMIT)
|
|
160
|
-
.select([
|
|
161
|
-
"chunk_id",
|
|
162
|
-
"snapshot_id",
|
|
163
|
-
"file_path",
|
|
164
|
-
"start_line",
|
|
165
|
-
"end_line",
|
|
166
|
-
"content_hash",
|
|
167
|
-
"chunk_type",
|
|
168
|
-
"primary_symbol",
|
|
169
|
-
"vector",
|
|
170
|
-
])
|
|
171
|
-
.toArray();
|
|
172
|
-
results = rows
|
|
173
|
-
.map((row) => {
|
|
174
|
-
const vector = Array.isArray(row.vector)
|
|
175
|
-
? row.vector
|
|
176
|
-
: Array.from(row.vector ?? []);
|
|
177
|
-
return {
|
|
178
|
-
...row,
|
|
179
|
-
_distance: this.euclideanDistance(queryEmbedding, vector),
|
|
180
|
-
};
|
|
181
|
-
})
|
|
182
|
-
.sort((left, right) => left._distance - right._distance)
|
|
183
|
-
.slice(0, topK);
|
|
184
|
-
}
|
|
185
|
-
else {
|
|
186
|
-
const searchQuery = this.table.search(queryEmbedding).limit(topK);
|
|
187
|
-
results = prefilter
|
|
188
|
-
? await searchQuery.where(prefilter).toArray()
|
|
189
|
-
: await searchQuery.toArray();
|
|
190
|
-
}
|
|
191
|
-
return results.map((result) => ({
|
|
192
|
-
chunkId: result.chunk_id,
|
|
193
|
-
snapshotId: result.snapshot_id,
|
|
194
|
-
filePath: result.file_path,
|
|
195
|
-
startLine: result.start_line,
|
|
196
|
-
endLine: result.end_line,
|
|
197
|
-
contentHash: result.content_hash,
|
|
198
|
-
chunkType: typeof result.chunk_type === "string"
|
|
199
|
-
? result.chunk_type
|
|
200
|
-
: undefined,
|
|
201
|
-
primarySymbol: typeof result.primary_symbol === "string"
|
|
202
|
-
? result.primary_symbol
|
|
203
|
-
: undefined,
|
|
204
|
-
score: 1 /
|
|
205
|
-
(1 + (typeof result._distance === "number" ? result._distance : 0)),
|
|
206
|
-
distance: typeof result._distance === "number" ? result._distance : 0,
|
|
207
|
-
}));
|
|
208
|
-
});
|
|
209
|
-
});
|
|
150
|
+
if (!filters.projectId) {
|
|
151
|
+
throw new Error("projectId is required in filters for search");
|
|
152
|
+
}
|
|
153
|
+
await this.initialize();
|
|
154
|
+
const db = this.getDb();
|
|
155
|
+
const conditions = ["vm.project_id = ?"];
|
|
156
|
+
const values = [filters.projectId];
|
|
157
|
+
const prefilter = this.buildPrefilter(filters, "vm");
|
|
158
|
+
if (prefilter) {
|
|
159
|
+
conditions.push(prefilter);
|
|
160
|
+
}
|
|
161
|
+
const rows = db
|
|
162
|
+
.prepare(`
|
|
163
|
+
SELECT vm.*, vec_distance_L2(vc.embedding, vec_f32(?)) AS distance
|
|
164
|
+
FROM vec_chunks vc
|
|
165
|
+
JOIN vector_meta vm ON vc.chunk_id = vm.chunk_id
|
|
166
|
+
WHERE ${conditions.join(" AND ")}
|
|
167
|
+
ORDER BY distance
|
|
168
|
+
LIMIT ?
|
|
169
|
+
`)
|
|
170
|
+
.all(this.embeddingToBuffer(queryEmbedding), ...values, topK);
|
|
171
|
+
return rows.map((row) => ({
|
|
172
|
+
chunkId: row.chunk_id,
|
|
173
|
+
snapshotId: row.snapshot_id,
|
|
174
|
+
filePath: row.file_path,
|
|
175
|
+
startLine: row.start_line,
|
|
176
|
+
endLine: row.end_line,
|
|
177
|
+
contentHash: row.content_hash,
|
|
178
|
+
chunkType: row.chunk_type || undefined,
|
|
179
|
+
primarySymbol: row.primary_symbol || undefined,
|
|
180
|
+
score: 1 / (1 + row.distance),
|
|
181
|
+
distance: row.distance,
|
|
182
|
+
}));
|
|
210
183
|
}
|
|
211
184
|
async countVectors(filters) {
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
}
|
|
228
|
-
if (typeof this.table.filter === "function") {
|
|
229
|
-
logger.warn("[LanceDB] countRows not available, falling back to filter-based count");
|
|
230
|
-
const results = await this.table
|
|
231
|
-
.filter(prefilter || "1 = 1")
|
|
232
|
-
.limit(COPY_VECTORS_QUERY_LIMIT)
|
|
233
|
-
.select(["chunk_id"])
|
|
234
|
-
.toArray();
|
|
235
|
-
return results.length;
|
|
236
|
-
}
|
|
237
|
-
if (typeof this.table.query === "function") {
|
|
238
|
-
logger.warn("[LanceDB] countRows not available, falling back to query-based count");
|
|
239
|
-
const query = this.table.query();
|
|
240
|
-
const results = prefilter
|
|
241
|
-
? await query
|
|
242
|
-
.where(prefilter)
|
|
243
|
-
.limit(COPY_VECTORS_QUERY_LIMIT)
|
|
244
|
-
.select(["chunk_id"])
|
|
245
|
-
.toArray({ batchSize: 1024 })
|
|
246
|
-
: await query
|
|
247
|
-
.limit(COPY_VECTORS_QUERY_LIMIT)
|
|
248
|
-
.select(["chunk_id"])
|
|
249
|
-
.toArray({
|
|
250
|
-
batchSize: 1024,
|
|
251
|
-
});
|
|
252
|
-
return results.length;
|
|
253
|
-
}
|
|
254
|
-
throw new Error("[LanceDB] countVectors requires countRows(), filter(), or query() support for exhaustive results");
|
|
255
|
-
});
|
|
256
|
-
});
|
|
185
|
+
if (!filters.projectId) {
|
|
186
|
+
throw new Error("projectId is required in filters for countVectors");
|
|
187
|
+
}
|
|
188
|
+
await this.initialize();
|
|
189
|
+
const db = this.getDb();
|
|
190
|
+
const conditions = ["project_id = ?"];
|
|
191
|
+
const values = [filters.projectId];
|
|
192
|
+
const prefilter = this.buildPrefilter(filters);
|
|
193
|
+
if (prefilter) {
|
|
194
|
+
conditions.push(prefilter);
|
|
195
|
+
}
|
|
196
|
+
const row = db
|
|
197
|
+
.prepare(`SELECT COUNT(*) AS count FROM vector_meta WHERE ${conditions.join(" AND ")}`)
|
|
198
|
+
.get(...values);
|
|
199
|
+
return row.count;
|
|
257
200
|
}
|
|
258
201
|
async deleteBySnapshot(projectId, snapshotId) {
|
|
259
|
-
await this.
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
202
|
+
await this.initialize();
|
|
203
|
+
const db = this.getDb();
|
|
204
|
+
db.transaction(() => {
|
|
205
|
+
db.prepare(`
|
|
206
|
+
DELETE FROM vec_chunks
|
|
207
|
+
WHERE chunk_id IN (
|
|
208
|
+
SELECT chunk_id FROM vector_meta
|
|
209
|
+
WHERE project_id = ? AND snapshot_id = ?
|
|
210
|
+
)
|
|
211
|
+
`).run(projectId, snapshotId);
|
|
212
|
+
db.prepare("DELETE FROM vector_meta WHERE project_id = ? AND snapshot_id = ?").run(projectId, snapshotId);
|
|
213
|
+
})();
|
|
268
214
|
}
|
|
269
215
|
async copyVectors(projectId, fromSnapshotId, toSnapshotId, excludeFilePaths) {
|
|
270
|
-
await this.
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
: await this.table
|
|
314
|
-
.search(Array(this.vectorSize).fill(0))
|
|
315
|
-
.limit(COPY_VECTORS_QUERY_LIMIT)
|
|
316
|
-
.where(filter)
|
|
317
|
-
.toArray();
|
|
318
|
-
const filtered = results.filter((row) => !excludeFilePaths.includes(String(row.file_path)));
|
|
319
|
-
if (filtered.length === 0) {
|
|
320
|
-
return;
|
|
321
|
-
}
|
|
322
|
-
await this.table.add(filtered.map((row) => ({
|
|
323
|
-
project_id: row.project_id ?? projectId.toString(),
|
|
324
|
-
chunk_id: row.chunk_id,
|
|
325
|
-
snapshot_id: toSnapshotId.toString(),
|
|
326
|
-
file_path: row.file_path,
|
|
327
|
-
start_line: row.start_line,
|
|
328
|
-
end_line: row.end_line,
|
|
329
|
-
content_hash: row.content_hash,
|
|
330
|
-
chunk_type: row.chunk_type ?? "",
|
|
331
|
-
primary_symbol: row.primary_symbol ?? "",
|
|
332
|
-
vector: row.vector,
|
|
333
|
-
})));
|
|
334
|
-
});
|
|
216
|
+
await this.initialize();
|
|
217
|
+
const db = this.getDb();
|
|
218
|
+
const conditions = ["vm.project_id = ?", "vm.snapshot_id = ?"];
|
|
219
|
+
const values = [projectId, fromSnapshotId];
|
|
220
|
+
if (excludeFilePaths.length > 0) {
|
|
221
|
+
const placeholders = excludeFilePaths.map(() => "?").join(", ");
|
|
222
|
+
conditions.push(`vm.file_path NOT IN (${placeholders})`);
|
|
223
|
+
values.push(...excludeFilePaths);
|
|
224
|
+
}
|
|
225
|
+
const rows = db
|
|
226
|
+
.prepare(`
|
|
227
|
+
SELECT vm.*, vc.embedding
|
|
228
|
+
FROM vector_meta vm
|
|
229
|
+
JOIN vec_chunks vc ON vc.chunk_id = vm.chunk_id
|
|
230
|
+
WHERE ${conditions.join(" AND ")}
|
|
231
|
+
`)
|
|
232
|
+
.all(...values);
|
|
233
|
+
if (rows.length === 0) {
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
const deleteVectorStatement = db.prepare("DELETE FROM vec_chunks WHERE chunk_id = ?");
|
|
237
|
+
const deleteMetaStatement = db.prepare("DELETE FROM vector_meta WHERE chunk_id = ?");
|
|
238
|
+
const insertMetaStatement = db.prepare(`
|
|
239
|
+
INSERT INTO vector_meta (
|
|
240
|
+
chunk_id,
|
|
241
|
+
project_id,
|
|
242
|
+
snapshot_id,
|
|
243
|
+
file_path,
|
|
244
|
+
start_line,
|
|
245
|
+
end_line,
|
|
246
|
+
content_hash,
|
|
247
|
+
chunk_type,
|
|
248
|
+
primary_symbol
|
|
249
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
250
|
+
`);
|
|
251
|
+
const insertVectorStatement = db.prepare("INSERT INTO vec_chunks (chunk_id, embedding) VALUES (?, vec_f32(?))");
|
|
252
|
+
const copyBatch = db.transaction((batch) => {
|
|
253
|
+
for (const row of batch) {
|
|
254
|
+
deleteVectorStatement.run(row.chunk_id);
|
|
255
|
+
deleteMetaStatement.run(row.chunk_id);
|
|
256
|
+
insertMetaStatement.run(row.chunk_id, row.project_id, toSnapshotId, row.file_path, row.start_line, row.end_line, row.content_hash, row.chunk_type, row.primary_symbol);
|
|
257
|
+
insertVectorStatement.run(row.chunk_id, this.normalizeEmbeddingValue(row.embedding));
|
|
258
|
+
}
|
|
335
259
|
});
|
|
260
|
+
for (let index = 0; index < rows.length; index += UPSERT_BATCH_SIZE) {
|
|
261
|
+
const batch = rows.slice(index, index + UPSERT_BATCH_SIZE);
|
|
262
|
+
copyBatch(batch);
|
|
263
|
+
}
|
|
336
264
|
}
|
|
337
265
|
async deleteByProject(projectId) {
|
|
338
|
-
await this.
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
266
|
+
await this.initialize();
|
|
267
|
+
const db = this.getDb();
|
|
268
|
+
db.transaction(() => {
|
|
269
|
+
db.prepare(`
|
|
270
|
+
DELETE FROM vec_chunks
|
|
271
|
+
WHERE chunk_id IN (
|
|
272
|
+
SELECT chunk_id FROM vector_meta WHERE project_id = ?
|
|
273
|
+
)
|
|
274
|
+
`).run(projectId);
|
|
275
|
+
db.prepare("DELETE FROM vector_meta WHERE project_id = ?").run(projectId);
|
|
276
|
+
})();
|
|
277
|
+
const legacyVectorsPath = node_path_1.default.join(node_path_1.default.dirname(this.dbPath), "vectors");
|
|
278
|
+
if ((0, node_fs_1.existsSync)(legacyVectorsPath)) {
|
|
279
|
+
(0, node_fs_1.rmSync)(legacyVectorsPath, { recursive: true, force: true });
|
|
280
|
+
}
|
|
353
281
|
}
|
|
354
|
-
|
|
355
|
-
const
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
message.includes("Did not find any data files") ||
|
|
359
|
-
message.includes("/vectors.lance/data/")));
|
|
282
|
+
openDatabase() {
|
|
283
|
+
const db = new better_sqlite3_1.default(this.dbPath);
|
|
284
|
+
sqliteVec.load(db);
|
|
285
|
+
return db;
|
|
360
286
|
}
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
}
|
|
365
|
-
catch (error) {
|
|
366
|
-
if (!this.isTransientIoError(error)) {
|
|
367
|
-
throw error;
|
|
368
|
-
}
|
|
369
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
370
|
-
logger.warn(`[LanceDB] ${operationName} hit transient IO error, reopening and retrying once: ${message}`);
|
|
371
|
-
await this.close();
|
|
372
|
-
await this.initialize();
|
|
373
|
-
return operation();
|
|
287
|
+
getDb() {
|
|
288
|
+
if (!this.db) {
|
|
289
|
+
this.db = this.openDatabase();
|
|
374
290
|
}
|
|
291
|
+
return this.db;
|
|
375
292
|
}
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
this.lastCacheRefresh = now;
|
|
383
|
-
return;
|
|
293
|
+
embeddingToBuffer(embedding) {
|
|
294
|
+
return Buffer.from(new Float32Array(embedding).buffer);
|
|
295
|
+
}
|
|
296
|
+
normalizeEmbeddingValue(embedding) {
|
|
297
|
+
if (Buffer.isBuffer(embedding)) {
|
|
298
|
+
return embedding;
|
|
384
299
|
}
|
|
385
|
-
if (
|
|
386
|
-
|
|
387
|
-
this.lastCacheRefresh = now;
|
|
388
|
-
logger.debug(`[LanceDB] Cache refreshed (TTL: ${this.cacheTTL}ms)`);
|
|
300
|
+
if (embedding instanceof Uint8Array) {
|
|
301
|
+
return Buffer.from(embedding);
|
|
389
302
|
}
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
if (!this.table) {
|
|
393
|
-
this.table = await this.db.openTable(this.tableName);
|
|
303
|
+
if (embedding instanceof ArrayBuffer) {
|
|
304
|
+
return Buffer.from(embedding);
|
|
394
305
|
}
|
|
306
|
+
throw new Error("Unsupported sqlite-vec embedding value returned from database");
|
|
395
307
|
}
|
|
396
|
-
|
|
308
|
+
buildPrefilter(filters, alias) {
|
|
397
309
|
const conditions = [];
|
|
310
|
+
const prefix = alias ? `${alias}.` : "";
|
|
398
311
|
if (filters.snapshotId) {
|
|
399
|
-
conditions.push(
|
|
312
|
+
conditions.push(`${prefix}snapshot_id = '${this.escapeSqlLiteral(filters.snapshotId)}'`);
|
|
400
313
|
}
|
|
401
314
|
if (filters.filePath) {
|
|
402
|
-
conditions.push(
|
|
315
|
+
conditions.push(`${prefix}file_path = '${this.escapeSqlLiteral(filters.filePath)}'`);
|
|
403
316
|
}
|
|
404
317
|
else if (filters.pathPrefix) {
|
|
405
|
-
conditions.push(
|
|
318
|
+
conditions.push(`${prefix}file_path LIKE '${this.escapeSqlLike(filters.pathPrefix)}%'`);
|
|
406
319
|
}
|
|
407
320
|
if (filters.chunkTypes && filters.chunkTypes.length > 0) {
|
|
408
321
|
const normalizedChunkTypes = filters.chunkTypes
|
|
@@ -410,48 +323,11 @@ class LanceDbVectorStore {
|
|
|
410
323
|
.filter((chunkType) => chunkType.length > 0)
|
|
411
324
|
.map((chunkType) => `'${this.escapeSqlLiteral(chunkType)}'`);
|
|
412
325
|
if (normalizedChunkTypes.length > 0) {
|
|
413
|
-
conditions.push(
|
|
326
|
+
conditions.push(`${prefix}chunk_type IN (${normalizedChunkTypes.join(", ")})`);
|
|
414
327
|
}
|
|
415
328
|
}
|
|
416
329
|
return conditions.join(" AND ");
|
|
417
330
|
}
|
|
418
|
-
euclideanDistance(a, b) {
|
|
419
|
-
const length = Math.min(a.length, b.length);
|
|
420
|
-
let sum = 0;
|
|
421
|
-
for (let index = 0; index < length; index += 1) {
|
|
422
|
-
const delta = a[index] - b[index];
|
|
423
|
-
sum += delta * delta;
|
|
424
|
-
}
|
|
425
|
-
return Math.sqrt(sum);
|
|
426
|
-
}
|
|
427
|
-
async createTable() {
|
|
428
|
-
this.table = await this.db.createTable(this.tableName, [
|
|
429
|
-
{
|
|
430
|
-
project_id: "",
|
|
431
|
-
chunk_id: "",
|
|
432
|
-
snapshot_id: "",
|
|
433
|
-
file_path: "",
|
|
434
|
-
start_line: 0,
|
|
435
|
-
end_line: 0,
|
|
436
|
-
content_hash: "",
|
|
437
|
-
chunk_type: "",
|
|
438
|
-
primary_symbol: "",
|
|
439
|
-
vector: Array(this.vectorSize).fill(0),
|
|
440
|
-
},
|
|
441
|
-
]);
|
|
442
|
-
}
|
|
443
|
-
hasRequiredSchema(schema) {
|
|
444
|
-
if (!schema?.fields) {
|
|
445
|
-
return false;
|
|
446
|
-
}
|
|
447
|
-
const fields = new Set(schema.fields.map((field) => field.name));
|
|
448
|
-
for (const column of exports.REQUIRED_COLUMNS) {
|
|
449
|
-
if (!fields.has(column)) {
|
|
450
|
-
return false;
|
|
451
|
-
}
|
|
452
|
-
}
|
|
453
|
-
return true;
|
|
454
|
-
}
|
|
455
331
|
escapeSqlLiteral(value) {
|
|
456
332
|
return value.replace(/'/g, "''");
|
|
457
333
|
}
|
|
@@ -459,5 +335,6 @@ class LanceDbVectorStore {
|
|
|
459
335
|
return this.escapeSqlLiteral(value).replace(/[%_]/g, (char) => `\\${char}`);
|
|
460
336
|
}
|
|
461
337
|
}
|
|
462
|
-
exports.
|
|
338
|
+
exports.SqliteVecVectorStore = SqliteVecVectorStore;
|
|
339
|
+
exports.LanceDbVectorStore = SqliteVecVectorStore;
|
|
463
340
|
//# sourceMappingURL=vectors.js.map
|