@pleaseai/context-please-core 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/context.js +10 -10
- package/dist/context.js.map +1 -1
- package/dist/embedding/gemini-embedding.js +4 -4
- package/dist/embedding/gemini-embedding.js.map +1 -1
- package/dist/embedding/huggingface-embedding.js +3 -3
- package/dist/embedding/huggingface-embedding.js.map +1 -1
- package/dist/splitter/ast-splitter.d.ts.map +1 -1
- package/dist/splitter/ast-splitter.js.map +1 -1
- package/dist/vectordb/factory.d.ts +15 -1
- package/dist/vectordb/factory.d.ts.map +1 -1
- package/dist/vectordb/factory.js +17 -2
- package/dist/vectordb/factory.js.map +1 -1
- package/dist/vectordb/faiss-vectordb.d.ts.map +1 -1
- package/dist/vectordb/faiss-vectordb.js +18 -10
- package/dist/vectordb/faiss-vectordb.js.map +1 -1
- package/dist/vectordb/index.d.ts +1 -0
- package/dist/vectordb/index.d.ts.map +1 -1
- package/dist/vectordb/index.js +3 -1
- package/dist/vectordb/index.js.map +1 -1
- package/dist/vectordb/libsql-vectordb.d.ts +170 -0
- package/dist/vectordb/libsql-vectordb.d.ts.map +1 -0
- package/dist/vectordb/libsql-vectordb.js +803 -0
- package/dist/vectordb/libsql-vectordb.js.map +1 -0
- package/dist/vectordb/sparse/simple-bm25.d.ts.map +1 -1
- package/dist/vectordb/sparse/simple-bm25.js +7 -18
- package/dist/vectordb/sparse/simple-bm25.js.map +1 -1
- package/package.json +2 -1
|
@@ -0,0 +1,803 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.LibSQLVectorDatabase = void 0;
|
|
37
|
+
const os = __importStar(require("node:os"));
|
|
38
|
+
const path = __importStar(require("node:path"));
|
|
39
|
+
const client_1 = require("@libsql/client");
|
|
40
|
+
const fs = __importStar(require("fs-extra"));
|
|
41
|
+
const base_vector_database_1 = require("./base/base-vector-database");
|
|
42
|
+
const simple_bm25_1 = require("./sparse/simple-bm25");
|
|
43
|
+
/**
|
|
44
|
+
* LibSQL Vector Database implementation for local-only deployments
|
|
45
|
+
*
|
|
46
|
+
* Features:
|
|
47
|
+
* - Pure JavaScript SDK (no native bindings required)
|
|
48
|
+
* - Full document deletion support via SQL DELETE
|
|
49
|
+
* - Query filtering support via SQL WHERE clauses
|
|
50
|
+
* - Single SQLite file per collection
|
|
51
|
+
* - Hybrid search with BM25 sparse vectors
|
|
52
|
+
* - RRF (Reciprocal Rank Fusion) reranking
|
|
53
|
+
*
|
|
54
|
+
* Architecture:
|
|
55
|
+
* - Dense vectors: Stored in F32_BLOB columns with DiskANN indexing
|
|
56
|
+
* - Sparse vectors: Stored as JSON (indices/values) for BM25
|
|
57
|
+
* - Hybrid search: Combines both using RRF fusion
|
|
58
|
+
*
|
|
59
|
+
* Storage structure:
|
|
60
|
+
* ~/.context/libsql-indexes/
|
|
61
|
+
* └── {collection_name}.db # SQLite database file
|
|
62
|
+
*
|
|
63
|
+
* Key advantages over FAISS:
|
|
64
|
+
* - Document deletion IS supported (SQL DELETE)
|
|
65
|
+
* - Query filters ARE supported (SQL WHERE)
|
|
66
|
+
* - No native bindings required
|
|
67
|
+
*/
|
|
68
|
+
class LibSQLVectorDatabase extends base_vector_database_1.BaseVectorDatabase {
|
|
69
|
+
constructor(config) {
|
|
70
|
+
const configWithDefaults = {
|
|
71
|
+
...config,
|
|
72
|
+
storageDir: config.storageDir || path.join(os.homedir(), '.context', 'libsql-indexes'),
|
|
73
|
+
walMode: config.walMode !== false,
|
|
74
|
+
cacheSize: config.cacheSize || 2000,
|
|
75
|
+
};
|
|
76
|
+
super(configWithDefaults);
|
|
77
|
+
this.clients = new Map();
|
|
78
|
+
this.bm25Generators = new Map();
|
|
79
|
+
this.metadataCache = new Map();
|
|
80
|
+
}
|
|
81
|
+
get storageDir() {
|
|
82
|
+
return this.config.storageDir;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Initialize LibSQL storage directory
|
|
86
|
+
*/
|
|
87
|
+
async initialize() {
|
|
88
|
+
try {
|
|
89
|
+
console.log('[LibSQLDB] Initializing LibSQL storage at:', this.storageDir);
|
|
90
|
+
await fs.ensureDir(this.storageDir);
|
|
91
|
+
console.log('[LibSQLDB] LibSQL storage initialized');
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
const errorMsg = `Failed to initialize LibSQL storage at ${this.storageDir}: ${error.message}`;
|
|
95
|
+
console.error(`[LibSQLDB] ${errorMsg}`);
|
|
96
|
+
if (error.code === 'EACCES') {
|
|
97
|
+
throw new Error(`${errorMsg}\nPermission denied. Check directory permissions.`);
|
|
98
|
+
}
|
|
99
|
+
else if (error.code === 'ENOSPC') {
|
|
100
|
+
throw new Error(`${errorMsg}\nDisk space exhausted. Free up disk space and try again.`);
|
|
101
|
+
}
|
|
102
|
+
throw new Error(errorMsg);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* LibSQL collections are loaded on-demand when accessed
|
|
107
|
+
*/
|
|
108
|
+
async ensureLoaded(collectionName) {
|
|
109
|
+
if (this.clients.has(collectionName)) {
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const dbPath = this.getDbPath(collectionName);
|
|
113
|
+
if (!(await fs.pathExists(dbPath))) {
|
|
114
|
+
throw new Error(`Collection ${collectionName} does not exist`);
|
|
115
|
+
}
|
|
116
|
+
await this.loadCollection(collectionName);
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Get database file path for a collection
|
|
120
|
+
*/
|
|
121
|
+
getDbPath(collectionName) {
|
|
122
|
+
return path.join(this.storageDir, `${collectionName}.db`);
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Load collection from disk
|
|
126
|
+
*/
|
|
127
|
+
async loadCollection(collectionName) {
|
|
128
|
+
const dbPath = this.getDbPath(collectionName);
|
|
129
|
+
console.log('[LibSQLDB] Loading collection:', collectionName);
|
|
130
|
+
try {
|
|
131
|
+
const client = (0, client_1.createClient)({ url: `file:${dbPath}` });
|
|
132
|
+
this.clients.set(collectionName, client);
|
|
133
|
+
// Load metadata
|
|
134
|
+
const result = await client.execute('SELECT key, value FROM _metadata');
|
|
135
|
+
const metadata = {};
|
|
136
|
+
for (const row of result.rows) {
|
|
137
|
+
metadata[row.key] = row.value;
|
|
138
|
+
}
|
|
139
|
+
const collectionMetadata = {
|
|
140
|
+
dimension: Number.parseInt(metadata.dimension, 10),
|
|
141
|
+
isHybrid: metadata.isHybrid === 'true',
|
|
142
|
+
documentCount: Number.parseInt(metadata.documentCount || '0', 10),
|
|
143
|
+
createdAt: metadata.createdAt,
|
|
144
|
+
};
|
|
145
|
+
this.metadataCache.set(collectionName, collectionMetadata);
|
|
146
|
+
// Load BM25 if hybrid collection
|
|
147
|
+
if (collectionMetadata.isHybrid) {
|
|
148
|
+
const bm25Path = path.join(this.storageDir, `${collectionName}_bm25.json`);
|
|
149
|
+
if (await fs.pathExists(bm25Path)) {
|
|
150
|
+
const bm25Json = await fs.readFile(bm25Path, 'utf-8');
|
|
151
|
+
const bm25 = simple_bm25_1.SimpleBM25.fromJSON(bm25Json);
|
|
152
|
+
this.bm25Generators.set(collectionName, bm25);
|
|
153
|
+
}
|
|
154
|
+
else {
|
|
155
|
+
console.warn(`[LibSQLDB] BM25 model file missing for hybrid collection ${collectionName}. Sparse search will be unavailable until re-indexing.`);
|
|
156
|
+
this.bm25Generators.set(collectionName, new simple_bm25_1.SimpleBM25(this.config.bm25Config));
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
console.log('[LibSQLDB] Loaded collection:', collectionName);
|
|
160
|
+
}
|
|
161
|
+
catch (error) {
|
|
162
|
+
console.error(`[LibSQLDB] Failed to load collection ${collectionName}:`, error.message);
|
|
163
|
+
throw error;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Get or create client for a collection
|
|
168
|
+
*/
|
|
169
|
+
async getClient(collectionName) {
|
|
170
|
+
await this.ensureLoaded(collectionName);
|
|
171
|
+
return this.clients.get(collectionName);
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Save BM25 model for a collection
|
|
175
|
+
*/
|
|
176
|
+
async saveBM25(collectionName) {
|
|
177
|
+
const bm25 = this.bm25Generators.get(collectionName);
|
|
178
|
+
if (!bm25) {
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
const bm25Path = path.join(this.storageDir, `${collectionName}_bm25.json`);
|
|
182
|
+
try {
|
|
183
|
+
await fs.writeFile(bm25Path, bm25.toJSON(), 'utf-8');
|
|
184
|
+
}
|
|
185
|
+
catch (error) {
|
|
186
|
+
console.error(`[LibSQLDB] Failed to save BM25 model for ${collectionName}:`, error.message);
|
|
187
|
+
throw new Error(`Failed to save BM25 model for ${collectionName}: ${error.message}`);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Update document count in metadata
|
|
192
|
+
*/
|
|
193
|
+
async updateDocumentCount(collectionName) {
|
|
194
|
+
const client = this.clients.get(collectionName);
|
|
195
|
+
if (!client) {
|
|
196
|
+
console.warn(`[LibSQLDB] Cannot update document count: client not found for ${collectionName}`);
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
try {
|
|
200
|
+
const result = await client.execute('SELECT COUNT(*) as count FROM documents');
|
|
201
|
+
const count = Number(result.rows[0].count);
|
|
202
|
+
if (Number.isNaN(count)) {
|
|
203
|
+
console.error(`[LibSQLDB] Invalid document count result for ${collectionName}`);
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
await client.execute({
|
|
207
|
+
sql: 'INSERT OR REPLACE INTO _metadata (key, value) VALUES (?, ?)',
|
|
208
|
+
args: ['documentCount', String(count)],
|
|
209
|
+
});
|
|
210
|
+
const metadata = this.metadataCache.get(collectionName);
|
|
211
|
+
if (metadata) {
|
|
212
|
+
metadata.documentCount = count;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
catch (error) {
|
|
216
|
+
console.error(`[LibSQLDB] Failed to update document count for ${collectionName}:`, error.message);
|
|
217
|
+
// Don't throw - this is a non-critical metadata update
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Create collection with dense vectors only
|
|
222
|
+
*/
|
|
223
|
+
async createCollection(collectionName, dimension, _description) {
|
|
224
|
+
await this.ensureInitialized();
|
|
225
|
+
const dbPath = this.getDbPath(collectionName);
|
|
226
|
+
if (await fs.pathExists(dbPath)) {
|
|
227
|
+
throw new Error(`Collection ${collectionName} already exists`);
|
|
228
|
+
}
|
|
229
|
+
console.log('[LibSQLDB] Creating collection:', collectionName);
|
|
230
|
+
console.log('[LibSQLDB] Vector dimension:', dimension);
|
|
231
|
+
const client = (0, client_1.createClient)({ url: `file:${dbPath}` });
|
|
232
|
+
// Configure SQLite settings
|
|
233
|
+
if (this.config.walMode) {
|
|
234
|
+
await client.execute('PRAGMA journal_mode=WAL');
|
|
235
|
+
}
|
|
236
|
+
await client.execute(`PRAGMA cache_size=${this.config.cacheSize}`);
|
|
237
|
+
// Create metadata table
|
|
238
|
+
await client.execute(`
|
|
239
|
+
CREATE TABLE _metadata (
|
|
240
|
+
key TEXT PRIMARY KEY,
|
|
241
|
+
value TEXT NOT NULL
|
|
242
|
+
)
|
|
243
|
+
`);
|
|
244
|
+
// Create documents table with vector column
|
|
245
|
+
await client.execute(`
|
|
246
|
+
CREATE TABLE documents (
|
|
247
|
+
id TEXT PRIMARY KEY,
|
|
248
|
+
content TEXT NOT NULL,
|
|
249
|
+
relative_path TEXT NOT NULL,
|
|
250
|
+
start_line INTEGER NOT NULL,
|
|
251
|
+
end_line INTEGER NOT NULL,
|
|
252
|
+
file_extension TEXT NOT NULL,
|
|
253
|
+
metadata TEXT NOT NULL,
|
|
254
|
+
dense_vector F32_BLOB(${dimension})
|
|
255
|
+
)
|
|
256
|
+
`);
|
|
257
|
+
// Create vector index
|
|
258
|
+
await client.execute('CREATE INDEX idx_dense ON documents(libsql_vector_idx(dense_vector))');
|
|
259
|
+
// Insert metadata
|
|
260
|
+
await client.batch([
|
|
261
|
+
{ sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['dimension', String(dimension)] },
|
|
262
|
+
{ sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['isHybrid', 'false'] },
|
|
263
|
+
{ sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['createdAt', new Date().toISOString()] },
|
|
264
|
+
{ sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['documentCount', '0'] },
|
|
265
|
+
]);
|
|
266
|
+
this.clients.set(collectionName, client);
|
|
267
|
+
this.metadataCache.set(collectionName, {
|
|
268
|
+
dimension,
|
|
269
|
+
isHybrid: false,
|
|
270
|
+
documentCount: 0,
|
|
271
|
+
createdAt: new Date().toISOString(),
|
|
272
|
+
});
|
|
273
|
+
console.log('[LibSQLDB] Collection created:', collectionName);
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* Create collection with hybrid search support (dense + sparse vectors)
|
|
277
|
+
*/
|
|
278
|
+
async createHybridCollection(collectionName, dimension, _description) {
|
|
279
|
+
await this.ensureInitialized();
|
|
280
|
+
const dbPath = this.getDbPath(collectionName);
|
|
281
|
+
if (await fs.pathExists(dbPath)) {
|
|
282
|
+
throw new Error(`Collection ${collectionName} already exists`);
|
|
283
|
+
}
|
|
284
|
+
console.log('[LibSQLDB] Creating hybrid collection:', collectionName);
|
|
285
|
+
console.log('[LibSQLDB] Vector dimension:', dimension);
|
|
286
|
+
const client = (0, client_1.createClient)({ url: `file:${dbPath}` });
|
|
287
|
+
// Configure SQLite settings
|
|
288
|
+
if (this.config.walMode) {
|
|
289
|
+
await client.execute('PRAGMA journal_mode=WAL');
|
|
290
|
+
}
|
|
291
|
+
await client.execute(`PRAGMA cache_size=${this.config.cacheSize}`);
|
|
292
|
+
// Create metadata table
|
|
293
|
+
await client.execute(`
|
|
294
|
+
CREATE TABLE _metadata (
|
|
295
|
+
key TEXT PRIMARY KEY,
|
|
296
|
+
value TEXT NOT NULL
|
|
297
|
+
)
|
|
298
|
+
`);
|
|
299
|
+
// Create documents table with vector and sparse columns
|
|
300
|
+
await client.execute(`
|
|
301
|
+
CREATE TABLE documents (
|
|
302
|
+
id TEXT PRIMARY KEY,
|
|
303
|
+
content TEXT NOT NULL,
|
|
304
|
+
relative_path TEXT NOT NULL,
|
|
305
|
+
start_line INTEGER NOT NULL,
|
|
306
|
+
end_line INTEGER NOT NULL,
|
|
307
|
+
file_extension TEXT NOT NULL,
|
|
308
|
+
metadata TEXT NOT NULL,
|
|
309
|
+
dense_vector F32_BLOB(${dimension}),
|
|
310
|
+
sparse_indices TEXT,
|
|
311
|
+
sparse_values TEXT
|
|
312
|
+
)
|
|
313
|
+
`);
|
|
314
|
+
// Create vector index
|
|
315
|
+
await client.execute('CREATE INDEX idx_dense ON documents(libsql_vector_idx(dense_vector))');
|
|
316
|
+
// Insert metadata
|
|
317
|
+
await client.batch([
|
|
318
|
+
{ sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['dimension', String(dimension)] },
|
|
319
|
+
{ sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['isHybrid', 'true'] },
|
|
320
|
+
{ sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['createdAt', new Date().toISOString()] },
|
|
321
|
+
{ sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['documentCount', '0'] },
|
|
322
|
+
]);
|
|
323
|
+
this.clients.set(collectionName, client);
|
|
324
|
+
this.metadataCache.set(collectionName, {
|
|
325
|
+
dimension,
|
|
326
|
+
isHybrid: true,
|
|
327
|
+
documentCount: 0,
|
|
328
|
+
createdAt: new Date().toISOString(),
|
|
329
|
+
});
|
|
330
|
+
// Initialize BM25 generator
|
|
331
|
+
this.bm25Generators.set(collectionName, new simple_bm25_1.SimpleBM25(this.config.bm25Config));
|
|
332
|
+
console.log('[LibSQLDB] Hybrid collection created:', collectionName);
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Drop collection
|
|
336
|
+
*/
|
|
337
|
+
async dropCollection(collectionName) {
|
|
338
|
+
await this.ensureInitialized();
|
|
339
|
+
console.log('[LibSQLDB] Dropping collection:', collectionName);
|
|
340
|
+
// Close client if exists
|
|
341
|
+
const client = this.clients.get(collectionName);
|
|
342
|
+
if (client) {
|
|
343
|
+
client.close();
|
|
344
|
+
this.clients.delete(collectionName);
|
|
345
|
+
}
|
|
346
|
+
// Remove from caches
|
|
347
|
+
this.metadataCache.delete(collectionName);
|
|
348
|
+
this.bm25Generators.delete(collectionName);
|
|
349
|
+
// Remove database file
|
|
350
|
+
const dbPath = this.getDbPath(collectionName);
|
|
351
|
+
if (await fs.pathExists(dbPath)) {
|
|
352
|
+
await fs.remove(dbPath);
|
|
353
|
+
}
|
|
354
|
+
// Remove BM25 file if exists
|
|
355
|
+
const bm25Path = path.join(this.storageDir, `${collectionName}_bm25.json`);
|
|
356
|
+
if (await fs.pathExists(bm25Path)) {
|
|
357
|
+
await fs.remove(bm25Path);
|
|
358
|
+
}
|
|
359
|
+
// Remove WAL files if they exist
|
|
360
|
+
const walPath = `${dbPath}-wal`;
|
|
361
|
+
const shmPath = `${dbPath}-shm`;
|
|
362
|
+
await fs.remove(walPath);
|
|
363
|
+
await fs.remove(shmPath);
|
|
364
|
+
console.log('[LibSQLDB] Collection dropped:', collectionName);
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Check if collection exists
|
|
368
|
+
*/
|
|
369
|
+
async hasCollection(collectionName) {
|
|
370
|
+
await this.ensureInitialized();
|
|
371
|
+
if (this.clients.has(collectionName)) {
|
|
372
|
+
return true;
|
|
373
|
+
}
|
|
374
|
+
const dbPath = this.getDbPath(collectionName);
|
|
375
|
+
return await fs.pathExists(dbPath);
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* List all collections
|
|
379
|
+
*/
|
|
380
|
+
async listCollections() {
|
|
381
|
+
await this.ensureInitialized();
|
|
382
|
+
const collections = [];
|
|
383
|
+
if (await fs.pathExists(this.storageDir)) {
|
|
384
|
+
const entries = await fs.readdir(this.storageDir, { withFileTypes: true });
|
|
385
|
+
for (const entry of entries) {
|
|
386
|
+
if (entry.isFile() && entry.name.endsWith('.db')) {
|
|
387
|
+
collections.push(entry.name.replace('.db', ''));
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
return collections;
|
|
392
|
+
}
|
|
393
|
+
/**
|
|
394
|
+
* Insert vector documents (dense only)
|
|
395
|
+
*/
|
|
396
|
+
async insert(collectionName, documents) {
|
|
397
|
+
await this.ensureInitialized();
|
|
398
|
+
const client = await this.getClient(collectionName);
|
|
399
|
+
const metadata = this.metadataCache.get(collectionName);
|
|
400
|
+
if (!metadata) {
|
|
401
|
+
throw new Error(`Collection ${collectionName} metadata not found`);
|
|
402
|
+
}
|
|
403
|
+
console.log('[LibSQLDB] Inserting documents:', documents.length);
|
|
404
|
+
// Validate vector dimensions
|
|
405
|
+
for (const doc of documents) {
|
|
406
|
+
if (doc.vector.length !== metadata.dimension) {
|
|
407
|
+
throw new Error(`Vector dimension mismatch for document '${doc.id}': `
|
|
408
|
+
+ `expected ${metadata.dimension}, got ${doc.vector.length}`);
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
// Batch insert
|
|
412
|
+
const statements = documents.map((doc) => ({
|
|
413
|
+
sql: `INSERT OR REPLACE INTO documents
|
|
414
|
+
(id, content, relative_path, start_line, end_line, file_extension, metadata, dense_vector)
|
|
415
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, vector32(?))`,
|
|
416
|
+
args: [
|
|
417
|
+
doc.id,
|
|
418
|
+
doc.content,
|
|
419
|
+
doc.relativePath,
|
|
420
|
+
doc.startLine,
|
|
421
|
+
doc.endLine,
|
|
422
|
+
doc.fileExtension,
|
|
423
|
+
JSON.stringify(doc.metadata),
|
|
424
|
+
`[${doc.vector.join(',')}]`,
|
|
425
|
+
],
|
|
426
|
+
}));
|
|
427
|
+
await client.batch(statements);
|
|
428
|
+
await this.updateDocumentCount(collectionName);
|
|
429
|
+
console.log('[LibSQLDB] Inserted documents:', documents.length);
|
|
430
|
+
}
|
|
431
|
+
/**
|
|
432
|
+
* Insert hybrid vector documents (dense + sparse)
|
|
433
|
+
*/
|
|
434
|
+
async insertHybrid(collectionName, documents) {
|
|
435
|
+
await this.ensureInitialized();
|
|
436
|
+
const client = await this.getClient(collectionName);
|
|
437
|
+
const metadata = this.metadataCache.get(collectionName);
|
|
438
|
+
if (!metadata) {
|
|
439
|
+
throw new Error(`Collection ${collectionName} metadata not found`);
|
|
440
|
+
}
|
|
441
|
+
if (!metadata.isHybrid) {
|
|
442
|
+
throw new Error(`Collection ${collectionName} is not a hybrid collection`);
|
|
443
|
+
}
|
|
444
|
+
console.log('[LibSQLDB] Inserting hybrid documents:', documents.length);
|
|
445
|
+
// Validate vector dimensions
|
|
446
|
+
for (const doc of documents) {
|
|
447
|
+
if (doc.vector.length !== metadata.dimension) {
|
|
448
|
+
throw new Error(`Vector dimension mismatch for document '${doc.id}': `
|
|
449
|
+
+ `expected ${metadata.dimension}, got ${doc.vector.length}`);
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
// Get or create BM25 generator
|
|
453
|
+
let bm25 = this.bm25Generators.get(collectionName);
|
|
454
|
+
if (!bm25) {
|
|
455
|
+
bm25 = new simple_bm25_1.SimpleBM25(this.config.bm25Config);
|
|
456
|
+
this.bm25Generators.set(collectionName, bm25);
|
|
457
|
+
}
|
|
458
|
+
// Train BM25 on all documents (existing + new)
|
|
459
|
+
const existingResult = await client.execute('SELECT content FROM documents');
|
|
460
|
+
const existingContents = existingResult.rows.map((r) => r.content);
|
|
461
|
+
const allContents = [...existingContents, ...documents.map((d) => d.content)];
|
|
462
|
+
bm25.learn(allContents);
|
|
463
|
+
// Generate sparse vectors
|
|
464
|
+
const sparseVectors = documents.map((doc) => bm25.generate(doc.content));
|
|
465
|
+
// Batch insert
|
|
466
|
+
const statements = documents.map((doc, i) => ({
|
|
467
|
+
sql: `INSERT OR REPLACE INTO documents
|
|
468
|
+
(id, content, relative_path, start_line, end_line, file_extension, metadata,
|
|
469
|
+
dense_vector, sparse_indices, sparse_values)
|
|
470
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, vector32(?), ?, ?)`,
|
|
471
|
+
args: [
|
|
472
|
+
doc.id,
|
|
473
|
+
doc.content,
|
|
474
|
+
doc.relativePath,
|
|
475
|
+
doc.startLine,
|
|
476
|
+
doc.endLine,
|
|
477
|
+
doc.fileExtension,
|
|
478
|
+
JSON.stringify(doc.metadata),
|
|
479
|
+
`[${doc.vector.join(',')}]`,
|
|
480
|
+
JSON.stringify(sparseVectors[i].indices),
|
|
481
|
+
JSON.stringify(sparseVectors[i].values),
|
|
482
|
+
],
|
|
483
|
+
}));
|
|
484
|
+
await client.batch(statements);
|
|
485
|
+
await this.updateDocumentCount(collectionName);
|
|
486
|
+
await this.saveBM25(collectionName);
|
|
487
|
+
console.log('[LibSQLDB] Inserted hybrid documents:', documents.length);
|
|
488
|
+
}
|
|
489
|
+
/**
|
|
490
|
+
* Search similar vectors (dense search only)
|
|
491
|
+
*/
|
|
492
|
+
async search(collectionName, queryVector, options) {
|
|
493
|
+
await this.ensureInitialized();
|
|
494
|
+
const client = await this.getClient(collectionName);
|
|
495
|
+
const topK = options?.topK || 10;
|
|
496
|
+
const queryVectorStr = `[${queryVector.join(',')}]`;
|
|
497
|
+
console.log('[LibSQLDB] Searching vectors, topK:', topK);
|
|
498
|
+
// Build query with vector_top_k
|
|
499
|
+
// Note: vector_top_k returns 'id' (rowid), we calculate distance with vector_distance_cos
|
|
500
|
+
let sql = `
|
|
501
|
+
SELECT d.*, vector_distance_cos(d.dense_vector, vector32(?)) AS distance
|
|
502
|
+
FROM vector_top_k('idx_dense', vector32(?), ?) AS vt
|
|
503
|
+
JOIN documents d ON d.rowid = vt.id
|
|
504
|
+
`;
|
|
505
|
+
const args = [queryVectorStr, queryVectorStr, topK * 2];
|
|
506
|
+
// Apply filter if provided
|
|
507
|
+
if (options?.filterExpr) {
|
|
508
|
+
const whereClause = this.parseFilterExpression(options.filterExpr);
|
|
509
|
+
sql += ` WHERE ${whereClause}`;
|
|
510
|
+
}
|
|
511
|
+
sql += ' ORDER BY distance ASC LIMIT ?';
|
|
512
|
+
args.push(topK);
|
|
513
|
+
const result = await client.execute({ sql, args });
|
|
514
|
+
const searchResults = [];
|
|
515
|
+
for (const row of result.rows) {
|
|
516
|
+
const score = 1 / (1 + row.distance);
|
|
517
|
+
// Apply threshold filter
|
|
518
|
+
if (options?.threshold !== undefined && score < options.threshold) {
|
|
519
|
+
continue;
|
|
520
|
+
}
|
|
521
|
+
searchResults.push({
|
|
522
|
+
document: this.rowToDocument(row),
|
|
523
|
+
score,
|
|
524
|
+
});
|
|
525
|
+
}
|
|
526
|
+
console.log('[LibSQLDB] Found results:', searchResults.length);
|
|
527
|
+
return searchResults;
|
|
528
|
+
}
|
|
529
|
+
/**
|
|
530
|
+
* Hybrid search with multiple vector fields (dense + sparse)
|
|
531
|
+
*/
|
|
532
|
+
async hybridSearch(collectionName, searchRequests, options) {
|
|
533
|
+
await this.ensureInitialized();
|
|
534
|
+
const client = await this.getClient(collectionName);
|
|
535
|
+
const metadata = this.metadataCache.get(collectionName);
|
|
536
|
+
if (!metadata?.isHybrid) {
|
|
537
|
+
throw new Error(`Collection ${collectionName} is not a hybrid collection`);
|
|
538
|
+
}
|
|
539
|
+
const limit = options?.limit || 10;
|
|
540
|
+
console.log('[LibSQLDB] Hybrid search, requests:', searchRequests.length);
|
|
541
|
+
// Process search requests
|
|
542
|
+
const denseResults = new Map();
|
|
543
|
+
const sparseResults = new Map();
|
|
544
|
+
for (const request of searchRequests) {
|
|
545
|
+
if (request.anns_field === 'vector' || request.anns_field === 'dense') {
|
|
546
|
+
await this.performDenseSearch(client, request.data, limit, denseResults);
|
|
547
|
+
}
|
|
548
|
+
else if (request.anns_field === 'sparse' || request.anns_field === 'sparse_vector') {
|
|
549
|
+
await this.performSparseSearch(client, collectionName, request.data, sparseResults);
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
// Apply RRF reranking
|
|
553
|
+
const k = options?.rerank?.params?.k || 60;
|
|
554
|
+
const rrfScores = this.applyRRF(denseResults, sparseResults, k);
|
|
555
|
+
// Fetch full documents for top results
|
|
556
|
+
const topIds = Array.from(rrfScores.entries())
|
|
557
|
+
.sort((a, b) => b[1] - a[1])
|
|
558
|
+
.slice(0, limit)
|
|
559
|
+
.map(([id]) => id);
|
|
560
|
+
if (topIds.length === 0) {
|
|
561
|
+
return [];
|
|
562
|
+
}
|
|
563
|
+
const placeholders = topIds.map(() => '?').join(',');
|
|
564
|
+
const docsResult = await client.execute({
|
|
565
|
+
sql: `SELECT * FROM documents WHERE id IN (${placeholders})`,
|
|
566
|
+
args: topIds,
|
|
567
|
+
});
|
|
568
|
+
const results = [];
|
|
569
|
+
const docMap = new Map(docsResult.rows.map((row) => [row.id, row]));
|
|
570
|
+
for (const id of topIds) {
|
|
571
|
+
const row = docMap.get(id);
|
|
572
|
+
if (row) {
|
|
573
|
+
results.push({
|
|
574
|
+
document: this.rowToDocument(row),
|
|
575
|
+
score: rrfScores.get(id) || 0,
|
|
576
|
+
});
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
console.log('[LibSQLDB] Hybrid search results:', results.length);
|
|
580
|
+
return results;
|
|
581
|
+
}
|
|
582
|
+
/**
|
|
583
|
+
* Perform dense vector search
|
|
584
|
+
*/
|
|
585
|
+
async performDenseSearch(client, queryVector, limit, results) {
|
|
586
|
+
const queryVectorStr = `[${queryVector.join(',')}]`;
|
|
587
|
+
const sql = `
|
|
588
|
+
SELECT d.id, vector_distance_cos(d.dense_vector, vector32(?)) AS distance
|
|
589
|
+
FROM vector_top_k('idx_dense', vector32(?), ?) AS vt
|
|
590
|
+
JOIN documents d ON d.rowid = vt.id
|
|
591
|
+
`;
|
|
592
|
+
const result = await client.execute({
|
|
593
|
+
sql,
|
|
594
|
+
args: [queryVectorStr, queryVectorStr, limit * 2],
|
|
595
|
+
});
|
|
596
|
+
for (const row of result.rows) {
|
|
597
|
+
const score = 1 / (1 + row.distance);
|
|
598
|
+
results.set(row.id, score);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
/**
|
|
602
|
+
* Perform sparse search using BM25
|
|
603
|
+
*/
|
|
604
|
+
async performSparseSearch(client, collectionName, queryText, results) {
|
|
605
|
+
const bm25 = this.bm25Generators.get(collectionName);
|
|
606
|
+
if (!bm25) {
|
|
607
|
+
console.warn(`[LibSQLDB] BM25 generator not found for collection ${collectionName}. Sparse search skipped.`);
|
|
608
|
+
return;
|
|
609
|
+
}
|
|
610
|
+
if (!bm25.isTrained()) {
|
|
611
|
+
console.warn(`[LibSQLDB] BM25 model not trained for collection ${collectionName}. Sparse search skipped.`);
|
|
612
|
+
return;
|
|
613
|
+
}
|
|
614
|
+
const queryVector = bm25.generate(queryText);
|
|
615
|
+
const queryMap = new Map();
|
|
616
|
+
for (let i = 0; i < queryVector.indices.length; i++) {
|
|
617
|
+
queryMap.set(queryVector.indices[i], queryVector.values[i]);
|
|
618
|
+
}
|
|
619
|
+
// Fetch all documents with sparse vectors and compute scores
|
|
620
|
+
const result = await client.execute(`
|
|
621
|
+
SELECT id, sparse_indices, sparse_values FROM documents
|
|
622
|
+
WHERE sparse_indices IS NOT NULL
|
|
623
|
+
`);
|
|
624
|
+
for (const row of result.rows) {
|
|
625
|
+
const indices = JSON.parse(row.sparse_indices);
|
|
626
|
+
const values = JSON.parse(row.sparse_values);
|
|
627
|
+
let score = 0;
|
|
628
|
+
for (let i = 0; i < indices.length; i++) {
|
|
629
|
+
const queryVal = queryMap.get(indices[i]);
|
|
630
|
+
if (queryVal !== undefined) {
|
|
631
|
+
score += values[i] * queryVal;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
if (score > 0) {
|
|
635
|
+
results.set(row.id, score);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
/**
|
|
640
|
+
* Apply Reciprocal Rank Fusion (RRF) reranking
|
|
641
|
+
*/
|
|
642
|
+
applyRRF(denseResults, sparseResults, k) {
|
|
643
|
+
const denseRanks = this.computeRanks(denseResults);
|
|
644
|
+
const sparseRanks = this.computeRanks(sparseResults);
|
|
645
|
+
const allIds = new Set([...denseResults.keys(), ...sparseResults.keys()]);
|
|
646
|
+
const rrfScores = new Map();
|
|
647
|
+
for (const id of allIds) {
|
|
648
|
+
let score = 0;
|
|
649
|
+
const denseRank = denseRanks.get(id);
|
|
650
|
+
const sparseRank = sparseRanks.get(id);
|
|
651
|
+
if (denseRank !== undefined) {
|
|
652
|
+
score += 1 / (k + denseRank);
|
|
653
|
+
}
|
|
654
|
+
if (sparseRank !== undefined) {
|
|
655
|
+
score += 1 / (k + sparseRank);
|
|
656
|
+
}
|
|
657
|
+
rrfScores.set(id, score);
|
|
658
|
+
}
|
|
659
|
+
return rrfScores;
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* Compute ranks from scores
|
|
663
|
+
*/
|
|
664
|
+
computeRanks(scores) {
|
|
665
|
+
const ranks = new Map();
|
|
666
|
+
const sorted = Array.from(scores.entries()).sort((a, b) => b[1] - a[1]);
|
|
667
|
+
sorted.forEach(([id], index) => ranks.set(id, index + 1));
|
|
668
|
+
return ranks;
|
|
669
|
+
}
|
|
670
|
+
/**
|
|
671
|
+
* Convert database row to VectorDocument
|
|
672
|
+
*/
|
|
673
|
+
rowToDocument(row) {
|
|
674
|
+
return {
|
|
675
|
+
id: row.id,
|
|
676
|
+
vector: [],
|
|
677
|
+
content: row.content,
|
|
678
|
+
relativePath: row.relative_path,
|
|
679
|
+
startLine: row.start_line,
|
|
680
|
+
endLine: row.end_line,
|
|
681
|
+
fileExtension: row.file_extension,
|
|
682
|
+
metadata: JSON.parse(row.metadata),
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
/**
|
|
686
|
+
* Parse filter expression to SQL WHERE clause
|
|
687
|
+
*/
|
|
688
|
+
parseFilterExpression(expr) {
|
|
689
|
+
// Convert Milvus-style filters to SQL WHERE clause
|
|
690
|
+
// "fileExtension == '.ts'" -> "file_extension = '.ts'"
|
|
691
|
+
// "fileExtension in ['.ts', '.js']" -> "file_extension IN ('.ts', '.js')"
|
|
692
|
+
if (expr.includes(' in ')) {
|
|
693
|
+
const match = expr.match(/(\w+)\s+in\s+\[(.*)\]/);
|
|
694
|
+
if (match) {
|
|
695
|
+
const field = this.mapFieldName(match[1]);
|
|
696
|
+
const values = match[2].split(',').map((v) => v.trim());
|
|
697
|
+
return `${field} IN (${values.join(',')})`;
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
if (expr.includes('==')) {
|
|
701
|
+
const match = expr.match(/(\w+)\s*==\s*(.+)/);
|
|
702
|
+
if (match) {
|
|
703
|
+
const field = this.mapFieldName(match[1]);
|
|
704
|
+
return `${field} = ${match[2].trim()}`;
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
// Return as-is if not recognized
|
|
708
|
+
console.warn(`[LibSQLDB] Unrecognized filter expression: ${expr}`);
|
|
709
|
+
return expr;
|
|
710
|
+
}
|
|
711
|
+
/**
|
|
712
|
+
* Map field names to database column names
|
|
713
|
+
*/
|
|
714
|
+
mapFieldName(field) {
|
|
715
|
+
const mapping = {
|
|
716
|
+
relativePath: 'relative_path',
|
|
717
|
+
startLine: 'start_line',
|
|
718
|
+
endLine: 'end_line',
|
|
719
|
+
fileExtension: 'file_extension',
|
|
720
|
+
};
|
|
721
|
+
return mapping[field] || field;
|
|
722
|
+
}
|
|
723
|
+
/**
|
|
724
|
+
* Delete documents by IDs
|
|
725
|
+
*
|
|
726
|
+
* Key advantage over FAISS: LibSQL supports document deletion via SQL DELETE
|
|
727
|
+
*/
|
|
728
|
+
async delete(collectionName, ids) {
|
|
729
|
+
await this.ensureInitialized();
|
|
730
|
+
const client = await this.getClient(collectionName);
|
|
731
|
+
console.log(`[LibSQLDB] Deleting ${ids.length} documents from ${collectionName}`);
|
|
732
|
+
const placeholders = ids.map(() => '?').join(',');
|
|
733
|
+
await client.execute({
|
|
734
|
+
sql: `DELETE FROM documents WHERE id IN (${placeholders})`,
|
|
735
|
+
args: ids,
|
|
736
|
+
});
|
|
737
|
+
await this.updateDocumentCount(collectionName);
|
|
738
|
+
// Re-train BM25 if hybrid collection
|
|
739
|
+
const metadata = this.metadataCache.get(collectionName);
|
|
740
|
+
if (metadata?.isHybrid) {
|
|
741
|
+
const bm25 = this.bm25Generators.get(collectionName);
|
|
742
|
+
if (bm25) {
|
|
743
|
+
const result = await client.execute('SELECT content FROM documents');
|
|
744
|
+
const contents = result.rows.map((r) => r.content);
|
|
745
|
+
if (contents.length > 0) {
|
|
746
|
+
bm25.learn(contents);
|
|
747
|
+
await this.saveBM25(collectionName);
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
console.log(`[LibSQLDB] Deleted ${ids.length} documents`);
|
|
752
|
+
}
|
|
753
|
+
/**
|
|
754
|
+
* Query documents with filter conditions
|
|
755
|
+
*
|
|
756
|
+
* Key advantage over FAISS: LibSQL supports SQL WHERE clauses
|
|
757
|
+
*/
|
|
758
|
+
async query(collectionName, filter, outputFields, limit) {
|
|
759
|
+
await this.ensureInitialized();
|
|
760
|
+
const client = await this.getClient(collectionName);
|
|
761
|
+
console.log('[LibSQLDB] Querying documents');
|
|
762
|
+
// Build SELECT clause
|
|
763
|
+
const fields = outputFields.length > 0
|
|
764
|
+
? outputFields.map((f) => this.mapFieldName(f)).join(', ')
|
|
765
|
+
: '*';
|
|
766
|
+
let sql = `SELECT ${fields} FROM documents`;
|
|
767
|
+
const args = [];
|
|
768
|
+
// Apply filter
|
|
769
|
+
if (filter && filter.trim()) {
|
|
770
|
+
const whereClause = this.parseFilterExpression(filter);
|
|
771
|
+
sql += ` WHERE ${whereClause}`;
|
|
772
|
+
}
|
|
773
|
+
sql += ' LIMIT ?';
|
|
774
|
+
args.push(limit || 100);
|
|
775
|
+
const result = await client.execute({ sql, args });
|
|
776
|
+
return result.rows.map((row) => this.rowToResult(row, outputFields));
|
|
777
|
+
}
|
|
778
|
+
/**
|
|
779
|
+
* Convert row to result object
|
|
780
|
+
*/
|
|
781
|
+
rowToResult(row, outputFields) {
|
|
782
|
+
const result = {};
|
|
783
|
+
for (const field of outputFields) {
|
|
784
|
+
const dbField = this.mapFieldName(field);
|
|
785
|
+
if (row[dbField] !== undefined) {
|
|
786
|
+
result[field] = row[dbField];
|
|
787
|
+
}
|
|
788
|
+
else if (row[field] !== undefined) {
|
|
789
|
+
result[field] = row[field];
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
return result;
|
|
793
|
+
}
|
|
794
|
+
/**
|
|
795
|
+
* Check collection limit
|
|
796
|
+
* LibSQL has no inherent collection limit (only limited by disk space)
|
|
797
|
+
*/
|
|
798
|
+
async checkCollectionLimit() {
|
|
799
|
+
return true;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
exports.LibSQLVectorDatabase = LibSQLVectorDatabase;
|
|
803
|
+
//# sourceMappingURL=libsql-vectordb.js.map
|