@pleaseai/context-please-core 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,803 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.LibSQLVectorDatabase = void 0;
37
+ const os = __importStar(require("node:os"));
38
+ const path = __importStar(require("node:path"));
39
+ const client_1 = require("@libsql/client");
40
+ const fs = __importStar(require("fs-extra"));
41
+ const base_vector_database_1 = require("./base/base-vector-database");
42
+ const simple_bm25_1 = require("./sparse/simple-bm25");
43
+ /**
44
+ * LibSQL Vector Database implementation for local-only deployments
45
+ *
46
+ * Features:
47
+ * - Pure JavaScript SDK (no native bindings required)
48
+ * - Full document deletion support via SQL DELETE
49
+ * - Query filtering support via SQL WHERE clauses
50
+ * - Single SQLite file per collection
51
+ * - Hybrid search with BM25 sparse vectors
52
+ * - RRF (Reciprocal Rank Fusion) reranking
53
+ *
54
+ * Architecture:
55
+ * - Dense vectors: Stored in F32_BLOB columns with DiskANN indexing
56
+ * - Sparse vectors: Stored as JSON (indices/values) for BM25
57
+ * - Hybrid search: Combines both using RRF fusion
58
+ *
59
+ * Storage structure:
60
+ * ~/.context/libsql-indexes/
61
+ * └── {collection_name}.db # SQLite database file
62
+ *
63
+ * Key advantages over FAISS:
64
+ * - Document deletion IS supported (SQL DELETE)
65
+ * - Query filters ARE supported (SQL WHERE)
66
+ * - No native bindings required
67
+ */
68
+ class LibSQLVectorDatabase extends base_vector_database_1.BaseVectorDatabase {
69
+ constructor(config) {
70
+ const configWithDefaults = {
71
+ ...config,
72
+ storageDir: config.storageDir || path.join(os.homedir(), '.context', 'libsql-indexes'),
73
+ walMode: config.walMode !== false,
74
+ cacheSize: config.cacheSize || 2000,
75
+ };
76
+ super(configWithDefaults);
77
+ this.clients = new Map();
78
+ this.bm25Generators = new Map();
79
+ this.metadataCache = new Map();
80
+ }
81
+ get storageDir() {
82
+ return this.config.storageDir;
83
+ }
84
+ /**
85
+ * Initialize LibSQL storage directory
86
+ */
87
+ async initialize() {
88
+ try {
89
+ console.log('[LibSQLDB] Initializing LibSQL storage at:', this.storageDir);
90
+ await fs.ensureDir(this.storageDir);
91
+ console.log('[LibSQLDB] LibSQL storage initialized');
92
+ }
93
+ catch (error) {
94
+ const errorMsg = `Failed to initialize LibSQL storage at ${this.storageDir}: ${error.message}`;
95
+ console.error(`[LibSQLDB] ${errorMsg}`);
96
+ if (error.code === 'EACCES') {
97
+ throw new Error(`${errorMsg}\nPermission denied. Check directory permissions.`);
98
+ }
99
+ else if (error.code === 'ENOSPC') {
100
+ throw new Error(`${errorMsg}\nDisk space exhausted. Free up disk space and try again.`);
101
+ }
102
+ throw new Error(errorMsg);
103
+ }
104
+ }
105
+ /**
106
+ * LibSQL collections are loaded on-demand when accessed
107
+ */
108
+ async ensureLoaded(collectionName) {
109
+ if (this.clients.has(collectionName)) {
110
+ return;
111
+ }
112
+ const dbPath = this.getDbPath(collectionName);
113
+ if (!(await fs.pathExists(dbPath))) {
114
+ throw new Error(`Collection ${collectionName} does not exist`);
115
+ }
116
+ await this.loadCollection(collectionName);
117
+ }
118
+ /**
119
+ * Get database file path for a collection
120
+ */
121
+ getDbPath(collectionName) {
122
+ return path.join(this.storageDir, `${collectionName}.db`);
123
+ }
124
+ /**
125
+ * Load collection from disk
126
+ */
127
+ async loadCollection(collectionName) {
128
+ const dbPath = this.getDbPath(collectionName);
129
+ console.log('[LibSQLDB] Loading collection:', collectionName);
130
+ try {
131
+ const client = (0, client_1.createClient)({ url: `file:${dbPath}` });
132
+ this.clients.set(collectionName, client);
133
+ // Load metadata
134
+ const result = await client.execute('SELECT key, value FROM _metadata');
135
+ const metadata = {};
136
+ for (const row of result.rows) {
137
+ metadata[row.key] = row.value;
138
+ }
139
+ const collectionMetadata = {
140
+ dimension: Number.parseInt(metadata.dimension, 10),
141
+ isHybrid: metadata.isHybrid === 'true',
142
+ documentCount: Number.parseInt(metadata.documentCount || '0', 10),
143
+ createdAt: metadata.createdAt,
144
+ };
145
+ this.metadataCache.set(collectionName, collectionMetadata);
146
+ // Load BM25 if hybrid collection
147
+ if (collectionMetadata.isHybrid) {
148
+ const bm25Path = path.join(this.storageDir, `${collectionName}_bm25.json`);
149
+ if (await fs.pathExists(bm25Path)) {
150
+ const bm25Json = await fs.readFile(bm25Path, 'utf-8');
151
+ const bm25 = simple_bm25_1.SimpleBM25.fromJSON(bm25Json);
152
+ this.bm25Generators.set(collectionName, bm25);
153
+ }
154
+ else {
155
+ console.warn(`[LibSQLDB] BM25 model file missing for hybrid collection ${collectionName}. Sparse search will be unavailable until re-indexing.`);
156
+ this.bm25Generators.set(collectionName, new simple_bm25_1.SimpleBM25(this.config.bm25Config));
157
+ }
158
+ }
159
+ console.log('[LibSQLDB] Loaded collection:', collectionName);
160
+ }
161
+ catch (error) {
162
+ console.error(`[LibSQLDB] Failed to load collection ${collectionName}:`, error.message);
163
+ throw error;
164
+ }
165
+ }
166
+ /**
167
+ * Get or create client for a collection
168
+ */
169
+ async getClient(collectionName) {
170
+ await this.ensureLoaded(collectionName);
171
+ return this.clients.get(collectionName);
172
+ }
173
+ /**
174
+ * Save BM25 model for a collection
175
+ */
176
+ async saveBM25(collectionName) {
177
+ const bm25 = this.bm25Generators.get(collectionName);
178
+ if (!bm25) {
179
+ return;
180
+ }
181
+ const bm25Path = path.join(this.storageDir, `${collectionName}_bm25.json`);
182
+ try {
183
+ await fs.writeFile(bm25Path, bm25.toJSON(), 'utf-8');
184
+ }
185
+ catch (error) {
186
+ console.error(`[LibSQLDB] Failed to save BM25 model for ${collectionName}:`, error.message);
187
+ throw new Error(`Failed to save BM25 model for ${collectionName}: ${error.message}`);
188
+ }
189
+ }
190
+ /**
191
+ * Update document count in metadata
192
+ */
193
+ async updateDocumentCount(collectionName) {
194
+ const client = this.clients.get(collectionName);
195
+ if (!client) {
196
+ console.warn(`[LibSQLDB] Cannot update document count: client not found for ${collectionName}`);
197
+ return;
198
+ }
199
+ try {
200
+ const result = await client.execute('SELECT COUNT(*) as count FROM documents');
201
+ const count = Number(result.rows[0].count);
202
+ if (Number.isNaN(count)) {
203
+ console.error(`[LibSQLDB] Invalid document count result for ${collectionName}`);
204
+ return;
205
+ }
206
+ await client.execute({
207
+ sql: 'INSERT OR REPLACE INTO _metadata (key, value) VALUES (?, ?)',
208
+ args: ['documentCount', String(count)],
209
+ });
210
+ const metadata = this.metadataCache.get(collectionName);
211
+ if (metadata) {
212
+ metadata.documentCount = count;
213
+ }
214
+ }
215
+ catch (error) {
216
+ console.error(`[LibSQLDB] Failed to update document count for ${collectionName}:`, error.message);
217
+ // Don't throw - this is a non-critical metadata update
218
+ }
219
+ }
220
+ /**
221
+ * Create collection with dense vectors only
222
+ */
223
+ async createCollection(collectionName, dimension, _description) {
224
+ await this.ensureInitialized();
225
+ const dbPath = this.getDbPath(collectionName);
226
+ if (await fs.pathExists(dbPath)) {
227
+ throw new Error(`Collection ${collectionName} already exists`);
228
+ }
229
+ console.log('[LibSQLDB] Creating collection:', collectionName);
230
+ console.log('[LibSQLDB] Vector dimension:', dimension);
231
+ const client = (0, client_1.createClient)({ url: `file:${dbPath}` });
232
+ // Configure SQLite settings
233
+ if (this.config.walMode) {
234
+ await client.execute('PRAGMA journal_mode=WAL');
235
+ }
236
+ await client.execute(`PRAGMA cache_size=${this.config.cacheSize}`);
237
+ // Create metadata table
238
+ await client.execute(`
239
+ CREATE TABLE _metadata (
240
+ key TEXT PRIMARY KEY,
241
+ value TEXT NOT NULL
242
+ )
243
+ `);
244
+ // Create documents table with vector column
245
+ await client.execute(`
246
+ CREATE TABLE documents (
247
+ id TEXT PRIMARY KEY,
248
+ content TEXT NOT NULL,
249
+ relative_path TEXT NOT NULL,
250
+ start_line INTEGER NOT NULL,
251
+ end_line INTEGER NOT NULL,
252
+ file_extension TEXT NOT NULL,
253
+ metadata TEXT NOT NULL,
254
+ dense_vector F32_BLOB(${dimension})
255
+ )
256
+ `);
257
+ // Create vector index
258
+ await client.execute('CREATE INDEX idx_dense ON documents(libsql_vector_idx(dense_vector))');
259
+ // Insert metadata
260
+ await client.batch([
261
+ { sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['dimension', String(dimension)] },
262
+ { sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['isHybrid', 'false'] },
263
+ { sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['createdAt', new Date().toISOString()] },
264
+ { sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['documentCount', '0'] },
265
+ ]);
266
+ this.clients.set(collectionName, client);
267
+ this.metadataCache.set(collectionName, {
268
+ dimension,
269
+ isHybrid: false,
270
+ documentCount: 0,
271
+ createdAt: new Date().toISOString(),
272
+ });
273
+ console.log('[LibSQLDB] Collection created:', collectionName);
274
+ }
275
+ /**
276
+ * Create collection with hybrid search support (dense + sparse vectors)
277
+ */
278
+ async createHybridCollection(collectionName, dimension, _description) {
279
+ await this.ensureInitialized();
280
+ const dbPath = this.getDbPath(collectionName);
281
+ if (await fs.pathExists(dbPath)) {
282
+ throw new Error(`Collection ${collectionName} already exists`);
283
+ }
284
+ console.log('[LibSQLDB] Creating hybrid collection:', collectionName);
285
+ console.log('[LibSQLDB] Vector dimension:', dimension);
286
+ const client = (0, client_1.createClient)({ url: `file:${dbPath}` });
287
+ // Configure SQLite settings
288
+ if (this.config.walMode) {
289
+ await client.execute('PRAGMA journal_mode=WAL');
290
+ }
291
+ await client.execute(`PRAGMA cache_size=${this.config.cacheSize}`);
292
+ // Create metadata table
293
+ await client.execute(`
294
+ CREATE TABLE _metadata (
295
+ key TEXT PRIMARY KEY,
296
+ value TEXT NOT NULL
297
+ )
298
+ `);
299
+ // Create documents table with vector and sparse columns
300
+ await client.execute(`
301
+ CREATE TABLE documents (
302
+ id TEXT PRIMARY KEY,
303
+ content TEXT NOT NULL,
304
+ relative_path TEXT NOT NULL,
305
+ start_line INTEGER NOT NULL,
306
+ end_line INTEGER NOT NULL,
307
+ file_extension TEXT NOT NULL,
308
+ metadata TEXT NOT NULL,
309
+ dense_vector F32_BLOB(${dimension}),
310
+ sparse_indices TEXT,
311
+ sparse_values TEXT
312
+ )
313
+ `);
314
+ // Create vector index
315
+ await client.execute('CREATE INDEX idx_dense ON documents(libsql_vector_idx(dense_vector))');
316
+ // Insert metadata
317
+ await client.batch([
318
+ { sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['dimension', String(dimension)] },
319
+ { sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['isHybrid', 'true'] },
320
+ { sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['createdAt', new Date().toISOString()] },
321
+ { sql: 'INSERT INTO _metadata VALUES (?, ?)', args: ['documentCount', '0'] },
322
+ ]);
323
+ this.clients.set(collectionName, client);
324
+ this.metadataCache.set(collectionName, {
325
+ dimension,
326
+ isHybrid: true,
327
+ documentCount: 0,
328
+ createdAt: new Date().toISOString(),
329
+ });
330
+ // Initialize BM25 generator
331
+ this.bm25Generators.set(collectionName, new simple_bm25_1.SimpleBM25(this.config.bm25Config));
332
+ console.log('[LibSQLDB] Hybrid collection created:', collectionName);
333
+ }
334
+ /**
335
+ * Drop collection
336
+ */
337
+ async dropCollection(collectionName) {
338
+ await this.ensureInitialized();
339
+ console.log('[LibSQLDB] Dropping collection:', collectionName);
340
+ // Close client if exists
341
+ const client = this.clients.get(collectionName);
342
+ if (client) {
343
+ client.close();
344
+ this.clients.delete(collectionName);
345
+ }
346
+ // Remove from caches
347
+ this.metadataCache.delete(collectionName);
348
+ this.bm25Generators.delete(collectionName);
349
+ // Remove database file
350
+ const dbPath = this.getDbPath(collectionName);
351
+ if (await fs.pathExists(dbPath)) {
352
+ await fs.remove(dbPath);
353
+ }
354
+ // Remove BM25 file if exists
355
+ const bm25Path = path.join(this.storageDir, `${collectionName}_bm25.json`);
356
+ if (await fs.pathExists(bm25Path)) {
357
+ await fs.remove(bm25Path);
358
+ }
359
+ // Remove WAL files if they exist
360
+ const walPath = `${dbPath}-wal`;
361
+ const shmPath = `${dbPath}-shm`;
362
+ await fs.remove(walPath);
363
+ await fs.remove(shmPath);
364
+ console.log('[LibSQLDB] Collection dropped:', collectionName);
365
+ }
366
+ /**
367
+ * Check if collection exists
368
+ */
369
+ async hasCollection(collectionName) {
370
+ await this.ensureInitialized();
371
+ if (this.clients.has(collectionName)) {
372
+ return true;
373
+ }
374
+ const dbPath = this.getDbPath(collectionName);
375
+ return await fs.pathExists(dbPath);
376
+ }
377
+ /**
378
+ * List all collections
379
+ */
380
+ async listCollections() {
381
+ await this.ensureInitialized();
382
+ const collections = [];
383
+ if (await fs.pathExists(this.storageDir)) {
384
+ const entries = await fs.readdir(this.storageDir, { withFileTypes: true });
385
+ for (const entry of entries) {
386
+ if (entry.isFile() && entry.name.endsWith('.db')) {
387
+ collections.push(entry.name.replace('.db', ''));
388
+ }
389
+ }
390
+ }
391
+ return collections;
392
+ }
393
+ /**
394
+ * Insert vector documents (dense only)
395
+ */
396
+ async insert(collectionName, documents) {
397
+ await this.ensureInitialized();
398
+ const client = await this.getClient(collectionName);
399
+ const metadata = this.metadataCache.get(collectionName);
400
+ if (!metadata) {
401
+ throw new Error(`Collection ${collectionName} metadata not found`);
402
+ }
403
+ console.log('[LibSQLDB] Inserting documents:', documents.length);
404
+ // Validate vector dimensions
405
+ for (const doc of documents) {
406
+ if (doc.vector.length !== metadata.dimension) {
407
+ throw new Error(`Vector dimension mismatch for document '${doc.id}': `
408
+ + `expected ${metadata.dimension}, got ${doc.vector.length}`);
409
+ }
410
+ }
411
+ // Batch insert
412
+ const statements = documents.map((doc) => ({
413
+ sql: `INSERT OR REPLACE INTO documents
414
+ (id, content, relative_path, start_line, end_line, file_extension, metadata, dense_vector)
415
+ VALUES (?, ?, ?, ?, ?, ?, ?, vector32(?))`,
416
+ args: [
417
+ doc.id,
418
+ doc.content,
419
+ doc.relativePath,
420
+ doc.startLine,
421
+ doc.endLine,
422
+ doc.fileExtension,
423
+ JSON.stringify(doc.metadata),
424
+ `[${doc.vector.join(',')}]`,
425
+ ],
426
+ }));
427
+ await client.batch(statements);
428
+ await this.updateDocumentCount(collectionName);
429
+ console.log('[LibSQLDB] Inserted documents:', documents.length);
430
+ }
431
+ /**
432
+ * Insert hybrid vector documents (dense + sparse)
433
+ */
434
+ async insertHybrid(collectionName, documents) {
435
+ await this.ensureInitialized();
436
+ const client = await this.getClient(collectionName);
437
+ const metadata = this.metadataCache.get(collectionName);
438
+ if (!metadata) {
439
+ throw new Error(`Collection ${collectionName} metadata not found`);
440
+ }
441
+ if (!metadata.isHybrid) {
442
+ throw new Error(`Collection ${collectionName} is not a hybrid collection`);
443
+ }
444
+ console.log('[LibSQLDB] Inserting hybrid documents:', documents.length);
445
+ // Validate vector dimensions
446
+ for (const doc of documents) {
447
+ if (doc.vector.length !== metadata.dimension) {
448
+ throw new Error(`Vector dimension mismatch for document '${doc.id}': `
449
+ + `expected ${metadata.dimension}, got ${doc.vector.length}`);
450
+ }
451
+ }
452
+ // Get or create BM25 generator
453
+ let bm25 = this.bm25Generators.get(collectionName);
454
+ if (!bm25) {
455
+ bm25 = new simple_bm25_1.SimpleBM25(this.config.bm25Config);
456
+ this.bm25Generators.set(collectionName, bm25);
457
+ }
458
+ // Train BM25 on all documents (existing + new)
459
+ const existingResult = await client.execute('SELECT content FROM documents');
460
+ const existingContents = existingResult.rows.map((r) => r.content);
461
+ const allContents = [...existingContents, ...documents.map((d) => d.content)];
462
+ bm25.learn(allContents);
463
+ // Generate sparse vectors
464
+ const sparseVectors = documents.map((doc) => bm25.generate(doc.content));
465
+ // Batch insert
466
+ const statements = documents.map((doc, i) => ({
467
+ sql: `INSERT OR REPLACE INTO documents
468
+ (id, content, relative_path, start_line, end_line, file_extension, metadata,
469
+ dense_vector, sparse_indices, sparse_values)
470
+ VALUES (?, ?, ?, ?, ?, ?, ?, vector32(?), ?, ?)`,
471
+ args: [
472
+ doc.id,
473
+ doc.content,
474
+ doc.relativePath,
475
+ doc.startLine,
476
+ doc.endLine,
477
+ doc.fileExtension,
478
+ JSON.stringify(doc.metadata),
479
+ `[${doc.vector.join(',')}]`,
480
+ JSON.stringify(sparseVectors[i].indices),
481
+ JSON.stringify(sparseVectors[i].values),
482
+ ],
483
+ }));
484
+ await client.batch(statements);
485
+ await this.updateDocumentCount(collectionName);
486
+ await this.saveBM25(collectionName);
487
+ console.log('[LibSQLDB] Inserted hybrid documents:', documents.length);
488
+ }
489
+ /**
490
+ * Search similar vectors (dense search only)
491
+ */
492
+ async search(collectionName, queryVector, options) {
493
+ await this.ensureInitialized();
494
+ const client = await this.getClient(collectionName);
495
+ const topK = options?.topK || 10;
496
+ const queryVectorStr = `[${queryVector.join(',')}]`;
497
+ console.log('[LibSQLDB] Searching vectors, topK:', topK);
498
+ // Build query with vector_top_k
499
+ // Note: vector_top_k returns 'id' (rowid), we calculate distance with vector_distance_cos
500
+ let sql = `
501
+ SELECT d.*, vector_distance_cos(d.dense_vector, vector32(?)) AS distance
502
+ FROM vector_top_k('idx_dense', vector32(?), ?) AS vt
503
+ JOIN documents d ON d.rowid = vt.id
504
+ `;
505
+ const args = [queryVectorStr, queryVectorStr, topK * 2];
506
+ // Apply filter if provided
507
+ if (options?.filterExpr) {
508
+ const whereClause = this.parseFilterExpression(options.filterExpr);
509
+ sql += ` WHERE ${whereClause}`;
510
+ }
511
+ sql += ' ORDER BY distance ASC LIMIT ?';
512
+ args.push(topK);
513
+ const result = await client.execute({ sql, args });
514
+ const searchResults = [];
515
+ for (const row of result.rows) {
516
+ const score = 1 / (1 + row.distance);
517
+ // Apply threshold filter
518
+ if (options?.threshold !== undefined && score < options.threshold) {
519
+ continue;
520
+ }
521
+ searchResults.push({
522
+ document: this.rowToDocument(row),
523
+ score,
524
+ });
525
+ }
526
+ console.log('[LibSQLDB] Found results:', searchResults.length);
527
+ return searchResults;
528
+ }
529
+ /**
530
+ * Hybrid search with multiple vector fields (dense + sparse)
531
+ */
532
+ async hybridSearch(collectionName, searchRequests, options) {
533
+ await this.ensureInitialized();
534
+ const client = await this.getClient(collectionName);
535
+ const metadata = this.metadataCache.get(collectionName);
536
+ if (!metadata?.isHybrid) {
537
+ throw new Error(`Collection ${collectionName} is not a hybrid collection`);
538
+ }
539
+ const limit = options?.limit || 10;
540
+ console.log('[LibSQLDB] Hybrid search, requests:', searchRequests.length);
541
+ // Process search requests
542
+ const denseResults = new Map();
543
+ const sparseResults = new Map();
544
+ for (const request of searchRequests) {
545
+ if (request.anns_field === 'vector' || request.anns_field === 'dense') {
546
+ await this.performDenseSearch(client, request.data, limit, denseResults);
547
+ }
548
+ else if (request.anns_field === 'sparse' || request.anns_field === 'sparse_vector') {
549
+ await this.performSparseSearch(client, collectionName, request.data, sparseResults);
550
+ }
551
+ }
552
+ // Apply RRF reranking
553
+ const k = options?.rerank?.params?.k || 60;
554
+ const rrfScores = this.applyRRF(denseResults, sparseResults, k);
555
+ // Fetch full documents for top results
556
+ const topIds = Array.from(rrfScores.entries())
557
+ .sort((a, b) => b[1] - a[1])
558
+ .slice(0, limit)
559
+ .map(([id]) => id);
560
+ if (topIds.length === 0) {
561
+ return [];
562
+ }
563
+ const placeholders = topIds.map(() => '?').join(',');
564
+ const docsResult = await client.execute({
565
+ sql: `SELECT * FROM documents WHERE id IN (${placeholders})`,
566
+ args: topIds,
567
+ });
568
+ const results = [];
569
+ const docMap = new Map(docsResult.rows.map((row) => [row.id, row]));
570
+ for (const id of topIds) {
571
+ const row = docMap.get(id);
572
+ if (row) {
573
+ results.push({
574
+ document: this.rowToDocument(row),
575
+ score: rrfScores.get(id) || 0,
576
+ });
577
+ }
578
+ }
579
+ console.log('[LibSQLDB] Hybrid search results:', results.length);
580
+ return results;
581
+ }
582
+ /**
583
+ * Perform dense vector search
584
+ */
585
+ async performDenseSearch(client, queryVector, limit, results) {
586
+ const queryVectorStr = `[${queryVector.join(',')}]`;
587
+ const sql = `
588
+ SELECT d.id, vector_distance_cos(d.dense_vector, vector32(?)) AS distance
589
+ FROM vector_top_k('idx_dense', vector32(?), ?) AS vt
590
+ JOIN documents d ON d.rowid = vt.id
591
+ `;
592
+ const result = await client.execute({
593
+ sql,
594
+ args: [queryVectorStr, queryVectorStr, limit * 2],
595
+ });
596
+ for (const row of result.rows) {
597
+ const score = 1 / (1 + row.distance);
598
+ results.set(row.id, score);
599
+ }
600
+ }
601
+ /**
602
+ * Perform sparse search using BM25
603
+ */
604
+ async performSparseSearch(client, collectionName, queryText, results) {
605
+ const bm25 = this.bm25Generators.get(collectionName);
606
+ if (!bm25) {
607
+ console.warn(`[LibSQLDB] BM25 generator not found for collection ${collectionName}. Sparse search skipped.`);
608
+ return;
609
+ }
610
+ if (!bm25.isTrained()) {
611
+ console.warn(`[LibSQLDB] BM25 model not trained for collection ${collectionName}. Sparse search skipped.`);
612
+ return;
613
+ }
614
+ const queryVector = bm25.generate(queryText);
615
+ const queryMap = new Map();
616
+ for (let i = 0; i < queryVector.indices.length; i++) {
617
+ queryMap.set(queryVector.indices[i], queryVector.values[i]);
618
+ }
619
+ // Fetch all documents with sparse vectors and compute scores
620
+ const result = await client.execute(`
621
+ SELECT id, sparse_indices, sparse_values FROM documents
622
+ WHERE sparse_indices IS NOT NULL
623
+ `);
624
+ for (const row of result.rows) {
625
+ const indices = JSON.parse(row.sparse_indices);
626
+ const values = JSON.parse(row.sparse_values);
627
+ let score = 0;
628
+ for (let i = 0; i < indices.length; i++) {
629
+ const queryVal = queryMap.get(indices[i]);
630
+ if (queryVal !== undefined) {
631
+ score += values[i] * queryVal;
632
+ }
633
+ }
634
+ if (score > 0) {
635
+ results.set(row.id, score);
636
+ }
637
+ }
638
+ }
639
+ /**
640
+ * Apply Reciprocal Rank Fusion (RRF) reranking
641
+ */
642
+ applyRRF(denseResults, sparseResults, k) {
643
+ const denseRanks = this.computeRanks(denseResults);
644
+ const sparseRanks = this.computeRanks(sparseResults);
645
+ const allIds = new Set([...denseResults.keys(), ...sparseResults.keys()]);
646
+ const rrfScores = new Map();
647
+ for (const id of allIds) {
648
+ let score = 0;
649
+ const denseRank = denseRanks.get(id);
650
+ const sparseRank = sparseRanks.get(id);
651
+ if (denseRank !== undefined) {
652
+ score += 1 / (k + denseRank);
653
+ }
654
+ if (sparseRank !== undefined) {
655
+ score += 1 / (k + sparseRank);
656
+ }
657
+ rrfScores.set(id, score);
658
+ }
659
+ return rrfScores;
660
+ }
661
+ /**
662
+ * Compute ranks from scores
663
+ */
664
+ computeRanks(scores) {
665
+ const ranks = new Map();
666
+ const sorted = Array.from(scores.entries()).sort((a, b) => b[1] - a[1]);
667
+ sorted.forEach(([id], index) => ranks.set(id, index + 1));
668
+ return ranks;
669
+ }
670
+ /**
671
+ * Convert database row to VectorDocument
672
+ */
673
+ rowToDocument(row) {
674
+ return {
675
+ id: row.id,
676
+ vector: [],
677
+ content: row.content,
678
+ relativePath: row.relative_path,
679
+ startLine: row.start_line,
680
+ endLine: row.end_line,
681
+ fileExtension: row.file_extension,
682
+ metadata: JSON.parse(row.metadata),
683
+ };
684
+ }
685
+ /**
686
+ * Parse filter expression to SQL WHERE clause
687
+ */
688
+ parseFilterExpression(expr) {
689
+ // Convert Milvus-style filters to SQL WHERE clause
690
+ // "fileExtension == '.ts'" -> "file_extension = '.ts'"
691
+ // "fileExtension in ['.ts', '.js']" -> "file_extension IN ('.ts', '.js')"
692
+ if (expr.includes(' in ')) {
693
+ const match = expr.match(/(\w+)\s+in\s+\[(.*)\]/);
694
+ if (match) {
695
+ const field = this.mapFieldName(match[1]);
696
+ const values = match[2].split(',').map((v) => v.trim());
697
+ return `${field} IN (${values.join(',')})`;
698
+ }
699
+ }
700
+ if (expr.includes('==')) {
701
+ const match = expr.match(/(\w+)\s*==\s*(.+)/);
702
+ if (match) {
703
+ const field = this.mapFieldName(match[1]);
704
+ return `${field} = ${match[2].trim()}`;
705
+ }
706
+ }
707
+ // Return as-is if not recognized
708
+ console.warn(`[LibSQLDB] Unrecognized filter expression: ${expr}`);
709
+ return expr;
710
+ }
711
+ /**
712
+ * Map field names to database column names
713
+ */
714
+ mapFieldName(field) {
715
+ const mapping = {
716
+ relativePath: 'relative_path',
717
+ startLine: 'start_line',
718
+ endLine: 'end_line',
719
+ fileExtension: 'file_extension',
720
+ };
721
+ return mapping[field] || field;
722
+ }
723
+ /**
724
+ * Delete documents by IDs
725
+ *
726
+ * Key advantage over FAISS: LibSQL supports document deletion via SQL DELETE
727
+ */
728
+ async delete(collectionName, ids) {
729
+ await this.ensureInitialized();
730
+ const client = await this.getClient(collectionName);
731
+ console.log(`[LibSQLDB] Deleting ${ids.length} documents from ${collectionName}`);
732
+ const placeholders = ids.map(() => '?').join(',');
733
+ await client.execute({
734
+ sql: `DELETE FROM documents WHERE id IN (${placeholders})`,
735
+ args: ids,
736
+ });
737
+ await this.updateDocumentCount(collectionName);
738
+ // Re-train BM25 if hybrid collection
739
+ const metadata = this.metadataCache.get(collectionName);
740
+ if (metadata?.isHybrid) {
741
+ const bm25 = this.bm25Generators.get(collectionName);
742
+ if (bm25) {
743
+ const result = await client.execute('SELECT content FROM documents');
744
+ const contents = result.rows.map((r) => r.content);
745
+ if (contents.length > 0) {
746
+ bm25.learn(contents);
747
+ await this.saveBM25(collectionName);
748
+ }
749
+ }
750
+ }
751
+ console.log(`[LibSQLDB] Deleted ${ids.length} documents`);
752
+ }
753
+ /**
754
+ * Query documents with filter conditions
755
+ *
756
+ * Key advantage over FAISS: LibSQL supports SQL WHERE clauses
757
+ */
758
+ async query(collectionName, filter, outputFields, limit) {
759
+ await this.ensureInitialized();
760
+ const client = await this.getClient(collectionName);
761
+ console.log('[LibSQLDB] Querying documents');
762
+ // Build SELECT clause
763
+ const fields = outputFields.length > 0
764
+ ? outputFields.map((f) => this.mapFieldName(f)).join(', ')
765
+ : '*';
766
+ let sql = `SELECT ${fields} FROM documents`;
767
+ const args = [];
768
+ // Apply filter
769
+ if (filter && filter.trim()) {
770
+ const whereClause = this.parseFilterExpression(filter);
771
+ sql += ` WHERE ${whereClause}`;
772
+ }
773
+ sql += ' LIMIT ?';
774
+ args.push(limit || 100);
775
+ const result = await client.execute({ sql, args });
776
+ return result.rows.map((row) => this.rowToResult(row, outputFields));
777
+ }
778
+ /**
779
+ * Convert row to result object
780
+ */
781
+ rowToResult(row, outputFields) {
782
+ const result = {};
783
+ for (const field of outputFields) {
784
+ const dbField = this.mapFieldName(field);
785
+ if (row[dbField] !== undefined) {
786
+ result[field] = row[dbField];
787
+ }
788
+ else if (row[field] !== undefined) {
789
+ result[field] = row[field];
790
+ }
791
+ }
792
+ return result;
793
+ }
794
+ /**
795
+ * Check collection limit
796
+ * LibSQL has no inherent collection limit (only limited by disk space)
797
+ */
798
+ async checkCollectionLimit() {
799
+ return true;
800
+ }
801
+ }
802
+ exports.LibSQLVectorDatabase = LibSQLVectorDatabase;
803
+ //# sourceMappingURL=libsql-vectordb.js.map