@pleaseai/context-please-core 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +113 -46
  2. package/dist/.tsbuildinfo +1 -1
  3. package/dist/context.d.ts +37 -12
  4. package/dist/context.d.ts.map +1 -1
  5. package/dist/context.js +199 -73
  6. package/dist/context.js.map +1 -1
  7. package/dist/embedding/base-embedding.d.ts.map +1 -1
  8. package/dist/embedding/base-embedding.js +5 -1
  9. package/dist/embedding/base-embedding.js.map +1 -1
  10. package/dist/embedding/gemini-embedding.d.ts +43 -1
  11. package/dist/embedding/gemini-embedding.d.ts.map +1 -1
  12. package/dist/embedding/gemini-embedding.js +160 -31
  13. package/dist/embedding/gemini-embedding.js.map +1 -1
  14. package/dist/embedding/huggingface-embedding.d.ts +70 -0
  15. package/dist/embedding/huggingface-embedding.d.ts.map +1 -0
  16. package/dist/embedding/huggingface-embedding.js +270 -0
  17. package/dist/embedding/huggingface-embedding.js.map +1 -0
  18. package/dist/embedding/index.d.ts +3 -2
  19. package/dist/embedding/index.d.ts.map +1 -1
  20. package/dist/embedding/index.js +3 -2
  21. package/dist/embedding/index.js.map +1 -1
  22. package/dist/embedding/ollama-embedding.d.ts +2 -1
  23. package/dist/embedding/ollama-embedding.d.ts.map +1 -1
  24. package/dist/embedding/ollama-embedding.js +5 -5
  25. package/dist/embedding/ollama-embedding.js.map +1 -1
  26. package/dist/embedding/openai-embedding.d.ts +2 -1
  27. package/dist/embedding/openai-embedding.d.ts.map +1 -1
  28. package/dist/embedding/openai-embedding.js +10 -10
  29. package/dist/embedding/openai-embedding.js.map +1 -1
  30. package/dist/embedding/voyageai-embedding.d.ts +2 -1
  31. package/dist/embedding/voyageai-embedding.d.ts.map +1 -1
  32. package/dist/embedding/voyageai-embedding.js +23 -23
  33. package/dist/embedding/voyageai-embedding.js.map +1 -1
  34. package/dist/index.d.ts +4 -4
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +4 -4
  37. package/dist/index.js.map +1 -1
  38. package/dist/splitter/ast-splitter.d.ts +1 -1
  39. package/dist/splitter/ast-splitter.d.ts.map +1 -1
  40. package/dist/splitter/ast-splitter.js +29 -15
  41. package/dist/splitter/ast-splitter.js.map +1 -1
  42. package/dist/splitter/index.d.ts +4 -4
  43. package/dist/splitter/index.d.ts.map +1 -1
  44. package/dist/splitter/index.js +1 -1
  45. package/dist/splitter/index.js.map +1 -1
  46. package/dist/splitter/langchain-splitter.d.ts +1 -1
  47. package/dist/splitter/langchain-splitter.d.ts.map +1 -1
  48. package/dist/splitter/langchain-splitter.js.map +1 -1
  49. package/dist/sync/merkle.d.ts.map +1 -1
  50. package/dist/sync/merkle.js +9 -9
  51. package/dist/sync/merkle.js.map +1 -1
  52. package/dist/sync/synchronizer.d.ts.map +1 -1
  53. package/dist/sync/synchronizer.js +15 -15
  54. package/dist/sync/synchronizer.js.map +1 -1
  55. package/dist/types.d.ts.map +1 -1
  56. package/dist/utils/env-manager.d.ts.map +1 -1
  57. package/dist/utils/env-manager.js +3 -3
  58. package/dist/utils/env-manager.js.map +1 -1
  59. package/dist/utils/index.d.ts.map +1 -1
  60. package/dist/utils/index.js.map +1 -1
  61. package/dist/vectordb/base/base-vector-database.d.ts +1 -1
  62. package/dist/vectordb/base/base-vector-database.d.ts.map +1 -1
  63. package/dist/vectordb/base/base-vector-database.js.map +1 -1
  64. package/dist/vectordb/factory.d.ts +26 -7
  65. package/dist/vectordb/factory.d.ts.map +1 -1
  66. package/dist/vectordb/factory.js +68 -2
  67. package/dist/vectordb/factory.js.map +1 -1
  68. package/dist/vectordb/faiss-vectordb.d.ts +162 -0
  69. package/dist/vectordb/faiss-vectordb.d.ts.map +1 -0
  70. package/dist/vectordb/faiss-vectordb.js +762 -0
  71. package/dist/vectordb/faiss-vectordb.js.map +1 -0
  72. package/dist/vectordb/index.d.ts +10 -9
  73. package/dist/vectordb/index.d.ts.map +1 -1
  74. package/dist/vectordb/index.js +28 -9
  75. package/dist/vectordb/index.js.map +1 -1
  76. package/dist/vectordb/milvus-restful-vectordb.d.ts +6 -5
  77. package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -1
  78. package/dist/vectordb/milvus-restful-vectordb.js +136 -136
  79. package/dist/vectordb/milvus-restful-vectordb.js.map +1 -1
  80. package/dist/vectordb/milvus-vectordb.d.ts +5 -4
  81. package/dist/vectordb/milvus-vectordb.d.ts.map +1 -1
  82. package/dist/vectordb/milvus-vectordb.js +31 -31
  83. package/dist/vectordb/milvus-vectordb.js.map +1 -1
  84. package/dist/vectordb/qdrant-vectordb.d.ts +28 -3
  85. package/dist/vectordb/qdrant-vectordb.d.ts.map +1 -1
  86. package/dist/vectordb/qdrant-vectordb.js +298 -73
  87. package/dist/vectordb/qdrant-vectordb.js.map +1 -1
  88. package/dist/vectordb/sparse/index.d.ts +2 -2
  89. package/dist/vectordb/sparse/index.d.ts.map +1 -1
  90. package/dist/vectordb/sparse/index.js +4 -4
  91. package/dist/vectordb/sparse/index.js.map +1 -1
  92. package/dist/vectordb/sparse/simple-bm25.d.ts +13 -2
  93. package/dist/vectordb/sparse/simple-bm25.d.ts.map +1 -1
  94. package/dist/vectordb/sparse/simple-bm25.js +80 -9
  95. package/dist/vectordb/sparse/simple-bm25.js.map +1 -1
  96. package/dist/vectordb/sparse/sparse-vector-generator.d.ts +7 -7
  97. package/dist/vectordb/sparse/sparse-vector-generator.d.ts.map +1 -1
  98. package/dist/vectordb/sparse/types.d.ts.map +1 -1
  99. package/dist/vectordb/types.d.ts +12 -12
  100. package/dist/vectordb/types.d.ts.map +1 -1
  101. package/dist/vectordb/types.js +1 -1
  102. package/dist/vectordb/types.js.map +1 -1
  103. package/dist/vectordb/zilliz-utils.d.ts +10 -10
  104. package/dist/vectordb/zilliz-utils.d.ts.map +1 -1
  105. package/dist/vectordb/zilliz-utils.js +16 -17
  106. package/dist/vectordb/zilliz-utils.js.map +1 -1
  107. package/package.json +16 -13
@@ -0,0 +1,762 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.FaissVectorDatabase = void 0;
37
+ const faiss_node_1 = require("faiss-node");
38
+ const fs = __importStar(require("fs-extra"));
39
+ const os = __importStar(require("os"));
40
+ const path = __importStar(require("path"));
41
+ const base_vector_database_1 = require("./base/base-vector-database");
42
+ const simple_bm25_1 = require("./sparse/simple-bm25");
43
+ /**
44
+ * FAISS Vector Database implementation for local-only deployments
45
+ *
46
+ * Features:
47
+ * - Zero-configuration file-based storage
48
+ * - Hybrid search with BM25 sparse vectors
49
+ * - RRF (Reciprocal Rank Fusion) reranking
50
+ * - Perfect for local development and small-to-medium codebases
51
+ *
52
+ * Architecture:
53
+ * - Dense vectors: Stored in FAISS IndexFlatL2 (L2 distance)
54
+ * - Sparse vectors: Generated using SimpleBM25 for keyword matching
55
+ * - Hybrid search: Combines both using RRF fusion
56
+ *
57
+ * Storage structure:
58
+ * ~/.context/faiss-indexes/
59
+ * └── {collection_name}/
60
+ * ├── dense.index # FAISS index file
61
+ * ├── sparse.json # BM25 model (vocabulary, IDF)
62
+ * └── metadata.json # Document metadata
63
+ *
64
+ * Limitations:
65
+ * - Document deletion is NOT supported (FAISS IndexFlatL2 limitation)
66
+ * - Query filters are NOT supported (returns all documents)
67
+ * - To remove documents, you must drop and recreate the collection
68
+ */
69
+ class FaissVectorDatabase extends base_vector_database_1.BaseVectorDatabase {
70
+ constructor(config) {
71
+ // Set storageDir default before calling super(), which triggers initialize()
72
+ const configWithDefaults = {
73
+ ...config,
74
+ storageDir: config.storageDir || path.join(os.homedir(), '.context', 'faiss-indexes'),
75
+ };
76
+ super(configWithDefaults);
77
+ this.collections = new Map();
78
+ }
79
+ /**
80
+ * Get storage directory (lazily computed from config)
81
+ */
82
+ get storageDir() {
83
+ return this.config.storageDir;
84
+ }
85
+ /**
86
+ * Initialize FAISS storage directory
87
+ */
88
+ async initialize() {
89
+ try {
90
+ console.log('[FaissDB] 🔧 Initializing FAISS storage at:', this.storageDir);
91
+ await fs.ensureDir(this.storageDir);
92
+ console.log('[FaissDB] ✅ FAISS storage initialized');
93
+ }
94
+ catch (error) {
95
+ const errorMsg = `Failed to initialize FAISS storage at ${this.storageDir}: ${error.message}`;
96
+ console.error(`[FaissDB] ❌ ${errorMsg}`);
97
+ console.error(`[FaissDB] Error code: ${error.code || 'UNKNOWN'}`);
98
+ if (error.code === 'EACCES') {
99
+ throw new Error(`${errorMsg}\nPermission denied. Check directory permissions.`);
100
+ }
101
+ else if (error.code === 'ENOSPC') {
102
+ throw new Error(`${errorMsg}\nDisk space exhausted. Free up disk space and try again.`);
103
+ }
104
+ else if (error.code === 'ENOENT') {
105
+ throw new Error(`${errorMsg}\nParent directory does not exist.`);
106
+ }
107
+ else {
108
+ throw new Error(errorMsg);
109
+ }
110
+ }
111
+ }
112
+ /**
113
+ * FAISS indexes are loaded on-demand when accessed
114
+ */
115
+ async ensureLoaded(collectionName) {
116
+ if (this.collections.has(collectionName)) {
117
+ return;
118
+ }
119
+ const collectionPath = this.getCollectionPath(collectionName);
120
+ if (!(await fs.pathExists(collectionPath))) {
121
+ throw new Error(`Collection ${collectionName} does not exist`);
122
+ }
123
+ await this.loadCollection(collectionName);
124
+ }
125
+ /**
126
+ * Get collection storage path
127
+ */
128
+ getCollectionPath(collectionName) {
129
+ return path.join(this.storageDir, collectionName);
130
+ }
131
+ /**
132
+ * Load collection from disk
133
+ */
134
+ async loadCollection(collectionName) {
135
+ const collectionPath = this.getCollectionPath(collectionName);
136
+ console.log('[FaissDB] 📂 Loading collection:', collectionName);
137
+ try {
138
+ // Load metadata
139
+ const metadataPath = path.join(collectionPath, 'metadata.json');
140
+ let metadata;
141
+ try {
142
+ metadata = await fs.readJson(metadataPath);
143
+ }
144
+ catch (error) {
145
+ throw new Error(`Failed to load collection metadata from ${metadataPath}: ${error.message}. `
146
+ + `The metadata file may be corrupted. Try re-indexing the collection.`);
147
+ }
148
+ // Load FAISS index
149
+ const indexPath = path.join(collectionPath, 'dense.index');
150
+ let index;
151
+ try {
152
+ index = faiss_node_1.IndexFlatL2.read(indexPath);
153
+ }
154
+ catch (error) {
155
+ throw new Error(`Failed to load FAISS index from ${indexPath}: ${error.message}. `
156
+ + `The index file may be corrupted. Try re-indexing the collection.`);
157
+ }
158
+ // Load documents
159
+ const documentsPath = path.join(collectionPath, 'documents.json');
160
+ let documentsArray;
161
+ try {
162
+ documentsArray = await fs.readJson(documentsPath);
163
+ }
164
+ catch (error) {
165
+ throw new Error(`Failed to load documents metadata from ${documentsPath}: ${error.message}. `
166
+ + `The documents file may be corrupted. Try re-indexing the collection.`);
167
+ }
168
+ const documents = new Map(documentsArray.map((doc) => [doc.id, doc]));
169
+ // Load BM25 model if hybrid collection
170
+ let bm25;
171
+ if (metadata.isHybrid) {
172
+ const bm25Path = path.join(collectionPath, 'sparse.json');
173
+ try {
174
+ const bm25Json = await fs.readFile(bm25Path, 'utf-8');
175
+ bm25 = simple_bm25_1.SimpleBM25.fromJSON(bm25Json);
176
+ }
177
+ catch (error) {
178
+ throw new Error(`Failed to load BM25 model from ${bm25Path}: ${error.message}. `
179
+ + `The BM25 file may be corrupted. Try re-indexing the collection.`);
180
+ }
181
+ }
182
+ this.collections.set(collectionName, {
183
+ index,
184
+ metadata,
185
+ documents,
186
+ bm25,
187
+ });
188
+ console.log('[FaissDB] ✅ Loaded collection:', collectionName);
189
+ console.log('[FaissDB] 📊 Document count:', documents.size);
190
+ }
191
+ catch (error) {
192
+ console.error(`[FaissDB] ❌ Failed to load collection ${collectionName}:`, error.message);
193
+ throw error;
194
+ }
195
+ }
196
+ /**
197
+ * Save collection to disk
198
+ */
199
+ async saveCollection(collectionName) {
200
+ const collection = this.collections.get(collectionName);
201
+ if (!collection) {
202
+ throw new Error(`Collection ${collectionName} not found in memory`);
203
+ }
204
+ const collectionPath = this.getCollectionPath(collectionName);
205
+ try {
206
+ await fs.ensureDir(collectionPath);
207
+ }
208
+ catch (error) {
209
+ const errorMsg = `Failed to create collection directory ${collectionPath}: ${error.message}`;
210
+ console.error(`[FaissDB] ❌ ${errorMsg}`);
211
+ throw new Error(errorMsg);
212
+ }
213
+ try {
214
+ // Save FAISS index
215
+ const indexPath = path.join(collectionPath, 'dense.index');
216
+ try {
217
+ collection.index.write(indexPath);
218
+ }
219
+ catch (error) {
220
+ throw new Error(`Failed to write FAISS index to ${indexPath}: ${error.message}`);
221
+ }
222
+ // Save metadata
223
+ const metadataPath = path.join(collectionPath, 'metadata.json');
224
+ try {
225
+ await fs.writeJson(metadataPath, collection.metadata, { spaces: 2 });
226
+ }
227
+ catch (error) {
228
+ throw new Error(`Failed to write metadata to ${metadataPath}: ${error.message}`);
229
+ }
230
+ // Save documents
231
+ const documentsPath = path.join(collectionPath, 'documents.json');
232
+ const documentsArray = Array.from(collection.documents.values());
233
+ try {
234
+ await fs.writeJson(documentsPath, documentsArray, { spaces: 2 });
235
+ }
236
+ catch (error) {
237
+ throw new Error(`Failed to write documents to ${documentsPath}: ${error.message}`);
238
+ }
239
+ // Save BM25 model if hybrid collection
240
+ if (collection.bm25 && collection.metadata.isHybrid) {
241
+ const bm25Path = path.join(collectionPath, 'sparse.json');
242
+ try {
243
+ const bm25Json = collection.bm25.toJSON();
244
+ await fs.writeFile(bm25Path, bm25Json, 'utf-8');
245
+ }
246
+ catch (error) {
247
+ throw new Error(`Failed to write BM25 model to ${bm25Path}: ${error.message}`);
248
+ }
249
+ }
250
+ console.log('[FaissDB] 💾 Saved collection:', collectionName);
251
+ }
252
+ catch (error) {
253
+ console.error(`[FaissDB] ❌ Failed to save collection ${collectionName}:`, error.message);
254
+ console.error(`[FaissDB] Collection may be in an inconsistent state. Consider re-indexing.`);
255
+ throw error;
256
+ }
257
+ }
258
+ /**
259
+ * Create collection with dense vectors only
260
+ */
261
+ async createCollection(collectionName, dimension, description) {
262
+ await this.ensureInitialized();
263
+ if (this.collections.has(collectionName)) {
264
+ throw new Error(`Collection ${collectionName} already exists`);
265
+ }
266
+ const collectionPath = this.getCollectionPath(collectionName);
267
+ if (await fs.pathExists(collectionPath)) {
268
+ throw new Error(`Collection ${collectionName} already exists on disk`);
269
+ }
270
+ console.log('[FaissDB] 🔧 Creating collection:', collectionName);
271
+ console.log('[FaissDB] 📏 Vector dimension:', dimension);
272
+ // Create FAISS index
273
+ const index = new faiss_node_1.IndexFlatL2(dimension);
274
+ // Create metadata
275
+ const metadata = {
276
+ name: collectionName,
277
+ dimension,
278
+ isHybrid: false,
279
+ documentCount: 0,
280
+ createdAt: new Date().toISOString(),
281
+ };
282
+ this.collections.set(collectionName, {
283
+ index,
284
+ metadata,
285
+ documents: new Map(),
286
+ });
287
+ await this.saveCollection(collectionName);
288
+ console.log('[FaissDB] ✅ Collection created:', collectionName);
289
+ }
290
+ /**
291
+ * Create collection with hybrid search support (dense + sparse vectors)
292
+ */
293
+ async createHybridCollection(collectionName, dimension, description) {
294
+ await this.ensureInitialized();
295
+ if (this.collections.has(collectionName)) {
296
+ throw new Error(`Collection ${collectionName} already exists`);
297
+ }
298
+ const collectionPath = this.getCollectionPath(collectionName);
299
+ if (await fs.pathExists(collectionPath)) {
300
+ throw new Error(`Collection ${collectionName} already exists on disk`);
301
+ }
302
+ console.log('[FaissDB] 🔧 Creating hybrid collection:', collectionName);
303
+ console.log('[FaissDB] 📏 Vector dimension:', dimension);
304
+ // Create FAISS index
305
+ const index = new faiss_node_1.IndexFlatL2(dimension);
306
+ // Create BM25 generator
307
+ const bm25 = new simple_bm25_1.SimpleBM25(this.config.bm25Config);
308
+ // Create metadata
309
+ const metadata = {
310
+ name: collectionName,
311
+ dimension,
312
+ isHybrid: true,
313
+ documentCount: 0,
314
+ createdAt: new Date().toISOString(),
315
+ };
316
+ this.collections.set(collectionName, {
317
+ index,
318
+ metadata,
319
+ documents: new Map(),
320
+ bm25,
321
+ });
322
+ await this.saveCollection(collectionName);
323
+ console.log('[FaissDB] ✅ Hybrid collection created:', collectionName);
324
+ }
325
+ /**
326
+ * Drop collection
327
+ */
328
+ async dropCollection(collectionName) {
329
+ await this.ensureInitialized();
330
+ console.log('[FaissDB] 🗑️ Dropping collection:', collectionName);
331
+ // Store reference in case we need to restore on disk error
332
+ const collectionBackup = this.collections.get(collectionName);
333
+ // Remove from memory first
334
+ this.collections.delete(collectionName);
335
+ // Remove from disk
336
+ const collectionPath = this.getCollectionPath(collectionName);
337
+ try {
338
+ if (await fs.pathExists(collectionPath)) {
339
+ await fs.remove(collectionPath);
340
+ }
341
+ }
342
+ catch (error) {
343
+ // Restore in-memory state to maintain consistency
344
+ if (collectionBackup) {
345
+ this.collections.set(collectionName, collectionBackup);
346
+ }
347
+ const errorMsg = `Failed to remove collection '${collectionName}' from disk: ${error.message}`;
348
+ console.error(`[FaissDB] ❌ ${errorMsg}`);
349
+ if (error.code === 'EACCES') {
350
+ throw new Error(`${errorMsg}\nPermission denied. Check file permissions.`);
351
+ }
352
+ else if (error.code === 'EBUSY') {
353
+ throw new Error(`${errorMsg}\nFiles are in use by another process.`);
354
+ }
355
+ throw new Error(errorMsg);
356
+ }
357
+ console.log('[FaissDB] ✅ Collection dropped:', collectionName);
358
+ }
359
+ /**
360
+ * Check if collection exists
361
+ */
362
+ async hasCollection(collectionName) {
363
+ await this.ensureInitialized();
364
+ // Check memory first
365
+ if (this.collections.has(collectionName)) {
366
+ return true;
367
+ }
368
+ // Check disk
369
+ const collectionPath = this.getCollectionPath(collectionName);
370
+ return await fs.pathExists(collectionPath);
371
+ }
372
+ /**
373
+ * List all collections
374
+ */
375
+ async listCollections() {
376
+ await this.ensureInitialized();
377
+ const collections = [];
378
+ // Read from storage directory
379
+ if (await fs.pathExists(this.storageDir)) {
380
+ const entries = await fs.readdir(this.storageDir, { withFileTypes: true });
381
+ for (const entry of entries) {
382
+ if (entry.isDirectory()) {
383
+ collections.push(entry.name);
384
+ }
385
+ }
386
+ }
387
+ return collections;
388
+ }
389
+ /**
390
+ * Insert vector documents (dense only)
391
+ */
392
+ async insert(collectionName, documents) {
393
+ await this.ensureInitialized();
394
+ await this.ensureLoaded(collectionName);
395
+ const collection = this.collections.get(collectionName);
396
+ if (!collection) {
397
+ throw new Error(`Collection ${collectionName} not found`);
398
+ }
399
+ console.log('[FaissDB] 📝 Inserting documents:', documents.length);
400
+ // Validate vector dimensions
401
+ const expectedDim = collection.metadata.dimension;
402
+ for (const doc of documents) {
403
+ if (doc.vector.length !== expectedDim) {
404
+ throw new Error(`Vector dimension mismatch for document '${doc.id}': `
405
+ + `expected ${expectedDim}, got ${doc.vector.length}`);
406
+ }
407
+ }
408
+ // Add vectors to FAISS index one at a time
409
+ documents.forEach((doc) => {
410
+ collection.index.add(doc.vector);
411
+ });
412
+ // Store document metadata
413
+ documents.forEach((doc) => {
414
+ collection.documents.set(doc.id, {
415
+ id: doc.id,
416
+ content: doc.content,
417
+ relativePath: doc.relativePath,
418
+ startLine: doc.startLine,
419
+ endLine: doc.endLine,
420
+ fileExtension: doc.fileExtension,
421
+ metadata: doc.metadata,
422
+ });
423
+ });
424
+ // Update metadata
425
+ collection.metadata.documentCount = collection.documents.size;
426
+ await this.saveCollection(collectionName);
427
+ console.log('[FaissDB] ✅ Inserted documents:', documents.length);
428
+ }
429
+ /**
430
+ * Insert hybrid vector documents (dense + sparse)
431
+ */
432
+ async insertHybrid(collectionName, documents) {
433
+ await this.ensureInitialized();
434
+ await this.ensureLoaded(collectionName);
435
+ const collection = this.collections.get(collectionName);
436
+ if (!collection) {
437
+ throw new Error(`Collection ${collectionName} not found`);
438
+ }
439
+ if (!collection.metadata.isHybrid || !collection.bm25) {
440
+ throw new Error(`Collection ${collectionName} is not a hybrid collection`);
441
+ }
442
+ console.log('[FaissDB] 📝 Inserting hybrid documents:', documents.length);
443
+ // Validate vector dimensions
444
+ const expectedDim = collection.metadata.dimension;
445
+ for (const doc of documents) {
446
+ if (doc.vector.length !== expectedDim) {
447
+ throw new Error(`Vector dimension mismatch for document '${doc.id}': `
448
+ + `expected ${expectedDim}, got ${doc.vector.length}`);
449
+ }
450
+ }
451
+ // Train BM25 on all documents (including new ones)
452
+ const allDocuments = [...collection.documents.values(), ...documents];
453
+ const allContents = allDocuments.map((doc) => doc.content);
454
+ collection.bm25.learn(allContents);
455
+ // Add vectors to FAISS index one at a time
456
+ documents.forEach((doc) => {
457
+ collection.index.add(doc.vector);
458
+ });
459
+ // Store document metadata
460
+ documents.forEach((doc) => {
461
+ collection.documents.set(doc.id, {
462
+ id: doc.id,
463
+ content: doc.content,
464
+ relativePath: doc.relativePath,
465
+ startLine: doc.startLine,
466
+ endLine: doc.endLine,
467
+ fileExtension: doc.fileExtension,
468
+ metadata: doc.metadata,
469
+ });
470
+ });
471
+ // Update metadata
472
+ collection.metadata.documentCount = collection.documents.size;
473
+ await this.saveCollection(collectionName);
474
+ console.log('[FaissDB] ✅ Inserted hybrid documents:', documents.length);
475
+ }
476
+ /**
477
+ * Search similar vectors (dense search only)
478
+ */
479
+ async search(collectionName, queryVector, options) {
480
+ await this.ensureInitialized();
481
+ await this.ensureLoaded(collectionName);
482
+ const collection = this.collections.get(collectionName);
483
+ if (!collection) {
484
+ throw new Error(`Collection ${collectionName} not found`);
485
+ }
486
+ // FAISS requires topK <= ntotal (number of vectors in index)
487
+ const ntotal = collection.index.ntotal();
488
+ if (ntotal === 0) {
489
+ console.log('[FaissDB] 🔍 Empty collection, returning no results');
490
+ return [];
491
+ }
492
+ const requestedTopK = options?.topK || 10;
493
+ const topK = Math.min(requestedTopK, ntotal);
494
+ console.log('[FaissDB] 🔍 Searching vectors, topK:', topK, '(requested:', requestedTopK, ', ntotal:', ntotal, ')');
495
+ // Search FAISS index
496
+ const results = collection.index.search(queryVector, topK);
497
+ // Convert to VectorSearchResult
498
+ const searchResults = [];
499
+ const documentsArray = Array.from(collection.documents.values());
500
+ for (let i = 0; i < results.labels.length; i++) {
501
+ const idx = results.labels[i];
502
+ const distance = results.distances[i];
503
+ if (idx >= 0 && idx < documentsArray.length) {
504
+ const doc = documentsArray[idx];
505
+ // Convert L2 distance to cosine similarity score
506
+ // Lower distance = higher similarity
507
+ const score = 1 / (1 + distance);
508
+ // Apply threshold filter if specified
509
+ if (options?.threshold !== undefined && score < options.threshold) {
510
+ continue;
511
+ }
512
+ searchResults.push({
513
+ document: {
514
+ id: doc.id,
515
+ vector: [], // Vector not needed in results
516
+ content: doc.content,
517
+ relativePath: doc.relativePath,
518
+ startLine: doc.startLine,
519
+ endLine: doc.endLine,
520
+ fileExtension: doc.fileExtension,
521
+ metadata: doc.metadata,
522
+ },
523
+ score,
524
+ });
525
+ }
526
+ }
527
+ console.log('[FaissDB] ✅ Found results:', searchResults.length);
528
+ return searchResults;
529
+ }
530
+ /**
531
+ * Hybrid search with multiple vector fields (dense + sparse)
532
+ */
533
+ async hybridSearch(collectionName, searchRequests, options) {
534
+ await this.ensureInitialized();
535
+ await this.ensureLoaded(collectionName);
536
+ const collection = this.collections.get(collectionName);
537
+ if (!collection) {
538
+ throw new Error(`Collection ${collectionName} not found`);
539
+ }
540
+ if (!collection.metadata.isHybrid || !collection.bm25) {
541
+ throw new Error(`Collection ${collectionName} is not a hybrid collection`);
542
+ }
543
+ const limit = options?.limit || 10;
544
+ console.log('[FaissDB] 🔍 Hybrid search, requests:', searchRequests.length);
545
+ // Process search requests and collect results
546
+ const denseResults = new Map();
547
+ const sparseResults = new Map();
548
+ for (const request of searchRequests) {
549
+ if (request.anns_field === 'vector' || request.anns_field === 'dense') {
550
+ this.performDenseSearch(collection, request.data, limit, denseResults);
551
+ }
552
+ else if (request.anns_field === 'sparse' || request.anns_field === 'sparse_vector') {
553
+ this.performSparseSearch(collection, request.data, sparseResults);
554
+ }
555
+ }
556
+ // Apply RRF reranking
557
+ const rrfResults = this.applyRRF(collection, denseResults, sparseResults, options);
558
+ console.log('[FaissDB] ✅ Hybrid search results:', rrfResults.length);
559
+ return rrfResults.slice(0, limit);
560
+ }
561
+ /**
562
+ * Perform dense vector search using FAISS index
563
+ */
564
+ performDenseSearch(collection, queryVector, limit, results) {
565
+ const ntotal = collection.index.ntotal();
566
+ if (ntotal === 0)
567
+ return;
568
+ const topK = Math.min(limit * 2, ntotal);
569
+ const searchResults = collection.index.search(queryVector, topK);
570
+ const documentsArray = Array.from(collection.documents.values());
571
+ for (let i = 0; i < searchResults.labels.length; i++) {
572
+ const idx = searchResults.labels[i];
573
+ const distance = searchResults.distances[i];
574
+ if (idx >= 0 && idx < documentsArray.length) {
575
+ const doc = documentsArray[idx];
576
+ const score = 1 / (1 + distance);
577
+ results.set(doc.id, score);
578
+ }
579
+ }
580
+ }
581
+ /**
582
+ * Perform sparse search using BM25
583
+ */
584
+ performSparseSearch(collection, queryText, results) {
585
+ if (!collection.bm25)
586
+ return;
587
+ // Generate query vector once (outside the loop)
588
+ const queryVector = collection.bm25.generate(queryText);
589
+ const queryMap = new Map();
590
+ for (let i = 0; i < queryVector.indices.length; i++) {
591
+ queryMap.set(queryVector.indices[i], queryVector.values[i]);
592
+ }
593
+ // Score all documents
594
+ for (const doc of collection.documents.values()) {
595
+ const score = this.calculateSparseScore(collection.bm25, doc.content, queryMap);
596
+ if (score > 0) {
597
+ results.set(doc.id, score);
598
+ }
599
+ }
600
+ }
601
+ /**
602
+ * Calculate sparse vector dot product score
603
+ */
604
+ calculateSparseScore(bm25, content, queryMap) {
605
+ const sparseVector = bm25.generate(content);
606
+ let score = 0;
607
+ for (let i = 0; i < sparseVector.indices.length; i++) {
608
+ const idx = sparseVector.indices[i];
609
+ const val = sparseVector.values[i];
610
+ const queryVal = queryMap.get(idx);
611
+ if (queryVal !== undefined) {
612
+ score += val * queryVal;
613
+ }
614
+ }
615
+ return score;
616
+ }
617
+ /**
618
+ * Pre-compute ranks from scores (O(n log n) instead of O(n²))
619
+ */
620
+ computeRanks(scores) {
621
+ const ranks = new Map();
622
+ const sorted = Array.from(scores.entries()).sort((a, b) => b[1] - a[1]);
623
+ sorted.forEach(([id], index) => ranks.set(id, index + 1));
624
+ return ranks;
625
+ }
626
+ /**
627
+ * Apply Reciprocal Rank Fusion (RRF) reranking
628
+ */
629
+ applyRRF(collection, denseResults, sparseResults, options) {
630
+ const k = options?.rerank?.params?.k || 60;
631
+ // Pre-compute ranks once (O(n log n) total instead of O(n²))
632
+ const denseRanks = this.computeRanks(denseResults);
633
+ const sparseRanks = this.computeRanks(sparseResults);
634
+ // Combine all document IDs and calculate RRF scores
635
+ const allDocIds = new Set([...denseResults.keys(), ...sparseResults.keys()]);
636
+ const rrfScores = [];
637
+ for (const docId of allDocIds) {
638
+ let rrfScore = 0;
639
+ const denseRank = denseRanks.get(docId);
640
+ const sparseRank = sparseRanks.get(docId);
641
+ if (denseRank !== undefined) {
642
+ rrfScore += 1 / (k + denseRank);
643
+ }
644
+ if (sparseRank !== undefined) {
645
+ rrfScore += 1 / (k + sparseRank);
646
+ }
647
+ rrfScores.push([docId, rrfScore]);
648
+ }
649
+ // Sort by RRF score and convert to results
650
+ rrfScores.sort((a, b) => b[1] - a[1]);
651
+ const results = [];
652
+ for (const [docId, score] of rrfScores) {
653
+ const doc = collection.documents.get(docId);
654
+ if (doc) {
655
+ results.push({
656
+ document: {
657
+ id: doc.id,
658
+ vector: [],
659
+ content: doc.content,
660
+ relativePath: doc.relativePath,
661
+ startLine: doc.startLine,
662
+ endLine: doc.endLine,
663
+ fileExtension: doc.fileExtension,
664
+ metadata: doc.metadata,
665
+ },
666
+ score,
667
+ });
668
+ }
669
+ }
670
+ return results;
671
+ }
672
+ /**
673
+ * Delete documents by IDs
674
+ *
675
+ * ⚠️ NOT IMPLEMENTED: FAISS does not support document deletion
676
+ *
677
+ * The FAISS IndexFlatL2 library does not provide a way to remove vectors
678
+ * from an existing index. To fully remove documents, you must:
679
+ *
680
+ * 1. Drop the collection using dropCollection()
681
+ * 2. Recreate it using createCollection() or createHybridCollection()
682
+ * 3. Re-insert all documents except the ones you want to delete
683
+ *
684
+ * @throws Error Always throws - deletion is not supported
685
+ * @param collectionName Collection name
686
+ * @param ids Document IDs to delete (not used)
687
+ */
688
+ async delete(collectionName, ids) {
689
+ await this.ensureInitialized();
690
+ await this.ensureLoaded(collectionName);
691
+ console.error(`[FaissDB] ❌ FAISS does not support document deletion`);
692
+ console.error(`[FaissDB] ❌ Attempted to delete ${ids.length} document(s) from collection '${collectionName}'`);
693
+ throw new Error(`FAISS does not support document deletion. `
694
+ + `To remove documents from collection '${collectionName}', you must:\n`
695
+ + ` 1. Drop the collection using dropCollection()\n`
696
+ + ` 2. Recreate it using createCollection() or createHybridCollection()\n`
697
+ + ` 3. Re-insert all documents except the ones you want to delete\n\n`
698
+ + `Attempted to delete document IDs: ${ids.join(', ')}`);
699
+ }
700
+ /**
701
+ * Query documents with filter conditions
702
+ *
703
+ * ⚠️ LIMITATION: Filter parameter is currently ignored
704
+ *
705
+ * This method returns ALL documents in the collection (up to limit),
706
+ * not filtered results. Filter parsing is not yet implemented for FAISS.
707
+ *
708
+ * @param collectionName Collection name
709
+ * @param filter Filter expression (currently ignored - returns all documents)
710
+ * @param outputFields Fields to return in results
711
+ * @param limit Maximum number of results (only limit is enforced)
712
+ * @returns All documents with specified fields (up to limit)
713
+ */
714
+ async query(collectionName, filter, outputFields, limit) {
715
+ await this.ensureInitialized();
716
+ await this.ensureLoaded(collectionName);
717
+ const collection = this.collections.get(collectionName);
718
+ if (!collection) {
719
+ throw new Error(`Collection ${collectionName} not found`);
720
+ }
721
+ if (filter && filter.trim() !== '') {
722
+ console.warn(`[FaissDB] ⚠️ Query filters are not implemented. Filter '${filter}' will be ignored.`);
723
+ console.warn(`[FaissDB] ⚠️ All documents will be returned (up to limit). Consider using another vector database if filtering is required.`);
724
+ }
725
+ console.log('[FaissDB] 🔍 Querying documents (no filter support)');
726
+ const results = [];
727
+ for (const doc of collection.documents.values()) {
728
+ const result = {};
729
+ for (const field of outputFields) {
730
+ if (field === 'id')
731
+ result.id = doc.id;
732
+ else if (field === 'content')
733
+ result.content = doc.content;
734
+ else if (field === 'relativePath')
735
+ result.relativePath = doc.relativePath;
736
+ else if (field === 'startLine')
737
+ result.startLine = doc.startLine;
738
+ else if (field === 'endLine')
739
+ result.endLine = doc.endLine;
740
+ else if (field === 'fileExtension')
741
+ result.fileExtension = doc.fileExtension;
742
+ else if (doc.metadata[field] !== undefined) {
743
+ result[field] = doc.metadata[field];
744
+ }
745
+ }
746
+ results.push(result);
747
+ if (limit && results.length >= limit) {
748
+ break;
749
+ }
750
+ }
751
+ return results;
752
+ }
753
+ /**
754
+ * Check collection limit
755
+ * FAISS has no inherent collection limit (only limited by disk space)
756
+ */
757
+ async checkCollectionLimit() {
758
+ return true;
759
+ }
760
+ }
761
+ exports.FaissVectorDatabase = FaissVectorDatabase;
762
+ //# sourceMappingURL=faiss-vectordb.js.map