@pleaseai/context-please-core 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -0
- package/dist/.tsbuildinfo +1 -1
- package/dist/context.d.ts +25 -0
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +74 -0
- package/dist/context.js.map +1 -1
- package/dist/embedding/base-embedding.d.ts.map +1 -1
- package/dist/embedding/base-embedding.js +4 -0
- package/dist/embedding/base-embedding.js.map +1 -1
- package/dist/embedding/gemini-embedding.d.ts +41 -0
- package/dist/embedding/gemini-embedding.d.ts.map +1 -1
- package/dist/embedding/gemini-embedding.js +154 -25
- package/dist/embedding/gemini-embedding.js.map +1 -1
- package/dist/embedding/huggingface-embedding.d.ts +70 -0
- package/dist/embedding/huggingface-embedding.d.ts.map +1 -0
- package/dist/embedding/huggingface-embedding.js +270 -0
- package/dist/embedding/huggingface-embedding.js.map +1 -0
- package/dist/embedding/index.d.ts +1 -0
- package/dist/embedding/index.d.ts.map +1 -1
- package/dist/embedding/index.js +1 -0
- package/dist/embedding/index.js.map +1 -1
- package/dist/splitter/ast-splitter.d.ts.map +1 -1
- package/dist/splitter/ast-splitter.js.map +1 -1
- package/dist/vectordb/factory.d.ts +20 -1
- package/dist/vectordb/factory.d.ts.map +1 -1
- package/dist/vectordb/factory.js +67 -1
- package/dist/vectordb/factory.js.map +1 -1
- package/dist/vectordb/faiss-vectordb.d.ts +162 -0
- package/dist/vectordb/faiss-vectordb.d.ts.map +1 -0
- package/dist/vectordb/faiss-vectordb.js +762 -0
- package/dist/vectordb/faiss-vectordb.js.map +1 -0
- package/dist/vectordb/index.d.ts +1 -0
- package/dist/vectordb/index.d.ts.map +1 -1
- package/dist/vectordb/index.js +20 -1
- package/dist/vectordb/index.js.map +1 -1
- package/dist/vectordb/sparse/simple-bm25.d.ts +1 -0
- package/dist/vectordb/sparse/simple-bm25.d.ts.map +1 -1
- package/dist/vectordb/sparse/simple-bm25.js +1 -3
- package/dist/vectordb/sparse/simple-bm25.js.map +1 -1
- package/package.json +3 -1
|
@@ -0,0 +1,762 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.FaissVectorDatabase = void 0;
|
|
37
|
+
const faiss_node_1 = require("faiss-node");
|
|
38
|
+
const fs = __importStar(require("fs-extra"));
|
|
39
|
+
const os = __importStar(require("os"));
|
|
40
|
+
const path = __importStar(require("path"));
|
|
41
|
+
const base_vector_database_1 = require("./base/base-vector-database");
|
|
42
|
+
const simple_bm25_1 = require("./sparse/simple-bm25");
|
|
43
|
+
/**
|
|
44
|
+
* FAISS Vector Database implementation for local-only deployments
|
|
45
|
+
*
|
|
46
|
+
* Features:
|
|
47
|
+
* - Zero-configuration file-based storage
|
|
48
|
+
* - Hybrid search with BM25 sparse vectors
|
|
49
|
+
* - RRF (Reciprocal Rank Fusion) reranking
|
|
50
|
+
* - Perfect for local development and small-to-medium codebases
|
|
51
|
+
*
|
|
52
|
+
* Architecture:
|
|
53
|
+
* - Dense vectors: Stored in FAISS IndexFlatL2 (L2 distance)
|
|
54
|
+
* - Sparse vectors: Generated using SimpleBM25 for keyword matching
|
|
55
|
+
* - Hybrid search: Combines both using RRF fusion
|
|
56
|
+
*
|
|
57
|
+
* Storage structure:
|
|
58
|
+
* ~/.context/faiss-indexes/
|
|
59
|
+
* └── {collection_name}/
|
|
60
|
+
* ├── dense.index # FAISS index file
|
|
61
|
+
* ├── sparse.json # BM25 model (vocabulary, IDF)
|
|
62
|
+
* └── metadata.json # Document metadata
|
|
63
|
+
*
|
|
64
|
+
* Limitations:
|
|
65
|
+
* - Document deletion is NOT supported (FAISS IndexFlatL2 limitation)
|
|
66
|
+
* - Query filters are NOT supported (returns all documents)
|
|
67
|
+
* - To remove documents, you must drop and recreate the collection
|
|
68
|
+
*/
|
|
69
|
+
class FaissVectorDatabase extends base_vector_database_1.BaseVectorDatabase {
|
|
70
|
+
constructor(config) {
|
|
71
|
+
// Set storageDir default before calling super(), which triggers initialize()
|
|
72
|
+
const configWithDefaults = {
|
|
73
|
+
...config,
|
|
74
|
+
storageDir: config.storageDir || path.join(os.homedir(), '.context', 'faiss-indexes'),
|
|
75
|
+
};
|
|
76
|
+
super(configWithDefaults);
|
|
77
|
+
this.collections = new Map();
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Get storage directory (lazily computed from config)
|
|
81
|
+
*/
|
|
82
|
+
get storageDir() {
|
|
83
|
+
return this.config.storageDir;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Initialize FAISS storage directory
|
|
87
|
+
*/
|
|
88
|
+
async initialize() {
|
|
89
|
+
try {
|
|
90
|
+
console.log('[FaissDB] 🔧 Initializing FAISS storage at:', this.storageDir);
|
|
91
|
+
await fs.ensureDir(this.storageDir);
|
|
92
|
+
console.log('[FaissDB] ✅ FAISS storage initialized');
|
|
93
|
+
}
|
|
94
|
+
catch (error) {
|
|
95
|
+
const errorMsg = `Failed to initialize FAISS storage at ${this.storageDir}: ${error.message}`;
|
|
96
|
+
console.error(`[FaissDB] ❌ ${errorMsg}`);
|
|
97
|
+
console.error(`[FaissDB] Error code: ${error.code || 'UNKNOWN'}`);
|
|
98
|
+
if (error.code === 'EACCES') {
|
|
99
|
+
throw new Error(`${errorMsg}\nPermission denied. Check directory permissions.`);
|
|
100
|
+
}
|
|
101
|
+
else if (error.code === 'ENOSPC') {
|
|
102
|
+
throw new Error(`${errorMsg}\nDisk space exhausted. Free up disk space and try again.`);
|
|
103
|
+
}
|
|
104
|
+
else if (error.code === 'ENOENT') {
|
|
105
|
+
throw new Error(`${errorMsg}\nParent directory does not exist.`);
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
throw new Error(errorMsg);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* FAISS indexes are loaded on-demand when accessed
|
|
114
|
+
*/
|
|
115
|
+
async ensureLoaded(collectionName) {
|
|
116
|
+
if (this.collections.has(collectionName)) {
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
const collectionPath = this.getCollectionPath(collectionName);
|
|
120
|
+
if (!(await fs.pathExists(collectionPath))) {
|
|
121
|
+
throw new Error(`Collection ${collectionName} does not exist`);
|
|
122
|
+
}
|
|
123
|
+
await this.loadCollection(collectionName);
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Get collection storage path
|
|
127
|
+
*/
|
|
128
|
+
getCollectionPath(collectionName) {
|
|
129
|
+
return path.join(this.storageDir, collectionName);
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Load collection from disk
|
|
133
|
+
*/
|
|
134
|
+
async loadCollection(collectionName) {
|
|
135
|
+
const collectionPath = this.getCollectionPath(collectionName);
|
|
136
|
+
console.log('[FaissDB] 📂 Loading collection:', collectionName);
|
|
137
|
+
try {
|
|
138
|
+
// Load metadata
|
|
139
|
+
const metadataPath = path.join(collectionPath, 'metadata.json');
|
|
140
|
+
let metadata;
|
|
141
|
+
try {
|
|
142
|
+
metadata = await fs.readJson(metadataPath);
|
|
143
|
+
}
|
|
144
|
+
catch (error) {
|
|
145
|
+
throw new Error(`Failed to load collection metadata from ${metadataPath}: ${error.message}. `
|
|
146
|
+
+ `The metadata file may be corrupted. Try re-indexing the collection.`);
|
|
147
|
+
}
|
|
148
|
+
// Load FAISS index
|
|
149
|
+
const indexPath = path.join(collectionPath, 'dense.index');
|
|
150
|
+
let index;
|
|
151
|
+
try {
|
|
152
|
+
index = faiss_node_1.IndexFlatL2.read(indexPath);
|
|
153
|
+
}
|
|
154
|
+
catch (error) {
|
|
155
|
+
throw new Error(`Failed to load FAISS index from ${indexPath}: ${error.message}. `
|
|
156
|
+
+ `The index file may be corrupted. Try re-indexing the collection.`);
|
|
157
|
+
}
|
|
158
|
+
// Load documents
|
|
159
|
+
const documentsPath = path.join(collectionPath, 'documents.json');
|
|
160
|
+
let documentsArray;
|
|
161
|
+
try {
|
|
162
|
+
documentsArray = await fs.readJson(documentsPath);
|
|
163
|
+
}
|
|
164
|
+
catch (error) {
|
|
165
|
+
throw new Error(`Failed to load documents metadata from ${documentsPath}: ${error.message}. `
|
|
166
|
+
+ `The documents file may be corrupted. Try re-indexing the collection.`);
|
|
167
|
+
}
|
|
168
|
+
const documents = new Map(documentsArray.map((doc) => [doc.id, doc]));
|
|
169
|
+
// Load BM25 model if hybrid collection
|
|
170
|
+
let bm25;
|
|
171
|
+
if (metadata.isHybrid) {
|
|
172
|
+
const bm25Path = path.join(collectionPath, 'sparse.json');
|
|
173
|
+
try {
|
|
174
|
+
const bm25Json = await fs.readFile(bm25Path, 'utf-8');
|
|
175
|
+
bm25 = simple_bm25_1.SimpleBM25.fromJSON(bm25Json);
|
|
176
|
+
}
|
|
177
|
+
catch (error) {
|
|
178
|
+
throw new Error(`Failed to load BM25 model from ${bm25Path}: ${error.message}. `
|
|
179
|
+
+ `The BM25 file may be corrupted. Try re-indexing the collection.`);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
this.collections.set(collectionName, {
|
|
183
|
+
index,
|
|
184
|
+
metadata,
|
|
185
|
+
documents,
|
|
186
|
+
bm25,
|
|
187
|
+
});
|
|
188
|
+
console.log('[FaissDB] ✅ Loaded collection:', collectionName);
|
|
189
|
+
console.log('[FaissDB] 📊 Document count:', documents.size);
|
|
190
|
+
}
|
|
191
|
+
catch (error) {
|
|
192
|
+
console.error(`[FaissDB] ❌ Failed to load collection ${collectionName}:`, error.message);
|
|
193
|
+
throw error;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Save collection to disk
|
|
198
|
+
*/
|
|
199
|
+
async saveCollection(collectionName) {
|
|
200
|
+
const collection = this.collections.get(collectionName);
|
|
201
|
+
if (!collection) {
|
|
202
|
+
throw new Error(`Collection ${collectionName} not found in memory`);
|
|
203
|
+
}
|
|
204
|
+
const collectionPath = this.getCollectionPath(collectionName);
|
|
205
|
+
try {
|
|
206
|
+
await fs.ensureDir(collectionPath);
|
|
207
|
+
}
|
|
208
|
+
catch (error) {
|
|
209
|
+
const errorMsg = `Failed to create collection directory ${collectionPath}: ${error.message}`;
|
|
210
|
+
console.error(`[FaissDB] ❌ ${errorMsg}`);
|
|
211
|
+
throw new Error(errorMsg);
|
|
212
|
+
}
|
|
213
|
+
try {
|
|
214
|
+
// Save FAISS index
|
|
215
|
+
const indexPath = path.join(collectionPath, 'dense.index');
|
|
216
|
+
try {
|
|
217
|
+
collection.index.write(indexPath);
|
|
218
|
+
}
|
|
219
|
+
catch (error) {
|
|
220
|
+
throw new Error(`Failed to write FAISS index to ${indexPath}: ${error.message}`);
|
|
221
|
+
}
|
|
222
|
+
// Save metadata
|
|
223
|
+
const metadataPath = path.join(collectionPath, 'metadata.json');
|
|
224
|
+
try {
|
|
225
|
+
await fs.writeJson(metadataPath, collection.metadata, { spaces: 2 });
|
|
226
|
+
}
|
|
227
|
+
catch (error) {
|
|
228
|
+
throw new Error(`Failed to write metadata to ${metadataPath}: ${error.message}`);
|
|
229
|
+
}
|
|
230
|
+
// Save documents
|
|
231
|
+
const documentsPath = path.join(collectionPath, 'documents.json');
|
|
232
|
+
const documentsArray = Array.from(collection.documents.values());
|
|
233
|
+
try {
|
|
234
|
+
await fs.writeJson(documentsPath, documentsArray, { spaces: 2 });
|
|
235
|
+
}
|
|
236
|
+
catch (error) {
|
|
237
|
+
throw new Error(`Failed to write documents to ${documentsPath}: ${error.message}`);
|
|
238
|
+
}
|
|
239
|
+
// Save BM25 model if hybrid collection
|
|
240
|
+
if (collection.bm25 && collection.metadata.isHybrid) {
|
|
241
|
+
const bm25Path = path.join(collectionPath, 'sparse.json');
|
|
242
|
+
try {
|
|
243
|
+
const bm25Json = collection.bm25.toJSON();
|
|
244
|
+
await fs.writeFile(bm25Path, bm25Json, 'utf-8');
|
|
245
|
+
}
|
|
246
|
+
catch (error) {
|
|
247
|
+
throw new Error(`Failed to write BM25 model to ${bm25Path}: ${error.message}`);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
console.log('[FaissDB] 💾 Saved collection:', collectionName);
|
|
251
|
+
}
|
|
252
|
+
catch (error) {
|
|
253
|
+
console.error(`[FaissDB] ❌ Failed to save collection ${collectionName}:`, error.message);
|
|
254
|
+
console.error(`[FaissDB] Collection may be in an inconsistent state. Consider re-indexing.`);
|
|
255
|
+
throw error;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Create collection with dense vectors only
|
|
260
|
+
*/
|
|
261
|
+
async createCollection(collectionName, dimension, description) {
|
|
262
|
+
await this.ensureInitialized();
|
|
263
|
+
if (this.collections.has(collectionName)) {
|
|
264
|
+
throw new Error(`Collection ${collectionName} already exists`);
|
|
265
|
+
}
|
|
266
|
+
const collectionPath = this.getCollectionPath(collectionName);
|
|
267
|
+
if (await fs.pathExists(collectionPath)) {
|
|
268
|
+
throw new Error(`Collection ${collectionName} already exists on disk`);
|
|
269
|
+
}
|
|
270
|
+
console.log('[FaissDB] 🔧 Creating collection:', collectionName);
|
|
271
|
+
console.log('[FaissDB] 📏 Vector dimension:', dimension);
|
|
272
|
+
// Create FAISS index
|
|
273
|
+
const index = new faiss_node_1.IndexFlatL2(dimension);
|
|
274
|
+
// Create metadata
|
|
275
|
+
const metadata = {
|
|
276
|
+
name: collectionName,
|
|
277
|
+
dimension,
|
|
278
|
+
isHybrid: false,
|
|
279
|
+
documentCount: 0,
|
|
280
|
+
createdAt: new Date().toISOString(),
|
|
281
|
+
};
|
|
282
|
+
this.collections.set(collectionName, {
|
|
283
|
+
index,
|
|
284
|
+
metadata,
|
|
285
|
+
documents: new Map(),
|
|
286
|
+
});
|
|
287
|
+
await this.saveCollection(collectionName);
|
|
288
|
+
console.log('[FaissDB] ✅ Collection created:', collectionName);
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Create collection with hybrid search support (dense + sparse vectors)
|
|
292
|
+
*/
|
|
293
|
+
async createHybridCollection(collectionName, dimension, description) {
|
|
294
|
+
await this.ensureInitialized();
|
|
295
|
+
if (this.collections.has(collectionName)) {
|
|
296
|
+
throw new Error(`Collection ${collectionName} already exists`);
|
|
297
|
+
}
|
|
298
|
+
const collectionPath = this.getCollectionPath(collectionName);
|
|
299
|
+
if (await fs.pathExists(collectionPath)) {
|
|
300
|
+
throw new Error(`Collection ${collectionName} already exists on disk`);
|
|
301
|
+
}
|
|
302
|
+
console.log('[FaissDB] 🔧 Creating hybrid collection:', collectionName);
|
|
303
|
+
console.log('[FaissDB] 📏 Vector dimension:', dimension);
|
|
304
|
+
// Create FAISS index
|
|
305
|
+
const index = new faiss_node_1.IndexFlatL2(dimension);
|
|
306
|
+
// Create BM25 generator
|
|
307
|
+
const bm25 = new simple_bm25_1.SimpleBM25(this.config.bm25Config);
|
|
308
|
+
// Create metadata
|
|
309
|
+
const metadata = {
|
|
310
|
+
name: collectionName,
|
|
311
|
+
dimension,
|
|
312
|
+
isHybrid: true,
|
|
313
|
+
documentCount: 0,
|
|
314
|
+
createdAt: new Date().toISOString(),
|
|
315
|
+
};
|
|
316
|
+
this.collections.set(collectionName, {
|
|
317
|
+
index,
|
|
318
|
+
metadata,
|
|
319
|
+
documents: new Map(),
|
|
320
|
+
bm25,
|
|
321
|
+
});
|
|
322
|
+
await this.saveCollection(collectionName);
|
|
323
|
+
console.log('[FaissDB] ✅ Hybrid collection created:', collectionName);
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Drop collection
|
|
327
|
+
*/
|
|
328
|
+
async dropCollection(collectionName) {
|
|
329
|
+
await this.ensureInitialized();
|
|
330
|
+
console.log('[FaissDB] 🗑️ Dropping collection:', collectionName);
|
|
331
|
+
// Store reference in case we need to restore on disk error
|
|
332
|
+
const collectionBackup = this.collections.get(collectionName);
|
|
333
|
+
// Remove from memory first
|
|
334
|
+
this.collections.delete(collectionName);
|
|
335
|
+
// Remove from disk
|
|
336
|
+
const collectionPath = this.getCollectionPath(collectionName);
|
|
337
|
+
try {
|
|
338
|
+
if (await fs.pathExists(collectionPath)) {
|
|
339
|
+
await fs.remove(collectionPath);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
catch (error) {
|
|
343
|
+
// Restore in-memory state to maintain consistency
|
|
344
|
+
if (collectionBackup) {
|
|
345
|
+
this.collections.set(collectionName, collectionBackup);
|
|
346
|
+
}
|
|
347
|
+
const errorMsg = `Failed to remove collection '${collectionName}' from disk: ${error.message}`;
|
|
348
|
+
console.error(`[FaissDB] ❌ ${errorMsg}`);
|
|
349
|
+
if (error.code === 'EACCES') {
|
|
350
|
+
throw new Error(`${errorMsg}\nPermission denied. Check file permissions.`);
|
|
351
|
+
}
|
|
352
|
+
else if (error.code === 'EBUSY') {
|
|
353
|
+
throw new Error(`${errorMsg}\nFiles are in use by another process.`);
|
|
354
|
+
}
|
|
355
|
+
throw new Error(errorMsg);
|
|
356
|
+
}
|
|
357
|
+
console.log('[FaissDB] ✅ Collection dropped:', collectionName);
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Check if collection exists
|
|
361
|
+
*/
|
|
362
|
+
async hasCollection(collectionName) {
|
|
363
|
+
await this.ensureInitialized();
|
|
364
|
+
// Check memory first
|
|
365
|
+
if (this.collections.has(collectionName)) {
|
|
366
|
+
return true;
|
|
367
|
+
}
|
|
368
|
+
// Check disk
|
|
369
|
+
const collectionPath = this.getCollectionPath(collectionName);
|
|
370
|
+
return await fs.pathExists(collectionPath);
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* List all collections
|
|
374
|
+
*/
|
|
375
|
+
async listCollections() {
|
|
376
|
+
await this.ensureInitialized();
|
|
377
|
+
const collections = [];
|
|
378
|
+
// Read from storage directory
|
|
379
|
+
if (await fs.pathExists(this.storageDir)) {
|
|
380
|
+
const entries = await fs.readdir(this.storageDir, { withFileTypes: true });
|
|
381
|
+
for (const entry of entries) {
|
|
382
|
+
if (entry.isDirectory()) {
|
|
383
|
+
collections.push(entry.name);
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
return collections;
|
|
388
|
+
}
|
|
389
|
+
/**
|
|
390
|
+
* Insert vector documents (dense only)
|
|
391
|
+
*/
|
|
392
|
+
async insert(collectionName, documents) {
|
|
393
|
+
await this.ensureInitialized();
|
|
394
|
+
await this.ensureLoaded(collectionName);
|
|
395
|
+
const collection = this.collections.get(collectionName);
|
|
396
|
+
if (!collection) {
|
|
397
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
398
|
+
}
|
|
399
|
+
console.log('[FaissDB] 📝 Inserting documents:', documents.length);
|
|
400
|
+
// Validate vector dimensions
|
|
401
|
+
const expectedDim = collection.metadata.dimension;
|
|
402
|
+
for (const doc of documents) {
|
|
403
|
+
if (doc.vector.length !== expectedDim) {
|
|
404
|
+
throw new Error(`Vector dimension mismatch for document '${doc.id}': `
|
|
405
|
+
+ `expected ${expectedDim}, got ${doc.vector.length}`);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
// Add vectors to FAISS index one at a time
|
|
409
|
+
documents.forEach((doc) => {
|
|
410
|
+
collection.index.add(doc.vector);
|
|
411
|
+
});
|
|
412
|
+
// Store document metadata
|
|
413
|
+
documents.forEach((doc) => {
|
|
414
|
+
collection.documents.set(doc.id, {
|
|
415
|
+
id: doc.id,
|
|
416
|
+
content: doc.content,
|
|
417
|
+
relativePath: doc.relativePath,
|
|
418
|
+
startLine: doc.startLine,
|
|
419
|
+
endLine: doc.endLine,
|
|
420
|
+
fileExtension: doc.fileExtension,
|
|
421
|
+
metadata: doc.metadata,
|
|
422
|
+
});
|
|
423
|
+
});
|
|
424
|
+
// Update metadata
|
|
425
|
+
collection.metadata.documentCount = collection.documents.size;
|
|
426
|
+
await this.saveCollection(collectionName);
|
|
427
|
+
console.log('[FaissDB] ✅ Inserted documents:', documents.length);
|
|
428
|
+
}
|
|
429
|
+
/**
|
|
430
|
+
* Insert hybrid vector documents (dense + sparse)
|
|
431
|
+
*/
|
|
432
|
+
async insertHybrid(collectionName, documents) {
|
|
433
|
+
await this.ensureInitialized();
|
|
434
|
+
await this.ensureLoaded(collectionName);
|
|
435
|
+
const collection = this.collections.get(collectionName);
|
|
436
|
+
if (!collection) {
|
|
437
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
438
|
+
}
|
|
439
|
+
if (!collection.metadata.isHybrid || !collection.bm25) {
|
|
440
|
+
throw new Error(`Collection ${collectionName} is not a hybrid collection`);
|
|
441
|
+
}
|
|
442
|
+
console.log('[FaissDB] 📝 Inserting hybrid documents:', documents.length);
|
|
443
|
+
// Validate vector dimensions
|
|
444
|
+
const expectedDim = collection.metadata.dimension;
|
|
445
|
+
for (const doc of documents) {
|
|
446
|
+
if (doc.vector.length !== expectedDim) {
|
|
447
|
+
throw new Error(`Vector dimension mismatch for document '${doc.id}': `
|
|
448
|
+
+ `expected ${expectedDim}, got ${doc.vector.length}`);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
// Train BM25 on all documents (including new ones)
|
|
452
|
+
const allDocuments = [...collection.documents.values(), ...documents];
|
|
453
|
+
const allContents = allDocuments.map((doc) => doc.content);
|
|
454
|
+
collection.bm25.learn(allContents);
|
|
455
|
+
// Add vectors to FAISS index one at a time
|
|
456
|
+
documents.forEach((doc) => {
|
|
457
|
+
collection.index.add(doc.vector);
|
|
458
|
+
});
|
|
459
|
+
// Store document metadata
|
|
460
|
+
documents.forEach((doc) => {
|
|
461
|
+
collection.documents.set(doc.id, {
|
|
462
|
+
id: doc.id,
|
|
463
|
+
content: doc.content,
|
|
464
|
+
relativePath: doc.relativePath,
|
|
465
|
+
startLine: doc.startLine,
|
|
466
|
+
endLine: doc.endLine,
|
|
467
|
+
fileExtension: doc.fileExtension,
|
|
468
|
+
metadata: doc.metadata,
|
|
469
|
+
});
|
|
470
|
+
});
|
|
471
|
+
// Update metadata
|
|
472
|
+
collection.metadata.documentCount = collection.documents.size;
|
|
473
|
+
await this.saveCollection(collectionName);
|
|
474
|
+
console.log('[FaissDB] ✅ Inserted hybrid documents:', documents.length);
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Search similar vectors (dense search only)
|
|
478
|
+
*/
|
|
479
|
+
async search(collectionName, queryVector, options) {
|
|
480
|
+
await this.ensureInitialized();
|
|
481
|
+
await this.ensureLoaded(collectionName);
|
|
482
|
+
const collection = this.collections.get(collectionName);
|
|
483
|
+
if (!collection) {
|
|
484
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
485
|
+
}
|
|
486
|
+
// FAISS requires topK <= ntotal (number of vectors in index)
|
|
487
|
+
const ntotal = collection.index.ntotal();
|
|
488
|
+
if (ntotal === 0) {
|
|
489
|
+
console.log('[FaissDB] 🔍 Empty collection, returning no results');
|
|
490
|
+
return [];
|
|
491
|
+
}
|
|
492
|
+
const requestedTopK = options?.topK || 10;
|
|
493
|
+
const topK = Math.min(requestedTopK, ntotal);
|
|
494
|
+
console.log('[FaissDB] 🔍 Searching vectors, topK:', topK, '(requested:', requestedTopK, ', ntotal:', ntotal, ')');
|
|
495
|
+
// Search FAISS index
|
|
496
|
+
const results = collection.index.search(queryVector, topK);
|
|
497
|
+
// Convert to VectorSearchResult
|
|
498
|
+
const searchResults = [];
|
|
499
|
+
const documentsArray = Array.from(collection.documents.values());
|
|
500
|
+
for (let i = 0; i < results.labels.length; i++) {
|
|
501
|
+
const idx = results.labels[i];
|
|
502
|
+
const distance = results.distances[i];
|
|
503
|
+
if (idx >= 0 && idx < documentsArray.length) {
|
|
504
|
+
const doc = documentsArray[idx];
|
|
505
|
+
// Convert L2 distance to cosine similarity score
|
|
506
|
+
// Lower distance = higher similarity
|
|
507
|
+
const score = 1 / (1 + distance);
|
|
508
|
+
// Apply threshold filter if specified
|
|
509
|
+
if (options?.threshold !== undefined && score < options.threshold) {
|
|
510
|
+
continue;
|
|
511
|
+
}
|
|
512
|
+
searchResults.push({
|
|
513
|
+
document: {
|
|
514
|
+
id: doc.id,
|
|
515
|
+
vector: [], // Vector not needed in results
|
|
516
|
+
content: doc.content,
|
|
517
|
+
relativePath: doc.relativePath,
|
|
518
|
+
startLine: doc.startLine,
|
|
519
|
+
endLine: doc.endLine,
|
|
520
|
+
fileExtension: doc.fileExtension,
|
|
521
|
+
metadata: doc.metadata,
|
|
522
|
+
},
|
|
523
|
+
score,
|
|
524
|
+
});
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
console.log('[FaissDB] ✅ Found results:', searchResults.length);
|
|
528
|
+
return searchResults;
|
|
529
|
+
}
|
|
530
|
+
/**
|
|
531
|
+
* Hybrid search with multiple vector fields (dense + sparse)
|
|
532
|
+
*/
|
|
533
|
+
async hybridSearch(collectionName, searchRequests, options) {
|
|
534
|
+
await this.ensureInitialized();
|
|
535
|
+
await this.ensureLoaded(collectionName);
|
|
536
|
+
const collection = this.collections.get(collectionName);
|
|
537
|
+
if (!collection) {
|
|
538
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
539
|
+
}
|
|
540
|
+
if (!collection.metadata.isHybrid || !collection.bm25) {
|
|
541
|
+
throw new Error(`Collection ${collectionName} is not a hybrid collection`);
|
|
542
|
+
}
|
|
543
|
+
const limit = options?.limit || 10;
|
|
544
|
+
console.log('[FaissDB] 🔍 Hybrid search, requests:', searchRequests.length);
|
|
545
|
+
// Process search requests and collect results
|
|
546
|
+
const denseResults = new Map();
|
|
547
|
+
const sparseResults = new Map();
|
|
548
|
+
for (const request of searchRequests) {
|
|
549
|
+
if (request.anns_field === 'vector' || request.anns_field === 'dense') {
|
|
550
|
+
this.performDenseSearch(collection, request.data, limit, denseResults);
|
|
551
|
+
}
|
|
552
|
+
else if (request.anns_field === 'sparse' || request.anns_field === 'sparse_vector') {
|
|
553
|
+
this.performSparseSearch(collection, request.data, sparseResults);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
// Apply RRF reranking
|
|
557
|
+
const rrfResults = this.applyRRF(collection, denseResults, sparseResults, options);
|
|
558
|
+
console.log('[FaissDB] ✅ Hybrid search results:', rrfResults.length);
|
|
559
|
+
return rrfResults.slice(0, limit);
|
|
560
|
+
}
|
|
561
|
+
/**
|
|
562
|
+
* Perform dense vector search using FAISS index
|
|
563
|
+
*/
|
|
564
|
+
performDenseSearch(collection, queryVector, limit, results) {
|
|
565
|
+
const ntotal = collection.index.ntotal();
|
|
566
|
+
if (ntotal === 0)
|
|
567
|
+
return;
|
|
568
|
+
const topK = Math.min(limit * 2, ntotal);
|
|
569
|
+
const searchResults = collection.index.search(queryVector, topK);
|
|
570
|
+
const documentsArray = Array.from(collection.documents.values());
|
|
571
|
+
for (let i = 0; i < searchResults.labels.length; i++) {
|
|
572
|
+
const idx = searchResults.labels[i];
|
|
573
|
+
const distance = searchResults.distances[i];
|
|
574
|
+
if (idx >= 0 && idx < documentsArray.length) {
|
|
575
|
+
const doc = documentsArray[idx];
|
|
576
|
+
const score = 1 / (1 + distance);
|
|
577
|
+
results.set(doc.id, score);
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
/**
|
|
582
|
+
* Perform sparse search using BM25
|
|
583
|
+
*/
|
|
584
|
+
performSparseSearch(collection, queryText, results) {
|
|
585
|
+
if (!collection.bm25)
|
|
586
|
+
return;
|
|
587
|
+
// Generate query vector once (outside the loop)
|
|
588
|
+
const queryVector = collection.bm25.generate(queryText);
|
|
589
|
+
const queryMap = new Map();
|
|
590
|
+
for (let i = 0; i < queryVector.indices.length; i++) {
|
|
591
|
+
queryMap.set(queryVector.indices[i], queryVector.values[i]);
|
|
592
|
+
}
|
|
593
|
+
// Score all documents
|
|
594
|
+
for (const doc of collection.documents.values()) {
|
|
595
|
+
const score = this.calculateSparseScore(collection.bm25, doc.content, queryMap);
|
|
596
|
+
if (score > 0) {
|
|
597
|
+
results.set(doc.id, score);
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
/**
|
|
602
|
+
* Calculate sparse vector dot product score
|
|
603
|
+
*/
|
|
604
|
+
calculateSparseScore(bm25, content, queryMap) {
|
|
605
|
+
const sparseVector = bm25.generate(content);
|
|
606
|
+
let score = 0;
|
|
607
|
+
for (let i = 0; i < sparseVector.indices.length; i++) {
|
|
608
|
+
const idx = sparseVector.indices[i];
|
|
609
|
+
const val = sparseVector.values[i];
|
|
610
|
+
const queryVal = queryMap.get(idx);
|
|
611
|
+
if (queryVal !== undefined) {
|
|
612
|
+
score += val * queryVal;
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
return score;
|
|
616
|
+
}
|
|
617
|
+
/**
|
|
618
|
+
* Pre-compute ranks from scores (O(n log n) instead of O(n²))
|
|
619
|
+
*/
|
|
620
|
+
computeRanks(scores) {
|
|
621
|
+
const ranks = new Map();
|
|
622
|
+
const sorted = Array.from(scores.entries()).sort((a, b) => b[1] - a[1]);
|
|
623
|
+
sorted.forEach(([id], index) => ranks.set(id, index + 1));
|
|
624
|
+
return ranks;
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* Apply Reciprocal Rank Fusion (RRF) reranking
|
|
628
|
+
*/
|
|
629
|
+
applyRRF(collection, denseResults, sparseResults, options) {
|
|
630
|
+
const k = options?.rerank?.params?.k || 60;
|
|
631
|
+
// Pre-compute ranks once (O(n log n) total instead of O(n²))
|
|
632
|
+
const denseRanks = this.computeRanks(denseResults);
|
|
633
|
+
const sparseRanks = this.computeRanks(sparseResults);
|
|
634
|
+
// Combine all document IDs and calculate RRF scores
|
|
635
|
+
const allDocIds = new Set([...denseResults.keys(), ...sparseResults.keys()]);
|
|
636
|
+
const rrfScores = [];
|
|
637
|
+
for (const docId of allDocIds) {
|
|
638
|
+
let rrfScore = 0;
|
|
639
|
+
const denseRank = denseRanks.get(docId);
|
|
640
|
+
const sparseRank = sparseRanks.get(docId);
|
|
641
|
+
if (denseRank !== undefined) {
|
|
642
|
+
rrfScore += 1 / (k + denseRank);
|
|
643
|
+
}
|
|
644
|
+
if (sparseRank !== undefined) {
|
|
645
|
+
rrfScore += 1 / (k + sparseRank);
|
|
646
|
+
}
|
|
647
|
+
rrfScores.push([docId, rrfScore]);
|
|
648
|
+
}
|
|
649
|
+
// Sort by RRF score and convert to results
|
|
650
|
+
rrfScores.sort((a, b) => b[1] - a[1]);
|
|
651
|
+
const results = [];
|
|
652
|
+
for (const [docId, score] of rrfScores) {
|
|
653
|
+
const doc = collection.documents.get(docId);
|
|
654
|
+
if (doc) {
|
|
655
|
+
results.push({
|
|
656
|
+
document: {
|
|
657
|
+
id: doc.id,
|
|
658
|
+
vector: [],
|
|
659
|
+
content: doc.content,
|
|
660
|
+
relativePath: doc.relativePath,
|
|
661
|
+
startLine: doc.startLine,
|
|
662
|
+
endLine: doc.endLine,
|
|
663
|
+
fileExtension: doc.fileExtension,
|
|
664
|
+
metadata: doc.metadata,
|
|
665
|
+
},
|
|
666
|
+
score,
|
|
667
|
+
});
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
return results;
|
|
671
|
+
}
|
|
672
|
+
/**
|
|
673
|
+
* Delete documents by IDs
|
|
674
|
+
*
|
|
675
|
+
* ⚠️ NOT IMPLEMENTED: FAISS does not support document deletion
|
|
676
|
+
*
|
|
677
|
+
* The FAISS IndexFlatL2 library does not provide a way to remove vectors
|
|
678
|
+
* from an existing index. To fully remove documents, you must:
|
|
679
|
+
*
|
|
680
|
+
* 1. Drop the collection using dropCollection()
|
|
681
|
+
* 2. Recreate it using createCollection() or createHybridCollection()
|
|
682
|
+
* 3. Re-insert all documents except the ones you want to delete
|
|
683
|
+
*
|
|
684
|
+
* @throws Error Always throws - deletion is not supported
|
|
685
|
+
* @param collectionName Collection name
|
|
686
|
+
* @param ids Document IDs to delete (not used)
|
|
687
|
+
*/
|
|
688
|
+
async delete(collectionName, ids) {
|
|
689
|
+
await this.ensureInitialized();
|
|
690
|
+
await this.ensureLoaded(collectionName);
|
|
691
|
+
console.error(`[FaissDB] ❌ FAISS does not support document deletion`);
|
|
692
|
+
console.error(`[FaissDB] ❌ Attempted to delete ${ids.length} document(s) from collection '${collectionName}'`);
|
|
693
|
+
throw new Error(`FAISS does not support document deletion. `
|
|
694
|
+
+ `To remove documents from collection '${collectionName}', you must:\n`
|
|
695
|
+
+ ` 1. Drop the collection using dropCollection()\n`
|
|
696
|
+
+ ` 2. Recreate it using createCollection() or createHybridCollection()\n`
|
|
697
|
+
+ ` 3. Re-insert all documents except the ones you want to delete\n\n`
|
|
698
|
+
+ `Attempted to delete document IDs: ${ids.join(', ')}`);
|
|
699
|
+
}
|
|
700
|
+
/**
|
|
701
|
+
* Query documents with filter conditions
|
|
702
|
+
*
|
|
703
|
+
* ⚠️ LIMITATION: Filter parameter is currently ignored
|
|
704
|
+
*
|
|
705
|
+
* This method returns ALL documents in the collection (up to limit),
|
|
706
|
+
* not filtered results. Filter parsing is not yet implemented for FAISS.
|
|
707
|
+
*
|
|
708
|
+
* @param collectionName Collection name
|
|
709
|
+
* @param filter Filter expression (currently ignored - returns all documents)
|
|
710
|
+
* @param outputFields Fields to return in results
|
|
711
|
+
* @param limit Maximum number of results (only limit is enforced)
|
|
712
|
+
* @returns All documents with specified fields (up to limit)
|
|
713
|
+
*/
|
|
714
|
+
async query(collectionName, filter, outputFields, limit) {
|
|
715
|
+
await this.ensureInitialized();
|
|
716
|
+
await this.ensureLoaded(collectionName);
|
|
717
|
+
const collection = this.collections.get(collectionName);
|
|
718
|
+
if (!collection) {
|
|
719
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
720
|
+
}
|
|
721
|
+
if (filter && filter.trim() !== '') {
|
|
722
|
+
console.warn(`[FaissDB] ⚠️ Query filters are not implemented. Filter '${filter}' will be ignored.`);
|
|
723
|
+
console.warn(`[FaissDB] ⚠️ All documents will be returned (up to limit). Consider using another vector database if filtering is required.`);
|
|
724
|
+
}
|
|
725
|
+
console.log('[FaissDB] 🔍 Querying documents (no filter support)');
|
|
726
|
+
const results = [];
|
|
727
|
+
for (const doc of collection.documents.values()) {
|
|
728
|
+
const result = {};
|
|
729
|
+
for (const field of outputFields) {
|
|
730
|
+
if (field === 'id')
|
|
731
|
+
result.id = doc.id;
|
|
732
|
+
else if (field === 'content')
|
|
733
|
+
result.content = doc.content;
|
|
734
|
+
else if (field === 'relativePath')
|
|
735
|
+
result.relativePath = doc.relativePath;
|
|
736
|
+
else if (field === 'startLine')
|
|
737
|
+
result.startLine = doc.startLine;
|
|
738
|
+
else if (field === 'endLine')
|
|
739
|
+
result.endLine = doc.endLine;
|
|
740
|
+
else if (field === 'fileExtension')
|
|
741
|
+
result.fileExtension = doc.fileExtension;
|
|
742
|
+
else if (doc.metadata[field] !== undefined) {
|
|
743
|
+
result[field] = doc.metadata[field];
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
results.push(result);
|
|
747
|
+
if (limit && results.length >= limit) {
|
|
748
|
+
break;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
return results;
|
|
752
|
+
}
|
|
753
|
+
/**
|
|
754
|
+
* Check collection limit
|
|
755
|
+
* FAISS has no inherent collection limit (only limited by disk space)
|
|
756
|
+
*/
|
|
757
|
+
async checkCollectionLimit() {
|
|
758
|
+
return true;
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
exports.FaissVectorDatabase = FaissVectorDatabase;
|
|
762
|
+
//# sourceMappingURL=faiss-vectordb.js.map
|