@mcampa/ai-context-core 0.0.2 → 0.1.0-beta.ff0e631
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -47
- package/dist/.tsbuildinfo +1 -1
- package/dist/context.d.ts +30 -5
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +160 -16
- package/dist/context.js.map +1 -1
- package/dist/embedding/base-embedding.d.ts.map +1 -1
- package/dist/embedding/base-embedding.js +4 -0
- package/dist/embedding/base-embedding.js.map +1 -1
- package/dist/embedding/gemini-embedding.d.ts +43 -1
- package/dist/embedding/gemini-embedding.d.ts.map +1 -1
- package/dist/embedding/gemini-embedding.js +164 -26
- package/dist/embedding/gemini-embedding.js.map +1 -1
- package/dist/embedding/huggingface-embedding.d.ts +70 -0
- package/dist/embedding/huggingface-embedding.d.ts.map +1 -0
- package/dist/embedding/huggingface-embedding.js +270 -0
- package/dist/embedding/huggingface-embedding.js.map +1 -0
- package/dist/embedding/index.d.ts +3 -2
- package/dist/embedding/index.d.ts.map +1 -1
- package/dist/embedding/index.js +3 -2
- package/dist/embedding/index.js.map +1 -1
- package/dist/embedding/ollama-embedding.d.ts +2 -1
- package/dist/embedding/ollama-embedding.d.ts.map +1 -1
- package/dist/embedding/ollama-embedding.js +2 -3
- package/dist/embedding/ollama-embedding.js.map +1 -1
- package/dist/embedding/openai-embedding.d.ts +2 -1
- package/dist/embedding/openai-embedding.d.ts.map +1 -1
- package/dist/embedding/openai-embedding.js +3 -3
- package/dist/embedding/openai-embedding.js.map +1 -1
- package/dist/embedding/voyageai-embedding.d.ts +2 -1
- package/dist/embedding/voyageai-embedding.d.ts.map +1 -1
- package/dist/embedding/voyageai-embedding.js +2 -2
- package/dist/embedding/voyageai-embedding.js.map +1 -1
- package/dist/index.d.ts +4 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/splitter/ast-splitter.d.ts +1 -1
- package/dist/splitter/ast-splitter.d.ts.map +1 -1
- package/dist/splitter/ast-splitter.js +7 -7
- package/dist/splitter/ast-splitter.js.map +1 -1
- package/dist/splitter/index.d.ts +4 -4
- package/dist/splitter/index.d.ts.map +1 -1
- package/dist/splitter/index.js +1 -1
- package/dist/splitter/index.js.map +1 -1
- package/dist/splitter/langchain-splitter.d.ts +1 -1
- package/dist/splitter/langchain-splitter.d.ts.map +1 -1
- package/dist/splitter/langchain-splitter.js +2 -2
- package/dist/splitter/langchain-splitter.js.map +1 -1
- package/dist/sync/merkle.js +1 -1
- package/dist/sync/merkle.js.map +1 -1
- package/dist/sync/synchronizer.js +6 -6
- package/dist/sync/synchronizer.js.map +1 -1
- package/dist/utils/env-manager.js +4 -4
- package/dist/utils/env-manager.js.map +1 -1
- package/dist/vectordb/base/base-vector-database.d.ts +58 -0
- package/dist/vectordb/base/base-vector-database.d.ts.map +1 -0
- package/dist/vectordb/base/base-vector-database.js +32 -0
- package/dist/vectordb/base/base-vector-database.js.map +1 -0
- package/dist/vectordb/factory.d.ts +113 -0
- package/dist/vectordb/factory.d.ts.map +1 -0
- package/dist/vectordb/factory.js +170 -0
- package/dist/vectordb/factory.js.map +1 -0
- package/dist/vectordb/faiss-vectordb.d.ts +162 -0
- package/dist/vectordb/faiss-vectordb.d.ts.map +1 -0
- package/dist/vectordb/faiss-vectordb.js +777 -0
- package/dist/vectordb/faiss-vectordb.js.map +1 -0
- package/dist/vectordb/index.d.ts +13 -4
- package/dist/vectordb/index.d.ts.map +1 -1
- package/dist/vectordb/index.js +39 -5
- package/dist/vectordb/index.js.map +1 -1
- package/dist/vectordb/libsql-vectordb.d.ts +170 -0
- package/dist/vectordb/libsql-vectordb.d.ts.map +1 -0
- package/dist/vectordb/libsql-vectordb.js +837 -0
- package/dist/vectordb/libsql-vectordb.js.map +1 -0
- package/dist/vectordb/milvus-restful-vectordb.d.ts +12 -11
- package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-restful-vectordb.js +29 -31
- package/dist/vectordb/milvus-restful-vectordb.js.map +1 -1
- package/dist/vectordb/milvus-vectordb.d.ts +12 -12
- package/dist/vectordb/milvus-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-vectordb.js +31 -28
- package/dist/vectordb/milvus-vectordb.js.map +1 -1
- package/dist/vectordb/qdrant-vectordb.d.ts +149 -0
- package/dist/vectordb/qdrant-vectordb.d.ts.map +1 -0
- package/dist/vectordb/qdrant-vectordb.js +856 -0
- package/dist/vectordb/qdrant-vectordb.js.map +1 -0
- package/dist/vectordb/sparse/index.d.ts +4 -0
- package/dist/vectordb/sparse/index.d.ts.map +1 -0
- package/dist/vectordb/sparse/index.js +23 -0
- package/dist/vectordb/sparse/index.js.map +1 -0
- package/dist/vectordb/sparse/simple-bm25.d.ts +115 -0
- package/dist/vectordb/sparse/simple-bm25.d.ts.map +1 -0
- package/dist/vectordb/sparse/simple-bm25.js +249 -0
- package/dist/vectordb/sparse/simple-bm25.js.map +1 -0
- package/dist/vectordb/sparse/sparse-vector-generator.d.ts +54 -0
- package/dist/vectordb/sparse/sparse-vector-generator.d.ts.map +1 -0
- package/dist/vectordb/sparse/sparse-vector-generator.js +3 -0
- package/dist/vectordb/sparse/sparse-vector-generator.js.map +1 -0
- package/dist/vectordb/sparse/types.d.ts +38 -0
- package/dist/vectordb/sparse/types.d.ts.map +1 -0
- package/dist/vectordb/sparse/types.js +3 -0
- package/dist/vectordb/sparse/types.js.map +1 -0
- package/dist/vectordb/types.d.ts +16 -16
- package/dist/vectordb/types.d.ts.map +1 -1
- package/dist/vectordb/types.js.map +1 -1
- package/dist/vectordb/zilliz-utils.js +3 -3
- package/dist/vectordb/zilliz-utils.js.map +1 -1
- package/package.json +32 -22
|
@@ -0,0 +1,856 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.QdrantVectorDatabase = void 0;
|
|
37
|
+
const fs = __importStar(require("node:fs/promises"));
|
|
38
|
+
const os = __importStar(require("node:os"));
|
|
39
|
+
const path = __importStar(require("node:path"));
|
|
40
|
+
const js_client_grpc_1 = require("@qdrant/js-client-grpc");
|
|
41
|
+
const base_vector_database_1 = require("./base/base-vector-database");
|
|
42
|
+
const simple_bm25_1 = require("./sparse/simple-bm25");
|
|
43
|
+
/**
|
|
44
|
+
* Qdrant Vector Database implementation using gRPC client
|
|
45
|
+
*
|
|
46
|
+
* Features:
|
|
47
|
+
* - Named vectors (dense + sparse)
|
|
48
|
+
* - Hybrid search with RRF fusion
|
|
49
|
+
* - BM25 sparse vector generation
|
|
50
|
+
* - Self-hosted and cloud support
|
|
51
|
+
*
|
|
52
|
+
* Architecture:
|
|
53
|
+
* - Dense vectors: From embedding providers (OpenAI, VoyageAI, etc.)
|
|
54
|
+
* - Sparse vectors: Generated using SimpleBM25 for keyword matching
|
|
55
|
+
* - Hybrid search: Combines both using Qdrant's prefetch + RRF
|
|
56
|
+
*/
|
|
57
|
+
class QdrantVectorDatabase extends base_vector_database_1.BaseVectorDatabase {
|
|
58
|
+
constructor(config) {
|
|
59
|
+
super(config);
|
|
60
|
+
this.client = null;
|
|
61
|
+
// Named vector configurations
|
|
62
|
+
this.DENSE_VECTOR_NAME = "dense";
|
|
63
|
+
this.SPARSE_VECTOR_NAME = "sparse";
|
|
64
|
+
this.bm25Generator = new simple_bm25_1.SimpleBM25(config.bm25Config);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Initialize Qdrant client connection
|
|
68
|
+
*/
|
|
69
|
+
async initialize() {
|
|
70
|
+
const resolvedAddress = await this.resolveAddress();
|
|
71
|
+
await this.initializeClient(resolvedAddress);
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Create Qdrant client instance
|
|
75
|
+
*/
|
|
76
|
+
async initializeClient(address) {
|
|
77
|
+
console.log("[QdrantDB] 🔌 Connecting to Qdrant at:", address);
|
|
78
|
+
// Parse address to extract host and port
|
|
79
|
+
const url = new URL(address.startsWith("http") ? address : `http://${address}`);
|
|
80
|
+
const host = url.hostname;
|
|
81
|
+
const port = url.port ? Number.parseInt(url.port) : 6334;
|
|
82
|
+
this.client = new js_client_grpc_1.QdrantClient({
|
|
83
|
+
host,
|
|
84
|
+
port,
|
|
85
|
+
apiKey: this.config.apiKey,
|
|
86
|
+
timeout: this.config.timeout || 10000,
|
|
87
|
+
});
|
|
88
|
+
// Suppress MaxListenersExceededWarning for gRPC connections
|
|
89
|
+
// Multiple operations on the same collection trigger multiple listeners
|
|
90
|
+
// This is normal for gRPC HTTP/2 multiplexing and not a memory leak
|
|
91
|
+
if (this.client &&
|
|
92
|
+
typeof this.client.setMaxListeners === "function") {
|
|
93
|
+
this.client.setMaxListeners(50);
|
|
94
|
+
}
|
|
95
|
+
console.log("[QdrantDB] ✅ Connected to Qdrant successfully");
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Resolve address from config
|
|
99
|
+
* Unlike Milvus, Qdrant doesn't have auto-provisioning
|
|
100
|
+
*/
|
|
101
|
+
async resolveAddress() {
|
|
102
|
+
if (!this.config.address) {
|
|
103
|
+
throw new Error("Qdrant address is required. Set QDRANT_URL environment variable.");
|
|
104
|
+
}
|
|
105
|
+
return this.config.address;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Override to add client null check
|
|
109
|
+
*/
|
|
110
|
+
async ensureInitialized() {
|
|
111
|
+
await super.ensureInitialized();
|
|
112
|
+
if (!this.client) {
|
|
113
|
+
throw new Error("QdrantClient is not initialized");
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Qdrant doesn't require explicit collection loading
|
|
118
|
+
* Collections are loaded on-demand automatically
|
|
119
|
+
*/
|
|
120
|
+
async ensureLoaded(_collectionName) {
|
|
121
|
+
// No-op for Qdrant - collections are loaded automatically
|
|
122
|
+
return Promise.resolve();
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Create collection with dense vectors only
|
|
126
|
+
*/
|
|
127
|
+
async createCollection(collectionName, dimension, _description) {
|
|
128
|
+
await this.ensureInitialized();
|
|
129
|
+
console.log("[QdrantDB] 🔧 Creating collection:", collectionName);
|
|
130
|
+
console.log("[QdrantDB] 📏 Vector dimension:", dimension);
|
|
131
|
+
await this.client.api("collections").create({
|
|
132
|
+
collectionName,
|
|
133
|
+
vectorsConfig: {
|
|
134
|
+
config: {
|
|
135
|
+
case: "paramsMap",
|
|
136
|
+
value: {
|
|
137
|
+
map: {
|
|
138
|
+
[this.DENSE_VECTOR_NAME]: {
|
|
139
|
+
size: BigInt(dimension),
|
|
140
|
+
distance: 1, // Cosine = 1
|
|
141
|
+
},
|
|
142
|
+
},
|
|
143
|
+
},
|
|
144
|
+
},
|
|
145
|
+
},
|
|
146
|
+
});
|
|
147
|
+
console.log("[QdrantDB] ✅ Collection created successfully");
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Create collection with hybrid search support (dense + sparse vectors)
|
|
151
|
+
*/
|
|
152
|
+
async createHybridCollection(collectionName, dimension, _description) {
|
|
153
|
+
await this.ensureInitialized();
|
|
154
|
+
console.log("[QdrantDB] 🔧 Creating hybrid collection:", collectionName);
|
|
155
|
+
console.log("[QdrantDB] 📏 Dense vector dimension:", dimension);
|
|
156
|
+
await this.client.api("collections").create({
|
|
157
|
+
collectionName,
|
|
158
|
+
vectorsConfig: {
|
|
159
|
+
config: {
|
|
160
|
+
case: "paramsMap",
|
|
161
|
+
value: {
|
|
162
|
+
map: {
|
|
163
|
+
[this.DENSE_VECTOR_NAME]: {
|
|
164
|
+
size: BigInt(dimension),
|
|
165
|
+
distance: 1, // Cosine = 1
|
|
166
|
+
},
|
|
167
|
+
},
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
},
|
|
171
|
+
sparseVectorsConfig: {
|
|
172
|
+
map: {
|
|
173
|
+
[this.SPARSE_VECTOR_NAME]: {
|
|
174
|
+
modifier: js_client_grpc_1.Modifier.Idf,
|
|
175
|
+
},
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
});
|
|
179
|
+
console.log("[QdrantDB] ✅ Hybrid collection created successfully");
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Drop collection
|
|
183
|
+
*/
|
|
184
|
+
async dropCollection(collectionName) {
|
|
185
|
+
await this.ensureInitialized();
|
|
186
|
+
console.log("[QdrantDB] 🗑️ Dropping collection:", collectionName);
|
|
187
|
+
await this.client.api("collections").delete({
|
|
188
|
+
collectionName,
|
|
189
|
+
});
|
|
190
|
+
console.log("[QdrantDB] ✅ Collection dropped successfully");
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Check if collection exists
|
|
194
|
+
*/
|
|
195
|
+
async hasCollection(collectionName) {
|
|
196
|
+
await this.ensureInitialized();
|
|
197
|
+
try {
|
|
198
|
+
const response = await this.client.api("collections").get({
|
|
199
|
+
collectionName,
|
|
200
|
+
});
|
|
201
|
+
return response.result !== undefined;
|
|
202
|
+
}
|
|
203
|
+
catch (error) {
|
|
204
|
+
// Handle gRPC NOT_FOUND error (code 5) or check error messages
|
|
205
|
+
if (error.code === 5 || // gRPC NOT_FOUND status code
|
|
206
|
+
error.rawMessage?.includes("not found") ||
|
|
207
|
+
error.rawMessage?.includes("does not exist") ||
|
|
208
|
+
error.message?.includes("not found") ||
|
|
209
|
+
error.message?.includes("does not exist")) {
|
|
210
|
+
return false;
|
|
211
|
+
}
|
|
212
|
+
throw error;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* List all collections
|
|
217
|
+
*/
|
|
218
|
+
async listCollections() {
|
|
219
|
+
await this.ensureInitialized();
|
|
220
|
+
const response = await this.client.api("collections").list({});
|
|
221
|
+
return response.collections.map((c) => c.name);
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Insert documents with dense vectors only
|
|
225
|
+
*/
|
|
226
|
+
async insert(collectionName, documents) {
|
|
227
|
+
await this.ensureInitialized();
|
|
228
|
+
await this.ensureLoaded(collectionName);
|
|
229
|
+
console.log("[QdrantDB] 📝 Inserting", documents.length, "documents into:", collectionName);
|
|
230
|
+
const points = documents.map((doc) => ({
|
|
231
|
+
id: {
|
|
232
|
+
pointIdOptions: {
|
|
233
|
+
case: "num",
|
|
234
|
+
value: this.convertToNumericId(doc.id),
|
|
235
|
+
},
|
|
236
|
+
},
|
|
237
|
+
vectors: {
|
|
238
|
+
vectorsOptions: {
|
|
239
|
+
case: "vectors",
|
|
240
|
+
value: {
|
|
241
|
+
vectors: {
|
|
242
|
+
[this.DENSE_VECTOR_NAME]: {
|
|
243
|
+
vector: {
|
|
244
|
+
case: "dense",
|
|
245
|
+
value: {
|
|
246
|
+
data: doc.vector,
|
|
247
|
+
},
|
|
248
|
+
},
|
|
249
|
+
},
|
|
250
|
+
},
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
},
|
|
254
|
+
payload: {
|
|
255
|
+
content: { kind: { case: "stringValue", value: doc.content } },
|
|
256
|
+
relativePath: {
|
|
257
|
+
kind: { case: "stringValue", value: doc.relativePath },
|
|
258
|
+
},
|
|
259
|
+
startLine: {
|
|
260
|
+
kind: { case: "integerValue", value: BigInt(doc.startLine) },
|
|
261
|
+
},
|
|
262
|
+
endLine: {
|
|
263
|
+
kind: { case: "integerValue", value: BigInt(doc.endLine) },
|
|
264
|
+
},
|
|
265
|
+
fileExtension: {
|
|
266
|
+
kind: { case: "stringValue", value: doc.fileExtension },
|
|
267
|
+
},
|
|
268
|
+
metadata: {
|
|
269
|
+
kind: {
|
|
270
|
+
case: "stringValue",
|
|
271
|
+
value: JSON.stringify(doc.metadata),
|
|
272
|
+
},
|
|
273
|
+
},
|
|
274
|
+
},
|
|
275
|
+
}));
|
|
276
|
+
await this.client.api("points").upsert({
|
|
277
|
+
collectionName,
|
|
278
|
+
wait: true,
|
|
279
|
+
points,
|
|
280
|
+
});
|
|
281
|
+
console.log("[QdrantDB] ✅ Documents inserted successfully");
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Insert documents with hybrid vectors (dense + sparse)
|
|
285
|
+
*/
|
|
286
|
+
async insertHybrid(collectionName, documents) {
|
|
287
|
+
await this.ensureInitialized();
|
|
288
|
+
await this.ensureLoaded(collectionName);
|
|
289
|
+
console.log("[QdrantDB] 📝 Inserting", documents.length, "hybrid documents into:", collectionName);
|
|
290
|
+
// Ensure BM25 is trained before insertion
|
|
291
|
+
if (!this.bm25Generator.isTrained()) {
|
|
292
|
+
// The BM25 model must be trained on the full corpus before insertion for accurate sparse vectors.
|
|
293
|
+
// Training on a single batch leads to incorrect IDF scores and poor search quality.
|
|
294
|
+
throw new Error("BM25 generator is not trained. The caller must explicitly train it via `getBM25Generator().learn(corpus)` before calling `insertHybrid`.");
|
|
295
|
+
}
|
|
296
|
+
// Generate sparse vectors for all documents
|
|
297
|
+
const sparseVectors = documents.map((doc) => this.bm25Generator.generate(doc.content));
|
|
298
|
+
const points = documents.map((doc, index) => ({
|
|
299
|
+
id: {
|
|
300
|
+
pointIdOptions: {
|
|
301
|
+
case: "num",
|
|
302
|
+
value: this.convertToNumericId(doc.id),
|
|
303
|
+
},
|
|
304
|
+
},
|
|
305
|
+
vectors: {
|
|
306
|
+
vectorsOptions: {
|
|
307
|
+
case: "vectors",
|
|
308
|
+
value: {
|
|
309
|
+
vectors: {
|
|
310
|
+
[this.DENSE_VECTOR_NAME]: {
|
|
311
|
+
vector: {
|
|
312
|
+
case: "dense",
|
|
313
|
+
value: {
|
|
314
|
+
data: doc.vector,
|
|
315
|
+
},
|
|
316
|
+
},
|
|
317
|
+
},
|
|
318
|
+
[this.SPARSE_VECTOR_NAME]: {
|
|
319
|
+
vector: {
|
|
320
|
+
case: "sparse",
|
|
321
|
+
value: {
|
|
322
|
+
indices: sparseVectors[index].indices,
|
|
323
|
+
values: sparseVectors[index].values,
|
|
324
|
+
},
|
|
325
|
+
},
|
|
326
|
+
},
|
|
327
|
+
},
|
|
328
|
+
},
|
|
329
|
+
},
|
|
330
|
+
},
|
|
331
|
+
payload: {
|
|
332
|
+
content: { kind: { case: "stringValue", value: doc.content } },
|
|
333
|
+
relativePath: {
|
|
334
|
+
kind: { case: "stringValue", value: doc.relativePath },
|
|
335
|
+
},
|
|
336
|
+
startLine: {
|
|
337
|
+
kind: { case: "integerValue", value: BigInt(doc.startLine) },
|
|
338
|
+
},
|
|
339
|
+
endLine: {
|
|
340
|
+
kind: { case: "integerValue", value: BigInt(doc.endLine) },
|
|
341
|
+
},
|
|
342
|
+
fileExtension: {
|
|
343
|
+
kind: { case: "stringValue", value: doc.fileExtension },
|
|
344
|
+
},
|
|
345
|
+
metadata: {
|
|
346
|
+
kind: {
|
|
347
|
+
case: "stringValue",
|
|
348
|
+
value: JSON.stringify(doc.metadata),
|
|
349
|
+
},
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
}));
|
|
353
|
+
await this.client.api("points").upsert({
|
|
354
|
+
collectionName,
|
|
355
|
+
wait: true,
|
|
356
|
+
points,
|
|
357
|
+
});
|
|
358
|
+
console.log("[QdrantDB] ✅ Hybrid documents inserted successfully");
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Search with dense vectors only
|
|
362
|
+
*/
|
|
363
|
+
async search(collectionName, queryVector, options) {
|
|
364
|
+
await this.ensureInitialized();
|
|
365
|
+
await this.ensureLoaded(collectionName);
|
|
366
|
+
console.log("[QdrantDB] 🔍 Searching in collection:", collectionName);
|
|
367
|
+
const searchParams = {
|
|
368
|
+
collectionName,
|
|
369
|
+
vector: queryVector,
|
|
370
|
+
vectorName: this.DENSE_VECTOR_NAME,
|
|
371
|
+
limit: BigInt(options?.topK || 10),
|
|
372
|
+
// For gRPC API, omitting withPayload returns all payload fields
|
|
373
|
+
// Using withPayload causes "No PayloadSelector" error
|
|
374
|
+
};
|
|
375
|
+
// Apply filter if provided
|
|
376
|
+
if (options?.filterExpr && options.filterExpr.trim().length > 0) {
|
|
377
|
+
searchParams.filter = this.parseFilterExpression(options.filterExpr);
|
|
378
|
+
}
|
|
379
|
+
const results = await this.client.api("points").search(searchParams);
|
|
380
|
+
return results.result.map((result) => ({
|
|
381
|
+
document: {
|
|
382
|
+
id: result.id?.str || result.id?.num?.toString() || "",
|
|
383
|
+
vector: queryVector,
|
|
384
|
+
content: result.payload?.content?.stringValue || "",
|
|
385
|
+
relativePath: result.payload?.relativePath?.stringValue || "",
|
|
386
|
+
startLine: Number(result.payload?.startLine?.integerValue || 0),
|
|
387
|
+
endLine: Number(result.payload?.endLine?.integerValue || 0),
|
|
388
|
+
fileExtension: result.payload?.fileExtension?.stringValue || "",
|
|
389
|
+
metadata: JSON.parse(result.payload?.metadata?.stringValue || "{}"),
|
|
390
|
+
},
|
|
391
|
+
score: result.score,
|
|
392
|
+
}));
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Hybrid search with dense + sparse vectors using RRF fusion
|
|
396
|
+
*/
|
|
397
|
+
async hybridSearch(collectionName, searchRequests, options) {
|
|
398
|
+
await this.ensureInitialized();
|
|
399
|
+
await this.ensureLoaded(collectionName);
|
|
400
|
+
console.log("[QdrantDB] 🔍 Performing hybrid search in collection:", collectionName);
|
|
401
|
+
// Extract dense vector and query text from search requests by inspecting data types
|
|
402
|
+
const denseQueryReq = searchRequests.find((req) => Array.isArray(req.data));
|
|
403
|
+
const textQueryReq = searchRequests.find((req) => typeof req.data === "string");
|
|
404
|
+
if (!denseQueryReq || !textQueryReq) {
|
|
405
|
+
throw new Error("Hybrid search requires one dense vector request (number[] data) and one text request (string data).");
|
|
406
|
+
}
|
|
407
|
+
const denseQuery = denseQueryReq.data;
|
|
408
|
+
const textQuery = textQueryReq.data;
|
|
409
|
+
// Generate sparse vector using BM25
|
|
410
|
+
if (!this.bm25Generator.isTrained()) {
|
|
411
|
+
console.warn("[QdrantDB] ⚠️ BM25 generator not trained. Hybrid search may have reduced quality.");
|
|
412
|
+
}
|
|
413
|
+
const sparseQuery = this.bm25Generator.isTrained()
|
|
414
|
+
? this.bm25Generator.generate(textQuery)
|
|
415
|
+
: { indices: [], values: [] };
|
|
416
|
+
console.log("[QdrantDB] 🔍 Dense query vector length:", denseQuery.length);
|
|
417
|
+
console.log("[QdrantDB] 🔍 Sparse query terms:", sparseQuery.indices.length);
|
|
418
|
+
console.log("[QdrantDB] 🔍 Sparse query indices:", sparseQuery.indices.slice(0, 5));
|
|
419
|
+
console.log("[QdrantDB] 🔍 Sparse query values:", sparseQuery.values.slice(0, 5));
|
|
420
|
+
// Validate sparse query has valid data
|
|
421
|
+
if (sparseQuery.indices.length === 0 ||
|
|
422
|
+
sparseQuery.values.length === 0 ||
|
|
423
|
+
sparseQuery.indices.length !== sparseQuery.values.length) {
|
|
424
|
+
console.warn("[QdrantDB] ⚠️ Invalid or empty sparse query. Falling back to dense-only search.");
|
|
425
|
+
console.warn(`[QdrantDB] ⚠️ indices.length=${sparseQuery.indices.length}, values.length=${sparseQuery.values.length}`);
|
|
426
|
+
return await this.search(collectionName, denseQuery, {
|
|
427
|
+
topK: options?.limit || 10,
|
|
428
|
+
filterExpr: options?.filterExpr,
|
|
429
|
+
});
|
|
430
|
+
}
|
|
431
|
+
// Validate all values are positive (Qdrant requirement for sparse vectors)
|
|
432
|
+
const hasNegativeValues = sparseQuery.values.some((v) => v <= 0);
|
|
433
|
+
if (hasNegativeValues) {
|
|
434
|
+
console.error("[QdrantDB] ❌ Sparse query contains non-positive values! This should not happen.");
|
|
435
|
+
console.error("[QdrantDB] ❌ Falling back to dense-only search.");
|
|
436
|
+
return await this.search(collectionName, denseQuery, {
|
|
437
|
+
topK: options?.limit || 10,
|
|
438
|
+
filterExpr: options?.filterExpr,
|
|
439
|
+
});
|
|
440
|
+
}
|
|
441
|
+
console.log("[QdrantDB] ✅ Sparse query validated, proceeding with hybrid search");
|
|
442
|
+
// Qdrant query API with nested prefetch for hybrid search
|
|
443
|
+
// Using RRF (Reciprocal Rank Fusion) to combine sparse and dense results
|
|
444
|
+
// Structure: prefetch contains one item with nested prefetch for dense/sparse, then fusion
|
|
445
|
+
//
|
|
446
|
+
// Note: Using plain objects that match the protobuf structure defined in:
|
|
447
|
+
// @qdrant/js-client-grpc/dist/types/proto/points_pb.d.ts
|
|
448
|
+
//
|
|
449
|
+
// QueryPoints structure:
|
|
450
|
+
// - collectionName: string
|
|
451
|
+
// - prefetch: PrefetchQuery[]
|
|
452
|
+
// - limit: bigint
|
|
453
|
+
//
|
|
454
|
+
// PrefetchQuery structure:
|
|
455
|
+
// - prefetch?: PrefetchQuery[] (nested prefetches)
|
|
456
|
+
// - query?: Query (query to apply)
|
|
457
|
+
// - using?: string (vector name)
|
|
458
|
+
// - limit?: bigint
|
|
459
|
+
//
|
|
460
|
+
// Query structure (oneof variant):
|
|
461
|
+
// - variant: { case: 'nearest', value: VectorInput } | { case: 'fusion', value: Fusion } | ...
|
|
462
|
+
//
|
|
463
|
+
// VectorInput structure (oneof variant):
|
|
464
|
+
// - variant: { case: 'dense', value: DenseVector } | { case: 'sparse', value: SparseVector } | ...
|
|
465
|
+
//
|
|
466
|
+
// DenseVector: { data: number[] }
|
|
467
|
+
// SparseVector: { indices: number[], values: number[] }
|
|
468
|
+
// Fusion enum: RRF = 0, DBSF = 1
|
|
469
|
+
const queryParams = {
|
|
470
|
+
collectionName,
|
|
471
|
+
prefetch: [
|
|
472
|
+
{
|
|
473
|
+
// Dense vector prefetch
|
|
474
|
+
query: {
|
|
475
|
+
variant: {
|
|
476
|
+
case: "nearest",
|
|
477
|
+
value: {
|
|
478
|
+
variant: {
|
|
479
|
+
case: "dense",
|
|
480
|
+
value: {
|
|
481
|
+
data: denseQuery,
|
|
482
|
+
},
|
|
483
|
+
},
|
|
484
|
+
},
|
|
485
|
+
},
|
|
486
|
+
},
|
|
487
|
+
using: this.DENSE_VECTOR_NAME,
|
|
488
|
+
limit: BigInt(denseQueryReq.limit || 25),
|
|
489
|
+
},
|
|
490
|
+
{
|
|
491
|
+
// Sparse vector prefetch
|
|
492
|
+
query: {
|
|
493
|
+
variant: {
|
|
494
|
+
case: "nearest",
|
|
495
|
+
value: {
|
|
496
|
+
variant: {
|
|
497
|
+
case: "sparse",
|
|
498
|
+
value: {
|
|
499
|
+
indices: sparseQuery.indices.map((i) => Number(i)),
|
|
500
|
+
values: sparseQuery.values,
|
|
501
|
+
},
|
|
502
|
+
},
|
|
503
|
+
},
|
|
504
|
+
},
|
|
505
|
+
},
|
|
506
|
+
using: this.SPARSE_VECTOR_NAME,
|
|
507
|
+
limit: BigInt(textQueryReq.limit || 25),
|
|
508
|
+
},
|
|
509
|
+
],
|
|
510
|
+
// Fusion query to combine results from prefetches
|
|
511
|
+
query: {
|
|
512
|
+
variant: {
|
|
513
|
+
case: "fusion",
|
|
514
|
+
value: 0, // Fusion.RRF = 0
|
|
515
|
+
},
|
|
516
|
+
},
|
|
517
|
+
limit: BigInt(options?.limit || 10),
|
|
518
|
+
withPayload: {
|
|
519
|
+
selectorOptions: {
|
|
520
|
+
case: "enable",
|
|
521
|
+
value: true,
|
|
522
|
+
},
|
|
523
|
+
},
|
|
524
|
+
};
|
|
525
|
+
// Apply filter if provided
|
|
526
|
+
if (options?.filterExpr && options.filterExpr.trim().length > 0) {
|
|
527
|
+
queryParams.filter = this.parseFilterExpression(options.filterExpr);
|
|
528
|
+
}
|
|
529
|
+
const results = await this.client.api("points").query(queryParams);
|
|
530
|
+
console.log("[QdrantDB] ✅ Found", results.result.length, "results from hybrid search");
|
|
531
|
+
return results.result.map((result) => ({
|
|
532
|
+
document: {
|
|
533
|
+
id: result.id?.str || result.id?.num?.toString() || "",
|
|
534
|
+
content: result.payload?.content?.kind?.value || "",
|
|
535
|
+
vector: [],
|
|
536
|
+
relativePath: result.payload?.relativePath?.kind?.value || "",
|
|
537
|
+
startLine: Number(result.payload?.startLine?.kind?.value || 0),
|
|
538
|
+
endLine: Number(result.payload?.endLine?.kind?.value || 0),
|
|
539
|
+
fileExtension: result.payload?.fileExtension?.kind?.value || "",
|
|
540
|
+
metadata: JSON.parse(result.payload?.metadata?.kind?.value || "{}"),
|
|
541
|
+
},
|
|
542
|
+
score: result.score,
|
|
543
|
+
}));
|
|
544
|
+
}
|
|
545
|
+
/**
|
|
546
|
+
* Delete documents by IDs
|
|
547
|
+
*/
|
|
548
|
+
async delete(collectionName, ids) {
|
|
549
|
+
await this.ensureInitialized();
|
|
550
|
+
await this.ensureLoaded(collectionName);
|
|
551
|
+
console.log("[QdrantDB] 🗑️ Deleting", ids.length, "documents from:", collectionName);
|
|
552
|
+
await this.client.api("points").delete({
|
|
553
|
+
collectionName,
|
|
554
|
+
wait: true,
|
|
555
|
+
points: {
|
|
556
|
+
pointsSelectorOneOf: {
|
|
557
|
+
case: "points",
|
|
558
|
+
value: {
|
|
559
|
+
ids: ids.map((id) => ({
|
|
560
|
+
pointIdOptions: {
|
|
561
|
+
case: "num",
|
|
562
|
+
value: this.convertToNumericId(id),
|
|
563
|
+
},
|
|
564
|
+
})),
|
|
565
|
+
},
|
|
566
|
+
},
|
|
567
|
+
},
|
|
568
|
+
});
|
|
569
|
+
console.log("[QdrantDB] ✅ Documents deleted successfully");
|
|
570
|
+
}
|
|
571
|
+
/**
|
|
572
|
+
* Query documents with filter conditions
|
|
573
|
+
*/
|
|
574
|
+
async query(collectionName, filter, outputFields, limit) {
|
|
575
|
+
await this.ensureInitialized();
|
|
576
|
+
await this.ensureLoaded(collectionName);
|
|
577
|
+
console.log("[QdrantDB] 📋 Querying collection:", collectionName);
|
|
578
|
+
// Build scroll parameters
|
|
579
|
+
// For gRPC API, omitting withPayload returns all payload fields
|
|
580
|
+
// Using withPayload: true causes "No PayloadSelector" error
|
|
581
|
+
const scrollParams = {
|
|
582
|
+
collectionName,
|
|
583
|
+
limit: limit || 100,
|
|
584
|
+
withVector: false,
|
|
585
|
+
};
|
|
586
|
+
// Parse filter expression if provided
|
|
587
|
+
if (filter && filter.trim().length > 0) {
|
|
588
|
+
scrollParams.filter = this.parseFilterExpression(filter);
|
|
589
|
+
}
|
|
590
|
+
const results = await this.client.api("points").scroll(scrollParams);
|
|
591
|
+
// Dynamically map results based on requested outputFields
|
|
592
|
+
return results.result.map((point) => {
|
|
593
|
+
// Extract ID from protobuf structure
|
|
594
|
+
// In gRPC API, id can be: {pointIdOptions: {case: 'num', value: bigint}} or {case: 'str', value: string}
|
|
595
|
+
let idValue = "";
|
|
596
|
+
if (point.id?.pointIdOptions?.case === "num") {
|
|
597
|
+
idValue = point.id.pointIdOptions.value.toString();
|
|
598
|
+
}
|
|
599
|
+
else if (point.id?.pointIdOptions?.case === "str") {
|
|
600
|
+
idValue = point.id.pointIdOptions.value;
|
|
601
|
+
}
|
|
602
|
+
else if (point.id?.num !== undefined) {
|
|
603
|
+
// Fallback for backward compatibility
|
|
604
|
+
idValue = point.id.num.toString();
|
|
605
|
+
}
|
|
606
|
+
else if (point.id?.str !== undefined) {
|
|
607
|
+
idValue = point.id.str;
|
|
608
|
+
}
|
|
609
|
+
const result = {
|
|
610
|
+
id: idValue,
|
|
611
|
+
};
|
|
612
|
+
// If no specific fields requested, return all known fields
|
|
613
|
+
if (outputFields.length === 0) {
|
|
614
|
+
// In gRPC client, payload values are wrapped in {kind: {case: 'stringValue', value: '...'}}
|
|
615
|
+
result.content =
|
|
616
|
+
point.payload?.content?.kind?.value ||
|
|
617
|
+
point.payload?.content?.stringValue;
|
|
618
|
+
result.relativePath =
|
|
619
|
+
point.payload?.relativePath?.kind?.value ||
|
|
620
|
+
point.payload?.relativePath?.stringValue;
|
|
621
|
+
result.startLine = Number(point.payload?.startLine?.kind?.value ||
|
|
622
|
+
point.payload?.startLine?.integerValue ||
|
|
623
|
+
0);
|
|
624
|
+
result.endLine = Number(point.payload?.endLine?.kind?.value ||
|
|
625
|
+
point.payload?.endLine?.integerValue ||
|
|
626
|
+
0);
|
|
627
|
+
result.fileExtension =
|
|
628
|
+
point.payload?.fileExtension?.kind?.value ||
|
|
629
|
+
point.payload?.fileExtension?.stringValue;
|
|
630
|
+
const metadataStr = point.payload?.metadata?.kind?.value ||
|
|
631
|
+
point.payload?.metadata?.stringValue;
|
|
632
|
+
result.metadata = JSON.parse(metadataStr || "{}");
|
|
633
|
+
}
|
|
634
|
+
else {
|
|
635
|
+
// Only include requested fields
|
|
636
|
+
for (const field of outputFields) {
|
|
637
|
+
if (point.payload?.[field]) {
|
|
638
|
+
const value = point.payload[field];
|
|
639
|
+
// Handle different value types based on protobuf structure
|
|
640
|
+
// In gRPC client, value is wrapped in {kind: {case: 'stringValue', value: '...'}}
|
|
641
|
+
if (value.kind?.case === "stringValue") {
|
|
642
|
+
result[field] =
|
|
643
|
+
field === "metadata"
|
|
644
|
+
? JSON.parse(value.kind.value || "{}")
|
|
645
|
+
: value.kind.value;
|
|
646
|
+
}
|
|
647
|
+
else if (value.kind?.case === "integerValue") {
|
|
648
|
+
result[field] = Number(value.kind.value);
|
|
649
|
+
}
|
|
650
|
+
else if (value.kind?.case === "doubleValue") {
|
|
651
|
+
result[field] = value.kind.value;
|
|
652
|
+
}
|
|
653
|
+
else if (value.kind?.case === "boolValue") {
|
|
654
|
+
result[field] = value.kind.value;
|
|
655
|
+
}
|
|
656
|
+
// Fallback for direct value access (backward compatibility)
|
|
657
|
+
else if (value.stringValue !== undefined) {
|
|
658
|
+
result[field] =
|
|
659
|
+
field === "metadata"
|
|
660
|
+
? JSON.parse(value.stringValue || "{}")
|
|
661
|
+
: value.stringValue;
|
|
662
|
+
}
|
|
663
|
+
else if (value.integerValue !== undefined) {
|
|
664
|
+
result[field] = Number(value.integerValue);
|
|
665
|
+
}
|
|
666
|
+
else if (value.doubleValue !== undefined) {
|
|
667
|
+
result[field] = value.doubleValue;
|
|
668
|
+
}
|
|
669
|
+
else if (value.boolValue !== undefined) {
|
|
670
|
+
result[field] = value.boolValue;
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
return result;
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
/**
|
|
679
|
+
* Check collection limit
|
|
680
|
+
* Qdrant doesn't have hard collection limits like Zilliz Cloud
|
|
681
|
+
*/
|
|
682
|
+
async checkCollectionLimit() {
|
|
683
|
+
// Qdrant (self-hosted or cloud) doesn't have hard collection limits
|
|
684
|
+
return Promise.resolve(true);
|
|
685
|
+
}
|
|
686
|
+
/**
|
|
687
|
+
* Parse Milvus-style filter expression to Qdrant filter format
|
|
688
|
+
*
|
|
689
|
+
* Example:
|
|
690
|
+
* - "fileExtension == '.ts'" -> { must: [{ key: 'fileExtension', match: { value: '.ts' } }] }
|
|
691
|
+
* - "fileExtension in ['.ts', '.js']" -> { must: [{ key: 'fileExtension', match: { any: ['.ts', '.js'] } }] }
|
|
692
|
+
*/
|
|
693
|
+
parseFilterExpression(expr) {
|
|
694
|
+
// Simple parser for common filter patterns
|
|
695
|
+
// Format: "field == 'value'" or "field in ['val1', 'val2']"
|
|
696
|
+
if (expr.includes(" in ")) {
|
|
697
|
+
// Handle "field in [...]" pattern
|
|
698
|
+
const match = expr.match(/(\w+)\s+in\s+\[(.*)\]/);
|
|
699
|
+
if (match) {
|
|
700
|
+
const field = match[1];
|
|
701
|
+
const values = match[2]
|
|
702
|
+
.split(",")
|
|
703
|
+
.map((v) => v.trim().replace(/['"]/g, ""));
|
|
704
|
+
// For "IN" operator, use a "must" clause with "any" match for better performance
|
|
705
|
+
return {
|
|
706
|
+
must: [
|
|
707
|
+
{
|
|
708
|
+
conditionOneOf: {
|
|
709
|
+
case: "field",
|
|
710
|
+
value: {
|
|
711
|
+
key: field,
|
|
712
|
+
match: {
|
|
713
|
+
matchValue: {
|
|
714
|
+
case: "any",
|
|
715
|
+
value: {
|
|
716
|
+
values: values.map((value) => ({
|
|
717
|
+
kind: { case: "stringValue", value },
|
|
718
|
+
})),
|
|
719
|
+
},
|
|
720
|
+
},
|
|
721
|
+
},
|
|
722
|
+
},
|
|
723
|
+
},
|
|
724
|
+
},
|
|
725
|
+
],
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
else if (expr.includes("==")) {
|
|
730
|
+
// Handle "field == value" pattern
|
|
731
|
+
const match = expr.match(/(\w+)\s*==\s*['"]?([^'"]+)['"]?/);
|
|
732
|
+
if (match) {
|
|
733
|
+
const field = match[1];
|
|
734
|
+
const value = match[2].trim();
|
|
735
|
+
return {
|
|
736
|
+
must: [
|
|
737
|
+
{
|
|
738
|
+
conditionOneOf: {
|
|
739
|
+
case: "field",
|
|
740
|
+
value: {
|
|
741
|
+
key: field,
|
|
742
|
+
match: {
|
|
743
|
+
matchValue: {
|
|
744
|
+
case: "keyword",
|
|
745
|
+
value,
|
|
746
|
+
},
|
|
747
|
+
},
|
|
748
|
+
},
|
|
749
|
+
},
|
|
750
|
+
},
|
|
751
|
+
],
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
// If parsing fails, return undefined (no filtering)
|
|
756
|
+
console.warn("[QdrantDB] ⚠️ Could not parse filter expression:", expr);
|
|
757
|
+
return undefined;
|
|
758
|
+
}
|
|
759
|
+
/**
|
|
760
|
+
* Convert chunk ID to numeric ID for Qdrant
|
|
761
|
+
* Extracts the hex hash from chunk_XXXXXXXXXXXXXXXX and converts to bigint
|
|
762
|
+
*
|
|
763
|
+
* Example: chunk_edf5558e3dbbf10b -> 17141645883789484811n
|
|
764
|
+
*/
|
|
765
|
+
convertToNumericId(chunkId) {
|
|
766
|
+
// Extract hex portion from chunk_XXXXXXXXXXXXXXXX format
|
|
767
|
+
const hex = chunkId.replace("chunk_", "");
|
|
768
|
+
// Convert hex string to bigint (16 hex chars = 64 bits)
|
|
769
|
+
return BigInt(`0x${hex}`);
|
|
770
|
+
}
|
|
771
|
+
/**
|
|
772
|
+
* Get BM25 generator (for testing/debugging)
|
|
773
|
+
*/
|
|
774
|
+
getBM25Generator() {
|
|
775
|
+
return this.bm25Generator;
|
|
776
|
+
}
|
|
777
|
+
/**
|
|
778
|
+
* Get BM25 model file path for a collection
|
|
779
|
+
*/
|
|
780
|
+
getBM25ModelPath(collectionName) {
|
|
781
|
+
const homeDir = os.homedir();
|
|
782
|
+
const modelDir = path.join(homeDir, ".context", "bm25");
|
|
783
|
+
return path.join(modelDir, `${collectionName}.json`);
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Save BM25 model to disk
|
|
787
|
+
*/
|
|
788
|
+
async saveBM25Model(collectionName) {
|
|
789
|
+
if (!this.bm25Generator.isTrained()) {
|
|
790
|
+
console.log("[QdrantDB] ⚠️ BM25 model is not trained, skipping save");
|
|
791
|
+
return;
|
|
792
|
+
}
|
|
793
|
+
try {
|
|
794
|
+
const modelPath = this.getBM25ModelPath(collectionName);
|
|
795
|
+
const modelDir = path.dirname(modelPath);
|
|
796
|
+
// Ensure directory exists
|
|
797
|
+
await fs.mkdir(modelDir, { recursive: true });
|
|
798
|
+
// Serialize and save BM25 model
|
|
799
|
+
const modelJson = this.bm25Generator.toJSON();
|
|
800
|
+
await fs.writeFile(modelPath, modelJson, "utf-8");
|
|
801
|
+
console.log(`[QdrantDB] 💾 Saved BM25 model to: ${modelPath}`);
|
|
802
|
+
}
|
|
803
|
+
catch (error) {
|
|
804
|
+
console.error(`[QdrantDB] ❌ Failed to save BM25 model:`, error);
|
|
805
|
+
throw error;
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
/**
|
|
809
|
+
* Load BM25 model from disk
|
|
810
|
+
*/
|
|
811
|
+
async loadBM25Model(collectionName) {
|
|
812
|
+
try {
|
|
813
|
+
const modelPath = this.getBM25ModelPath(collectionName);
|
|
814
|
+
// Check if model file exists
|
|
815
|
+
try {
|
|
816
|
+
await fs.access(modelPath);
|
|
817
|
+
}
|
|
818
|
+
catch {
|
|
819
|
+
console.log(`[QdrantDB] ℹ️ No saved BM25 model found at: ${modelPath}`);
|
|
820
|
+
return false;
|
|
821
|
+
}
|
|
822
|
+
// Load and deserialize BM25 model
|
|
823
|
+
const modelJson = await fs.readFile(modelPath, "utf-8");
|
|
824
|
+
this.bm25Generator = simple_bm25_1.SimpleBM25.fromJSON(modelJson);
|
|
825
|
+
console.log(`[QdrantDB] 📂 Loaded BM25 model from: ${modelPath}`);
|
|
826
|
+
return true;
|
|
827
|
+
}
|
|
828
|
+
catch (error) {
|
|
829
|
+
console.error(`[QdrantDB] ❌ Failed to load BM25 model:`, error);
|
|
830
|
+
return false;
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
/**
|
|
834
|
+
* Delete saved BM25 model
|
|
835
|
+
*/
|
|
836
|
+
async deleteBM25Model(collectionName) {
|
|
837
|
+
try {
|
|
838
|
+
const modelPath = this.getBM25ModelPath(collectionName);
|
|
839
|
+
// Check if model file exists
|
|
840
|
+
try {
|
|
841
|
+
await fs.access(modelPath);
|
|
842
|
+
}
|
|
843
|
+
catch {
|
|
844
|
+
// File doesn't exist, nothing to delete
|
|
845
|
+
return;
|
|
846
|
+
}
|
|
847
|
+
await fs.unlink(modelPath);
|
|
848
|
+
console.log(`[QdrantDB] 🗑️ Deleted BM25 model at: ${modelPath}`);
|
|
849
|
+
}
|
|
850
|
+
catch (error) {
|
|
851
|
+
console.warn(`[QdrantDB] ⚠️ Failed to delete BM25 model:`, error);
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
exports.QdrantVectorDatabase = QdrantVectorDatabase;
|
|
856
|
+
//# sourceMappingURL=qdrant-vectordb.js.map
|