@mcampa/ai-context-core 0.0.2 → 0.1.0-beta.ff0e631

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +55 -47
  2. package/dist/.tsbuildinfo +1 -1
  3. package/dist/context.d.ts +30 -5
  4. package/dist/context.d.ts.map +1 -1
  5. package/dist/context.js +160 -16
  6. package/dist/context.js.map +1 -1
  7. package/dist/embedding/base-embedding.d.ts.map +1 -1
  8. package/dist/embedding/base-embedding.js +4 -0
  9. package/dist/embedding/base-embedding.js.map +1 -1
  10. package/dist/embedding/gemini-embedding.d.ts +43 -1
  11. package/dist/embedding/gemini-embedding.d.ts.map +1 -1
  12. package/dist/embedding/gemini-embedding.js +164 -26
  13. package/dist/embedding/gemini-embedding.js.map +1 -1
  14. package/dist/embedding/huggingface-embedding.d.ts +70 -0
  15. package/dist/embedding/huggingface-embedding.d.ts.map +1 -0
  16. package/dist/embedding/huggingface-embedding.js +270 -0
  17. package/dist/embedding/huggingface-embedding.js.map +1 -0
  18. package/dist/embedding/index.d.ts +3 -2
  19. package/dist/embedding/index.d.ts.map +1 -1
  20. package/dist/embedding/index.js +3 -2
  21. package/dist/embedding/index.js.map +1 -1
  22. package/dist/embedding/ollama-embedding.d.ts +2 -1
  23. package/dist/embedding/ollama-embedding.d.ts.map +1 -1
  24. package/dist/embedding/ollama-embedding.js +2 -3
  25. package/dist/embedding/ollama-embedding.js.map +1 -1
  26. package/dist/embedding/openai-embedding.d.ts +2 -1
  27. package/dist/embedding/openai-embedding.d.ts.map +1 -1
  28. package/dist/embedding/openai-embedding.js +3 -3
  29. package/dist/embedding/openai-embedding.js.map +1 -1
  30. package/dist/embedding/voyageai-embedding.d.ts +2 -1
  31. package/dist/embedding/voyageai-embedding.d.ts.map +1 -1
  32. package/dist/embedding/voyageai-embedding.js +2 -2
  33. package/dist/embedding/voyageai-embedding.js.map +1 -1
  34. package/dist/index.d.ts +4 -4
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +4 -4
  37. package/dist/index.js.map +1 -1
  38. package/dist/splitter/ast-splitter.d.ts +1 -1
  39. package/dist/splitter/ast-splitter.d.ts.map +1 -1
  40. package/dist/splitter/ast-splitter.js +7 -7
  41. package/dist/splitter/ast-splitter.js.map +1 -1
  42. package/dist/splitter/index.d.ts +4 -4
  43. package/dist/splitter/index.d.ts.map +1 -1
  44. package/dist/splitter/index.js +1 -1
  45. package/dist/splitter/index.js.map +1 -1
  46. package/dist/splitter/langchain-splitter.d.ts +1 -1
  47. package/dist/splitter/langchain-splitter.d.ts.map +1 -1
  48. package/dist/splitter/langchain-splitter.js +2 -2
  49. package/dist/splitter/langchain-splitter.js.map +1 -1
  50. package/dist/sync/merkle.js +1 -1
  51. package/dist/sync/merkle.js.map +1 -1
  52. package/dist/sync/synchronizer.js +6 -6
  53. package/dist/sync/synchronizer.js.map +1 -1
  54. package/dist/utils/env-manager.js +4 -4
  55. package/dist/utils/env-manager.js.map +1 -1
  56. package/dist/vectordb/base/base-vector-database.d.ts +58 -0
  57. package/dist/vectordb/base/base-vector-database.d.ts.map +1 -0
  58. package/dist/vectordb/base/base-vector-database.js +32 -0
  59. package/dist/vectordb/base/base-vector-database.js.map +1 -0
  60. package/dist/vectordb/factory.d.ts +113 -0
  61. package/dist/vectordb/factory.d.ts.map +1 -0
  62. package/dist/vectordb/factory.js +170 -0
  63. package/dist/vectordb/factory.js.map +1 -0
  64. package/dist/vectordb/faiss-vectordb.d.ts +162 -0
  65. package/dist/vectordb/faiss-vectordb.d.ts.map +1 -0
  66. package/dist/vectordb/faiss-vectordb.js +777 -0
  67. package/dist/vectordb/faiss-vectordb.js.map +1 -0
  68. package/dist/vectordb/index.d.ts +13 -4
  69. package/dist/vectordb/index.d.ts.map +1 -1
  70. package/dist/vectordb/index.js +39 -5
  71. package/dist/vectordb/index.js.map +1 -1
  72. package/dist/vectordb/libsql-vectordb.d.ts +170 -0
  73. package/dist/vectordb/libsql-vectordb.d.ts.map +1 -0
  74. package/dist/vectordb/libsql-vectordb.js +837 -0
  75. package/dist/vectordb/libsql-vectordb.js.map +1 -0
  76. package/dist/vectordb/milvus-restful-vectordb.d.ts +12 -11
  77. package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -1
  78. package/dist/vectordb/milvus-restful-vectordb.js +29 -31
  79. package/dist/vectordb/milvus-restful-vectordb.js.map +1 -1
  80. package/dist/vectordb/milvus-vectordb.d.ts +12 -12
  81. package/dist/vectordb/milvus-vectordb.d.ts.map +1 -1
  82. package/dist/vectordb/milvus-vectordb.js +31 -28
  83. package/dist/vectordb/milvus-vectordb.js.map +1 -1
  84. package/dist/vectordb/qdrant-vectordb.d.ts +149 -0
  85. package/dist/vectordb/qdrant-vectordb.d.ts.map +1 -0
  86. package/dist/vectordb/qdrant-vectordb.js +856 -0
  87. package/dist/vectordb/qdrant-vectordb.js.map +1 -0
  88. package/dist/vectordb/sparse/index.d.ts +4 -0
  89. package/dist/vectordb/sparse/index.d.ts.map +1 -0
  90. package/dist/vectordb/sparse/index.js +23 -0
  91. package/dist/vectordb/sparse/index.js.map +1 -0
  92. package/dist/vectordb/sparse/simple-bm25.d.ts +115 -0
  93. package/dist/vectordb/sparse/simple-bm25.d.ts.map +1 -0
  94. package/dist/vectordb/sparse/simple-bm25.js +249 -0
  95. package/dist/vectordb/sparse/simple-bm25.js.map +1 -0
  96. package/dist/vectordb/sparse/sparse-vector-generator.d.ts +54 -0
  97. package/dist/vectordb/sparse/sparse-vector-generator.d.ts.map +1 -0
  98. package/dist/vectordb/sparse/sparse-vector-generator.js +3 -0
  99. package/dist/vectordb/sparse/sparse-vector-generator.js.map +1 -0
  100. package/dist/vectordb/sparse/types.d.ts +38 -0
  101. package/dist/vectordb/sparse/types.d.ts.map +1 -0
  102. package/dist/vectordb/sparse/types.js +3 -0
  103. package/dist/vectordb/sparse/types.js.map +1 -0
  104. package/dist/vectordb/types.d.ts +16 -16
  105. package/dist/vectordb/types.d.ts.map +1 -1
  106. package/dist/vectordb/types.js.map +1 -1
  107. package/dist/vectordb/zilliz-utils.js +3 -3
  108. package/dist/vectordb/zilliz-utils.js.map +1 -1
  109. package/package.json +32 -22
@@ -0,0 +1,856 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.QdrantVectorDatabase = void 0;
37
+ const fs = __importStar(require("node:fs/promises"));
38
+ const os = __importStar(require("node:os"));
39
+ const path = __importStar(require("node:path"));
40
+ const js_client_grpc_1 = require("@qdrant/js-client-grpc");
41
+ const base_vector_database_1 = require("./base/base-vector-database");
42
+ const simple_bm25_1 = require("./sparse/simple-bm25");
43
+ /**
44
+ * Qdrant Vector Database implementation using gRPC client
45
+ *
46
+ * Features:
47
+ * - Named vectors (dense + sparse)
48
+ * - Hybrid search with RRF fusion
49
+ * - BM25 sparse vector generation
50
+ * - Self-hosted and cloud support
51
+ *
52
+ * Architecture:
53
+ * - Dense vectors: From embedding providers (OpenAI, VoyageAI, etc.)
54
+ * - Sparse vectors: Generated using SimpleBM25 for keyword matching
55
+ * - Hybrid search: Combines both using Qdrant's prefetch + RRF
56
+ */
57
+ class QdrantVectorDatabase extends base_vector_database_1.BaseVectorDatabase {
58
+ constructor(config) {
59
+ super(config);
60
+ this.client = null;
61
+ // Named vector configurations
62
+ this.DENSE_VECTOR_NAME = "dense";
63
+ this.SPARSE_VECTOR_NAME = "sparse";
64
+ this.bm25Generator = new simple_bm25_1.SimpleBM25(config.bm25Config);
65
+ }
66
+ /**
67
+ * Initialize Qdrant client connection
68
+ */
69
+ async initialize() {
70
+ const resolvedAddress = await this.resolveAddress();
71
+ await this.initializeClient(resolvedAddress);
72
+ }
73
+ /**
74
+ * Create Qdrant client instance
75
+ */
76
+ async initializeClient(address) {
77
+ console.log("[QdrantDB] 🔌 Connecting to Qdrant at:", address);
78
+ // Parse address to extract host and port
79
+ const url = new URL(address.startsWith("http") ? address : `http://${address}`);
80
+ const host = url.hostname;
81
+ const port = url.port ? Number.parseInt(url.port) : 6334;
82
+ this.client = new js_client_grpc_1.QdrantClient({
83
+ host,
84
+ port,
85
+ apiKey: this.config.apiKey,
86
+ timeout: this.config.timeout || 10000,
87
+ });
88
+ // Suppress MaxListenersExceededWarning for gRPC connections
89
+ // Multiple operations on the same collection trigger multiple listeners
90
+ // This is normal for gRPC HTTP/2 multiplexing and not a memory leak
91
+ if (this.client &&
92
+ typeof this.client.setMaxListeners === "function") {
93
+ this.client.setMaxListeners(50);
94
+ }
95
+ console.log("[QdrantDB] ✅ Connected to Qdrant successfully");
96
+ }
97
+ /**
98
+ * Resolve address from config
99
+ * Unlike Milvus, Qdrant doesn't have auto-provisioning
100
+ */
101
+ async resolveAddress() {
102
+ if (!this.config.address) {
103
+ throw new Error("Qdrant address is required. Set QDRANT_URL environment variable.");
104
+ }
105
+ return this.config.address;
106
+ }
107
+ /**
108
+ * Override to add client null check
109
+ */
110
+ async ensureInitialized() {
111
+ await super.ensureInitialized();
112
+ if (!this.client) {
113
+ throw new Error("QdrantClient is not initialized");
114
+ }
115
+ }
116
+ /**
117
+ * Qdrant doesn't require explicit collection loading
118
+ * Collections are loaded on-demand automatically
119
+ */
120
+ async ensureLoaded(_collectionName) {
121
+ // No-op for Qdrant - collections are loaded automatically
122
+ return Promise.resolve();
123
+ }
124
+ /**
125
+ * Create collection with dense vectors only
126
+ */
127
+ async createCollection(collectionName, dimension, _description) {
128
+ await this.ensureInitialized();
129
+ console.log("[QdrantDB] 🔧 Creating collection:", collectionName);
130
+ console.log("[QdrantDB] 📏 Vector dimension:", dimension);
131
+ await this.client.api("collections").create({
132
+ collectionName,
133
+ vectorsConfig: {
134
+ config: {
135
+ case: "paramsMap",
136
+ value: {
137
+ map: {
138
+ [this.DENSE_VECTOR_NAME]: {
139
+ size: BigInt(dimension),
140
+ distance: 1, // Cosine = 1
141
+ },
142
+ },
143
+ },
144
+ },
145
+ },
146
+ });
147
+ console.log("[QdrantDB] ✅ Collection created successfully");
148
+ }
149
+ /**
150
+ * Create collection with hybrid search support (dense + sparse vectors)
151
+ */
152
+ async createHybridCollection(collectionName, dimension, _description) {
153
+ await this.ensureInitialized();
154
+ console.log("[QdrantDB] 🔧 Creating hybrid collection:", collectionName);
155
+ console.log("[QdrantDB] 📏 Dense vector dimension:", dimension);
156
+ await this.client.api("collections").create({
157
+ collectionName,
158
+ vectorsConfig: {
159
+ config: {
160
+ case: "paramsMap",
161
+ value: {
162
+ map: {
163
+ [this.DENSE_VECTOR_NAME]: {
164
+ size: BigInt(dimension),
165
+ distance: 1, // Cosine = 1
166
+ },
167
+ },
168
+ },
169
+ },
170
+ },
171
+ sparseVectorsConfig: {
172
+ map: {
173
+ [this.SPARSE_VECTOR_NAME]: {
174
+ modifier: js_client_grpc_1.Modifier.Idf,
175
+ },
176
+ },
177
+ },
178
+ });
179
+ console.log("[QdrantDB] ✅ Hybrid collection created successfully");
180
+ }
181
+ /**
182
+ * Drop collection
183
+ */
184
+ async dropCollection(collectionName) {
185
+ await this.ensureInitialized();
186
+ console.log("[QdrantDB] 🗑️ Dropping collection:", collectionName);
187
+ await this.client.api("collections").delete({
188
+ collectionName,
189
+ });
190
+ console.log("[QdrantDB] ✅ Collection dropped successfully");
191
+ }
192
+ /**
193
+ * Check if collection exists
194
+ */
195
+ async hasCollection(collectionName) {
196
+ await this.ensureInitialized();
197
+ try {
198
+ const response = await this.client.api("collections").get({
199
+ collectionName,
200
+ });
201
+ return response.result !== undefined;
202
+ }
203
+ catch (error) {
204
+ // Handle gRPC NOT_FOUND error (code 5) or check error messages
205
+ if (error.code === 5 || // gRPC NOT_FOUND status code
206
+ error.rawMessage?.includes("not found") ||
207
+ error.rawMessage?.includes("does not exist") ||
208
+ error.message?.includes("not found") ||
209
+ error.message?.includes("does not exist")) {
210
+ return false;
211
+ }
212
+ throw error;
213
+ }
214
+ }
215
+ /**
216
+ * List all collections
217
+ */
218
+ async listCollections() {
219
+ await this.ensureInitialized();
220
+ const response = await this.client.api("collections").list({});
221
+ return response.collections.map((c) => c.name);
222
+ }
223
+ /**
224
+ * Insert documents with dense vectors only
225
+ */
226
+ async insert(collectionName, documents) {
227
+ await this.ensureInitialized();
228
+ await this.ensureLoaded(collectionName);
229
+ console.log("[QdrantDB] 📝 Inserting", documents.length, "documents into:", collectionName);
230
+ const points = documents.map((doc) => ({
231
+ id: {
232
+ pointIdOptions: {
233
+ case: "num",
234
+ value: this.convertToNumericId(doc.id),
235
+ },
236
+ },
237
+ vectors: {
238
+ vectorsOptions: {
239
+ case: "vectors",
240
+ value: {
241
+ vectors: {
242
+ [this.DENSE_VECTOR_NAME]: {
243
+ vector: {
244
+ case: "dense",
245
+ value: {
246
+ data: doc.vector,
247
+ },
248
+ },
249
+ },
250
+ },
251
+ },
252
+ },
253
+ },
254
+ payload: {
255
+ content: { kind: { case: "stringValue", value: doc.content } },
256
+ relativePath: {
257
+ kind: { case: "stringValue", value: doc.relativePath },
258
+ },
259
+ startLine: {
260
+ kind: { case: "integerValue", value: BigInt(doc.startLine) },
261
+ },
262
+ endLine: {
263
+ kind: { case: "integerValue", value: BigInt(doc.endLine) },
264
+ },
265
+ fileExtension: {
266
+ kind: { case: "stringValue", value: doc.fileExtension },
267
+ },
268
+ metadata: {
269
+ kind: {
270
+ case: "stringValue",
271
+ value: JSON.stringify(doc.metadata),
272
+ },
273
+ },
274
+ },
275
+ }));
276
+ await this.client.api("points").upsert({
277
+ collectionName,
278
+ wait: true,
279
+ points,
280
+ });
281
+ console.log("[QdrantDB] ✅ Documents inserted successfully");
282
+ }
283
+ /**
284
+ * Insert documents with hybrid vectors (dense + sparse)
285
+ */
286
+ async insertHybrid(collectionName, documents) {
287
+ await this.ensureInitialized();
288
+ await this.ensureLoaded(collectionName);
289
+ console.log("[QdrantDB] 📝 Inserting", documents.length, "hybrid documents into:", collectionName);
290
+ // Ensure BM25 is trained before insertion
291
+ if (!this.bm25Generator.isTrained()) {
292
+ // The BM25 model must be trained on the full corpus before insertion for accurate sparse vectors.
293
+ // Training on a single batch leads to incorrect IDF scores and poor search quality.
294
+ throw new Error("BM25 generator is not trained. The caller must explicitly train it via `getBM25Generator().learn(corpus)` before calling `insertHybrid`.");
295
+ }
296
+ // Generate sparse vectors for all documents
297
+ const sparseVectors = documents.map((doc) => this.bm25Generator.generate(doc.content));
298
+ const points = documents.map((doc, index) => ({
299
+ id: {
300
+ pointIdOptions: {
301
+ case: "num",
302
+ value: this.convertToNumericId(doc.id),
303
+ },
304
+ },
305
+ vectors: {
306
+ vectorsOptions: {
307
+ case: "vectors",
308
+ value: {
309
+ vectors: {
310
+ [this.DENSE_VECTOR_NAME]: {
311
+ vector: {
312
+ case: "dense",
313
+ value: {
314
+ data: doc.vector,
315
+ },
316
+ },
317
+ },
318
+ [this.SPARSE_VECTOR_NAME]: {
319
+ vector: {
320
+ case: "sparse",
321
+ value: {
322
+ indices: sparseVectors[index].indices,
323
+ values: sparseVectors[index].values,
324
+ },
325
+ },
326
+ },
327
+ },
328
+ },
329
+ },
330
+ },
331
+ payload: {
332
+ content: { kind: { case: "stringValue", value: doc.content } },
333
+ relativePath: {
334
+ kind: { case: "stringValue", value: doc.relativePath },
335
+ },
336
+ startLine: {
337
+ kind: { case: "integerValue", value: BigInt(doc.startLine) },
338
+ },
339
+ endLine: {
340
+ kind: { case: "integerValue", value: BigInt(doc.endLine) },
341
+ },
342
+ fileExtension: {
343
+ kind: { case: "stringValue", value: doc.fileExtension },
344
+ },
345
+ metadata: {
346
+ kind: {
347
+ case: "stringValue",
348
+ value: JSON.stringify(doc.metadata),
349
+ },
350
+ },
351
+ },
352
+ }));
353
+ await this.client.api("points").upsert({
354
+ collectionName,
355
+ wait: true,
356
+ points,
357
+ });
358
+ console.log("[QdrantDB] ✅ Hybrid documents inserted successfully");
359
+ }
360
+ /**
361
+ * Search with dense vectors only
362
+ */
363
+ async search(collectionName, queryVector, options) {
364
+ await this.ensureInitialized();
365
+ await this.ensureLoaded(collectionName);
366
+ console.log("[QdrantDB] 🔍 Searching in collection:", collectionName);
367
+ const searchParams = {
368
+ collectionName,
369
+ vector: queryVector,
370
+ vectorName: this.DENSE_VECTOR_NAME,
371
+ limit: BigInt(options?.topK || 10),
372
+ // For gRPC API, omitting withPayload returns all payload fields
373
+ // Using withPayload causes "No PayloadSelector" error
374
+ };
375
+ // Apply filter if provided
376
+ if (options?.filterExpr && options.filterExpr.trim().length > 0) {
377
+ searchParams.filter = this.parseFilterExpression(options.filterExpr);
378
+ }
379
+ const results = await this.client.api("points").search(searchParams);
380
+ return results.result.map((result) => ({
381
+ document: {
382
+ id: result.id?.str || result.id?.num?.toString() || "",
383
+ vector: queryVector,
384
+ content: result.payload?.content?.stringValue || "",
385
+ relativePath: result.payload?.relativePath?.stringValue || "",
386
+ startLine: Number(result.payload?.startLine?.integerValue || 0),
387
+ endLine: Number(result.payload?.endLine?.integerValue || 0),
388
+ fileExtension: result.payload?.fileExtension?.stringValue || "",
389
+ metadata: JSON.parse(result.payload?.metadata?.stringValue || "{}"),
390
+ },
391
+ score: result.score,
392
+ }));
393
+ }
394
+ /**
395
+ * Hybrid search with dense + sparse vectors using RRF fusion
396
+ */
397
+ async hybridSearch(collectionName, searchRequests, options) {
398
+ await this.ensureInitialized();
399
+ await this.ensureLoaded(collectionName);
400
+ console.log("[QdrantDB] 🔍 Performing hybrid search in collection:", collectionName);
401
+ // Extract dense vector and query text from search requests by inspecting data types
402
+ const denseQueryReq = searchRequests.find((req) => Array.isArray(req.data));
403
+ const textQueryReq = searchRequests.find((req) => typeof req.data === "string");
404
+ if (!denseQueryReq || !textQueryReq) {
405
+ throw new Error("Hybrid search requires one dense vector request (number[] data) and one text request (string data).");
406
+ }
407
+ const denseQuery = denseQueryReq.data;
408
+ const textQuery = textQueryReq.data;
409
+ // Generate sparse vector using BM25
410
+ if (!this.bm25Generator.isTrained()) {
411
+ console.warn("[QdrantDB] ⚠️ BM25 generator not trained. Hybrid search may have reduced quality.");
412
+ }
413
+ const sparseQuery = this.bm25Generator.isTrained()
414
+ ? this.bm25Generator.generate(textQuery)
415
+ : { indices: [], values: [] };
416
+ console.log("[QdrantDB] 🔍 Dense query vector length:", denseQuery.length);
417
+ console.log("[QdrantDB] 🔍 Sparse query terms:", sparseQuery.indices.length);
418
+ console.log("[QdrantDB] 🔍 Sparse query indices:", sparseQuery.indices.slice(0, 5));
419
+ console.log("[QdrantDB] 🔍 Sparse query values:", sparseQuery.values.slice(0, 5));
420
+ // Validate sparse query has valid data
421
+ if (sparseQuery.indices.length === 0 ||
422
+ sparseQuery.values.length === 0 ||
423
+ sparseQuery.indices.length !== sparseQuery.values.length) {
424
+ console.warn("[QdrantDB] ⚠️ Invalid or empty sparse query. Falling back to dense-only search.");
425
+ console.warn(`[QdrantDB] ⚠️ indices.length=${sparseQuery.indices.length}, values.length=${sparseQuery.values.length}`);
426
+ return await this.search(collectionName, denseQuery, {
427
+ topK: options?.limit || 10,
428
+ filterExpr: options?.filterExpr,
429
+ });
430
+ }
431
+ // Validate all values are positive (Qdrant requirement for sparse vectors)
432
+ const hasNegativeValues = sparseQuery.values.some((v) => v <= 0);
433
+ if (hasNegativeValues) {
434
+ console.error("[QdrantDB] ❌ Sparse query contains non-positive values! This should not happen.");
435
+ console.error("[QdrantDB] ❌ Falling back to dense-only search.");
436
+ return await this.search(collectionName, denseQuery, {
437
+ topK: options?.limit || 10,
438
+ filterExpr: options?.filterExpr,
439
+ });
440
+ }
441
+ console.log("[QdrantDB] ✅ Sparse query validated, proceeding with hybrid search");
442
+ // Qdrant query API with nested prefetch for hybrid search
443
+ // Using RRF (Reciprocal Rank Fusion) to combine sparse and dense results
444
+ // Structure: prefetch contains one item with nested prefetch for dense/sparse, then fusion
445
+ //
446
+ // Note: Using plain objects that match the protobuf structure defined in:
447
+ // @qdrant/js-client-grpc/dist/types/proto/points_pb.d.ts
448
+ //
449
+ // QueryPoints structure:
450
+ // - collectionName: string
451
+ // - prefetch: PrefetchQuery[]
452
+ // - limit: bigint
453
+ //
454
+ // PrefetchQuery structure:
455
+ // - prefetch?: PrefetchQuery[] (nested prefetches)
456
+ // - query?: Query (query to apply)
457
+ // - using?: string (vector name)
458
+ // - limit?: bigint
459
+ //
460
+ // Query structure (oneof variant):
461
+ // - variant: { case: 'nearest', value: VectorInput } | { case: 'fusion', value: Fusion } | ...
462
+ //
463
+ // VectorInput structure (oneof variant):
464
+ // - variant: { case: 'dense', value: DenseVector } | { case: 'sparse', value: SparseVector } | ...
465
+ //
466
+ // DenseVector: { data: number[] }
467
+ // SparseVector: { indices: number[], values: number[] }
468
+ // Fusion enum: RRF = 0, DBSF = 1
469
+ const queryParams = {
470
+ collectionName,
471
+ prefetch: [
472
+ {
473
+ // Dense vector prefetch
474
+ query: {
475
+ variant: {
476
+ case: "nearest",
477
+ value: {
478
+ variant: {
479
+ case: "dense",
480
+ value: {
481
+ data: denseQuery,
482
+ },
483
+ },
484
+ },
485
+ },
486
+ },
487
+ using: this.DENSE_VECTOR_NAME,
488
+ limit: BigInt(denseQueryReq.limit || 25),
489
+ },
490
+ {
491
+ // Sparse vector prefetch
492
+ query: {
493
+ variant: {
494
+ case: "nearest",
495
+ value: {
496
+ variant: {
497
+ case: "sparse",
498
+ value: {
499
+ indices: sparseQuery.indices.map((i) => Number(i)),
500
+ values: sparseQuery.values,
501
+ },
502
+ },
503
+ },
504
+ },
505
+ },
506
+ using: this.SPARSE_VECTOR_NAME,
507
+ limit: BigInt(textQueryReq.limit || 25),
508
+ },
509
+ ],
510
+ // Fusion query to combine results from prefetches
511
+ query: {
512
+ variant: {
513
+ case: "fusion",
514
+ value: 0, // Fusion.RRF = 0
515
+ },
516
+ },
517
+ limit: BigInt(options?.limit || 10),
518
+ withPayload: {
519
+ selectorOptions: {
520
+ case: "enable",
521
+ value: true,
522
+ },
523
+ },
524
+ };
525
+ // Apply filter if provided
526
+ if (options?.filterExpr && options.filterExpr.trim().length > 0) {
527
+ queryParams.filter = this.parseFilterExpression(options.filterExpr);
528
+ }
529
+ const results = await this.client.api("points").query(queryParams);
530
+ console.log("[QdrantDB] ✅ Found", results.result.length, "results from hybrid search");
531
+ return results.result.map((result) => ({
532
+ document: {
533
+ id: result.id?.str || result.id?.num?.toString() || "",
534
+ content: result.payload?.content?.kind?.value || "",
535
+ vector: [],
536
+ relativePath: result.payload?.relativePath?.kind?.value || "",
537
+ startLine: Number(result.payload?.startLine?.kind?.value || 0),
538
+ endLine: Number(result.payload?.endLine?.kind?.value || 0),
539
+ fileExtension: result.payload?.fileExtension?.kind?.value || "",
540
+ metadata: JSON.parse(result.payload?.metadata?.kind?.value || "{}"),
541
+ },
542
+ score: result.score,
543
+ }));
544
+ }
545
+ /**
546
+ * Delete documents by IDs
547
+ */
548
+ async delete(collectionName, ids) {
549
+ await this.ensureInitialized();
550
+ await this.ensureLoaded(collectionName);
551
+ console.log("[QdrantDB] 🗑️ Deleting", ids.length, "documents from:", collectionName);
552
+ await this.client.api("points").delete({
553
+ collectionName,
554
+ wait: true,
555
+ points: {
556
+ pointsSelectorOneOf: {
557
+ case: "points",
558
+ value: {
559
+ ids: ids.map((id) => ({
560
+ pointIdOptions: {
561
+ case: "num",
562
+ value: this.convertToNumericId(id),
563
+ },
564
+ })),
565
+ },
566
+ },
567
+ },
568
+ });
569
+ console.log("[QdrantDB] ✅ Documents deleted successfully");
570
+ }
571
+ /**
572
+ * Query documents with filter conditions
573
+ */
574
+ async query(collectionName, filter, outputFields, limit) {
575
+ await this.ensureInitialized();
576
+ await this.ensureLoaded(collectionName);
577
+ console.log("[QdrantDB] 📋 Querying collection:", collectionName);
578
+ // Build scroll parameters
579
+ // For gRPC API, omitting withPayload returns all payload fields
580
+ // Using withPayload: true causes "No PayloadSelector" error
581
+ const scrollParams = {
582
+ collectionName,
583
+ limit: limit || 100,
584
+ withVector: false,
585
+ };
586
+ // Parse filter expression if provided
587
+ if (filter && filter.trim().length > 0) {
588
+ scrollParams.filter = this.parseFilterExpression(filter);
589
+ }
590
+ const results = await this.client.api("points").scroll(scrollParams);
591
+ // Dynamically map results based on requested outputFields
592
+ return results.result.map((point) => {
593
+ // Extract ID from protobuf structure
594
+ // In gRPC API, id can be: {pointIdOptions: {case: 'num', value: bigint}} or {case: 'str', value: string}
595
+ let idValue = "";
596
+ if (point.id?.pointIdOptions?.case === "num") {
597
+ idValue = point.id.pointIdOptions.value.toString();
598
+ }
599
+ else if (point.id?.pointIdOptions?.case === "str") {
600
+ idValue = point.id.pointIdOptions.value;
601
+ }
602
+ else if (point.id?.num !== undefined) {
603
+ // Fallback for backward compatibility
604
+ idValue = point.id.num.toString();
605
+ }
606
+ else if (point.id?.str !== undefined) {
607
+ idValue = point.id.str;
608
+ }
609
+ const result = {
610
+ id: idValue,
611
+ };
612
+ // If no specific fields requested, return all known fields
613
+ if (outputFields.length === 0) {
614
+ // In gRPC client, payload values are wrapped in {kind: {case: 'stringValue', value: '...'}}
615
+ result.content =
616
+ point.payload?.content?.kind?.value ||
617
+ point.payload?.content?.stringValue;
618
+ result.relativePath =
619
+ point.payload?.relativePath?.kind?.value ||
620
+ point.payload?.relativePath?.stringValue;
621
+ result.startLine = Number(point.payload?.startLine?.kind?.value ||
622
+ point.payload?.startLine?.integerValue ||
623
+ 0);
624
+ result.endLine = Number(point.payload?.endLine?.kind?.value ||
625
+ point.payload?.endLine?.integerValue ||
626
+ 0);
627
+ result.fileExtension =
628
+ point.payload?.fileExtension?.kind?.value ||
629
+ point.payload?.fileExtension?.stringValue;
630
+ const metadataStr = point.payload?.metadata?.kind?.value ||
631
+ point.payload?.metadata?.stringValue;
632
+ result.metadata = JSON.parse(metadataStr || "{}");
633
+ }
634
+ else {
635
+ // Only include requested fields
636
+ for (const field of outputFields) {
637
+ if (point.payload?.[field]) {
638
+ const value = point.payload[field];
639
+ // Handle different value types based on protobuf structure
640
+ // In gRPC client, value is wrapped in {kind: {case: 'stringValue', value: '...'}}
641
+ if (value.kind?.case === "stringValue") {
642
+ result[field] =
643
+ field === "metadata"
644
+ ? JSON.parse(value.kind.value || "{}")
645
+ : value.kind.value;
646
+ }
647
+ else if (value.kind?.case === "integerValue") {
648
+ result[field] = Number(value.kind.value);
649
+ }
650
+ else if (value.kind?.case === "doubleValue") {
651
+ result[field] = value.kind.value;
652
+ }
653
+ else if (value.kind?.case === "boolValue") {
654
+ result[field] = value.kind.value;
655
+ }
656
+ // Fallback for direct value access (backward compatibility)
657
+ else if (value.stringValue !== undefined) {
658
+ result[field] =
659
+ field === "metadata"
660
+ ? JSON.parse(value.stringValue || "{}")
661
+ : value.stringValue;
662
+ }
663
+ else if (value.integerValue !== undefined) {
664
+ result[field] = Number(value.integerValue);
665
+ }
666
+ else if (value.doubleValue !== undefined) {
667
+ result[field] = value.doubleValue;
668
+ }
669
+ else if (value.boolValue !== undefined) {
670
+ result[field] = value.boolValue;
671
+ }
672
+ }
673
+ }
674
+ }
675
+ return result;
676
+ });
677
+ }
678
+ /**
679
+ * Check collection limit
680
+ * Qdrant doesn't have hard collection limits like Zilliz Cloud
681
+ */
682
+ async checkCollectionLimit() {
683
+ // Qdrant (self-hosted or cloud) doesn't have hard collection limits
684
+ return Promise.resolve(true);
685
+ }
686
+ /**
687
+ * Parse Milvus-style filter expression to Qdrant filter format
688
+ *
689
+ * Example:
690
+ * - "fileExtension == '.ts'" -> { must: [{ key: 'fileExtension', match: { value: '.ts' } }] }
691
+ * - "fileExtension in ['.ts', '.js']" -> { must: [{ key: 'fileExtension', match: { any: ['.ts', '.js'] } }] }
692
+ */
693
+ parseFilterExpression(expr) {
694
+ // Simple parser for common filter patterns
695
+ // Format: "field == 'value'" or "field in ['val1', 'val2']"
696
+ if (expr.includes(" in ")) {
697
+ // Handle "field in [...]" pattern
698
+ const match = expr.match(/(\w+)\s+in\s+\[(.*)\]/);
699
+ if (match) {
700
+ const field = match[1];
701
+ const values = match[2]
702
+ .split(",")
703
+ .map((v) => v.trim().replace(/['"]/g, ""));
704
+ // For "IN" operator, use a "must" clause with "any" match for better performance
705
+ return {
706
+ must: [
707
+ {
708
+ conditionOneOf: {
709
+ case: "field",
710
+ value: {
711
+ key: field,
712
+ match: {
713
+ matchValue: {
714
+ case: "any",
715
+ value: {
716
+ values: values.map((value) => ({
717
+ kind: { case: "stringValue", value },
718
+ })),
719
+ },
720
+ },
721
+ },
722
+ },
723
+ },
724
+ },
725
+ ],
726
+ };
727
+ }
728
+ }
729
+ else if (expr.includes("==")) {
730
+ // Handle "field == value" pattern
731
+ const match = expr.match(/(\w+)\s*==\s*['"]?([^'"]+)['"]?/);
732
+ if (match) {
733
+ const field = match[1];
734
+ const value = match[2].trim();
735
+ return {
736
+ must: [
737
+ {
738
+ conditionOneOf: {
739
+ case: "field",
740
+ value: {
741
+ key: field,
742
+ match: {
743
+ matchValue: {
744
+ case: "keyword",
745
+ value,
746
+ },
747
+ },
748
+ },
749
+ },
750
+ },
751
+ ],
752
+ };
753
+ }
754
+ }
755
+ // If parsing fails, return undefined (no filtering)
756
+ console.warn("[QdrantDB] ⚠️ Could not parse filter expression:", expr);
757
+ return undefined;
758
+ }
759
+ /**
760
+ * Convert chunk ID to numeric ID for Qdrant
761
+ * Extracts the hex hash from chunk_XXXXXXXXXXXXXXXX and converts to bigint
762
+ *
763
+ * Example: chunk_edf5558e3dbbf10b -> 17141645883789484811n
764
+ */
765
+ convertToNumericId(chunkId) {
766
+ // Extract hex portion from chunk_XXXXXXXXXXXXXXXX format
767
+ const hex = chunkId.replace("chunk_", "");
768
+ // Convert hex string to bigint (16 hex chars = 64 bits)
769
+ return BigInt(`0x${hex}`);
770
+ }
771
+ /**
772
+ * Get BM25 generator (for testing/debugging)
773
+ */
774
+ getBM25Generator() {
775
+ return this.bm25Generator;
776
+ }
777
+ /**
778
+ * Get BM25 model file path for a collection
779
+ */
780
+ getBM25ModelPath(collectionName) {
781
+ const homeDir = os.homedir();
782
+ const modelDir = path.join(homeDir, ".context", "bm25");
783
+ return path.join(modelDir, `${collectionName}.json`);
784
+ }
785
+ /**
786
+ * Save BM25 model to disk
787
+ */
788
+ async saveBM25Model(collectionName) {
789
+ if (!this.bm25Generator.isTrained()) {
790
+ console.log("[QdrantDB] ⚠️ BM25 model is not trained, skipping save");
791
+ return;
792
+ }
793
+ try {
794
+ const modelPath = this.getBM25ModelPath(collectionName);
795
+ const modelDir = path.dirname(modelPath);
796
+ // Ensure directory exists
797
+ await fs.mkdir(modelDir, { recursive: true });
798
+ // Serialize and save BM25 model
799
+ const modelJson = this.bm25Generator.toJSON();
800
+ await fs.writeFile(modelPath, modelJson, "utf-8");
801
+ console.log(`[QdrantDB] 💾 Saved BM25 model to: ${modelPath}`);
802
+ }
803
+ catch (error) {
804
+ console.error(`[QdrantDB] ❌ Failed to save BM25 model:`, error);
805
+ throw error;
806
+ }
807
+ }
808
+ /**
809
+ * Load BM25 model from disk
810
+ */
811
+ async loadBM25Model(collectionName) {
812
+ try {
813
+ const modelPath = this.getBM25ModelPath(collectionName);
814
+ // Check if model file exists
815
+ try {
816
+ await fs.access(modelPath);
817
+ }
818
+ catch {
819
+ console.log(`[QdrantDB] ℹ️ No saved BM25 model found at: ${modelPath}`);
820
+ return false;
821
+ }
822
+ // Load and deserialize BM25 model
823
+ const modelJson = await fs.readFile(modelPath, "utf-8");
824
+ this.bm25Generator = simple_bm25_1.SimpleBM25.fromJSON(modelJson);
825
+ console.log(`[QdrantDB] 📂 Loaded BM25 model from: ${modelPath}`);
826
+ return true;
827
+ }
828
+ catch (error) {
829
+ console.error(`[QdrantDB] ❌ Failed to load BM25 model:`, error);
830
+ return false;
831
+ }
832
+ }
833
+ /**
834
+ * Delete saved BM25 model
835
+ */
836
+ async deleteBM25Model(collectionName) {
837
+ try {
838
+ const modelPath = this.getBM25ModelPath(collectionName);
839
+ // Check if model file exists
840
+ try {
841
+ await fs.access(modelPath);
842
+ }
843
+ catch {
844
+ // File doesn't exist, nothing to delete
845
+ return;
846
+ }
847
+ await fs.unlink(modelPath);
848
+ console.log(`[QdrantDB] 🗑️ Deleted BM25 model at: ${modelPath}`);
849
+ }
850
+ catch (error) {
851
+ console.warn(`[QdrantDB] ⚠️ Failed to delete BM25 model:`, error);
852
+ }
853
+ }
854
+ }
855
+ exports.QdrantVectorDatabase = QdrantVectorDatabase;
856
+ //# sourceMappingURL=qdrant-vectordb.js.map