@mhalder/qdrant-mcp-server 2.1.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/README.md +5 -4
- package/build/code/indexer.d.ts +5 -0
- package/build/code/indexer.d.ts.map +1 -1
- package/build/code/indexer.js +137 -32
- package/build/code/indexer.js.map +1 -1
- package/build/code/types.d.ts +4 -0
- package/build/code/types.d.ts.map +1 -1
- package/build/qdrant/client.d.ts +5 -0
- package/build/qdrant/client.d.ts.map +1 -1
- package/build/qdrant/client.js +10 -0
- package/build/qdrant/client.js.map +1 -1
- package/build/qdrant/client.test.js +25 -0
- package/build/qdrant/client.test.js.map +1 -1
- package/build/tools/code.d.ts.map +1 -1
- package/build/tools/code.js +11 -1
- package/build/tools/code.js.map +1 -1
- package/examples/code-search/README.md +19 -4
- package/package.json +1 -1
- package/src/code/indexer.ts +210 -55
- package/src/code/types.ts +5 -0
- package/src/qdrant/client.test.ts +29 -0
- package/src/qdrant/client.ts +14 -0
- package/src/tools/code.ts +12 -1
- package/tests/code/chunker/tree-sitter-chunker.test.ts +87 -5
- package/tests/code/indexer.test.ts +533 -74
- package/tests/code/integration.test.ts +253 -54
- package/tests/code/scanner.test.ts +81 -6
- package/tests/code/sync/snapshot.test.ts +55 -4
- package/tests/code/sync/synchronizer.test.ts +86 -10
package/src/code/indexer.ts
CHANGED
|
@@ -24,11 +24,14 @@ import type {
|
|
|
24
24
|
SearchOptions,
|
|
25
25
|
} from "./types.js";
|
|
26
26
|
|
|
27
|
+
/** Reserved ID for storing indexing metadata in the collection */
|
|
28
|
+
const INDEXING_METADATA_ID = "__indexing_metadata__";
|
|
29
|
+
|
|
27
30
|
export class CodeIndexer {
|
|
28
31
|
constructor(
|
|
29
32
|
private qdrant: QdrantManager,
|
|
30
33
|
private embeddings: EmbeddingProvider,
|
|
31
|
-
private config: CodeConfig
|
|
34
|
+
private config: CodeConfig,
|
|
32
35
|
) {}
|
|
33
36
|
|
|
34
37
|
/**
|
|
@@ -59,7 +62,7 @@ export class CodeIndexer {
|
|
|
59
62
|
async indexCodebase(
|
|
60
63
|
path: string,
|
|
61
64
|
options?: IndexOptions,
|
|
62
|
-
progressCallback?: ProgressCallback
|
|
65
|
+
progressCallback?: ProgressCallback,
|
|
63
66
|
): Promise<IndexStats> {
|
|
64
67
|
const startTime = Date.now();
|
|
65
68
|
const stats: IndexStats = {
|
|
@@ -71,9 +74,10 @@ export class CodeIndexer {
|
|
|
71
74
|
errors: [],
|
|
72
75
|
};
|
|
73
76
|
|
|
74
|
-
|
|
75
|
-
|
|
77
|
+
const absolutePath = await this.validatePath(path);
|
|
78
|
+
const collectionName = this.getCollectionName(absolutePath);
|
|
76
79
|
|
|
80
|
+
try {
|
|
77
81
|
// 1. Scan files
|
|
78
82
|
progressCallback?.({
|
|
79
83
|
phase: "scanning",
|
|
@@ -84,9 +88,11 @@ export class CodeIndexer {
|
|
|
84
88
|
});
|
|
85
89
|
|
|
86
90
|
const scanner = new FileScanner({
|
|
87
|
-
supportedExtensions:
|
|
91
|
+
supportedExtensions:
|
|
92
|
+
options?.extensions || this.config.supportedExtensions,
|
|
88
93
|
ignorePatterns: this.config.ignorePatterns,
|
|
89
|
-
customIgnorePatterns:
|
|
94
|
+
customIgnorePatterns:
|
|
95
|
+
options?.ignorePatterns || this.config.customIgnorePatterns,
|
|
90
96
|
});
|
|
91
97
|
|
|
92
98
|
await scanner.loadIgnorePatterns(absolutePath);
|
|
@@ -101,8 +107,8 @@ export class CodeIndexer {
|
|
|
101
107
|
}
|
|
102
108
|
|
|
103
109
|
// 2. Create or verify collection
|
|
104
|
-
const
|
|
105
|
-
|
|
110
|
+
const collectionExists =
|
|
111
|
+
await this.qdrant.collectionExists(collectionName);
|
|
106
112
|
|
|
107
113
|
if (options?.forceReindex && collectionExists) {
|
|
108
114
|
await this.qdrant.deleteCollection(collectionName);
|
|
@@ -114,10 +120,13 @@ export class CodeIndexer {
|
|
|
114
120
|
collectionName,
|
|
115
121
|
vectorSize,
|
|
116
122
|
"Cosine",
|
|
117
|
-
this.config.enableHybridSearch
|
|
123
|
+
this.config.enableHybridSearch,
|
|
118
124
|
);
|
|
119
125
|
}
|
|
120
126
|
|
|
127
|
+
// Store "indexing in progress" marker immediately after collection is ready
|
|
128
|
+
await this.storeIndexingMarker(collectionName, false);
|
|
129
|
+
|
|
121
130
|
// 3. Process files and create chunks
|
|
122
131
|
const chunker = new TreeSitterChunker({
|
|
123
132
|
chunkSize: this.config.chunkSize,
|
|
@@ -141,7 +150,9 @@ export class CodeIndexer {
|
|
|
141
150
|
|
|
142
151
|
// Check for secrets (basic detection)
|
|
143
152
|
if (metadataExtractor.containsSecrets(code)) {
|
|
144
|
-
stats.errors?.push(
|
|
153
|
+
stats.errors?.push(
|
|
154
|
+
`Skipped ${filePath}: potential secrets detected`,
|
|
155
|
+
);
|
|
145
156
|
continue;
|
|
146
157
|
}
|
|
147
158
|
|
|
@@ -158,7 +169,10 @@ export class CodeIndexer {
|
|
|
158
169
|
allChunks.push({ chunk, id });
|
|
159
170
|
|
|
160
171
|
// Check total chunk limit
|
|
161
|
-
if (
|
|
172
|
+
if (
|
|
173
|
+
this.config.maxTotalChunks &&
|
|
174
|
+
allChunks.length >= this.config.maxTotalChunks
|
|
175
|
+
) {
|
|
162
176
|
break;
|
|
163
177
|
}
|
|
164
178
|
}
|
|
@@ -166,11 +180,15 @@ export class CodeIndexer {
|
|
|
166
180
|
stats.filesIndexed++;
|
|
167
181
|
|
|
168
182
|
// Check total chunk limit
|
|
169
|
-
if (
|
|
183
|
+
if (
|
|
184
|
+
this.config.maxTotalChunks &&
|
|
185
|
+
allChunks.length >= this.config.maxTotalChunks
|
|
186
|
+
) {
|
|
170
187
|
break;
|
|
171
188
|
}
|
|
172
189
|
} catch (error) {
|
|
173
|
-
const errorMessage =
|
|
190
|
+
const errorMessage =
|
|
191
|
+
error instanceof Error ? error.message : String(error);
|
|
174
192
|
stats.errors?.push(`Failed to process ${filePath}: ${errorMessage}`);
|
|
175
193
|
}
|
|
176
194
|
}
|
|
@@ -183,12 +201,15 @@ export class CodeIndexer {
|
|
|
183
201
|
await synchronizer.updateSnapshot(files);
|
|
184
202
|
} catch (error) {
|
|
185
203
|
// Snapshot failure shouldn't fail the entire indexing
|
|
186
|
-
const errorMessage =
|
|
204
|
+
const errorMessage =
|
|
205
|
+
error instanceof Error ? error.message : String(error);
|
|
187
206
|
console.error("Failed to save snapshot:", errorMessage);
|
|
188
207
|
stats.errors?.push(`Snapshot save failed: ${errorMessage}`);
|
|
189
208
|
}
|
|
190
209
|
|
|
191
210
|
if (allChunks.length === 0) {
|
|
211
|
+
// Still store completion marker even with no chunks
|
|
212
|
+
await this.storeIndexingMarker(collectionName, true);
|
|
192
213
|
stats.status = "completed";
|
|
193
214
|
stats.durationMs = Date.now() - startTime;
|
|
194
215
|
return stats;
|
|
@@ -203,7 +224,8 @@ export class CodeIndexer {
|
|
|
203
224
|
phase: "embedding",
|
|
204
225
|
current: i + batch.length,
|
|
205
226
|
total: allChunks.length,
|
|
206
|
-
percentage:
|
|
227
|
+
percentage:
|
|
228
|
+
40 + Math.round(((i + batch.length) / allChunks.length) * 30), // 40-70%
|
|
207
229
|
message: `Generating embeddings ${i + batch.length}/${allChunks.length}`,
|
|
208
230
|
});
|
|
209
231
|
|
|
@@ -225,7 +247,9 @@ export class CodeIndexer {
|
|
|
225
247
|
codebasePath: absolutePath,
|
|
226
248
|
chunkIndex: b.chunk.metadata.chunkIndex,
|
|
227
249
|
...(b.chunk.metadata.name && { name: b.chunk.metadata.name }),
|
|
228
|
-
...(b.chunk.metadata.chunkType && {
|
|
250
|
+
...(b.chunk.metadata.chunkType && {
|
|
251
|
+
chunkType: b.chunk.metadata.chunkType,
|
|
252
|
+
}),
|
|
229
253
|
},
|
|
230
254
|
}));
|
|
231
255
|
|
|
@@ -233,7 +257,8 @@ export class CodeIndexer {
|
|
|
233
257
|
phase: "storing",
|
|
234
258
|
current: i + batch.length,
|
|
235
259
|
total: allChunks.length,
|
|
236
|
-
percentage:
|
|
260
|
+
percentage:
|
|
261
|
+
70 + Math.round(((i + batch.length) / allChunks.length) * 30), // 70-100%
|
|
237
262
|
message: `Storing chunks ${i + batch.length}/${allChunks.length}`,
|
|
238
263
|
});
|
|
239
264
|
|
|
@@ -254,7 +279,9 @@ export class CodeIndexer {
|
|
|
254
279
|
codebasePath: absolutePath,
|
|
255
280
|
chunkIndex: b.chunk.metadata.chunkIndex,
|
|
256
281
|
...(b.chunk.metadata.name && { name: b.chunk.metadata.name }),
|
|
257
|
-
...(b.chunk.metadata.chunkType && {
|
|
282
|
+
...(b.chunk.metadata.chunkType && {
|
|
283
|
+
chunkType: b.chunk.metadata.chunkType,
|
|
284
|
+
}),
|
|
258
285
|
},
|
|
259
286
|
}));
|
|
260
287
|
|
|
@@ -263,16 +290,23 @@ export class CodeIndexer {
|
|
|
263
290
|
await this.qdrant.addPoints(collectionName, points);
|
|
264
291
|
}
|
|
265
292
|
} catch (error) {
|
|
266
|
-
const errorMessage =
|
|
267
|
-
|
|
293
|
+
const errorMessage =
|
|
294
|
+
error instanceof Error ? error.message : String(error);
|
|
295
|
+
stats.errors?.push(
|
|
296
|
+
`Failed to process batch at index ${i}: ${errorMessage}`,
|
|
297
|
+
);
|
|
268
298
|
stats.status = "partial";
|
|
269
299
|
}
|
|
270
300
|
}
|
|
271
301
|
|
|
302
|
+
// Store completion marker to indicate indexing is complete
|
|
303
|
+
await this.storeIndexingMarker(collectionName, true);
|
|
304
|
+
|
|
272
305
|
stats.durationMs = Date.now() - startTime;
|
|
273
306
|
return stats;
|
|
274
307
|
} catch (error) {
|
|
275
|
-
const errorMessage =
|
|
308
|
+
const errorMessage =
|
|
309
|
+
error instanceof Error ? error.message : String(error);
|
|
276
310
|
stats.status = "failed";
|
|
277
311
|
stats.errors?.push(`Indexing failed: ${errorMessage}`);
|
|
278
312
|
stats.durationMs = Date.now() - startTime;
|
|
@@ -280,13 +314,62 @@ export class CodeIndexer {
|
|
|
280
314
|
}
|
|
281
315
|
}
|
|
282
316
|
|
|
317
|
+
/**
|
|
318
|
+
* Store an indexing status marker in the collection.
|
|
319
|
+
* Called at the start of indexing with complete=false, and at the end with complete=true.
|
|
320
|
+
*/
|
|
321
|
+
private async storeIndexingMarker(
|
|
322
|
+
collectionName: string,
|
|
323
|
+
complete: boolean,
|
|
324
|
+
): Promise<void> {
|
|
325
|
+
try {
|
|
326
|
+
// Create a dummy vector of zeros (required by Qdrant)
|
|
327
|
+
const vectorSize = this.embeddings.getDimensions();
|
|
328
|
+
const zeroVector = new Array(vectorSize).fill(0);
|
|
329
|
+
|
|
330
|
+
// Check if collection uses hybrid mode
|
|
331
|
+
const collectionInfo =
|
|
332
|
+
await this.qdrant.getCollectionInfo(collectionName);
|
|
333
|
+
|
|
334
|
+
const payload = {
|
|
335
|
+
_type: "indexing_metadata",
|
|
336
|
+
indexingComplete: complete,
|
|
337
|
+
...(complete
|
|
338
|
+
? { completedAt: new Date().toISOString() }
|
|
339
|
+
: { startedAt: new Date().toISOString() }),
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
if (collectionInfo.hybridEnabled) {
|
|
343
|
+
await this.qdrant.addPointsWithSparse(collectionName, [
|
|
344
|
+
{
|
|
345
|
+
id: INDEXING_METADATA_ID,
|
|
346
|
+
vector: zeroVector,
|
|
347
|
+
sparseVector: { indices: [], values: [] },
|
|
348
|
+
payload,
|
|
349
|
+
},
|
|
350
|
+
]);
|
|
351
|
+
} else {
|
|
352
|
+
await this.qdrant.addPoints(collectionName, [
|
|
353
|
+
{
|
|
354
|
+
id: INDEXING_METADATA_ID,
|
|
355
|
+
vector: zeroVector,
|
|
356
|
+
payload,
|
|
357
|
+
},
|
|
358
|
+
]);
|
|
359
|
+
}
|
|
360
|
+
} catch (error) {
|
|
361
|
+
// Non-fatal: log but don't fail the indexing
|
|
362
|
+
console.error("Failed to store indexing marker:", error);
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
283
366
|
/**
|
|
284
367
|
* Search code semantically
|
|
285
368
|
*/
|
|
286
369
|
async searchCode(
|
|
287
370
|
path: string,
|
|
288
371
|
query: string,
|
|
289
|
-
options?: SearchOptions
|
|
372
|
+
options?: SearchOptions,
|
|
290
373
|
): Promise<CodeSearchResult[]> {
|
|
291
374
|
const absolutePath = await this.validatePath(path);
|
|
292
375
|
const collectionName = this.getCollectionName(absolutePath);
|
|
@@ -300,7 +383,8 @@ export class CodeIndexer {
|
|
|
300
383
|
// Check if collection has hybrid search enabled
|
|
301
384
|
const collectionInfo = await this.qdrant.getCollectionInfo(collectionName);
|
|
302
385
|
const useHybrid =
|
|
303
|
-
(options?.useHybrid ?? this.config.enableHybridSearch) &&
|
|
386
|
+
(options?.useHybrid ?? this.config.enableHybridSearch) &&
|
|
387
|
+
collectionInfo.hybridEnabled;
|
|
304
388
|
|
|
305
389
|
// Generate query embedding
|
|
306
390
|
const { embedding } = await this.embeddings.embed(query);
|
|
@@ -342,14 +426,14 @@ export class CodeIndexer {
|
|
|
342
426
|
embedding,
|
|
343
427
|
sparseVector,
|
|
344
428
|
options?.limit || this.config.defaultSearchLimit,
|
|
345
|
-
filter
|
|
429
|
+
filter,
|
|
346
430
|
);
|
|
347
431
|
} else {
|
|
348
432
|
results = await this.qdrant.search(
|
|
349
433
|
collectionName,
|
|
350
434
|
embedding,
|
|
351
435
|
options?.limit || this.config.defaultSearchLimit,
|
|
352
|
-
filter
|
|
436
|
+
filter,
|
|
353
437
|
);
|
|
354
438
|
}
|
|
355
439
|
|
|
@@ -379,24 +463,75 @@ export class CodeIndexer {
|
|
|
379
463
|
const exists = await this.qdrant.collectionExists(collectionName);
|
|
380
464
|
|
|
381
465
|
if (!exists) {
|
|
382
|
-
return { isIndexed: false };
|
|
466
|
+
return { isIndexed: false, status: "not_indexed" };
|
|
383
467
|
}
|
|
384
468
|
|
|
469
|
+
// Check for indexing marker in Qdrant (persisted across instances)
|
|
470
|
+
const indexingMarker = await this.qdrant.getPoint(
|
|
471
|
+
collectionName,
|
|
472
|
+
INDEXING_METADATA_ID,
|
|
473
|
+
);
|
|
385
474
|
const info = await this.qdrant.getCollectionInfo(collectionName);
|
|
386
475
|
|
|
476
|
+
// Check marker status
|
|
477
|
+
const isComplete = indexingMarker?.payload?.indexingComplete === true;
|
|
478
|
+
const isInProgress = indexingMarker?.payload?.indexingComplete === false;
|
|
479
|
+
|
|
480
|
+
// Subtract 1 from points count if marker exists (metadata point doesn't count as a chunk)
|
|
481
|
+
const actualChunksCount = indexingMarker
|
|
482
|
+
? Math.max(0, info.pointsCount - 1)
|
|
483
|
+
: info.pointsCount;
|
|
484
|
+
|
|
485
|
+
if (isInProgress) {
|
|
486
|
+
// Indexing in progress - marker exists with indexingComplete=false
|
|
487
|
+
return {
|
|
488
|
+
isIndexed: false,
|
|
489
|
+
status: "indexing",
|
|
490
|
+
collectionName,
|
|
491
|
+
chunksCount: actualChunksCount,
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
if (isComplete) {
|
|
496
|
+
// Indexing completed - marker exists with indexingComplete=true
|
|
497
|
+
return {
|
|
498
|
+
isIndexed: true,
|
|
499
|
+
status: "indexed",
|
|
500
|
+
collectionName,
|
|
501
|
+
chunksCount: actualChunksCount,
|
|
502
|
+
lastUpdated: indexingMarker.payload?.completedAt
|
|
503
|
+
? new Date(indexingMarker.payload.completedAt)
|
|
504
|
+
: undefined,
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Legacy collection (no marker) - check if it has content
|
|
509
|
+
// If it has chunks, assume it's indexed (backwards compatibility)
|
|
510
|
+
if (actualChunksCount > 0) {
|
|
511
|
+
return {
|
|
512
|
+
isIndexed: true,
|
|
513
|
+
status: "indexed",
|
|
514
|
+
collectionName,
|
|
515
|
+
chunksCount: actualChunksCount,
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
// Collection exists but no chunks and no marker - not indexed
|
|
387
520
|
return {
|
|
388
|
-
isIndexed:
|
|
521
|
+
isIndexed: false,
|
|
522
|
+
status: "not_indexed",
|
|
389
523
|
collectionName,
|
|
390
|
-
chunksCount:
|
|
391
|
-
// TODO: Extract unique languages and file count from collection
|
|
392
|
-
// This would require scrolling through points or maintaining separate metadata
|
|
524
|
+
chunksCount: 0,
|
|
393
525
|
};
|
|
394
526
|
}
|
|
395
527
|
|
|
396
528
|
/**
|
|
397
529
|
* Incrementally re-index only changed files
|
|
398
530
|
*/
|
|
399
|
-
async reindexChanges(
|
|
531
|
+
async reindexChanges(
|
|
532
|
+
path: string,
|
|
533
|
+
progressCallback?: ProgressCallback,
|
|
534
|
+
): Promise<ChangeStats> {
|
|
400
535
|
const startTime = Date.now();
|
|
401
536
|
const stats: ChangeStats = {
|
|
402
537
|
filesAdded: 0,
|
|
@@ -422,7 +557,9 @@ export class CodeIndexer {
|
|
|
422
557
|
const hasSnapshot = await synchronizer.initialize();
|
|
423
558
|
|
|
424
559
|
if (!hasSnapshot) {
|
|
425
|
-
throw new Error(
|
|
560
|
+
throw new Error(
|
|
561
|
+
"No previous snapshot found. Use index_codebase for initial indexing.",
|
|
562
|
+
);
|
|
426
563
|
}
|
|
427
564
|
|
|
428
565
|
// Scan current files
|
|
@@ -449,7 +586,11 @@ export class CodeIndexer {
|
|
|
449
586
|
stats.filesModified = changes.modified.length;
|
|
450
587
|
stats.filesDeleted = changes.deleted.length;
|
|
451
588
|
|
|
452
|
-
if (
|
|
589
|
+
if (
|
|
590
|
+
stats.filesAdded === 0 &&
|
|
591
|
+
stats.filesModified === 0 &&
|
|
592
|
+
stats.filesDeleted === 0
|
|
593
|
+
) {
|
|
453
594
|
stats.durationMs = Date.now() - startTime;
|
|
454
595
|
return stats;
|
|
455
596
|
}
|
|
@@ -461,27 +602,34 @@ export class CodeIndexer {
|
|
|
461
602
|
});
|
|
462
603
|
const metadataExtractor = new MetadataExtractor();
|
|
463
604
|
|
|
464
|
-
//
|
|
465
|
-
const
|
|
466
|
-
const filesToReprocess = [...changes.modified, ...changes.deleted];
|
|
605
|
+
// Delete chunks for modified and deleted files BEFORE adding new ones
|
|
606
|
+
const filesToDelete = [...changes.modified, ...changes.deleted];
|
|
467
607
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
608
|
+
if (filesToDelete.length > 0) {
|
|
609
|
+
progressCallback?.({
|
|
610
|
+
phase: "scanning",
|
|
611
|
+
current: 0,
|
|
612
|
+
total: filesToDelete.length,
|
|
613
|
+
percentage: 5,
|
|
614
|
+
message: `Deleting old chunks for ${filesToDelete.length} files...`,
|
|
615
|
+
});
|
|
616
|
+
|
|
617
|
+
for (const relativePath of filesToDelete) {
|
|
618
|
+
try {
|
|
619
|
+
const filter = {
|
|
620
|
+
must: [{ key: "relativePath", match: { value: relativePath } }],
|
|
621
|
+
};
|
|
622
|
+
await this.qdrant.deletePointsByFilter(collectionName, filter);
|
|
623
|
+
} catch (error) {
|
|
624
|
+
// Log but don't fail - file might not have any chunks
|
|
625
|
+
console.error(
|
|
626
|
+
`Failed to delete chunks for ${relativePath}:`,
|
|
627
|
+
error,
|
|
628
|
+
);
|
|
629
|
+
}
|
|
480
630
|
}
|
|
481
631
|
}
|
|
482
632
|
|
|
483
|
-
// For Phase 2 MVP: Simply re-process all changed files
|
|
484
|
-
// TODO Phase 3: Implement proper chunk deletion by maintaining chunk ID mapping
|
|
485
633
|
const filesToIndex = [...changes.added, ...changes.modified];
|
|
486
634
|
const allChunks: Array<{ chunk: CodeChunk; id: string }> = [];
|
|
487
635
|
|
|
@@ -526,7 +674,8 @@ export class CodeIndexer {
|
|
|
526
674
|
phase: "embedding",
|
|
527
675
|
current: i + batch.length,
|
|
528
676
|
total: allChunks.length,
|
|
529
|
-
percentage:
|
|
677
|
+
percentage:
|
|
678
|
+
40 + Math.round(((i + batch.length) / allChunks.length) * 30),
|
|
530
679
|
message: `Generating embeddings ${i + batch.length}/${allChunks.length}`,
|
|
531
680
|
});
|
|
532
681
|
|
|
@@ -546,7 +695,9 @@ export class CodeIndexer {
|
|
|
546
695
|
codebasePath: absolutePath,
|
|
547
696
|
chunkIndex: b.chunk.metadata.chunkIndex,
|
|
548
697
|
...(b.chunk.metadata.name && { name: b.chunk.metadata.name }),
|
|
549
|
-
...(b.chunk.metadata.chunkType && {
|
|
698
|
+
...(b.chunk.metadata.chunkType && {
|
|
699
|
+
chunkType: b.chunk.metadata.chunkType,
|
|
700
|
+
}),
|
|
550
701
|
},
|
|
551
702
|
}));
|
|
552
703
|
|
|
@@ -554,7 +705,8 @@ export class CodeIndexer {
|
|
|
554
705
|
phase: "storing",
|
|
555
706
|
current: i + batch.length,
|
|
556
707
|
total: allChunks.length,
|
|
557
|
-
percentage:
|
|
708
|
+
percentage:
|
|
709
|
+
70 + Math.round(((i + batch.length) / allChunks.length) * 30),
|
|
558
710
|
message: `Storing chunks ${i + batch.length}/${allChunks.length}`,
|
|
559
711
|
});
|
|
560
712
|
|
|
@@ -562,7 +714,9 @@ export class CodeIndexer {
|
|
|
562
714
|
const sparseGenerator = new BM25SparseVectorGenerator();
|
|
563
715
|
const hybridPoints = points.map((point, idx) => ({
|
|
564
716
|
...point,
|
|
565
|
-
sparseVector: sparseGenerator.generate(
|
|
717
|
+
sparseVector: sparseGenerator.generate(
|
|
718
|
+
allChunks[i + idx].chunk.content,
|
|
719
|
+
),
|
|
566
720
|
}));
|
|
567
721
|
await this.qdrant.addPointsWithSparse(collectionName, hybridPoints);
|
|
568
722
|
} else {
|
|
@@ -576,7 +730,8 @@ export class CodeIndexer {
|
|
|
576
730
|
stats.durationMs = Date.now() - startTime;
|
|
577
731
|
return stats;
|
|
578
732
|
} catch (error) {
|
|
579
|
-
const errorMessage =
|
|
733
|
+
const errorMessage =
|
|
734
|
+
error instanceof Error ? error.message : String(error);
|
|
580
735
|
throw new Error(`Incremental re-indexing failed: ${errorMessage}`);
|
|
581
736
|
}
|
|
582
737
|
}
|
package/src/code/types.ts
CHANGED
|
@@ -78,8 +78,13 @@ export interface SearchOptions {
|
|
|
78
78
|
scoreThreshold?: number;
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
+
export type IndexingStatus = "not_indexed" | "indexing" | "indexed";
|
|
82
|
+
|
|
81
83
|
export interface IndexStatus {
|
|
84
|
+
/** @deprecated Use `status` instead. True only when status is 'indexed'. */
|
|
82
85
|
isIndexed: boolean;
|
|
86
|
+
/** Current indexing status: 'not_indexed', 'indexing', or 'indexed' */
|
|
87
|
+
status: IndexingStatus;
|
|
83
88
|
collectionName?: string;
|
|
84
89
|
filesCount?: number;
|
|
85
90
|
chunksCount?: number;
|
|
@@ -634,6 +634,35 @@ describe("QdrantManager", () => {
|
|
|
634
634
|
});
|
|
635
635
|
});
|
|
636
636
|
|
|
637
|
+
describe("deletePointsByFilter", () => {
|
|
638
|
+
it("should delete points matching filter", async () => {
|
|
639
|
+
const filter = {
|
|
640
|
+
must: [{ key: "relativePath", match: { value: "src/test.ts" } }],
|
|
641
|
+
};
|
|
642
|
+
await manager.deletePointsByFilter("test-collection", filter);
|
|
643
|
+
|
|
644
|
+
expect(mockClient.delete).toHaveBeenCalledWith("test-collection", {
|
|
645
|
+
wait: true,
|
|
646
|
+
filter: filter,
|
|
647
|
+
});
|
|
648
|
+
});
|
|
649
|
+
|
|
650
|
+
it("should delete points with complex filter", async () => {
|
|
651
|
+
const filter = {
|
|
652
|
+
must: [
|
|
653
|
+
{ key: "relativePath", match: { value: "src/utils.ts" } },
|
|
654
|
+
{ key: "language", match: { value: "typescript" } },
|
|
655
|
+
],
|
|
656
|
+
};
|
|
657
|
+
await manager.deletePointsByFilter("test-collection", filter);
|
|
658
|
+
|
|
659
|
+
expect(mockClient.delete).toHaveBeenCalledWith("test-collection", {
|
|
660
|
+
wait: true,
|
|
661
|
+
filter: filter,
|
|
662
|
+
});
|
|
663
|
+
});
|
|
664
|
+
});
|
|
665
|
+
|
|
637
666
|
describe("hybridSearch", () => {
|
|
638
667
|
beforeEach(() => {
|
|
639
668
|
mockClient.query = vi.fn();
|
package/src/qdrant/client.ts
CHANGED
|
@@ -240,6 +240,20 @@ export class QdrantManager {
|
|
|
240
240
|
});
|
|
241
241
|
}
|
|
242
242
|
|
|
243
|
+
/**
|
|
244
|
+
* Deletes points matching a filter condition.
|
|
245
|
+
* Useful for deleting all chunks associated with a specific file path.
|
|
246
|
+
*/
|
|
247
|
+
async deletePointsByFilter(
|
|
248
|
+
collectionName: string,
|
|
249
|
+
filter: Record<string, any>,
|
|
250
|
+
): Promise<void> {
|
|
251
|
+
await this.client.delete(collectionName, {
|
|
252
|
+
wait: true,
|
|
253
|
+
filter: filter,
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
243
257
|
/**
|
|
244
258
|
* Performs hybrid search combining semantic vector search with sparse vector (keyword) search
|
|
245
259
|
* using Reciprocal Rank Fusion (RRF) to combine results
|
package/src/tools/code.ts
CHANGED
|
@@ -146,7 +146,7 @@ export function registerCodeTools(
|
|
|
146
146
|
async ({ path }) => {
|
|
147
147
|
const status = await codeIndexer.getIndexStatus(path);
|
|
148
148
|
|
|
149
|
-
if (
|
|
149
|
+
if (status.status === "not_indexed") {
|
|
150
150
|
return {
|
|
151
151
|
content: [
|
|
152
152
|
{
|
|
@@ -157,6 +157,17 @@ export function registerCodeTools(
|
|
|
157
157
|
};
|
|
158
158
|
}
|
|
159
159
|
|
|
160
|
+
if (status.status === "indexing") {
|
|
161
|
+
return {
|
|
162
|
+
content: [
|
|
163
|
+
{
|
|
164
|
+
type: "text",
|
|
165
|
+
text: `Codebase at "${path}" is currently being indexed. ${status.chunksCount || 0} chunks processed so far.`,
|
|
166
|
+
},
|
|
167
|
+
],
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
160
171
|
return {
|
|
161
172
|
content: [{ type: "text", text: JSON.stringify(status, null, 2) }],
|
|
162
173
|
};
|