@mhalder/qdrant-mcp-server 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,11 +24,14 @@ import type {
24
24
  SearchOptions,
25
25
  } from "./types.js";
26
26
 
27
+ /** Reserved ID for storing indexing metadata in the collection */
28
+ const INDEXING_METADATA_ID = "__indexing_metadata__";
29
+
27
30
  export class CodeIndexer {
28
31
  constructor(
29
32
  private qdrant: QdrantManager,
30
33
  private embeddings: EmbeddingProvider,
31
- private config: CodeConfig
34
+ private config: CodeConfig,
32
35
  ) {}
33
36
 
34
37
  /**
@@ -59,7 +62,7 @@ export class CodeIndexer {
59
62
  async indexCodebase(
60
63
  path: string,
61
64
  options?: IndexOptions,
62
- progressCallback?: ProgressCallback
65
+ progressCallback?: ProgressCallback,
63
66
  ): Promise<IndexStats> {
64
67
  const startTime = Date.now();
65
68
  const stats: IndexStats = {
@@ -71,9 +74,10 @@ export class CodeIndexer {
71
74
  errors: [],
72
75
  };
73
76
 
74
- try {
75
- const absolutePath = await this.validatePath(path);
77
+ const absolutePath = await this.validatePath(path);
78
+ const collectionName = this.getCollectionName(absolutePath);
76
79
 
80
+ try {
77
81
  // 1. Scan files
78
82
  progressCallback?.({
79
83
  phase: "scanning",
@@ -84,9 +88,11 @@ export class CodeIndexer {
84
88
  });
85
89
 
86
90
  const scanner = new FileScanner({
87
- supportedExtensions: options?.extensions || this.config.supportedExtensions,
91
+ supportedExtensions:
92
+ options?.extensions || this.config.supportedExtensions,
88
93
  ignorePatterns: this.config.ignorePatterns,
89
- customIgnorePatterns: options?.ignorePatterns || this.config.customIgnorePatterns,
94
+ customIgnorePatterns:
95
+ options?.ignorePatterns || this.config.customIgnorePatterns,
90
96
  });
91
97
 
92
98
  await scanner.loadIgnorePatterns(absolutePath);
@@ -101,8 +107,8 @@ export class CodeIndexer {
101
107
  }
102
108
 
103
109
  // 2. Create or verify collection
104
- const collectionName = this.getCollectionName(absolutePath);
105
- const collectionExists = await this.qdrant.collectionExists(collectionName);
110
+ const collectionExists =
111
+ await this.qdrant.collectionExists(collectionName);
106
112
 
107
113
  if (options?.forceReindex && collectionExists) {
108
114
  await this.qdrant.deleteCollection(collectionName);
@@ -114,10 +120,13 @@ export class CodeIndexer {
114
120
  collectionName,
115
121
  vectorSize,
116
122
  "Cosine",
117
- this.config.enableHybridSearch
123
+ this.config.enableHybridSearch,
118
124
  );
119
125
  }
120
126
 
127
+ // Store "indexing in progress" marker immediately after collection is ready
128
+ await this.storeIndexingMarker(collectionName, false);
129
+
121
130
  // 3. Process files and create chunks
122
131
  const chunker = new TreeSitterChunker({
123
132
  chunkSize: this.config.chunkSize,
@@ -141,7 +150,9 @@ export class CodeIndexer {
141
150
 
142
151
  // Check for secrets (basic detection)
143
152
  if (metadataExtractor.containsSecrets(code)) {
144
- stats.errors?.push(`Skipped ${filePath}: potential secrets detected`);
153
+ stats.errors?.push(
154
+ `Skipped ${filePath}: potential secrets detected`,
155
+ );
145
156
  continue;
146
157
  }
147
158
 
@@ -158,7 +169,10 @@ export class CodeIndexer {
158
169
  allChunks.push({ chunk, id });
159
170
 
160
171
  // Check total chunk limit
161
- if (this.config.maxTotalChunks && allChunks.length >= this.config.maxTotalChunks) {
172
+ if (
173
+ this.config.maxTotalChunks &&
174
+ allChunks.length >= this.config.maxTotalChunks
175
+ ) {
162
176
  break;
163
177
  }
164
178
  }
@@ -166,11 +180,15 @@ export class CodeIndexer {
166
180
  stats.filesIndexed++;
167
181
 
168
182
  // Check total chunk limit
169
- if (this.config.maxTotalChunks && allChunks.length >= this.config.maxTotalChunks) {
183
+ if (
184
+ this.config.maxTotalChunks &&
185
+ allChunks.length >= this.config.maxTotalChunks
186
+ ) {
170
187
  break;
171
188
  }
172
189
  } catch (error) {
173
- const errorMessage = error instanceof Error ? error.message : String(error);
190
+ const errorMessage =
191
+ error instanceof Error ? error.message : String(error);
174
192
  stats.errors?.push(`Failed to process ${filePath}: ${errorMessage}`);
175
193
  }
176
194
  }
@@ -183,12 +201,15 @@ export class CodeIndexer {
183
201
  await synchronizer.updateSnapshot(files);
184
202
  } catch (error) {
185
203
  // Snapshot failure shouldn't fail the entire indexing
186
- const errorMessage = error instanceof Error ? error.message : String(error);
204
+ const errorMessage =
205
+ error instanceof Error ? error.message : String(error);
187
206
  console.error("Failed to save snapshot:", errorMessage);
188
207
  stats.errors?.push(`Snapshot save failed: ${errorMessage}`);
189
208
  }
190
209
 
191
210
  if (allChunks.length === 0) {
211
+ // Still store completion marker even with no chunks
212
+ await this.storeIndexingMarker(collectionName, true);
192
213
  stats.status = "completed";
193
214
  stats.durationMs = Date.now() - startTime;
194
215
  return stats;
@@ -203,7 +224,8 @@ export class CodeIndexer {
203
224
  phase: "embedding",
204
225
  current: i + batch.length,
205
226
  total: allChunks.length,
206
- percentage: 40 + Math.round(((i + batch.length) / allChunks.length) * 30), // 40-70%
227
+ percentage:
228
+ 40 + Math.round(((i + batch.length) / allChunks.length) * 30), // 40-70%
207
229
  message: `Generating embeddings ${i + batch.length}/${allChunks.length}`,
208
230
  });
209
231
 
@@ -225,7 +247,9 @@ export class CodeIndexer {
225
247
  codebasePath: absolutePath,
226
248
  chunkIndex: b.chunk.metadata.chunkIndex,
227
249
  ...(b.chunk.metadata.name && { name: b.chunk.metadata.name }),
228
- ...(b.chunk.metadata.chunkType && { chunkType: b.chunk.metadata.chunkType }),
250
+ ...(b.chunk.metadata.chunkType && {
251
+ chunkType: b.chunk.metadata.chunkType,
252
+ }),
229
253
  },
230
254
  }));
231
255
 
@@ -233,7 +257,8 @@ export class CodeIndexer {
233
257
  phase: "storing",
234
258
  current: i + batch.length,
235
259
  total: allChunks.length,
236
- percentage: 70 + Math.round(((i + batch.length) / allChunks.length) * 30), // 70-100%
260
+ percentage:
261
+ 70 + Math.round(((i + batch.length) / allChunks.length) * 30), // 70-100%
237
262
  message: `Storing chunks ${i + batch.length}/${allChunks.length}`,
238
263
  });
239
264
 
@@ -254,7 +279,9 @@ export class CodeIndexer {
254
279
  codebasePath: absolutePath,
255
280
  chunkIndex: b.chunk.metadata.chunkIndex,
256
281
  ...(b.chunk.metadata.name && { name: b.chunk.metadata.name }),
257
- ...(b.chunk.metadata.chunkType && { chunkType: b.chunk.metadata.chunkType }),
282
+ ...(b.chunk.metadata.chunkType && {
283
+ chunkType: b.chunk.metadata.chunkType,
284
+ }),
258
285
  },
259
286
  }));
260
287
 
@@ -263,16 +290,23 @@ export class CodeIndexer {
263
290
  await this.qdrant.addPoints(collectionName, points);
264
291
  }
265
292
  } catch (error) {
266
- const errorMessage = error instanceof Error ? error.message : String(error);
267
- stats.errors?.push(`Failed to process batch at index ${i}: ${errorMessage}`);
293
+ const errorMessage =
294
+ error instanceof Error ? error.message : String(error);
295
+ stats.errors?.push(
296
+ `Failed to process batch at index ${i}: ${errorMessage}`,
297
+ );
268
298
  stats.status = "partial";
269
299
  }
270
300
  }
271
301
 
302
+ // Store completion marker to indicate indexing is complete
303
+ await this.storeIndexingMarker(collectionName, true);
304
+
272
305
  stats.durationMs = Date.now() - startTime;
273
306
  return stats;
274
307
  } catch (error) {
275
- const errorMessage = error instanceof Error ? error.message : String(error);
308
+ const errorMessage =
309
+ error instanceof Error ? error.message : String(error);
276
310
  stats.status = "failed";
277
311
  stats.errors?.push(`Indexing failed: ${errorMessage}`);
278
312
  stats.durationMs = Date.now() - startTime;
@@ -280,13 +314,62 @@ export class CodeIndexer {
280
314
  }
281
315
  }
282
316
 
317
+ /**
318
+ * Store an indexing status marker in the collection.
319
+ * Called at the start of indexing with complete=false, and at the end with complete=true.
320
+ */
321
+ private async storeIndexingMarker(
322
+ collectionName: string,
323
+ complete: boolean,
324
+ ): Promise<void> {
325
+ try {
326
+ // Create a dummy vector of zeros (required by Qdrant)
327
+ const vectorSize = this.embeddings.getDimensions();
328
+ const zeroVector = new Array(vectorSize).fill(0);
329
+
330
+ // Check if collection uses hybrid mode
331
+ const collectionInfo =
332
+ await this.qdrant.getCollectionInfo(collectionName);
333
+
334
+ const payload = {
335
+ _type: "indexing_metadata",
336
+ indexingComplete: complete,
337
+ ...(complete
338
+ ? { completedAt: new Date().toISOString() }
339
+ : { startedAt: new Date().toISOString() }),
340
+ };
341
+
342
+ if (collectionInfo.hybridEnabled) {
343
+ await this.qdrant.addPointsWithSparse(collectionName, [
344
+ {
345
+ id: INDEXING_METADATA_ID,
346
+ vector: zeroVector,
347
+ sparseVector: { indices: [], values: [] },
348
+ payload,
349
+ },
350
+ ]);
351
+ } else {
352
+ await this.qdrant.addPoints(collectionName, [
353
+ {
354
+ id: INDEXING_METADATA_ID,
355
+ vector: zeroVector,
356
+ payload,
357
+ },
358
+ ]);
359
+ }
360
+ } catch (error) {
361
+ // Non-fatal: log but don't fail the indexing
362
+ console.error("Failed to store indexing marker:", error);
363
+ }
364
+ }
365
+
283
366
  /**
284
367
  * Search code semantically
285
368
  */
286
369
  async searchCode(
287
370
  path: string,
288
371
  query: string,
289
- options?: SearchOptions
372
+ options?: SearchOptions,
290
373
  ): Promise<CodeSearchResult[]> {
291
374
  const absolutePath = await this.validatePath(path);
292
375
  const collectionName = this.getCollectionName(absolutePath);
@@ -300,7 +383,8 @@ export class CodeIndexer {
300
383
  // Check if collection has hybrid search enabled
301
384
  const collectionInfo = await this.qdrant.getCollectionInfo(collectionName);
302
385
  const useHybrid =
303
- (options?.useHybrid ?? this.config.enableHybridSearch) && collectionInfo.hybridEnabled;
386
+ (options?.useHybrid ?? this.config.enableHybridSearch) &&
387
+ collectionInfo.hybridEnabled;
304
388
 
305
389
  // Generate query embedding
306
390
  const { embedding } = await this.embeddings.embed(query);
@@ -342,14 +426,14 @@ export class CodeIndexer {
342
426
  embedding,
343
427
  sparseVector,
344
428
  options?.limit || this.config.defaultSearchLimit,
345
- filter
429
+ filter,
346
430
  );
347
431
  } else {
348
432
  results = await this.qdrant.search(
349
433
  collectionName,
350
434
  embedding,
351
435
  options?.limit || this.config.defaultSearchLimit,
352
- filter
436
+ filter,
353
437
  );
354
438
  }
355
439
 
@@ -379,24 +463,75 @@ export class CodeIndexer {
379
463
  const exists = await this.qdrant.collectionExists(collectionName);
380
464
 
381
465
  if (!exists) {
382
- return { isIndexed: false };
466
+ return { isIndexed: false, status: "not_indexed" };
383
467
  }
384
468
 
469
+ // Check for indexing marker in Qdrant (persisted across instances)
470
+ const indexingMarker = await this.qdrant.getPoint(
471
+ collectionName,
472
+ INDEXING_METADATA_ID,
473
+ );
385
474
  const info = await this.qdrant.getCollectionInfo(collectionName);
386
475
 
476
+ // Check marker status
477
+ const isComplete = indexingMarker?.payload?.indexingComplete === true;
478
+ const isInProgress = indexingMarker?.payload?.indexingComplete === false;
479
+
480
+ // Subtract 1 from points count if marker exists (metadata point doesn't count as a chunk)
481
+ const actualChunksCount = indexingMarker
482
+ ? Math.max(0, info.pointsCount - 1)
483
+ : info.pointsCount;
484
+
485
+ if (isInProgress) {
486
+ // Indexing in progress - marker exists with indexingComplete=false
487
+ return {
488
+ isIndexed: false,
489
+ status: "indexing",
490
+ collectionName,
491
+ chunksCount: actualChunksCount,
492
+ };
493
+ }
494
+
495
+ if (isComplete) {
496
+ // Indexing completed - marker exists with indexingComplete=true
497
+ return {
498
+ isIndexed: true,
499
+ status: "indexed",
500
+ collectionName,
501
+ chunksCount: actualChunksCount,
502
+ lastUpdated: indexingMarker.payload?.completedAt
503
+ ? new Date(indexingMarker.payload.completedAt)
504
+ : undefined,
505
+ };
506
+ }
507
+
508
+ // Legacy collection (no marker) - check if it has content
509
+ // If it has chunks, assume it's indexed (backwards compatibility)
510
+ if (actualChunksCount > 0) {
511
+ return {
512
+ isIndexed: true,
513
+ status: "indexed",
514
+ collectionName,
515
+ chunksCount: actualChunksCount,
516
+ };
517
+ }
518
+
519
+ // Collection exists but no chunks and no marker - not indexed
387
520
  return {
388
- isIndexed: true,
521
+ isIndexed: false,
522
+ status: "not_indexed",
389
523
  collectionName,
390
- chunksCount: info.pointsCount,
391
- // TODO: Extract unique languages and file count from collection
392
- // This would require scrolling through points or maintaining separate metadata
524
+ chunksCount: 0,
393
525
  };
394
526
  }
395
527
 
396
528
  /**
397
529
  * Incrementally re-index only changed files
398
530
  */
399
- async reindexChanges(path: string, progressCallback?: ProgressCallback): Promise<ChangeStats> {
531
+ async reindexChanges(
532
+ path: string,
533
+ progressCallback?: ProgressCallback,
534
+ ): Promise<ChangeStats> {
400
535
  const startTime = Date.now();
401
536
  const stats: ChangeStats = {
402
537
  filesAdded: 0,
@@ -422,7 +557,9 @@ export class CodeIndexer {
422
557
  const hasSnapshot = await synchronizer.initialize();
423
558
 
424
559
  if (!hasSnapshot) {
425
- throw new Error("No previous snapshot found. Use index_codebase for initial indexing.");
560
+ throw new Error(
561
+ "No previous snapshot found. Use index_codebase for initial indexing.",
562
+ );
426
563
  }
427
564
 
428
565
  // Scan current files
@@ -449,7 +586,11 @@ export class CodeIndexer {
449
586
  stats.filesModified = changes.modified.length;
450
587
  stats.filesDeleted = changes.deleted.length;
451
588
 
452
- if (stats.filesAdded === 0 && stats.filesModified === 0 && stats.filesDeleted === 0) {
589
+ if (
590
+ stats.filesAdded === 0 &&
591
+ stats.filesModified === 0 &&
592
+ stats.filesDeleted === 0
593
+ ) {
453
594
  stats.durationMs = Date.now() - startTime;
454
595
  return stats;
455
596
  }
@@ -526,7 +667,8 @@ export class CodeIndexer {
526
667
  phase: "embedding",
527
668
  current: i + batch.length,
528
669
  total: allChunks.length,
529
- percentage: 40 + Math.round(((i + batch.length) / allChunks.length) * 30),
670
+ percentage:
671
+ 40 + Math.round(((i + batch.length) / allChunks.length) * 30),
530
672
  message: `Generating embeddings ${i + batch.length}/${allChunks.length}`,
531
673
  });
532
674
 
@@ -546,7 +688,9 @@ export class CodeIndexer {
546
688
  codebasePath: absolutePath,
547
689
  chunkIndex: b.chunk.metadata.chunkIndex,
548
690
  ...(b.chunk.metadata.name && { name: b.chunk.metadata.name }),
549
- ...(b.chunk.metadata.chunkType && { chunkType: b.chunk.metadata.chunkType }),
691
+ ...(b.chunk.metadata.chunkType && {
692
+ chunkType: b.chunk.metadata.chunkType,
693
+ }),
550
694
  },
551
695
  }));
552
696
 
@@ -554,7 +698,8 @@ export class CodeIndexer {
554
698
  phase: "storing",
555
699
  current: i + batch.length,
556
700
  total: allChunks.length,
557
- percentage: 70 + Math.round(((i + batch.length) / allChunks.length) * 30),
701
+ percentage:
702
+ 70 + Math.round(((i + batch.length) / allChunks.length) * 30),
558
703
  message: `Storing chunks ${i + batch.length}/${allChunks.length}`,
559
704
  });
560
705
 
@@ -562,7 +707,9 @@ export class CodeIndexer {
562
707
  const sparseGenerator = new BM25SparseVectorGenerator();
563
708
  const hybridPoints = points.map((point, idx) => ({
564
709
  ...point,
565
- sparseVector: sparseGenerator.generate(allChunks[i + idx].chunk.content),
710
+ sparseVector: sparseGenerator.generate(
711
+ allChunks[i + idx].chunk.content,
712
+ ),
566
713
  }));
567
714
  await this.qdrant.addPointsWithSparse(collectionName, hybridPoints);
568
715
  } else {
@@ -576,7 +723,8 @@ export class CodeIndexer {
576
723
  stats.durationMs = Date.now() - startTime;
577
724
  return stats;
578
725
  } catch (error) {
579
- const errorMessage = error instanceof Error ? error.message : String(error);
726
+ const errorMessage =
727
+ error instanceof Error ? error.message : String(error);
580
728
  throw new Error(`Incremental re-indexing failed: ${errorMessage}`);
581
729
  }
582
730
  }
package/src/code/types.ts CHANGED
@@ -78,8 +78,13 @@ export interface SearchOptions {
78
78
  scoreThreshold?: number;
79
79
  }
80
80
 
81
+ export type IndexingStatus = "not_indexed" | "indexing" | "indexed";
82
+
81
83
  export interface IndexStatus {
84
+ /** @deprecated Use `status` instead. True only when status is 'indexed'. */
82
85
  isIndexed: boolean;
86
+ /** Current indexing status: 'not_indexed', 'indexing', or 'indexed' */
87
+ status: IndexingStatus;
83
88
  collectionName?: string;
84
89
  filesCount?: number;
85
90
  chunksCount?: number;
package/src/tools/code.ts CHANGED
@@ -146,7 +146,7 @@ export function registerCodeTools(
146
146
  async ({ path }) => {
147
147
  const status = await codeIndexer.getIndexStatus(path);
148
148
 
149
- if (!status.isIndexed) {
149
+ if (status.status === "not_indexed") {
150
150
  return {
151
151
  content: [
152
152
  {
@@ -157,6 +157,17 @@ export function registerCodeTools(
157
157
  };
158
158
  }
159
159
 
160
+ if (status.status === "indexing") {
161
+ return {
162
+ content: [
163
+ {
164
+ type: "text",
165
+ text: `Codebase at "${path}" is currently being indexed. ${status.chunksCount || 0} chunks processed so far.`,
166
+ },
167
+ ],
168
+ };
169
+ }
170
+
160
171
  return {
161
172
  content: [{ type: "text", text: JSON.stringify(status, null, 2) }],
162
173
  };