@mhalder/qdrant-mcp-server 3.3.2 → 3.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +0 -2
- package/.github/workflows/claude-code-review.yml +1 -1
- package/CHANGELOG.md +12 -0
- package/README.md +1 -1
- package/biome.json +3 -2
- package/build/code/chunker/tree-sitter-chunker.d.ts.map +1 -1
- package/build/code/chunker/tree-sitter-chunker.js +2 -12
- package/build/code/chunker/tree-sitter-chunker.js.map +1 -1
- package/build/code/indexer.d.ts.map +1 -1
- package/build/code/indexer.js +12 -18
- package/build/code/indexer.js.map +1 -1
- package/build/code/scanner.js +1 -1
- package/build/code/scanner.js.map +1 -1
- package/build/embeddings/cohere.d.ts +1 -1
- package/build/embeddings/cohere.d.ts.map +1 -1
- package/build/embeddings/cohere.js +2 -2
- package/build/embeddings/cohere.js.map +1 -1
- package/build/embeddings/cohere.test.js +1 -5
- package/build/embeddings/cohere.test.js.map +1 -1
- package/build/embeddings/factory.d.ts +1 -1
- package/build/embeddings/factory.d.ts.map +1 -1
- package/build/embeddings/factory.js +7 -9
- package/build/embeddings/factory.js.map +1 -1
- package/build/embeddings/factory.test.js +3 -3
- package/build/embeddings/factory.test.js.map +1 -1
- package/build/embeddings/ollama.d.ts +1 -1
- package/build/embeddings/ollama.d.ts.map +1 -1
- package/build/embeddings/ollama.js +6 -8
- package/build/embeddings/ollama.js.map +1 -1
- package/build/embeddings/ollama.test.js +2 -6
- package/build/embeddings/ollama.test.js.map +1 -1
- package/build/embeddings/openai.d.ts +1 -1
- package/build/embeddings/openai.d.ts.map +1 -1
- package/build/embeddings/openai.js +4 -7
- package/build/embeddings/openai.js.map +1 -1
- package/build/embeddings/openai.test.js +3 -12
- package/build/embeddings/openai.test.js.map +1 -1
- package/build/embeddings/sparse.test.js +12 -2
- package/build/embeddings/sparse.test.js.map +1 -1
- package/build/embeddings/voyage.d.ts +1 -1
- package/build/embeddings/voyage.d.ts.map +1 -1
- package/build/embeddings/voyage.js +2 -3
- package/build/embeddings/voyage.js.map +1 -1
- package/build/embeddings/voyage.test.js +2 -6
- package/build/embeddings/voyage.test.js.map +1 -1
- package/build/git/chunker.d.ts.map +1 -1
- package/build/git/chunker.js +2 -2
- package/build/git/chunker.js.map +1 -1
- package/build/git/chunker.test.js +1 -1
- package/build/git/chunker.test.js.map +1 -1
- package/build/git/extractor.d.ts.map +1 -1
- package/build/git/extractor.integration.test.js +9 -5
- package/build/git/extractor.integration.test.js.map +1 -1
- package/build/git/extractor.js +1 -1
- package/build/git/extractor.js.map +1 -1
- package/build/git/extractor.test.js +2 -2
- package/build/git/extractor.test.js.map +1 -1
- package/build/git/index.d.ts +4 -4
- package/build/git/index.d.ts.map +1 -1
- package/build/git/index.js +3 -3
- package/build/git/index.js.map +1 -1
- package/build/git/indexer.d.ts.map +1 -1
- package/build/git/indexer.js +9 -21
- package/build/git/indexer.js.map +1 -1
- package/build/git/indexer.test.js +4 -8
- package/build/git/indexer.test.js.map +1 -1
- package/build/git/sync/synchronizer.d.ts.map +1 -1
- package/build/git/sync/synchronizer.js.map +1 -1
- package/build/git/sync/synchronizer.test.js +4 -2
- package/build/git/sync/synchronizer.test.js.map +1 -1
- package/build/index.js +5 -9
- package/build/index.js.map +1 -1
- package/build/index.test.js +3 -3
- package/build/index.test.js.map +1 -1
- package/build/logger.d.ts.map +1 -1
- package/build/logger.js +1 -9
- package/build/logger.js.map +1 -1
- package/build/prompts/register.d.ts.map +1 -1
- package/build/prompts/register.js.map +1 -1
- package/build/qdrant/client.d.ts.map +1 -1
- package/build/qdrant/client.js.map +1 -1
- package/build/qdrant/client.test.js +10 -34
- package/build/qdrant/client.test.js.map +1 -1
- package/build/resources/index.d.ts +1 -1
- package/build/resources/index.d.ts.map +1 -1
- package/build/resources/index.js +1 -1
- package/build/resources/index.js.map +1 -1
- package/build/tools/code.d.ts.map +1 -1
- package/build/tools/code.js +3 -9
- package/build/tools/code.js.map +1 -1
- package/build/tools/collection.d.ts.map +1 -1
- package/build/tools/collection.js +1 -3
- package/build/tools/collection.js.map +1 -1
- package/build/tools/document.d.ts.map +1 -1
- package/build/tools/document.js +1 -1
- package/build/tools/document.js.map +1 -1
- package/build/tools/federated.d.ts.map +1 -1
- package/build/tools/federated.js +15 -6
- package/build/tools/federated.js.map +1 -1
- package/build/tools/federated.test.js +18 -22
- package/build/tools/federated.test.js.map +1 -1
- package/build/tools/git-history.d.ts.map +1 -1
- package/build/tools/git-history.js +3 -7
- package/build/tools/git-history.js.map +1 -1
- package/build/tools/index.d.ts.map +1 -1
- package/build/tools/index.js.map +1 -1
- package/build/tools/logging.d.ts.map +1 -1
- package/build/tools/logging.js +1 -3
- package/build/tools/logging.js.map +1 -1
- package/build/tools/logging.test.js +1 -1
- package/build/tools/logging.test.js.map +1 -1
- package/build/tools/schemas.d.ts.map +1 -1
- package/build/tools/schemas.js +17 -64
- package/build/tools/schemas.js.map +1 -1
- package/build/tools/search.d.ts.map +1 -1
- package/build/tools/search.js +1 -1
- package/build/tools/search.js.map +1 -1
- package/commitlint.config.js +12 -23
- package/package.json +1 -1
- package/scripts/verify-providers.js +12 -32
- package/src/code/chunker/tree-sitter-chunker.ts +9 -35
- package/src/code/indexer.ts +45 -107
- package/src/code/scanner.ts +1 -1
- package/src/embeddings/cohere.test.ts +17 -45
- package/src/embeddings/cohere.ts +10 -17
- package/src/embeddings/factory.test.ts +18 -18
- package/src/embeddings/factory.ts +18 -25
- package/src/embeddings/ollama.test.ts +38 -67
- package/src/embeddings/ollama.ts +15 -27
- package/src/embeddings/openai.test.ts +17 -53
- package/src/embeddings/openai.ts +11 -22
- package/src/embeddings/sparse.test.ts +12 -2
- package/src/embeddings/voyage.test.ts +39 -80
- package/src/embeddings/voyage.ts +9 -13
- package/src/git/chunker.test.ts +1 -1
- package/src/git/chunker.ts +6 -22
- package/src/git/extractor.integration.test.ts +12 -16
- package/src/git/extractor.test.ts +21 -35
- package/src/git/extractor.ts +14 -36
- package/src/git/index.ts +9 -10
- package/src/git/indexer.test.ts +29 -57
- package/src/git/indexer.ts +38 -86
- package/src/git/sync/synchronizer.test.ts +6 -9
- package/src/git/sync/synchronizer.ts +2 -5
- package/src/index.test.ts +7 -9
- package/src/index.ts +34 -80
- package/src/logger.ts +3 -14
- package/src/prompts/register.ts +3 -10
- package/src/qdrant/client.test.ts +63 -169
- package/src/qdrant/client.ts +19 -45
- package/src/resources/index.ts +4 -10
- package/src/tools/code.ts +43 -66
- package/src/tools/collection.ts +19 -38
- package/src/tools/document.ts +10 -19
- package/src/tools/federated.test.ts +34 -57
- package/src/tools/federated.ts +88 -108
- package/src/tools/git-history.ts +32 -60
- package/src/tools/index.ts +1 -4
- package/src/tools/logging.test.ts +10 -10
- package/src/tools/logging.ts +8 -18
- package/src/tools/schemas.ts +23 -78
- package/src/tools/search.ts +77 -94
- package/tests/code/chunker/tree-sitter-chunker.test.ts +6 -19
- package/tests/code/indexer.test.ts +100 -192
- package/tests/code/integration.test.ts +61 -117
- package/tests/code/scanner.test.ts +12 -39
- package/tests/code/sync/snapshot.test.ts +4 -14
- package/tests/code/sync/synchronizer.test.ts +10 -40
package/src/code/indexer.ts
CHANGED
|
@@ -8,10 +8,10 @@ import { promises as fs } from "node:fs";
|
|
|
8
8
|
import { extname, join, relative, resolve } from "node:path";
|
|
9
9
|
import { promisify } from "node:util";
|
|
10
10
|
import picomatch from "picomatch";
|
|
11
|
-
import logger from "../logger.js";
|
|
12
11
|
import type { EmbeddingProvider } from "../embeddings/base.js";
|
|
13
12
|
import { BM25SparseVectorGenerator } from "../embeddings/sparse.js";
|
|
14
13
|
import { normalizeRemoteUrl } from "../git/extractor.js";
|
|
14
|
+
import logger from "../logger.js";
|
|
15
15
|
import type { QdrantManager } from "../qdrant/client.js";
|
|
16
16
|
import { TreeSitterChunker } from "./chunker/tree-sitter-chunker.js";
|
|
17
17
|
import { MetadataExtractor } from "./metadata.js";
|
|
@@ -40,7 +40,7 @@ export class CodeIndexer {
|
|
|
40
40
|
constructor(
|
|
41
41
|
private qdrant: QdrantManager,
|
|
42
42
|
private embeddings: EmbeddingProvider,
|
|
43
|
-
private config: CodeConfig
|
|
43
|
+
private config: CodeConfig
|
|
44
44
|
) {}
|
|
45
45
|
|
|
46
46
|
/**
|
|
@@ -57,7 +57,7 @@ export class CodeIndexer {
|
|
|
57
57
|
// For now, we just ensure the path exists and is resolved
|
|
58
58
|
// In a more restrictive environment, you could check against an allowlist
|
|
59
59
|
return realPath;
|
|
60
|
-
} catch (
|
|
60
|
+
} catch (_error) {
|
|
61
61
|
// If realpath fails, the path doesn't exist yet or is invalid
|
|
62
62
|
// For operations like indexing, we still need to accept non-existent paths
|
|
63
63
|
// so we just return the resolved absolute path
|
|
@@ -71,7 +71,7 @@ export class CodeIndexer {
|
|
|
71
71
|
async indexCodebase(
|
|
72
72
|
path: string,
|
|
73
73
|
options?: IndexOptions,
|
|
74
|
-
progressCallback?: ProgressCallback
|
|
74
|
+
progressCallback?: ProgressCallback
|
|
75
75
|
): Promise<IndexStats> {
|
|
76
76
|
const startTime = Date.now();
|
|
77
77
|
const stats: IndexStats = {
|
|
@@ -99,11 +99,9 @@ export class CodeIndexer {
|
|
|
99
99
|
});
|
|
100
100
|
|
|
101
101
|
const scanner = new FileScanner({
|
|
102
|
-
supportedExtensions:
|
|
103
|
-
options?.extensions || this.config.supportedExtensions,
|
|
102
|
+
supportedExtensions: options?.extensions || this.config.supportedExtensions,
|
|
104
103
|
ignorePatterns: this.config.ignorePatterns,
|
|
105
|
-
customIgnorePatterns:
|
|
106
|
-
options?.ignorePatterns || this.config.customIgnorePatterns,
|
|
104
|
+
customIgnorePatterns: options?.ignorePatterns || this.config.customIgnorePatterns,
|
|
107
105
|
});
|
|
108
106
|
|
|
109
107
|
await scanner.loadIgnorePatterns(absolutePath);
|
|
@@ -119,8 +117,7 @@ export class CodeIndexer {
|
|
|
119
117
|
}
|
|
120
118
|
|
|
121
119
|
// 2. Create or verify collection
|
|
122
|
-
const collectionExists =
|
|
123
|
-
await this.qdrant.collectionExists(collectionName);
|
|
120
|
+
const collectionExists = await this.qdrant.collectionExists(collectionName);
|
|
124
121
|
|
|
125
122
|
if (options?.forceReindex && collectionExists) {
|
|
126
123
|
await this.qdrant.deleteCollection(collectionName);
|
|
@@ -132,7 +129,7 @@ export class CodeIndexer {
|
|
|
132
129
|
collectionName,
|
|
133
130
|
vectorSize,
|
|
134
131
|
"Cosine",
|
|
135
|
-
this.config.enableHybridSearch
|
|
132
|
+
this.config.enableHybridSearch
|
|
136
133
|
);
|
|
137
134
|
this.log.debug({ collectionName, vectorSize }, "Collection created");
|
|
138
135
|
}
|
|
@@ -163,9 +160,7 @@ export class CodeIndexer {
|
|
|
163
160
|
|
|
164
161
|
// Check for secrets (basic detection)
|
|
165
162
|
if (metadataExtractor.containsSecrets(code)) {
|
|
166
|
-
stats.errors?.push(
|
|
167
|
-
`Skipped ${filePath}: potential secrets detected`,
|
|
168
|
-
);
|
|
163
|
+
stats.errors?.push(`Skipped ${filePath}: potential secrets detected`);
|
|
169
164
|
continue;
|
|
170
165
|
}
|
|
171
166
|
|
|
@@ -182,10 +177,7 @@ export class CodeIndexer {
|
|
|
182
177
|
allChunks.push({ chunk, id });
|
|
183
178
|
|
|
184
179
|
// Check total chunk limit
|
|
185
|
-
if (
|
|
186
|
-
this.config.maxTotalChunks &&
|
|
187
|
-
allChunks.length >= this.config.maxTotalChunks
|
|
188
|
-
) {
|
|
180
|
+
if (this.config.maxTotalChunks && allChunks.length >= this.config.maxTotalChunks) {
|
|
189
181
|
break;
|
|
190
182
|
}
|
|
191
183
|
}
|
|
@@ -193,15 +185,11 @@ export class CodeIndexer {
|
|
|
193
185
|
stats.filesIndexed++;
|
|
194
186
|
|
|
195
187
|
// Check total chunk limit
|
|
196
|
-
if (
|
|
197
|
-
this.config.maxTotalChunks &&
|
|
198
|
-
allChunks.length >= this.config.maxTotalChunks
|
|
199
|
-
) {
|
|
188
|
+
if (this.config.maxTotalChunks && allChunks.length >= this.config.maxTotalChunks) {
|
|
200
189
|
break;
|
|
201
190
|
}
|
|
202
191
|
} catch (error) {
|
|
203
|
-
const errorMessage =
|
|
204
|
-
error instanceof Error ? error.message : String(error);
|
|
192
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
205
193
|
stats.errors?.push(`Failed to process ${filePath}: ${errorMessage}`);
|
|
206
194
|
}
|
|
207
195
|
}
|
|
@@ -214,8 +202,7 @@ export class CodeIndexer {
|
|
|
214
202
|
await synchronizer.updateSnapshot(files);
|
|
215
203
|
} catch (error) {
|
|
216
204
|
// Snapshot failure shouldn't fail the entire indexing
|
|
217
|
-
const errorMessage =
|
|
218
|
-
error instanceof Error ? error.message : String(error);
|
|
205
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
219
206
|
this.log.error({ err: error }, "Failed to save snapshot");
|
|
220
207
|
stats.errors?.push(`Snapshot save failed: ${errorMessage}`);
|
|
221
208
|
}
|
|
@@ -230,10 +217,7 @@ export class CodeIndexer {
|
|
|
230
217
|
|
|
231
218
|
// 4. Generate embeddings and store in batches
|
|
232
219
|
const batchSize = this.config.batchSize;
|
|
233
|
-
this.log.debug(
|
|
234
|
-
{ totalChunks: allChunks.length, batchSize },
|
|
235
|
-
"Starting embedding generation",
|
|
236
|
-
);
|
|
220
|
+
this.log.debug({ totalChunks: allChunks.length, batchSize }, "Starting embedding generation");
|
|
237
221
|
for (let i = 0; i < allChunks.length; i += batchSize) {
|
|
238
222
|
const batch = allChunks.slice(i, i + batchSize);
|
|
239
223
|
|
|
@@ -241,8 +225,7 @@ export class CodeIndexer {
|
|
|
241
225
|
phase: "embedding",
|
|
242
226
|
current: i + batch.length,
|
|
243
227
|
total: allChunks.length,
|
|
244
|
-
percentage:
|
|
245
|
-
40 + Math.round(((i + batch.length) / allChunks.length) * 30), // 40-70%
|
|
228
|
+
percentage: 40 + Math.round(((i + batch.length) / allChunks.length) * 30), // 40-70%
|
|
246
229
|
message: `Generating embeddings ${i + batch.length}/${allChunks.length}`,
|
|
247
230
|
});
|
|
248
231
|
|
|
@@ -274,8 +257,7 @@ export class CodeIndexer {
|
|
|
274
257
|
phase: "storing",
|
|
275
258
|
current: i + batch.length,
|
|
276
259
|
total: allChunks.length,
|
|
277
|
-
percentage:
|
|
278
|
-
70 + Math.round(((i + batch.length) / allChunks.length) * 30), // 70-100%
|
|
260
|
+
percentage: 70 + Math.round(((i + batch.length) / allChunks.length) * 30), // 70-100%
|
|
279
261
|
message: `Storing chunks ${i + batch.length}/${allChunks.length}`,
|
|
280
262
|
});
|
|
281
263
|
|
|
@@ -307,11 +289,8 @@ export class CodeIndexer {
|
|
|
307
289
|
await this.qdrant.addPoints(collectionName, points);
|
|
308
290
|
}
|
|
309
291
|
} catch (error) {
|
|
310
|
-
const errorMessage =
|
|
311
|
-
|
|
312
|
-
stats.errors?.push(
|
|
313
|
-
`Failed to process batch at index ${i}: ${errorMessage}`,
|
|
314
|
-
);
|
|
292
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
293
|
+
stats.errors?.push(`Failed to process batch at index ${i}: ${errorMessage}`);
|
|
315
294
|
stats.status = "partial";
|
|
316
295
|
}
|
|
317
296
|
}
|
|
@@ -326,12 +305,11 @@ export class CodeIndexer {
|
|
|
326
305
|
chunksCreated: stats.chunksCreated,
|
|
327
306
|
durationMs: stats.durationMs,
|
|
328
307
|
},
|
|
329
|
-
"Indexing complete"
|
|
308
|
+
"Indexing complete"
|
|
330
309
|
);
|
|
331
310
|
return stats;
|
|
332
311
|
} catch (error) {
|
|
333
|
-
const errorMessage =
|
|
334
|
-
error instanceof Error ? error.message : String(error);
|
|
312
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
335
313
|
stats.status = "failed";
|
|
336
314
|
stats.errors?.push(`Indexing failed: ${errorMessage}`);
|
|
337
315
|
stats.durationMs = Date.now() - startTime;
|
|
@@ -343,18 +321,14 @@ export class CodeIndexer {
|
|
|
343
321
|
* Store an indexing status marker in the collection.
|
|
344
322
|
* Called at the start of indexing with complete=false, and at the end with complete=true.
|
|
345
323
|
*/
|
|
346
|
-
private async storeIndexingMarker(
|
|
347
|
-
collectionName: string,
|
|
348
|
-
complete: boolean,
|
|
349
|
-
): Promise<void> {
|
|
324
|
+
private async storeIndexingMarker(collectionName: string, complete: boolean): Promise<void> {
|
|
350
325
|
try {
|
|
351
326
|
// Create a dummy vector of zeros (required by Qdrant)
|
|
352
327
|
const vectorSize = this.embeddings.getDimensions();
|
|
353
328
|
const zeroVector = new Array(vectorSize).fill(0);
|
|
354
329
|
|
|
355
330
|
// Check if collection uses hybrid mode
|
|
356
|
-
const collectionInfo =
|
|
357
|
-
await this.qdrant.getCollectionInfo(collectionName);
|
|
331
|
+
const collectionInfo = await this.qdrant.getCollectionInfo(collectionName);
|
|
358
332
|
|
|
359
333
|
const payload = {
|
|
360
334
|
_type: "indexing_metadata",
|
|
@@ -394,7 +368,7 @@ export class CodeIndexer {
|
|
|
394
368
|
async searchCode(
|
|
395
369
|
path: string,
|
|
396
370
|
query: string,
|
|
397
|
-
options?: SearchOptions
|
|
371
|
+
options?: SearchOptions
|
|
398
372
|
): Promise<CodeSearchResult[]> {
|
|
399
373
|
const absolutePath = await this.validatePath(path);
|
|
400
374
|
const collectionName = await this.getCollectionName(absolutePath);
|
|
@@ -408,8 +382,7 @@ export class CodeIndexer {
|
|
|
408
382
|
// Check if collection has hybrid search enabled
|
|
409
383
|
const collectionInfo = await this.qdrant.getCollectionInfo(collectionName);
|
|
410
384
|
const useHybrid =
|
|
411
|
-
(options?.useHybrid ?? this.config.enableHybridSearch) &&
|
|
412
|
-
collectionInfo.hybridEnabled;
|
|
385
|
+
(options?.useHybrid ?? this.config.enableHybridSearch) && collectionInfo.hybridEnabled;
|
|
413
386
|
|
|
414
387
|
// Generate query embedding
|
|
415
388
|
const { embedding } = await this.embeddings.embed(query);
|
|
@@ -429,9 +402,7 @@ export class CodeIndexer {
|
|
|
429
402
|
|
|
430
403
|
// Prepare pathPattern matcher for post-filtering
|
|
431
404
|
// Qdrant doesn't support regex/glob filtering, so we filter results in JS
|
|
432
|
-
const pathMatcher = options?.pathPattern
|
|
433
|
-
? picomatch(options.pathPattern, { dot: true })
|
|
434
|
-
: null;
|
|
405
|
+
const pathMatcher = options?.pathPattern ? picomatch(options.pathPattern, { dot: true }) : null;
|
|
435
406
|
|
|
436
407
|
// When using pathPattern, fetch more results to account for filtering
|
|
437
408
|
const fetchLimit = pathMatcher
|
|
@@ -448,15 +419,10 @@ export class CodeIndexer {
|
|
|
448
419
|
embedding,
|
|
449
420
|
sparseVector,
|
|
450
421
|
fetchLimit,
|
|
451
|
-
filter
|
|
422
|
+
filter
|
|
452
423
|
);
|
|
453
424
|
} else {
|
|
454
|
-
results = await this.qdrant.search(
|
|
455
|
-
collectionName,
|
|
456
|
-
embedding,
|
|
457
|
-
fetchLimit,
|
|
458
|
-
filter,
|
|
459
|
-
);
|
|
425
|
+
results = await this.qdrant.search(collectionName, embedding, fetchLimit, filter);
|
|
460
426
|
}
|
|
461
427
|
|
|
462
428
|
// Apply pathPattern post-filtering if specified
|
|
@@ -470,9 +436,7 @@ export class CodeIndexer {
|
|
|
470
436
|
|
|
471
437
|
// Apply score threshold if specified
|
|
472
438
|
if (options?.scoreThreshold) {
|
|
473
|
-
filteredResults = filteredResults.filter(
|
|
474
|
-
(r) => r.score >= (options.scoreThreshold || 0),
|
|
475
|
-
);
|
|
439
|
+
filteredResults = filteredResults.filter((r) => r.score >= (options.scoreThreshold || 0));
|
|
476
440
|
}
|
|
477
441
|
|
|
478
442
|
// Apply the requested limit after all filtering
|
|
@@ -504,10 +468,7 @@ export class CodeIndexer {
|
|
|
504
468
|
}
|
|
505
469
|
|
|
506
470
|
// Check for indexing marker in Qdrant (persisted across instances)
|
|
507
|
-
const indexingMarker = await this.qdrant.getPoint(
|
|
508
|
-
collectionName,
|
|
509
|
-
INDEXING_METADATA_ID,
|
|
510
|
-
);
|
|
471
|
+
const indexingMarker = await this.qdrant.getPoint(collectionName, INDEXING_METADATA_ID);
|
|
511
472
|
const info = await this.qdrant.getCollectionInfo(collectionName);
|
|
512
473
|
|
|
513
474
|
// Check marker status
|
|
@@ -515,9 +476,7 @@ export class CodeIndexer {
|
|
|
515
476
|
const isInProgress = indexingMarker?.payload?.indexingComplete === false;
|
|
516
477
|
|
|
517
478
|
// Subtract 1 from points count if marker exists (metadata point doesn't count as a chunk)
|
|
518
|
-
const actualChunksCount = indexingMarker
|
|
519
|
-
? Math.max(0, info.pointsCount - 1)
|
|
520
|
-
: info.pointsCount;
|
|
479
|
+
const actualChunksCount = indexingMarker ? Math.max(0, info.pointsCount - 1) : info.pointsCount;
|
|
521
480
|
|
|
522
481
|
if (isInProgress) {
|
|
523
482
|
// Indexing in progress - marker exists with indexingComplete=false
|
|
@@ -565,10 +524,7 @@ export class CodeIndexer {
|
|
|
565
524
|
/**
|
|
566
525
|
* Incrementally re-index only changed files
|
|
567
526
|
*/
|
|
568
|
-
async reindexChanges(
|
|
569
|
-
path: string,
|
|
570
|
-
progressCallback?: ProgressCallback,
|
|
571
|
-
): Promise<ChangeStats> {
|
|
527
|
+
async reindexChanges(path: string, progressCallback?: ProgressCallback): Promise<ChangeStats> {
|
|
572
528
|
const startTime = Date.now();
|
|
573
529
|
const stats: ChangeStats = {
|
|
574
530
|
filesAdded: 0,
|
|
@@ -596,9 +552,7 @@ export class CodeIndexer {
|
|
|
596
552
|
const hasSnapshot = await synchronizer.initialize();
|
|
597
553
|
|
|
598
554
|
if (!hasSnapshot) {
|
|
599
|
-
throw new Error(
|
|
600
|
-
"No previous snapshot found. Use index_codebase for initial indexing.",
|
|
601
|
-
);
|
|
555
|
+
throw new Error("No previous snapshot found. Use index_codebase for initial indexing.");
|
|
602
556
|
}
|
|
603
557
|
|
|
604
558
|
// Scan current files
|
|
@@ -625,11 +579,7 @@ export class CodeIndexer {
|
|
|
625
579
|
stats.filesModified = changes.modified.length;
|
|
626
580
|
stats.filesDeleted = changes.deleted.length;
|
|
627
581
|
|
|
628
|
-
if (
|
|
629
|
-
stats.filesAdded === 0 &&
|
|
630
|
-
stats.filesModified === 0 &&
|
|
631
|
-
stats.filesDeleted === 0
|
|
632
|
-
) {
|
|
582
|
+
if (stats.filesAdded === 0 && stats.filesModified === 0 && stats.filesDeleted === 0) {
|
|
633
583
|
stats.durationMs = Date.now() - startTime;
|
|
634
584
|
return stats;
|
|
635
585
|
}
|
|
@@ -661,10 +611,7 @@ export class CodeIndexer {
|
|
|
661
611
|
await this.qdrant.deletePointsByFilter(collectionName, filter);
|
|
662
612
|
} catch (error) {
|
|
663
613
|
// Log but don't fail - file might not have any chunks
|
|
664
|
-
this.log.error(
|
|
665
|
-
{ relativePath, err: error },
|
|
666
|
-
"Failed to delete chunks during reindex",
|
|
667
|
-
);
|
|
614
|
+
this.log.error({ relativePath, err: error }, "Failed to delete chunks during reindex");
|
|
668
615
|
}
|
|
669
616
|
}
|
|
670
617
|
}
|
|
@@ -698,10 +645,7 @@ export class CodeIndexer {
|
|
|
698
645
|
allChunks.push({ chunk, id });
|
|
699
646
|
}
|
|
700
647
|
} catch (error) {
|
|
701
|
-
this.log.error(
|
|
702
|
-
{ filePath, err: error },
|
|
703
|
-
"Failed to process file during reindex",
|
|
704
|
-
);
|
|
648
|
+
this.log.error({ filePath, err: error }, "Failed to process file during reindex");
|
|
705
649
|
}
|
|
706
650
|
}
|
|
707
651
|
|
|
@@ -716,8 +660,7 @@ export class CodeIndexer {
|
|
|
716
660
|
phase: "embedding",
|
|
717
661
|
current: i + batch.length,
|
|
718
662
|
total: allChunks.length,
|
|
719
|
-
percentage:
|
|
720
|
-
40 + Math.round(((i + batch.length) / allChunks.length) * 30),
|
|
663
|
+
percentage: 40 + Math.round(((i + batch.length) / allChunks.length) * 30),
|
|
721
664
|
message: `Generating embeddings ${i + batch.length}/${allChunks.length}`,
|
|
722
665
|
});
|
|
723
666
|
|
|
@@ -747,8 +690,7 @@ export class CodeIndexer {
|
|
|
747
690
|
phase: "storing",
|
|
748
691
|
current: i + batch.length,
|
|
749
692
|
total: allChunks.length,
|
|
750
|
-
percentage:
|
|
751
|
-
70 + Math.round(((i + batch.length) / allChunks.length) * 30),
|
|
693
|
+
percentage: 70 + Math.round(((i + batch.length) / allChunks.length) * 30),
|
|
752
694
|
message: `Storing chunks ${i + batch.length}/${allChunks.length}`,
|
|
753
695
|
});
|
|
754
696
|
|
|
@@ -756,9 +698,7 @@ export class CodeIndexer {
|
|
|
756
698
|
const sparseGenerator = new BM25SparseVectorGenerator();
|
|
757
699
|
const hybridPoints = points.map((point, idx) => ({
|
|
758
700
|
...point,
|
|
759
|
-
sparseVector: sparseGenerator.generate(
|
|
760
|
-
allChunks[i + idx].chunk.content,
|
|
761
|
-
),
|
|
701
|
+
sparseVector: sparseGenerator.generate(allChunks[i + idx].chunk.content),
|
|
762
702
|
}));
|
|
763
703
|
await this.qdrant.addPointsWithSparse(collectionName, hybridPoints);
|
|
764
704
|
} else {
|
|
@@ -778,12 +718,11 @@ export class CodeIndexer {
|
|
|
778
718
|
chunksAdded: stats.chunksAdded,
|
|
779
719
|
durationMs: stats.durationMs,
|
|
780
720
|
},
|
|
781
|
-
"Reindex complete"
|
|
721
|
+
"Reindex complete"
|
|
782
722
|
);
|
|
783
723
|
return stats;
|
|
784
724
|
} catch (error) {
|
|
785
|
-
const errorMessage =
|
|
786
|
-
error instanceof Error ? error.message : String(error);
|
|
725
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
787
726
|
throw new Error(`Incremental re-indexing failed: ${errorMessage}`);
|
|
788
727
|
}
|
|
789
728
|
}
|
|
@@ -830,17 +769,16 @@ export class CodeIndexer {
|
|
|
830
769
|
const { stdout: gitRootResult } = await execFileAsync(
|
|
831
770
|
"git",
|
|
832
771
|
["rev-parse", "--show-toplevel"],
|
|
833
|
-
{ cwd: absolutePath, env: cleanEnv }
|
|
772
|
+
{ cwd: absolutePath, env: cleanEnv }
|
|
834
773
|
);
|
|
835
774
|
const gitRoot = gitRootResult.trim();
|
|
836
775
|
|
|
837
776
|
// Only use git remote if this path IS the git root
|
|
838
777
|
if (gitRoot === absolutePath) {
|
|
839
|
-
const { stdout } = await execFileAsync(
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
);
|
|
778
|
+
const { stdout } = await execFileAsync("git", ["remote", "get-url", "origin"], {
|
|
779
|
+
cwd: absolutePath,
|
|
780
|
+
env: cleanEnv,
|
|
781
|
+
});
|
|
844
782
|
const normalized = normalizeRemoteUrl(stdout.trim());
|
|
845
783
|
if (normalized) {
|
|
846
784
|
const hash = createHash("md5").update(normalized).digest("hex");
|
package/src/code/scanner.ts
CHANGED
|
@@ -19,7 +19,7 @@ export class FileScanner {
|
|
|
19
19
|
* Load ignore patterns from .gitignore, .dockerignore, .npmignore, and .contextignore
|
|
20
20
|
*/
|
|
21
21
|
async loadIgnorePatterns(rootPath: string): Promise<void> {
|
|
22
|
-
const ignoreFiles = [".gitignore", ".dockerignore", ".
|
|
22
|
+
const ignoreFiles = [".gitignore", ".dockerignore", ".contextignore"];
|
|
23
23
|
|
|
24
24
|
for (const ignoreFile of ignoreFiles) {
|
|
25
25
|
const ignorePath = join(rootPath, ignoreFile);
|
|
@@ -40,36 +40,23 @@ describe("CohereEmbeddings", () => {
|
|
|
40
40
|
});
|
|
41
41
|
|
|
42
42
|
it("should use custom model", () => {
|
|
43
|
-
const customEmbeddings = new CohereEmbeddings(
|
|
44
|
-
"test-api-key",
|
|
45
|
-
"embed-multilingual-v3.0",
|
|
46
|
-
);
|
|
43
|
+
const customEmbeddings = new CohereEmbeddings("test-api-key", "embed-multilingual-v3.0");
|
|
47
44
|
expect(customEmbeddings.getModel()).toBe("embed-multilingual-v3.0");
|
|
48
45
|
expect(customEmbeddings.getDimensions()).toBe(1024);
|
|
49
46
|
});
|
|
50
47
|
|
|
51
48
|
it("should use custom dimensions", () => {
|
|
52
|
-
const customEmbeddings = new CohereEmbeddings(
|
|
53
|
-
"test-api-key",
|
|
54
|
-
"embed-english-v3.0",
|
|
55
|
-
512,
|
|
56
|
-
);
|
|
49
|
+
const customEmbeddings = new CohereEmbeddings("test-api-key", "embed-english-v3.0", 512);
|
|
57
50
|
expect(customEmbeddings.getDimensions()).toBe(512);
|
|
58
51
|
});
|
|
59
52
|
|
|
60
53
|
it("should use default dimensions for light models", () => {
|
|
61
|
-
const lightEmbeddings = new CohereEmbeddings(
|
|
62
|
-
"test-api-key",
|
|
63
|
-
"embed-english-light-v3.0",
|
|
64
|
-
);
|
|
54
|
+
const lightEmbeddings = new CohereEmbeddings("test-api-key", "embed-english-light-v3.0");
|
|
65
55
|
expect(lightEmbeddings.getDimensions()).toBe(384);
|
|
66
56
|
});
|
|
67
57
|
|
|
68
58
|
it("should default to 1024 for unknown models", () => {
|
|
69
|
-
const unknownEmbeddings = new CohereEmbeddings(
|
|
70
|
-
"test-api-key",
|
|
71
|
-
"custom-model",
|
|
72
|
-
);
|
|
59
|
+
const unknownEmbeddings = new CohereEmbeddings("test-api-key", "custom-model");
|
|
73
60
|
expect(unknownEmbeddings.getDimensions()).toBe(1024);
|
|
74
61
|
});
|
|
75
62
|
|
|
@@ -79,7 +66,7 @@ describe("CohereEmbeddings", () => {
|
|
|
79
66
|
"embed-english-v3.0",
|
|
80
67
|
undefined,
|
|
81
68
|
undefined,
|
|
82
|
-
"search_query"
|
|
69
|
+
"search_query"
|
|
83
70
|
);
|
|
84
71
|
expect(searchQueryEmbeddings).toBeInstanceOf(CohereEmbeddings);
|
|
85
72
|
});
|
|
@@ -130,7 +117,7 @@ describe("CohereEmbeddings", () => {
|
|
|
130
117
|
const customEmbeddings = new CohereEmbeddings(
|
|
131
118
|
"test-api-key",
|
|
132
119
|
"embed-multilingual-v3.0",
|
|
133
|
-
1024
|
|
120
|
+
1024
|
|
134
121
|
);
|
|
135
122
|
const mockEmbedding = Array(1024).fill(0.1);
|
|
136
123
|
mockClient.embed.mockResolvedValue({
|
|
@@ -153,7 +140,7 @@ describe("CohereEmbeddings", () => {
|
|
|
153
140
|
});
|
|
154
141
|
|
|
155
142
|
await expect(embeddings.embed("test")).rejects.toThrow(
|
|
156
|
-
"No embedding returned from Cohere API"
|
|
143
|
+
"No embedding returned from Cohere API"
|
|
157
144
|
);
|
|
158
145
|
});
|
|
159
146
|
|
|
@@ -166,11 +153,7 @@ describe("CohereEmbeddings", () => {
|
|
|
166
153
|
|
|
167
154
|
describe("embedBatch", () => {
|
|
168
155
|
it("should generate embeddings for multiple texts", async () => {
|
|
169
|
-
const mockEmbeddings = [
|
|
170
|
-
Array(1024).fill(0.1),
|
|
171
|
-
Array(1024).fill(0.2),
|
|
172
|
-
Array(1024).fill(0.3),
|
|
173
|
-
];
|
|
156
|
+
const mockEmbeddings = [Array(1024).fill(0.1), Array(1024).fill(0.2), Array(1024).fill(0.3)];
|
|
174
157
|
mockClient.embed.mockResolvedValue({
|
|
175
158
|
embeddings: mockEmbeddings,
|
|
176
159
|
});
|
|
@@ -235,16 +218,14 @@ describe("CohereEmbeddings", () => {
|
|
|
235
218
|
mockClient.embed.mockResolvedValue({});
|
|
236
219
|
|
|
237
220
|
await expect(embeddings.embedBatch(["text1"])).rejects.toThrow(
|
|
238
|
-
"No embeddings returned from Cohere API"
|
|
221
|
+
"No embeddings returned from Cohere API"
|
|
239
222
|
);
|
|
240
223
|
});
|
|
241
224
|
|
|
242
225
|
it("should propagate errors in batch", async () => {
|
|
243
226
|
mockClient.embed.mockRejectedValue(new Error("Batch API Error"));
|
|
244
227
|
|
|
245
|
-
await expect(embeddings.embedBatch(["text1", "text2"])).rejects.toThrow(
|
|
246
|
-
"Batch API Error",
|
|
247
|
-
);
|
|
228
|
+
await expect(embeddings.embedBatch(["text1", "text2"])).rejects.toThrow("Batch API Error");
|
|
248
229
|
});
|
|
249
230
|
});
|
|
250
231
|
|
|
@@ -254,11 +235,7 @@ describe("CohereEmbeddings", () => {
|
|
|
254
235
|
});
|
|
255
236
|
|
|
256
237
|
it("should return custom dimensions", () => {
|
|
257
|
-
const customEmbeddings = new CohereEmbeddings(
|
|
258
|
-
"test-api-key",
|
|
259
|
-
"embed-english-v3.0",
|
|
260
|
-
512,
|
|
261
|
-
);
|
|
238
|
+
const customEmbeddings = new CohereEmbeddings("test-api-key", "embed-english-v3.0", 512);
|
|
262
239
|
expect(customEmbeddings.getDimensions()).toBe(512);
|
|
263
240
|
});
|
|
264
241
|
});
|
|
@@ -269,10 +246,7 @@ describe("CohereEmbeddings", () => {
|
|
|
269
246
|
});
|
|
270
247
|
|
|
271
248
|
it("should return custom model", () => {
|
|
272
|
-
const customEmbeddings = new CohereEmbeddings(
|
|
273
|
-
"test-api-key",
|
|
274
|
-
"embed-multilingual-v3.0",
|
|
275
|
-
);
|
|
249
|
+
const customEmbeddings = new CohereEmbeddings("test-api-key", "embed-multilingual-v3.0");
|
|
276
250
|
expect(customEmbeddings.getModel()).toBe("embed-multilingual-v3.0");
|
|
277
251
|
});
|
|
278
252
|
});
|
|
@@ -331,7 +305,7 @@ describe("CohereEmbeddings", () => {
|
|
|
331
305
|
{
|
|
332
306
|
retryAttempts: 3,
|
|
333
307
|
retryDelayMs: 100,
|
|
334
|
-
}
|
|
308
|
+
}
|
|
335
309
|
);
|
|
336
310
|
|
|
337
311
|
const mockEmbedding = Array(1024).fill(0.5);
|
|
@@ -361,7 +335,7 @@ describe("CohereEmbeddings", () => {
|
|
|
361
335
|
{
|
|
362
336
|
retryAttempts: 2,
|
|
363
337
|
retryDelayMs: 100,
|
|
364
|
-
}
|
|
338
|
+
}
|
|
365
339
|
);
|
|
366
340
|
|
|
367
341
|
const rateLimitError = {
|
|
@@ -372,7 +346,7 @@ describe("CohereEmbeddings", () => {
|
|
|
372
346
|
mockClient.embed.mockRejectedValue(rateLimitError);
|
|
373
347
|
|
|
374
348
|
await expect(rateLimitEmbeddings.embed("test text")).rejects.toThrow(
|
|
375
|
-
"Cohere API rate limit exceeded after 2 retry attempts"
|
|
349
|
+
"Cohere API rate limit exceeded after 2 retry attempts"
|
|
376
350
|
);
|
|
377
351
|
|
|
378
352
|
expect(mockClient.embed).toHaveBeenCalledTimes(3);
|
|
@@ -397,9 +371,7 @@ describe("CohereEmbeddings", () => {
|
|
|
397
371
|
const apiError = new Error("Invalid API key");
|
|
398
372
|
mockClient.embed.mockRejectedValue(apiError);
|
|
399
373
|
|
|
400
|
-
await expect(embeddings.embed("test text")).rejects.toThrow(
|
|
401
|
-
"Invalid API key",
|
|
402
|
-
);
|
|
374
|
+
await expect(embeddings.embed("test text")).rejects.toThrow("Invalid API key");
|
|
403
375
|
expect(mockClient.embed).toHaveBeenCalledTimes(1);
|
|
404
376
|
});
|
|
405
377
|
|
|
@@ -412,7 +384,7 @@ describe("CohereEmbeddings", () => {
|
|
|
412
384
|
maxRequestsPerMinute: 200,
|
|
413
385
|
retryAttempts: 5,
|
|
414
386
|
retryDelayMs: 2000,
|
|
415
|
-
}
|
|
387
|
+
}
|
|
416
388
|
);
|
|
417
389
|
|
|
418
390
|
expect(customEmbeddings).toBeDefined();
|
package/src/embeddings/cohere.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { CohereClient } from "cohere-ai";
|
|
2
1
|
import Bottleneck from "bottleneck";
|
|
2
|
+
import { CohereClient } from "cohere-ai";
|
|
3
3
|
import logger from "../logger.js";
|
|
4
|
-
import { EmbeddingProvider, EmbeddingResult, RateLimitConfig } from "./base.js";
|
|
4
|
+
import type { EmbeddingProvider, EmbeddingResult, RateLimitConfig } from "./base.js";
|
|
5
5
|
|
|
6
6
|
interface CohereError {
|
|
7
7
|
status?: number;
|
|
@@ -17,11 +17,7 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
17
17
|
private limiter: Bottleneck;
|
|
18
18
|
private retryAttempts: number;
|
|
19
19
|
private retryDelayMs: number;
|
|
20
|
-
private inputType:
|
|
21
|
-
| "search_document"
|
|
22
|
-
| "search_query"
|
|
23
|
-
| "classification"
|
|
24
|
-
| "clustering";
|
|
20
|
+
private inputType: "search_document" | "search_query" | "classification" | "clustering";
|
|
25
21
|
|
|
26
22
|
constructor(
|
|
27
23
|
apiKey: string,
|
|
@@ -32,7 +28,7 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
32
28
|
| "search_document"
|
|
33
29
|
| "search_query"
|
|
34
30
|
| "classification"
|
|
35
|
-
| "clustering" = "search_document"
|
|
31
|
+
| "clustering" = "search_document"
|
|
36
32
|
) {
|
|
37
33
|
this.client = new CohereClient({ token: apiKey });
|
|
38
34
|
this.model = model;
|
|
@@ -62,10 +58,7 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
62
58
|
});
|
|
63
59
|
}
|
|
64
60
|
|
|
65
|
-
private async retryWithBackoff<T>(
|
|
66
|
-
fn: () => Promise<T>,
|
|
67
|
-
attempt: number = 0,
|
|
68
|
-
): Promise<T> {
|
|
61
|
+
private async retryWithBackoff<T>(fn: () => Promise<T>, attempt: number = 0): Promise<T> {
|
|
69
62
|
try {
|
|
70
63
|
return await fn();
|
|
71
64
|
} catch (error: unknown) {
|
|
@@ -76,7 +69,7 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
76
69
|
apiError?.message?.toLowerCase().includes("rate limit");
|
|
77
70
|
|
|
78
71
|
if (isRateLimitError && attempt < this.retryAttempts) {
|
|
79
|
-
const delayMs = this.retryDelayMs *
|
|
72
|
+
const delayMs = this.retryDelayMs * 2 ** attempt;
|
|
80
73
|
const waitTimeSeconds = (delayMs / 1000).toFixed(1);
|
|
81
74
|
this.log.warn(
|
|
82
75
|
{
|
|
@@ -84,7 +77,7 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
84
77
|
attempt: attempt + 1,
|
|
85
78
|
maxAttempts: this.retryAttempts,
|
|
86
79
|
},
|
|
87
|
-
"Rate limit reached, retrying"
|
|
80
|
+
"Rate limit reached, retrying"
|
|
88
81
|
);
|
|
89
82
|
|
|
90
83
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
@@ -93,7 +86,7 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
93
86
|
|
|
94
87
|
if (isRateLimitError) {
|
|
95
88
|
throw new Error(
|
|
96
|
-
`Cohere API rate limit exceeded after ${this.retryAttempts} retry attempts. Please try again later or reduce request frequency
|
|
89
|
+
`Cohere API rate limit exceeded after ${this.retryAttempts} retry attempts. Please try again later or reduce request frequency.`
|
|
97
90
|
);
|
|
98
91
|
}
|
|
99
92
|
|
|
@@ -121,7 +114,7 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
121
114
|
embedding: embeddings[0],
|
|
122
115
|
dimensions: this.dimensions,
|
|
123
116
|
};
|
|
124
|
-
})
|
|
117
|
+
})
|
|
125
118
|
);
|
|
126
119
|
}
|
|
127
120
|
|
|
@@ -146,7 +139,7 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
146
139
|
embedding,
|
|
147
140
|
dimensions: this.dimensions,
|
|
148
141
|
}));
|
|
149
|
-
})
|
|
142
|
+
})
|
|
150
143
|
);
|
|
151
144
|
}
|
|
152
145
|
|