@mhalder/qdrant-mcp-server 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dagger/.gitattributes +1 -0
- package/.dagger/package.json +6 -0
- package/.dagger/src/index.ts +83 -0
- package/.dagger/tsconfig.json +13 -0
- package/.dagger/yarn.lock +8 -0
- package/.github/workflows/ci.yml +17 -27
- package/.github/workflows/release.yml +16 -19
- package/CHANGELOG.md +13 -0
- package/README.md +11 -9
- package/build/code/chunker/tree-sitter-chunker.d.ts.map +1 -1
- package/build/code/chunker/tree-sitter-chunker.js +15 -3
- package/build/code/chunker/tree-sitter-chunker.js.map +1 -1
- package/build/code/indexer.d.ts +1 -0
- package/build/code/indexer.d.ts.map +1 -1
- package/build/code/indexer.js +24 -4
- package/build/code/indexer.js.map +1 -1
- package/build/embeddings/cohere.d.ts +1 -0
- package/build/embeddings/cohere.d.ts.map +1 -1
- package/build/embeddings/cohere.js +8 -1
- package/build/embeddings/cohere.js.map +1 -1
- package/build/embeddings/cohere.test.js +11 -0
- package/build/embeddings/cohere.test.js.map +1 -1
- package/build/embeddings/factory.d.ts.map +1 -1
- package/build/embeddings/factory.js +2 -0
- package/build/embeddings/factory.js.map +1 -1
- package/build/embeddings/factory.test.js +12 -1
- package/build/embeddings/factory.test.js.map +1 -1
- package/build/embeddings/ollama.d.ts +1 -0
- package/build/embeddings/ollama.d.ts.map +1 -1
- package/build/embeddings/ollama.js +8 -1
- package/build/embeddings/ollama.js.map +1 -1
- package/build/embeddings/ollama.test.js +11 -0
- package/build/embeddings/ollama.test.js.map +1 -1
- package/build/embeddings/openai.d.ts +1 -0
- package/build/embeddings/openai.d.ts.map +1 -1
- package/build/embeddings/openai.js +8 -1
- package/build/embeddings/openai.js.map +1 -1
- package/build/embeddings/openai.test.js +11 -0
- package/build/embeddings/openai.test.js.map +1 -1
- package/build/embeddings/voyage.d.ts +1 -0
- package/build/embeddings/voyage.d.ts.map +1 -1
- package/build/embeddings/voyage.js +8 -1
- package/build/embeddings/voyage.js.map +1 -1
- package/build/embeddings/voyage.test.js +11 -0
- package/build/embeddings/voyage.test.js.map +1 -1
- package/build/git/indexer.d.ts +1 -0
- package/build/git/indexer.d.ts.map +1 -1
- package/build/git/indexer.js +16 -3
- package/build/git/indexer.js.map +1 -1
- package/build/git/indexer.test.js +15 -9
- package/build/git/indexer.test.js.map +1 -1
- package/build/index.js +35 -26
- package/build/index.js.map +1 -1
- package/build/index.test.js +105 -91
- package/build/index.test.js.map +1 -1
- package/build/logger.d.ts +4 -0
- package/build/logger.d.ts.map +1 -0
- package/build/logger.js +24 -0
- package/build/logger.js.map +1 -0
- package/build/qdrant/client.d.ts +1 -0
- package/build/qdrant/client.d.ts.map +1 -1
- package/build/qdrant/client.js +10 -0
- package/build/qdrant/client.js.map +1 -1
- package/build/qdrant/client.test.js +11 -0
- package/build/qdrant/client.test.js.map +1 -1
- package/build/tools/code.d.ts.map +1 -1
- package/build/tools/code.js +44 -13
- package/build/tools/code.js.map +1 -1
- package/build/tools/collection.d.ts.map +1 -1
- package/build/tools/collection.js +15 -8
- package/build/tools/collection.js.map +1 -1
- package/build/tools/document.d.ts.map +1 -1
- package/build/tools/document.js +9 -4
- package/build/tools/document.js.map +1 -1
- package/build/tools/federated.d.ts.map +1 -1
- package/build/tools/federated.js +9 -4
- package/build/tools/federated.js.map +1 -1
- package/build/tools/federated.test.js +11 -0
- package/build/tools/federated.test.js.map +1 -1
- package/build/tools/git-history.d.ts.map +1 -1
- package/build/tools/git-history.js +44 -12
- package/build/tools/git-history.js.map +1 -1
- package/build/tools/logging.d.ts +16 -0
- package/build/tools/logging.d.ts.map +1 -0
- package/build/tools/logging.js +68 -0
- package/build/tools/logging.js.map +1 -0
- package/build/tools/logging.test.d.ts +2 -0
- package/build/tools/logging.test.d.ts.map +1 -0
- package/build/tools/logging.test.js +139 -0
- package/build/tools/logging.test.js.map +1 -0
- package/build/tools/schemas.d.ts +32 -19
- package/build/tools/schemas.d.ts.map +1 -1
- package/build/tools/schemas.js +9 -3
- package/build/tools/schemas.js.map +1 -1
- package/build/tools/search.d.ts.map +1 -1
- package/build/tools/search.js +13 -4
- package/build/tools/search.js.map +1 -1
- package/dagger.json +8 -0
- package/mise.toml +2 -0
- package/package.json +14 -13
- package/src/code/chunker/tree-sitter-chunker.ts +41 -9
- package/src/code/indexer.ts +41 -6
- package/src/embeddings/cohere.test.ts +12 -0
- package/src/embeddings/cohere.ts +10 -2
- package/src/embeddings/factory.test.ts +13 -1
- package/src/embeddings/factory.ts +3 -0
- package/src/embeddings/ollama.test.ts +12 -0
- package/src/embeddings/ollama.ts +10 -2
- package/src/embeddings/openai.test.ts +12 -0
- package/src/embeddings/openai.ts +10 -2
- package/src/embeddings/voyage.test.ts +12 -0
- package/src/embeddings/voyage.ts +10 -2
- package/src/git/indexer.test.ts +22 -16
- package/src/git/indexer.ts +30 -4
- package/src/index.test.ts +128 -106
- package/src/index.ts +59 -38
- package/src/logger.ts +33 -0
- package/src/qdrant/client.test.ts +12 -0
- package/src/qdrant/client.ts +22 -0
- package/src/tools/code.ts +107 -62
- package/src/tools/collection.ts +39 -22
- package/src/tools/document.ts +52 -22
- package/src/tools/federated.test.ts +12 -0
- package/src/tools/federated.ts +143 -125
- package/src/tools/git-history.ts +117 -60
- package/src/tools/logging.test.ts +206 -0
- package/src/tools/logging.ts +85 -0
- package/src/tools/schemas.ts +9 -3
- package/src/tools/search.ts +93 -71
- package/tests/code/chunker/tree-sitter-chunker.test.ts +13 -1
- package/tests/code/indexer.test.ts +12 -0
- package/tests/code/integration.test.ts +14 -1
|
@@ -13,10 +13,13 @@ import Python from "tree-sitter-python";
|
|
|
13
13
|
import Rust from "tree-sitter-rust";
|
|
14
14
|
import TypeScript from "tree-sitter-typescript";
|
|
15
15
|
|
|
16
|
+
import logger from "../../logger.js";
|
|
16
17
|
import type { ChunkerConfig, CodeChunk } from "../types.js";
|
|
17
18
|
import type { CodeChunker } from "./base.js";
|
|
18
19
|
import { CharacterChunker } from "./character-chunker.js";
|
|
19
20
|
|
|
21
|
+
const log = logger.child({ component: "tree-sitter-chunker" });
|
|
22
|
+
|
|
20
23
|
interface LanguageConfig {
|
|
21
24
|
parser: Parser;
|
|
22
25
|
chunkableTypes: string[];
|
|
@@ -65,7 +68,11 @@ export class TreeSitterChunker implements CodeChunker {
|
|
|
65
68
|
pyParser.setLanguage(Python as any);
|
|
66
69
|
this.languages.set("python", {
|
|
67
70
|
parser: pyParser,
|
|
68
|
-
chunkableTypes: [
|
|
71
|
+
chunkableTypes: [
|
|
72
|
+
"function_definition",
|
|
73
|
+
"class_definition",
|
|
74
|
+
"decorated_definition",
|
|
75
|
+
],
|
|
69
76
|
});
|
|
70
77
|
|
|
71
78
|
// Go
|
|
@@ -86,7 +93,13 @@ export class TreeSitterChunker implements CodeChunker {
|
|
|
86
93
|
rustParser.setLanguage(Rust as any);
|
|
87
94
|
this.languages.set("rust", {
|
|
88
95
|
parser: rustParser,
|
|
89
|
-
chunkableTypes: [
|
|
96
|
+
chunkableTypes: [
|
|
97
|
+
"function_item",
|
|
98
|
+
"impl_item",
|
|
99
|
+
"trait_item",
|
|
100
|
+
"struct_item",
|
|
101
|
+
"enum_item",
|
|
102
|
+
],
|
|
90
103
|
});
|
|
91
104
|
|
|
92
105
|
// Java
|
|
@@ -111,7 +124,11 @@ export class TreeSitterChunker implements CodeChunker {
|
|
|
111
124
|
});
|
|
112
125
|
}
|
|
113
126
|
|
|
114
|
-
async chunk(
|
|
127
|
+
async chunk(
|
|
128
|
+
code: string,
|
|
129
|
+
filePath: string,
|
|
130
|
+
language: string,
|
|
131
|
+
): Promise<CodeChunk[]> {
|
|
115
132
|
const langConfig = this.languages.get(language);
|
|
116
133
|
|
|
117
134
|
if (!langConfig) {
|
|
@@ -124,7 +141,10 @@ export class TreeSitterChunker implements CodeChunker {
|
|
|
124
141
|
const chunks: CodeChunk[] = [];
|
|
125
142
|
|
|
126
143
|
// Find all chunkable nodes
|
|
127
|
-
const nodes = this.findChunkableNodes(
|
|
144
|
+
const nodes = this.findChunkableNodes(
|
|
145
|
+
tree.rootNode,
|
|
146
|
+
langConfig.chunkableTypes,
|
|
147
|
+
);
|
|
128
148
|
|
|
129
149
|
for (const [index, node] of nodes.entries()) {
|
|
130
150
|
const content = code.substring(node.startIndex, node.endIndex);
|
|
@@ -136,7 +156,11 @@ export class TreeSitterChunker implements CodeChunker {
|
|
|
136
156
|
|
|
137
157
|
// If chunk is too large, fall back to character chunking for this node
|
|
138
158
|
if (content.length > this.config.maxChunkSize * 2) {
|
|
139
|
-
const subChunks = await this.fallbackChunker.chunk(
|
|
159
|
+
const subChunks = await this.fallbackChunker.chunk(
|
|
160
|
+
content,
|
|
161
|
+
filePath,
|
|
162
|
+
language,
|
|
163
|
+
);
|
|
140
164
|
// Adjust line numbers for sub-chunks
|
|
141
165
|
for (const subChunk of subChunks) {
|
|
142
166
|
chunks.push({
|
|
@@ -174,7 +198,10 @@ export class TreeSitterChunker implements CodeChunker {
|
|
|
174
198
|
return chunks;
|
|
175
199
|
} catch (error) {
|
|
176
200
|
// On parsing error, fallback to character-based chunking
|
|
177
|
-
|
|
201
|
+
log.warn(
|
|
202
|
+
{ filePath, err: error },
|
|
203
|
+
"Tree-sitter parsing failed, falling back to character chunker",
|
|
204
|
+
);
|
|
178
205
|
return this.fallbackChunker.chunk(code, filePath, language);
|
|
179
206
|
}
|
|
180
207
|
}
|
|
@@ -192,7 +219,7 @@ export class TreeSitterChunker implements CodeChunker {
|
|
|
192
219
|
*/
|
|
193
220
|
private findChunkableNodes(
|
|
194
221
|
node: Parser.SyntaxNode,
|
|
195
|
-
chunkableTypes: string[]
|
|
222
|
+
chunkableTypes: string[],
|
|
196
223
|
): Parser.SyntaxNode[] {
|
|
197
224
|
const nodes: Parser.SyntaxNode[] = [];
|
|
198
225
|
|
|
@@ -215,7 +242,10 @@ export class TreeSitterChunker implements CodeChunker {
|
|
|
215
242
|
/**
|
|
216
243
|
* Extract function/class name from AST node
|
|
217
244
|
*/
|
|
218
|
-
private extractName(
|
|
245
|
+
private extractName(
|
|
246
|
+
node: Parser.SyntaxNode,
|
|
247
|
+
code: string,
|
|
248
|
+
): string | undefined {
|
|
219
249
|
// Try to find name node
|
|
220
250
|
const nameNode = node.childForFieldName("name");
|
|
221
251
|
if (nameNode) {
|
|
@@ -235,7 +265,9 @@ export class TreeSitterChunker implements CodeChunker {
|
|
|
235
265
|
/**
|
|
236
266
|
* Map AST node type to chunk type
|
|
237
267
|
*/
|
|
238
|
-
private getChunkType(
|
|
268
|
+
private getChunkType(
|
|
269
|
+
nodeType: string,
|
|
270
|
+
): "function" | "class" | "interface" | "block" {
|
|
239
271
|
if (nodeType.includes("function") || nodeType.includes("method")) {
|
|
240
272
|
return "function";
|
|
241
273
|
}
|
package/src/code/indexer.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { promises as fs } from "node:fs";
|
|
|
8
8
|
import { extname, join, relative, resolve } from "node:path";
|
|
9
9
|
import { promisify } from "node:util";
|
|
10
10
|
import picomatch from "picomatch";
|
|
11
|
+
import logger from "../logger.js";
|
|
11
12
|
import type { EmbeddingProvider } from "../embeddings/base.js";
|
|
12
13
|
import { BM25SparseVectorGenerator } from "../embeddings/sparse.js";
|
|
13
14
|
import { normalizeRemoteUrl } from "../git/extractor.js";
|
|
@@ -34,6 +35,8 @@ const execFileAsync = promisify(execFile);
|
|
|
34
35
|
const INDEXING_METADATA_ID = "__indexing_metadata__";
|
|
35
36
|
|
|
36
37
|
export class CodeIndexer {
|
|
38
|
+
private log = logger.child({ component: "code-indexer" });
|
|
39
|
+
|
|
37
40
|
constructor(
|
|
38
41
|
private qdrant: QdrantManager,
|
|
39
42
|
private embeddings: EmbeddingProvider,
|
|
@@ -83,6 +86,8 @@ export class CodeIndexer {
|
|
|
83
86
|
const absolutePath = await this.validatePath(path);
|
|
84
87
|
const collectionName = await this.getCollectionName(absolutePath);
|
|
85
88
|
|
|
89
|
+
this.log.info({ path: absolutePath, collectionName }, "Indexing started");
|
|
90
|
+
|
|
86
91
|
try {
|
|
87
92
|
// 1. Scan files
|
|
88
93
|
progressCallback?.({
|
|
@@ -105,6 +110,7 @@ export class CodeIndexer {
|
|
|
105
110
|
const files = await scanner.scanDirectory(absolutePath);
|
|
106
111
|
|
|
107
112
|
stats.filesScanned = files.length;
|
|
113
|
+
this.log.info({ filesFound: files.length }, "File scan complete");
|
|
108
114
|
|
|
109
115
|
if (files.length === 0) {
|
|
110
116
|
stats.status = "completed";
|
|
@@ -128,6 +134,7 @@ export class CodeIndexer {
|
|
|
128
134
|
"Cosine",
|
|
129
135
|
this.config.enableHybridSearch,
|
|
130
136
|
);
|
|
137
|
+
this.log.debug({ collectionName, vectorSize }, "Collection created");
|
|
131
138
|
}
|
|
132
139
|
|
|
133
140
|
// Store "indexing in progress" marker immediately after collection is ready
|
|
@@ -209,7 +216,7 @@ export class CodeIndexer {
|
|
|
209
216
|
// Snapshot failure shouldn't fail the entire indexing
|
|
210
217
|
const errorMessage =
|
|
211
218
|
error instanceof Error ? error.message : String(error);
|
|
212
|
-
|
|
219
|
+
this.log.error({ err: error }, "Failed to save snapshot");
|
|
213
220
|
stats.errors?.push(`Snapshot save failed: ${errorMessage}`);
|
|
214
221
|
}
|
|
215
222
|
|
|
@@ -223,6 +230,10 @@ export class CodeIndexer {
|
|
|
223
230
|
|
|
224
231
|
// 4. Generate embeddings and store in batches
|
|
225
232
|
const batchSize = this.config.batchSize;
|
|
233
|
+
this.log.debug(
|
|
234
|
+
{ totalChunks: allChunks.length, batchSize },
|
|
235
|
+
"Starting embedding generation",
|
|
236
|
+
);
|
|
226
237
|
for (let i = 0; i < allChunks.length; i += batchSize) {
|
|
227
238
|
const batch = allChunks.slice(i, i + batchSize);
|
|
228
239
|
|
|
@@ -309,6 +320,14 @@ export class CodeIndexer {
|
|
|
309
320
|
await this.storeIndexingMarker(collectionName, true);
|
|
310
321
|
|
|
311
322
|
stats.durationMs = Date.now() - startTime;
|
|
323
|
+
this.log.info(
|
|
324
|
+
{
|
|
325
|
+
filesIndexed: stats.filesIndexed,
|
|
326
|
+
chunksCreated: stats.chunksCreated,
|
|
327
|
+
durationMs: stats.durationMs,
|
|
328
|
+
},
|
|
329
|
+
"Indexing complete",
|
|
330
|
+
);
|
|
312
331
|
return stats;
|
|
313
332
|
} catch (error) {
|
|
314
333
|
const errorMessage =
|
|
@@ -365,7 +384,7 @@ export class CodeIndexer {
|
|
|
365
384
|
}
|
|
366
385
|
} catch (error) {
|
|
367
386
|
// Non-fatal: log but don't fail the indexing
|
|
368
|
-
|
|
387
|
+
this.log.error({ err: error }, "Failed to store indexing marker");
|
|
369
388
|
}
|
|
370
389
|
}
|
|
371
390
|
|
|
@@ -564,6 +583,8 @@ export class CodeIndexer {
|
|
|
564
583
|
const absolutePath = await this.validatePath(path);
|
|
565
584
|
const collectionName = await this.getCollectionName(absolutePath);
|
|
566
585
|
|
|
586
|
+
this.log.info({ path: absolutePath }, "Reindex started");
|
|
587
|
+
|
|
567
588
|
// Check if collection exists
|
|
568
589
|
const exists = await this.qdrant.collectionExists(collectionName);
|
|
569
590
|
if (!exists) {
|
|
@@ -640,9 +661,9 @@ export class CodeIndexer {
|
|
|
640
661
|
await this.qdrant.deletePointsByFilter(collectionName, filter);
|
|
641
662
|
} catch (error) {
|
|
642
663
|
// Log but don't fail - file might not have any chunks
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
664
|
+
this.log.error(
|
|
665
|
+
{ relativePath, err: error },
|
|
666
|
+
"Failed to delete chunks during reindex",
|
|
646
667
|
);
|
|
647
668
|
}
|
|
648
669
|
}
|
|
@@ -677,7 +698,10 @@ export class CodeIndexer {
|
|
|
677
698
|
allChunks.push({ chunk, id });
|
|
678
699
|
}
|
|
679
700
|
} catch (error) {
|
|
680
|
-
|
|
701
|
+
this.log.error(
|
|
702
|
+
{ filePath, err: error },
|
|
703
|
+
"Failed to process file during reindex",
|
|
704
|
+
);
|
|
681
705
|
}
|
|
682
706
|
}
|
|
683
707
|
|
|
@@ -746,6 +770,16 @@ export class CodeIndexer {
|
|
|
746
770
|
await synchronizer.updateSnapshot(currentFiles);
|
|
747
771
|
|
|
748
772
|
stats.durationMs = Date.now() - startTime;
|
|
773
|
+
this.log.info(
|
|
774
|
+
{
|
|
775
|
+
filesAdded: stats.filesAdded,
|
|
776
|
+
filesModified: stats.filesModified,
|
|
777
|
+
filesDeleted: stats.filesDeleted,
|
|
778
|
+
chunksAdded: stats.chunksAdded,
|
|
779
|
+
durationMs: stats.durationMs,
|
|
780
|
+
},
|
|
781
|
+
"Reindex complete",
|
|
782
|
+
);
|
|
749
783
|
return stats;
|
|
750
784
|
} catch (error) {
|
|
751
785
|
const errorMessage =
|
|
@@ -758,6 +792,7 @@ export class CodeIndexer {
|
|
|
758
792
|
* Clear all indexed data for a codebase
|
|
759
793
|
*/
|
|
760
794
|
async clearIndex(path: string): Promise<void> {
|
|
795
|
+
this.log.info({ path }, "Clearing index");
|
|
761
796
|
const absolutePath = await this.validatePath(path);
|
|
762
797
|
const collectionName = await this.getCollectionName(absolutePath);
|
|
763
798
|
const exists = await this.qdrant.collectionExists(collectionName);
|
|
@@ -12,6 +12,18 @@ vi.mock("cohere-ai", () => ({
|
|
|
12
12
|
}),
|
|
13
13
|
}));
|
|
14
14
|
|
|
15
|
+
vi.mock("../logger.js", () => ({
|
|
16
|
+
default: {
|
|
17
|
+
info: vi.fn(),
|
|
18
|
+
warn: vi.fn(),
|
|
19
|
+
error: vi.fn(),
|
|
20
|
+
debug: vi.fn(),
|
|
21
|
+
fatal: vi.fn(),
|
|
22
|
+
trace: vi.fn(),
|
|
23
|
+
child: vi.fn().mockReturnThis(),
|
|
24
|
+
},
|
|
25
|
+
}));
|
|
26
|
+
|
|
15
27
|
describe("CohereEmbeddings", () => {
|
|
16
28
|
let embeddings: CohereEmbeddings;
|
|
17
29
|
|
package/src/embeddings/cohere.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { CohereClient } from "cohere-ai";
|
|
2
2
|
import Bottleneck from "bottleneck";
|
|
3
|
+
import logger from "../logger.js";
|
|
3
4
|
import { EmbeddingProvider, EmbeddingResult, RateLimitConfig } from "./base.js";
|
|
4
5
|
|
|
5
6
|
interface CohereError {
|
|
@@ -9,6 +10,7 @@ interface CohereError {
|
|
|
9
10
|
}
|
|
10
11
|
|
|
11
12
|
export class CohereEmbeddings implements EmbeddingProvider {
|
|
13
|
+
private log = logger.child({ component: "embeddings", provider: "cohere" });
|
|
12
14
|
private client: CohereClient;
|
|
13
15
|
private model: string;
|
|
14
16
|
private dimensions: number;
|
|
@@ -76,8 +78,13 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
76
78
|
if (isRateLimitError && attempt < this.retryAttempts) {
|
|
77
79
|
const delayMs = this.retryDelayMs * Math.pow(2, attempt);
|
|
78
80
|
const waitTimeSeconds = (delayMs / 1000).toFixed(1);
|
|
79
|
-
|
|
80
|
-
|
|
81
|
+
this.log.warn(
|
|
82
|
+
{
|
|
83
|
+
waitTimeSeconds,
|
|
84
|
+
attempt: attempt + 1,
|
|
85
|
+
maxAttempts: this.retryAttempts,
|
|
86
|
+
},
|
|
87
|
+
"Rate limit reached, retrying",
|
|
81
88
|
);
|
|
82
89
|
|
|
83
90
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
@@ -119,6 +126,7 @@ export class CohereEmbeddings implements EmbeddingProvider {
|
|
|
119
126
|
}
|
|
120
127
|
|
|
121
128
|
async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
|
|
129
|
+
this.log.debug({ batchSize: texts.length }, "embedBatch");
|
|
122
130
|
return this.limiter.schedule(() =>
|
|
123
131
|
this.retryWithBackoff(async () => {
|
|
124
132
|
const response = await this.client.embed({
|
|
@@ -1,10 +1,22 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
2
2
|
import { EmbeddingProviderFactory, type FactoryConfig } from "./factory.js";
|
|
3
3
|
import { OpenAIEmbeddings } from "./openai.js";
|
|
4
4
|
import { CohereEmbeddings } from "./cohere.js";
|
|
5
5
|
import { VoyageEmbeddings } from "./voyage.js";
|
|
6
6
|
import { OllamaEmbeddings } from "./ollama.js";
|
|
7
7
|
|
|
8
|
+
vi.mock("../logger.js", () => ({
|
|
9
|
+
default: {
|
|
10
|
+
info: vi.fn(),
|
|
11
|
+
warn: vi.fn(),
|
|
12
|
+
error: vi.fn(),
|
|
13
|
+
debug: vi.fn(),
|
|
14
|
+
fatal: vi.fn(),
|
|
15
|
+
trace: vi.fn(),
|
|
16
|
+
child: vi.fn().mockReturnThis(),
|
|
17
|
+
},
|
|
18
|
+
}));
|
|
19
|
+
|
|
8
20
|
describe("EmbeddingProviderFactory", () => {
|
|
9
21
|
let originalEnv: NodeJS.ProcessEnv;
|
|
10
22
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logger from "../logger.js";
|
|
1
2
|
import { EmbeddingProvider, ProviderConfig } from "./base.js";
|
|
2
3
|
import { OpenAIEmbeddings } from "./openai.js";
|
|
3
4
|
import { CohereEmbeddings } from "./cohere.js";
|
|
@@ -15,6 +16,8 @@ export class EmbeddingProviderFactory {
|
|
|
15
16
|
const { provider, model, dimensions, rateLimitConfig, apiKey, baseUrl } =
|
|
16
17
|
config;
|
|
17
18
|
|
|
19
|
+
logger.info({ provider, model }, "Creating embedding provider");
|
|
20
|
+
|
|
18
21
|
switch (provider) {
|
|
19
22
|
case "openai":
|
|
20
23
|
if (!apiKey) {
|
|
@@ -4,6 +4,18 @@ import { OllamaEmbeddings } from "./ollama.js";
|
|
|
4
4
|
// Mock fetch globally
|
|
5
5
|
global.fetch = vi.fn();
|
|
6
6
|
|
|
7
|
+
vi.mock("../logger.js", () => ({
|
|
8
|
+
default: {
|
|
9
|
+
info: vi.fn(),
|
|
10
|
+
warn: vi.fn(),
|
|
11
|
+
error: vi.fn(),
|
|
12
|
+
debug: vi.fn(),
|
|
13
|
+
fatal: vi.fn(),
|
|
14
|
+
trace: vi.fn(),
|
|
15
|
+
child: vi.fn().mockReturnThis(),
|
|
16
|
+
},
|
|
17
|
+
}));
|
|
18
|
+
|
|
7
19
|
describe("OllamaEmbeddings", () => {
|
|
8
20
|
let embeddings: OllamaEmbeddings;
|
|
9
21
|
let mockFetch: any;
|
package/src/embeddings/ollama.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import Bottleneck from "bottleneck";
|
|
2
|
+
import logger from "../logger.js";
|
|
2
3
|
import { EmbeddingProvider, EmbeddingResult, RateLimitConfig } from "./base.js";
|
|
3
4
|
|
|
4
5
|
interface OllamaError {
|
|
@@ -11,6 +12,7 @@ interface OllamaEmbedResponse {
|
|
|
11
12
|
}
|
|
12
13
|
|
|
13
14
|
export class OllamaEmbeddings implements EmbeddingProvider {
|
|
15
|
+
private log = logger.child({ component: "embeddings", provider: "ollama" });
|
|
14
16
|
private model: string;
|
|
15
17
|
private dimensions: number;
|
|
16
18
|
private limiter: Bottleneck;
|
|
@@ -76,8 +78,13 @@ export class OllamaEmbeddings implements EmbeddingProvider {
|
|
|
76
78
|
if (isRateLimitError && attempt < this.retryAttempts) {
|
|
77
79
|
const delayMs = this.retryDelayMs * Math.pow(2, attempt);
|
|
78
80
|
const waitTimeSeconds = (delayMs / 1000).toFixed(1);
|
|
79
|
-
|
|
80
|
-
|
|
81
|
+
this.log.warn(
|
|
82
|
+
{
|
|
83
|
+
waitTimeSeconds,
|
|
84
|
+
attempt: attempt + 1,
|
|
85
|
+
maxAttempts: this.retryAttempts,
|
|
86
|
+
},
|
|
87
|
+
"Rate limit reached, retrying",
|
|
81
88
|
);
|
|
82
89
|
|
|
83
90
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
@@ -169,6 +176,7 @@ export class OllamaEmbeddings implements EmbeddingProvider {
|
|
|
169
176
|
}
|
|
170
177
|
|
|
171
178
|
async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
|
|
179
|
+
this.log.debug({ batchSize: texts.length }, "embedBatch");
|
|
172
180
|
// Ollama doesn't support batch embeddings natively, so we process in parallel
|
|
173
181
|
// Process in chunks to avoid overwhelming Ollama and prevent memory issues
|
|
174
182
|
const CHUNK_SIZE = 50;
|
|
@@ -14,6 +14,18 @@ vi.mock("openai", () => ({
|
|
|
14
14
|
}),
|
|
15
15
|
}));
|
|
16
16
|
|
|
17
|
+
vi.mock("../logger.js", () => ({
|
|
18
|
+
default: {
|
|
19
|
+
info: vi.fn(),
|
|
20
|
+
warn: vi.fn(),
|
|
21
|
+
error: vi.fn(),
|
|
22
|
+
debug: vi.fn(),
|
|
23
|
+
fatal: vi.fn(),
|
|
24
|
+
trace: vi.fn(),
|
|
25
|
+
child: vi.fn().mockReturnThis(),
|
|
26
|
+
},
|
|
27
|
+
}));
|
|
28
|
+
|
|
17
29
|
describe("OpenAIEmbeddings", () => {
|
|
18
30
|
let embeddings: OpenAIEmbeddings;
|
|
19
31
|
|
package/src/embeddings/openai.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import Bottleneck from "bottleneck";
|
|
3
|
+
import logger from "../logger.js";
|
|
3
4
|
import {
|
|
4
5
|
EmbeddingProvider,
|
|
5
6
|
EmbeddingResult,
|
|
@@ -18,6 +19,7 @@ interface OpenAIError {
|
|
|
18
19
|
}
|
|
19
20
|
|
|
20
21
|
export class OpenAIEmbeddings implements EmbeddingProvider {
|
|
22
|
+
private log = logger.child({ component: "embeddings", provider: "openai" });
|
|
21
23
|
private client: OpenAI;
|
|
22
24
|
private model: string;
|
|
23
25
|
private dimensions: number;
|
|
@@ -95,8 +97,13 @@ export class OpenAIEmbeddings implements EmbeddingProvider {
|
|
|
95
97
|
}
|
|
96
98
|
|
|
97
99
|
const waitTimeSeconds = (delayMs / 1000).toFixed(1);
|
|
98
|
-
|
|
99
|
-
|
|
100
|
+
this.log.warn(
|
|
101
|
+
{
|
|
102
|
+
waitTimeSeconds,
|
|
103
|
+
attempt: attempt + 1,
|
|
104
|
+
maxAttempts: this.retryAttempts,
|
|
105
|
+
},
|
|
106
|
+
"Rate limit reached, retrying",
|
|
100
107
|
);
|
|
101
108
|
|
|
102
109
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
@@ -132,6 +139,7 @@ export class OpenAIEmbeddings implements EmbeddingProvider {
|
|
|
132
139
|
}
|
|
133
140
|
|
|
134
141
|
async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
|
|
142
|
+
this.log.debug({ batchSize: texts.length }, "embedBatch");
|
|
135
143
|
return this.limiter.schedule(() =>
|
|
136
144
|
this.retryWithBackoff(async () => {
|
|
137
145
|
const response = await this.client.embeddings.create({
|
|
@@ -4,6 +4,18 @@ import { VoyageEmbeddings } from "./voyage.js";
|
|
|
4
4
|
// Mock fetch globally
|
|
5
5
|
global.fetch = vi.fn();
|
|
6
6
|
|
|
7
|
+
vi.mock("../logger.js", () => ({
|
|
8
|
+
default: {
|
|
9
|
+
info: vi.fn(),
|
|
10
|
+
warn: vi.fn(),
|
|
11
|
+
error: vi.fn(),
|
|
12
|
+
debug: vi.fn(),
|
|
13
|
+
fatal: vi.fn(),
|
|
14
|
+
trace: vi.fn(),
|
|
15
|
+
child: vi.fn().mockReturnThis(),
|
|
16
|
+
},
|
|
17
|
+
}));
|
|
18
|
+
|
|
7
19
|
describe("VoyageEmbeddings", () => {
|
|
8
20
|
let embeddings: VoyageEmbeddings;
|
|
9
21
|
let mockFetch: any;
|
package/src/embeddings/voyage.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import Bottleneck from "bottleneck";
|
|
2
|
+
import logger from "../logger.js";
|
|
2
3
|
import { EmbeddingProvider, EmbeddingResult, RateLimitConfig } from "./base.js";
|
|
3
4
|
|
|
4
5
|
interface VoyageError {
|
|
@@ -15,6 +16,7 @@ interface VoyageEmbedResponse {
|
|
|
15
16
|
}
|
|
16
17
|
|
|
17
18
|
export class VoyageEmbeddings implements EmbeddingProvider {
|
|
19
|
+
private log = logger.child({ component: "embeddings", provider: "voyage" });
|
|
18
20
|
private apiKey: string;
|
|
19
21
|
private model: string;
|
|
20
22
|
private dimensions: number;
|
|
@@ -76,8 +78,13 @@ export class VoyageEmbeddings implements EmbeddingProvider {
|
|
|
76
78
|
if (isRateLimitError && attempt < this.retryAttempts) {
|
|
77
79
|
const delayMs = this.retryDelayMs * Math.pow(2, attempt);
|
|
78
80
|
const waitTimeSeconds = (delayMs / 1000).toFixed(1);
|
|
79
|
-
|
|
80
|
-
|
|
81
|
+
this.log.warn(
|
|
82
|
+
{
|
|
83
|
+
waitTimeSeconds,
|
|
84
|
+
attempt: attempt + 1,
|
|
85
|
+
maxAttempts: this.retryAttempts,
|
|
86
|
+
},
|
|
87
|
+
"Rate limit reached, retrying",
|
|
81
88
|
);
|
|
82
89
|
|
|
83
90
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
@@ -142,6 +149,7 @@ export class VoyageEmbeddings implements EmbeddingProvider {
|
|
|
142
149
|
}
|
|
143
150
|
|
|
144
151
|
async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
|
|
152
|
+
this.log.debug({ batchSize: texts.length }, "embedBatch");
|
|
145
153
|
return this.limiter.schedule(() =>
|
|
146
154
|
this.retryWithBackoff(async () => {
|
|
147
155
|
const response = await this.callApi(texts);
|
package/src/git/indexer.test.ts
CHANGED
|
@@ -76,6 +76,18 @@ vi.mock("node:fs", () => ({
|
|
|
76
76
|
},
|
|
77
77
|
}));
|
|
78
78
|
|
|
79
|
+
vi.mock("../logger.js", () => ({
|
|
80
|
+
default: {
|
|
81
|
+
info: vi.fn(),
|
|
82
|
+
warn: vi.fn(),
|
|
83
|
+
error: vi.fn(),
|
|
84
|
+
debug: vi.fn(),
|
|
85
|
+
fatal: vi.fn(),
|
|
86
|
+
trace: vi.fn(),
|
|
87
|
+
child: vi.fn().mockReturnThis(),
|
|
88
|
+
},
|
|
89
|
+
}));
|
|
90
|
+
|
|
79
91
|
describe("GitHistoryIndexer", () => {
|
|
80
92
|
let indexer: GitHistoryIndexer;
|
|
81
93
|
let mockQdrant: any;
|
|
@@ -622,10 +634,6 @@ describe("GitHistoryIndexer", () => {
|
|
|
622
634
|
});
|
|
623
635
|
|
|
624
636
|
it("should handle snapshot save failure gracefully", async () => {
|
|
625
|
-
const consoleSpy = vi
|
|
626
|
-
.spyOn(console, "error")
|
|
627
|
-
.mockImplementation(() => {});
|
|
628
|
-
|
|
629
637
|
const mockCommits = [
|
|
630
638
|
{
|
|
631
639
|
hash: "abc123",
|
|
@@ -653,14 +661,12 @@ describe("GitHistoryIndexer", () => {
|
|
|
653
661
|
|
|
654
662
|
expect(stats.status).toBe("completed");
|
|
655
663
|
expect(stats.errors?.some((e) => e.includes("Snapshot"))).toBe(true);
|
|
656
|
-
|
|
657
|
-
consoleSpy.mockRestore();
|
|
658
664
|
});
|
|
659
665
|
|
|
660
666
|
it("should handle storeIndexingMarker errors silently", async () => {
|
|
661
|
-
const
|
|
662
|
-
|
|
663
|
-
|
|
667
|
+
const loggerMod = await import("../logger.js");
|
|
668
|
+
const logError = loggerMod.default.error as ReturnType<typeof vi.fn>;
|
|
669
|
+
logError.mockClear();
|
|
664
670
|
|
|
665
671
|
mockExtractorInstance.validateRepository.mockResolvedValue(true);
|
|
666
672
|
mockExtractorInstance.getLatestCommitHash.mockResolvedValue("abc123");
|
|
@@ -670,9 +676,7 @@ describe("GitHistoryIndexer", () => {
|
|
|
670
676
|
const stats = await indexer.indexHistory("/test/repo");
|
|
671
677
|
|
|
672
678
|
expect(stats.status).toBe("completed");
|
|
673
|
-
expect(
|
|
674
|
-
|
|
675
|
-
consoleSpy.mockRestore();
|
|
679
|
+
expect(logError).toHaveBeenCalled();
|
|
676
680
|
});
|
|
677
681
|
|
|
678
682
|
it("should use hybrid search for indexing when enabled", async () => {
|
|
@@ -1072,7 +1076,9 @@ describe("GitHistoryIndexer", () => {
|
|
|
1072
1076
|
mockChunkerInstance.generateChunkId.mockReturnValue("chunk-1");
|
|
1073
1077
|
|
|
1074
1078
|
// All retries fail
|
|
1075
|
-
mockEmbeddings.embedBatch.mockRejectedValue(
|
|
1079
|
+
mockEmbeddings.embedBatch.mockRejectedValue(
|
|
1080
|
+
new Error("Persistent error"),
|
|
1081
|
+
);
|
|
1076
1082
|
|
|
1077
1083
|
mockQdrant.collectionExists.mockResolvedValue(false);
|
|
1078
1084
|
mockQdrant.getCollectionInfo.mockResolvedValue({ hybridEnabled: false });
|
|
@@ -1081,9 +1087,9 @@ describe("GitHistoryIndexer", () => {
|
|
|
1081
1087
|
|
|
1082
1088
|
expect(stats.status).toBe("partial");
|
|
1083
1089
|
expect(stats.errors).toBeDefined();
|
|
1084
|
-
expect(
|
|
1085
|
-
|
|
1086
|
-
)
|
|
1090
|
+
expect(stats.errors?.some((e) => e.includes("after 3 attempts"))).toBe(
|
|
1091
|
+
true,
|
|
1092
|
+
);
|
|
1087
1093
|
});
|
|
1088
1094
|
});
|
|
1089
1095
|
});
|