@mhalder/qdrant-mcp-server 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/.env.example +92 -0
  2. package/.github/workflows/ci.yml +61 -0
  3. package/.github/workflows/claude-code-review.yml +57 -0
  4. package/.github/workflows/claude.yml +50 -0
  5. package/.github/workflows/release.yml +52 -0
  6. package/.husky/commit-msg +1 -0
  7. package/.husky/pre-commit +1 -0
  8. package/.releaserc.json +59 -0
  9. package/.yamlfmt +4 -0
  10. package/CHANGELOG.md +73 -0
  11. package/CONTRIBUTING.md +176 -0
  12. package/LICENSE +21 -0
  13. package/README.md +714 -0
  14. package/build/embeddings/base.d.ts +23 -0
  15. package/build/embeddings/base.d.ts.map +1 -0
  16. package/build/embeddings/base.js +2 -0
  17. package/build/embeddings/base.js.map +1 -0
  18. package/build/embeddings/cohere.d.ts +17 -0
  19. package/build/embeddings/cohere.d.ts.map +1 -0
  20. package/build/embeddings/cohere.js +102 -0
  21. package/build/embeddings/cohere.js.map +1 -0
  22. package/build/embeddings/cohere.test.d.ts +2 -0
  23. package/build/embeddings/cohere.test.d.ts.map +1 -0
  24. package/build/embeddings/cohere.test.js +279 -0
  25. package/build/embeddings/cohere.test.js.map +1 -0
  26. package/build/embeddings/factory.d.ts +10 -0
  27. package/build/embeddings/factory.d.ts.map +1 -0
  28. package/build/embeddings/factory.js +98 -0
  29. package/build/embeddings/factory.js.map +1 -0
  30. package/build/embeddings/factory.test.d.ts +2 -0
  31. package/build/embeddings/factory.test.d.ts.map +1 -0
  32. package/build/embeddings/factory.test.js +329 -0
  33. package/build/embeddings/factory.test.js.map +1 -0
  34. package/build/embeddings/ollama.d.ts +18 -0
  35. package/build/embeddings/ollama.d.ts.map +1 -0
  36. package/build/embeddings/ollama.js +135 -0
  37. package/build/embeddings/ollama.js.map +1 -0
  38. package/build/embeddings/ollama.test.d.ts +2 -0
  39. package/build/embeddings/ollama.test.d.ts.map +1 -0
  40. package/build/embeddings/ollama.test.js +399 -0
  41. package/build/embeddings/ollama.test.js.map +1 -0
  42. package/build/embeddings/openai.d.ts +16 -0
  43. package/build/embeddings/openai.d.ts.map +1 -0
  44. package/build/embeddings/openai.js +108 -0
  45. package/build/embeddings/openai.js.map +1 -0
  46. package/build/embeddings/openai.test.d.ts +2 -0
  47. package/build/embeddings/openai.test.d.ts.map +1 -0
  48. package/build/embeddings/openai.test.js +283 -0
  49. package/build/embeddings/openai.test.js.map +1 -0
  50. package/build/embeddings/voyage.d.ts +19 -0
  51. package/build/embeddings/voyage.d.ts.map +1 -0
  52. package/build/embeddings/voyage.js +113 -0
  53. package/build/embeddings/voyage.js.map +1 -0
  54. package/build/embeddings/voyage.test.d.ts +2 -0
  55. package/build/embeddings/voyage.test.d.ts.map +1 -0
  56. package/build/embeddings/voyage.test.js +371 -0
  57. package/build/embeddings/voyage.test.js.map +1 -0
  58. package/build/index.d.ts +3 -0
  59. package/build/index.d.ts.map +1 -0
  60. package/build/index.js +534 -0
  61. package/build/index.js.map +1 -0
  62. package/build/index.test.d.ts +2 -0
  63. package/build/index.test.d.ts.map +1 -0
  64. package/build/index.test.js +241 -0
  65. package/build/index.test.js.map +1 -0
  66. package/build/qdrant/client.d.ts +37 -0
  67. package/build/qdrant/client.d.ts.map +1 -0
  68. package/build/qdrant/client.js +142 -0
  69. package/build/qdrant/client.js.map +1 -0
  70. package/build/qdrant/client.test.d.ts +2 -0
  71. package/build/qdrant/client.test.d.ts.map +1 -0
  72. package/build/qdrant/client.test.js +340 -0
  73. package/build/qdrant/client.test.js.map +1 -0
  74. package/commitlint.config.js +25 -0
  75. package/docker-compose.yml +22 -0
  76. package/docs/test_report.md +259 -0
  77. package/examples/README.md +315 -0
  78. package/examples/basic/README.md +111 -0
  79. package/examples/filters/README.md +262 -0
  80. package/examples/knowledge-base/README.md +207 -0
  81. package/examples/rate-limiting/README.md +376 -0
  82. package/package.json +59 -0
  83. package/scripts/verify-providers.js +238 -0
  84. package/src/embeddings/base.ts +25 -0
  85. package/src/embeddings/cohere.test.ts +408 -0
  86. package/src/embeddings/cohere.ts +152 -0
  87. package/src/embeddings/factory.test.ts +453 -0
  88. package/src/embeddings/factory.ts +163 -0
  89. package/src/embeddings/ollama.test.ts +543 -0
  90. package/src/embeddings/ollama.ts +196 -0
  91. package/src/embeddings/openai.test.ts +402 -0
  92. package/src/embeddings/openai.ts +158 -0
  93. package/src/embeddings/voyage.test.ts +520 -0
  94. package/src/embeddings/voyage.ts +168 -0
  95. package/src/index.test.ts +304 -0
  96. package/src/index.ts +614 -0
  97. package/src/qdrant/client.test.ts +456 -0
  98. package/src/qdrant/client.ts +195 -0
  99. package/tsconfig.json +19 -0
  100. package/vitest.config.ts +37 -0
@@ -0,0 +1,196 @@
1
+ import Bottleneck from "bottleneck";
2
+ import { EmbeddingProvider, EmbeddingResult, RateLimitConfig } from "./base.js";
3
+
4
+ interface OllamaError {
5
+ status?: number;
6
+ message?: string;
7
+ }
8
+
9
+ interface OllamaEmbedResponse {
10
+ embedding: number[];
11
+ }
12
+
13
+ export class OllamaEmbeddings implements EmbeddingProvider {
14
+ private model: string;
15
+ private dimensions: number;
16
+ private limiter: Bottleneck;
17
+ private retryAttempts: number;
18
+ private retryDelayMs: number;
19
+ private baseUrl: string;
20
+
21
+ constructor(
22
+ model: string = "nomic-embed-text",
23
+ dimensions?: number,
24
+ rateLimitConfig?: RateLimitConfig,
25
+ baseUrl: string = "http://localhost:11434",
26
+ ) {
27
+ this.model = model;
28
+ this.baseUrl = baseUrl;
29
+
30
+ // Default dimensions for different models
31
+ const defaultDimensions: Record<string, number> = {
32
+ "nomic-embed-text": 768,
33
+ "mxbai-embed-large": 1024,
34
+ "all-minilm": 384,
35
+ };
36
+
37
+ this.dimensions = dimensions || defaultDimensions[model] || 768;
38
+
39
+ // Rate limiting configuration (more lenient for local models)
40
+ const maxRequestsPerMinute = rateLimitConfig?.maxRequestsPerMinute || 1000;
41
+ this.retryAttempts = rateLimitConfig?.retryAttempts || 3;
42
+ this.retryDelayMs = rateLimitConfig?.retryDelayMs || 500;
43
+
44
+ this.limiter = new Bottleneck({
45
+ reservoir: maxRequestsPerMinute,
46
+ reservoirRefreshAmount: maxRequestsPerMinute,
47
+ reservoirRefreshInterval: 60 * 1000,
48
+ maxConcurrent: 10,
49
+ minTime: Math.floor((60 * 1000) / maxRequestsPerMinute),
50
+ });
51
+ }
52
+
53
+ private isOllamaError(e: unknown): e is OllamaError {
54
+ return (
55
+ typeof e === "object" && e !== null && ("status" in e || "message" in e)
56
+ );
57
+ }
58
+
59
+ private async retryWithBackoff<T>(
60
+ fn: () => Promise<T>,
61
+ attempt: number = 0,
62
+ ): Promise<T> {
63
+ try {
64
+ return await fn();
65
+ } catch (error: unknown) {
66
+ // Type guard for OllamaError
67
+ const apiError = this.isOllamaError(error)
68
+ ? error
69
+ : { status: 0, message: String(error) };
70
+
71
+ const isRateLimitError =
72
+ apiError.status === 429 ||
73
+ (typeof apiError.message === "string" &&
74
+ apiError.message.toLowerCase().includes("rate limit"));
75
+
76
+ if (isRateLimitError && attempt < this.retryAttempts) {
77
+ const delayMs = this.retryDelayMs * Math.pow(2, attempt);
78
+ const waitTimeSeconds = (delayMs / 1000).toFixed(1);
79
+ console.error(
80
+ `Rate limit reached. Retrying in ${waitTimeSeconds}s (attempt ${attempt + 1}/${this.retryAttempts})...`,
81
+ );
82
+
83
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
84
+ return this.retryWithBackoff(fn, attempt + 1);
85
+ }
86
+
87
+ if (isRateLimitError) {
88
+ throw new Error(
89
+ `Ollama API rate limit exceeded after ${this.retryAttempts} retry attempts. Please try again later or reduce request frequency.`,
90
+ );
91
+ }
92
+
93
+ throw error;
94
+ }
95
+ }
96
+
97
+ private async callApi(text: string): Promise<OllamaEmbedResponse> {
98
+ try {
99
+ const response = await fetch(`${this.baseUrl}/api/embeddings`, {
100
+ method: "POST",
101
+ headers: {
102
+ "Content-Type": "application/json",
103
+ },
104
+ body: JSON.stringify({
105
+ model: this.model,
106
+ prompt: text,
107
+ }),
108
+ });
109
+
110
+ if (!response.ok) {
111
+ const errorBody = await response.text();
112
+ const textPreview =
113
+ text.length > 100 ? text.substring(0, 100) + "..." : text;
114
+ const error: OllamaError = {
115
+ status: response.status,
116
+ message: `Ollama API error (${response.status}) for model "${this.model}": ${errorBody}. Text preview: "${textPreview}"`,
117
+ };
118
+ throw error;
119
+ }
120
+
121
+ return response.json();
122
+ } catch (error) {
123
+ // Re-throw if it's already an OllamaError from the !response.ok block
124
+ if (error && typeof error === "object" && "status" in error) {
125
+ throw error;
126
+ }
127
+
128
+ // For Error instances (like network errors), enhance the message
129
+ if (error instanceof Error) {
130
+ const textPreview =
131
+ text.length > 100 ? text.substring(0, 100) + "..." : text;
132
+ throw new Error(
133
+ `Failed to call Ollama API at ${this.baseUrl} with model ${this.model}: ${error.message}. Text preview: "${textPreview}"`,
134
+ );
135
+ }
136
+
137
+ // Handle objects with 'message' property - preserve the original error structure
138
+ // This ensures objects with 'message' property work correctly in tests
139
+ if (this.isOllamaError(error)) {
140
+ throw error;
141
+ }
142
+
143
+ // For other types, create a descriptive error message
144
+ const textPreview =
145
+ text.length > 100 ? text.substring(0, 100) + "..." : text;
146
+ const errorMessage = JSON.stringify(error);
147
+
148
+ throw new Error(
149
+ `Failed to call Ollama API at ${this.baseUrl} with model ${this.model}: ${errorMessage}. Text preview: "${textPreview}"`,
150
+ );
151
+ }
152
+ }
153
+
154
+ async embed(text: string): Promise<EmbeddingResult> {
155
+ return this.limiter.schedule(() =>
156
+ this.retryWithBackoff(async () => {
157
+ const response = await this.callApi(text);
158
+
159
+ if (!response.embedding) {
160
+ throw new Error("No embedding returned from Ollama API");
161
+ }
162
+
163
+ return {
164
+ embedding: response.embedding,
165
+ dimensions: this.dimensions,
166
+ };
167
+ }),
168
+ );
169
+ }
170
+
171
+ async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
172
+ // Ollama doesn't support batch embeddings natively, so we process in parallel
173
+ // Process in chunks to avoid overwhelming Ollama and prevent memory issues
174
+ const CHUNK_SIZE = 50;
175
+ const results: EmbeddingResult[] = [];
176
+
177
+ for (let i = 0; i < texts.length; i += CHUNK_SIZE) {
178
+ const chunk = texts.slice(i, i + CHUNK_SIZE);
179
+ // The Bottleneck limiter will handle rate limiting and concurrency (maxConcurrent: 10)
180
+ const chunkResults = await Promise.all(
181
+ chunk.map((text) => this.embed(text)),
182
+ );
183
+ results.push(...chunkResults);
184
+ }
185
+
186
+ return results;
187
+ }
188
+
189
+ getDimensions(): number {
190
+ return this.dimensions;
191
+ }
192
+
193
+ getModel(): string {
194
+ return this.model;
195
+ }
196
+ }
@@ -0,0 +1,402 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+ import { OpenAIEmbeddings } from "./openai.js";
3
+ import OpenAI from "openai";
4
+
5
+ vi.mock("openai", () => ({
6
+ default: vi.fn(),
7
+ }));
8
+
9
+ describe("OpenAIEmbeddings", () => {
10
+ let embeddings: OpenAIEmbeddings;
11
+ let mockOpenAI: any;
12
+
13
+ beforeEach(() => {
14
+ mockOpenAI = {
15
+ embeddings: {
16
+ create: vi.fn(),
17
+ },
18
+ };
19
+
20
+ vi.mocked(OpenAI).mockImplementation(() => mockOpenAI as any);
21
+
22
+ embeddings = new OpenAIEmbeddings("test-api-key");
23
+ });
24
+
25
+ describe("constructor", () => {
26
+ it("should use default model and dimensions", () => {
27
+ expect(embeddings.getModel()).toBe("text-embedding-3-small");
28
+ expect(embeddings.getDimensions()).toBe(1536);
29
+ });
30
+
31
+ it("should use custom model", () => {
32
+ const customEmbeddings = new OpenAIEmbeddings(
33
+ "test-api-key",
34
+ "text-embedding-3-large",
35
+ );
36
+ expect(customEmbeddings.getModel()).toBe("text-embedding-3-large");
37
+ expect(customEmbeddings.getDimensions()).toBe(3072);
38
+ });
39
+
40
+ it("should use custom dimensions", () => {
41
+ const customEmbeddings = new OpenAIEmbeddings(
42
+ "test-api-key",
43
+ "text-embedding-3-small",
44
+ 512,
45
+ );
46
+ expect(customEmbeddings.getDimensions()).toBe(512);
47
+ });
48
+
49
+ it("should use default dimensions for text-embedding-ada-002", () => {
50
+ const adaEmbeddings = new OpenAIEmbeddings(
51
+ "test-api-key",
52
+ "text-embedding-ada-002",
53
+ );
54
+ expect(adaEmbeddings.getDimensions()).toBe(1536);
55
+ });
56
+ });
57
+
58
+ describe("embed", () => {
59
+ it("should generate embedding for single text", async () => {
60
+ const mockEmbedding = Array(1536)
61
+ .fill(0)
62
+ .map((_, i) => i * 0.001);
63
+ mockOpenAI.embeddings.create.mockResolvedValue({
64
+ data: [{ embedding: mockEmbedding }],
65
+ });
66
+
67
+ const result = await embeddings.embed("test text");
68
+
69
+ expect(result).toEqual({
70
+ embedding: mockEmbedding,
71
+ dimensions: 1536,
72
+ });
73
+ expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
74
+ model: "text-embedding-3-small",
75
+ input: "test text",
76
+ dimensions: 1536,
77
+ });
78
+ });
79
+
80
+ it("should handle long text", async () => {
81
+ const longText = "word ".repeat(1000);
82
+ const mockEmbedding = Array(1536).fill(0.5);
83
+ mockOpenAI.embeddings.create.mockResolvedValue({
84
+ data: [{ embedding: mockEmbedding }],
85
+ });
86
+
87
+ const result = await embeddings.embed(longText);
88
+
89
+ expect(result.embedding).toEqual(mockEmbedding);
90
+ expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
91
+ model: "text-embedding-3-small",
92
+ input: longText,
93
+ dimensions: 1536,
94
+ });
95
+ });
96
+
97
+ it("should use custom model configuration", async () => {
98
+ const customEmbeddings = new OpenAIEmbeddings(
99
+ "test-api-key",
100
+ "text-embedding-3-large",
101
+ 3072,
102
+ );
103
+ const mockEmbedding = Array(3072).fill(0.1);
104
+ mockOpenAI.embeddings.create.mockResolvedValue({
105
+ data: [{ embedding: mockEmbedding }],
106
+ });
107
+
108
+ await customEmbeddings.embed("test");
109
+
110
+ expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
111
+ model: "text-embedding-3-large",
112
+ input: "test",
113
+ dimensions: 3072,
114
+ });
115
+ });
116
+
117
+ it("should propagate errors", async () => {
118
+ mockOpenAI.embeddings.create.mockRejectedValue(new Error("API Error"));
119
+
120
+ await expect(embeddings.embed("test")).rejects.toThrow("API Error");
121
+ });
122
+ });
123
+
124
+ describe("embedBatch", () => {
125
+ it("should generate embeddings for multiple texts", async () => {
126
+ const mockEmbeddings = [
127
+ Array(1536).fill(0.1),
128
+ Array(1536).fill(0.2),
129
+ Array(1536).fill(0.3),
130
+ ];
131
+ mockOpenAI.embeddings.create.mockResolvedValue({
132
+ data: [
133
+ { embedding: mockEmbeddings[0] },
134
+ { embedding: mockEmbeddings[1] },
135
+ { embedding: mockEmbeddings[2] },
136
+ ],
137
+ });
138
+
139
+ const texts = ["text1", "text2", "text3"];
140
+ const results = await embeddings.embedBatch(texts);
141
+
142
+ expect(results).toEqual([
143
+ { embedding: mockEmbeddings[0], dimensions: 1536 },
144
+ { embedding: mockEmbeddings[1], dimensions: 1536 },
145
+ { embedding: mockEmbeddings[2], dimensions: 1536 },
146
+ ]);
147
+ expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
148
+ model: "text-embedding-3-small",
149
+ input: texts,
150
+ dimensions: 1536,
151
+ });
152
+ });
153
+
154
+ it("should handle empty batch", async () => {
155
+ mockOpenAI.embeddings.create.mockResolvedValue({
156
+ data: [],
157
+ });
158
+
159
+ const results = await embeddings.embedBatch([]);
160
+
161
+ expect(results).toEqual([]);
162
+ });
163
+
164
+ it("should handle single item in batch", async () => {
165
+ const mockEmbedding = Array(1536).fill(0.5);
166
+ mockOpenAI.embeddings.create.mockResolvedValue({
167
+ data: [{ embedding: mockEmbedding }],
168
+ });
169
+
170
+ const results = await embeddings.embedBatch(["single text"]);
171
+
172
+ expect(results).toHaveLength(1);
173
+ expect(results[0].embedding).toEqual(mockEmbedding);
174
+ });
175
+
176
+ it("should handle large batches", async () => {
177
+ const batchSize = 100;
178
+ const mockEmbeddings = Array(batchSize)
179
+ .fill(null)
180
+ .map(() => Array(1536).fill(Math.random()));
181
+
182
+ mockOpenAI.embeddings.create.mockResolvedValue({
183
+ data: mockEmbeddings.map((embedding) => ({ embedding })),
184
+ });
185
+
186
+ const texts = Array(batchSize)
187
+ .fill(null)
188
+ .map((_, i) => `text ${i}`);
189
+ const results = await embeddings.embedBatch(texts);
190
+
191
+ expect(results).toHaveLength(batchSize);
192
+ });
193
+
194
+ it("should propagate errors in batch", async () => {
195
+ mockOpenAI.embeddings.create.mockRejectedValue(
196
+ new Error("Batch API Error"),
197
+ );
198
+
199
+ await expect(embeddings.embedBatch(["text1", "text2"])).rejects.toThrow(
200
+ "Batch API Error",
201
+ );
202
+ });
203
+ });
204
+
205
+ describe("getDimensions", () => {
206
+ it("should return configured dimensions", () => {
207
+ expect(embeddings.getDimensions()).toBe(1536);
208
+ });
209
+
210
+ it("should return custom dimensions", () => {
211
+ const customEmbeddings = new OpenAIEmbeddings(
212
+ "test-api-key",
213
+ "text-embedding-3-small",
214
+ 512,
215
+ );
216
+ expect(customEmbeddings.getDimensions()).toBe(512);
217
+ });
218
+ });
219
+
220
+ describe("getModel", () => {
221
+ it("should return configured model", () => {
222
+ expect(embeddings.getModel()).toBe("text-embedding-3-small");
223
+ });
224
+
225
+ it("should return custom model", () => {
226
+ const customEmbeddings = new OpenAIEmbeddings(
227
+ "test-api-key",
228
+ "text-embedding-3-large",
229
+ );
230
+ expect(customEmbeddings.getModel()).toBe("text-embedding-3-large");
231
+ });
232
+ });
233
+
234
+ describe("rate limiting", () => {
235
+ it("should retry on rate limit error (429 status)", async () => {
236
+ const mockEmbedding = Array(1536).fill(0.5);
237
+
238
+ // Fail first two times with rate limit, succeed on third
239
+ mockOpenAI.embeddings.create
240
+ .mockRejectedValueOnce({ status: 429, message: "Rate limit exceeded" })
241
+ .mockRejectedValueOnce({ status: 429, message: "Rate limit exceeded" })
242
+ .mockResolvedValue({ data: [{ embedding: mockEmbedding }] });
243
+
244
+ const result = await embeddings.embed("test text");
245
+
246
+ expect(result.embedding).toEqual(mockEmbedding);
247
+ expect(mockOpenAI.embeddings.create).toHaveBeenCalledTimes(3);
248
+ });
249
+
250
+ it("should respect Retry-After header when present", async () => {
251
+ const mockEmbedding = Array(1536).fill(0.5);
252
+ const rateLimitError = {
253
+ status: 429,
254
+ message: "Rate limit exceeded",
255
+ headers: { "retry-after": "2" },
256
+ };
257
+
258
+ mockOpenAI.embeddings.create
259
+ .mockRejectedValueOnce(rateLimitError)
260
+ .mockResolvedValue({ data: [{ embedding: mockEmbedding }] });
261
+
262
+ const startTime = Date.now();
263
+ await embeddings.embed("test text");
264
+ const duration = Date.now() - startTime;
265
+
266
+ // Should wait at least 2 seconds (2000ms)
267
+ expect(duration).toBeGreaterThanOrEqual(1900); // Allow small margin
268
+ });
269
+
270
+ it("should fallback to exponential backoff with invalid Retry-After header", async () => {
271
+ const rateLimitEmbeddings = new OpenAIEmbeddings(
272
+ "test-api-key",
273
+ "text-embedding-3-small",
274
+ undefined,
275
+ {
276
+ retryAttempts: 2,
277
+ retryDelayMs: 100, // 100ms for faster tests
278
+ },
279
+ );
280
+
281
+ const mockEmbedding = Array(1536).fill(0.5);
282
+
283
+ // Test various invalid Retry-After values
284
+ const invalidRetryAfterError = {
285
+ status: 429,
286
+ message: "Rate limit exceeded",
287
+ headers: { "retry-after": "invalid" }, // Non-numeric value
288
+ };
289
+
290
+ mockOpenAI.embeddings.create
291
+ .mockRejectedValueOnce(invalidRetryAfterError)
292
+ .mockResolvedValue({ data: [{ embedding: mockEmbedding }] });
293
+
294
+ const startTime = Date.now();
295
+ await rateLimitEmbeddings.embed("test text");
296
+ const duration = Date.now() - startTime;
297
+
298
+ // Should fallback to exponential backoff (100ms) instead of using invalid header
299
+ expect(duration).toBeGreaterThanOrEqual(90); // Allow small margin
300
+ expect(duration).toBeLessThan(500); // Should not wait too long
301
+ });
302
+
303
+ it("should use exponential backoff when no Retry-After header", async () => {
304
+ const rateLimitEmbeddings = new OpenAIEmbeddings(
305
+ "test-api-key",
306
+ "text-embedding-3-small",
307
+ undefined,
308
+ {
309
+ retryAttempts: 3,
310
+ retryDelayMs: 100, // 100ms for faster tests
311
+ },
312
+ );
313
+
314
+ const mockEmbedding = Array(1536).fill(0.5);
315
+ const rateLimitError = {
316
+ status: 429,
317
+ message: "Rate limit exceeded",
318
+ };
319
+
320
+ mockOpenAI.embeddings.create
321
+ .mockRejectedValueOnce(rateLimitError)
322
+ .mockRejectedValueOnce(rateLimitError)
323
+ .mockResolvedValue({ data: [{ embedding: mockEmbedding }] });
324
+
325
+ const startTime = Date.now();
326
+ await rateLimitEmbeddings.embed("test text");
327
+ const duration = Date.now() - startTime;
328
+
329
+ // Should wait: 100ms (first retry) + 200ms (second retry) = 300ms
330
+ expect(duration).toBeGreaterThanOrEqual(250); // Allow margin for test execution
331
+ });
332
+
333
+ it("should throw error after max retries exceeded", async () => {
334
+ const rateLimitEmbeddings = new OpenAIEmbeddings(
335
+ "test-api-key",
336
+ "text-embedding-3-small",
337
+ undefined,
338
+ {
339
+ retryAttempts: 2,
340
+ retryDelayMs: 100,
341
+ },
342
+ );
343
+
344
+ const rateLimitError = {
345
+ status: 429,
346
+ message: "Rate limit exceeded",
347
+ };
348
+
349
+ mockOpenAI.embeddings.create.mockRejectedValue(rateLimitError);
350
+
351
+ await expect(rateLimitEmbeddings.embed("test text")).rejects.toThrow(
352
+ "OpenAI API rate limit exceeded after 2 retry attempts",
353
+ );
354
+
355
+ // Should try initial + 2 retries = 3 total attempts
356
+ expect(mockOpenAI.embeddings.create).toHaveBeenCalledTimes(3);
357
+ });
358
+
359
+ it("should handle rate limit errors in batch operations", async () => {
360
+ const mockEmbeddings = [Array(1536).fill(0.1), Array(1536).fill(0.2)];
361
+
362
+ mockOpenAI.embeddings.create
363
+ .mockRejectedValueOnce({ status: 429, message: "Rate limit exceeded" })
364
+ .mockResolvedValue({
365
+ data: [
366
+ { embedding: mockEmbeddings[0] },
367
+ { embedding: mockEmbeddings[1] },
368
+ ],
369
+ });
370
+
371
+ const results = await embeddings.embedBatch(["text1", "text2"]);
372
+
373
+ expect(results).toHaveLength(2);
374
+ expect(mockOpenAI.embeddings.create).toHaveBeenCalledTimes(2);
375
+ });
376
+
377
+ it("should not retry on non-rate-limit errors", async () => {
378
+ const apiError = new Error("Invalid API key");
379
+ mockOpenAI.embeddings.create.mockRejectedValue(apiError);
380
+
381
+ await expect(embeddings.embed("test text")).rejects.toThrow(
382
+ "Invalid API key",
383
+ );
384
+ expect(mockOpenAI.embeddings.create).toHaveBeenCalledTimes(1);
385
+ });
386
+
387
+ it("should accept custom rate limit configuration", () => {
388
+ const customEmbeddings = new OpenAIEmbeddings(
389
+ "test-api-key",
390
+ "text-embedding-3-small",
391
+ undefined,
392
+ {
393
+ maxRequestsPerMinute: 1000,
394
+ retryAttempts: 5,
395
+ retryDelayMs: 2000,
396
+ },
397
+ );
398
+
399
+ expect(customEmbeddings).toBeDefined();
400
+ });
401
+ });
402
+ });