@mhalder/qdrant-mcp-server 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +92 -0
- package/.github/workflows/ci.yml +61 -0
- package/.github/workflows/claude-code-review.yml +57 -0
- package/.github/workflows/claude.yml +50 -0
- package/.github/workflows/release.yml +52 -0
- package/.husky/commit-msg +1 -0
- package/.husky/pre-commit +1 -0
- package/.releaserc.json +59 -0
- package/.yamlfmt +4 -0
- package/CHANGELOG.md +73 -0
- package/CONTRIBUTING.md +176 -0
- package/LICENSE +21 -0
- package/README.md +714 -0
- package/build/embeddings/base.d.ts +23 -0
- package/build/embeddings/base.d.ts.map +1 -0
- package/build/embeddings/base.js +2 -0
- package/build/embeddings/base.js.map +1 -0
- package/build/embeddings/cohere.d.ts +17 -0
- package/build/embeddings/cohere.d.ts.map +1 -0
- package/build/embeddings/cohere.js +102 -0
- package/build/embeddings/cohere.js.map +1 -0
- package/build/embeddings/cohere.test.d.ts +2 -0
- package/build/embeddings/cohere.test.d.ts.map +1 -0
- package/build/embeddings/cohere.test.js +279 -0
- package/build/embeddings/cohere.test.js.map +1 -0
- package/build/embeddings/factory.d.ts +10 -0
- package/build/embeddings/factory.d.ts.map +1 -0
- package/build/embeddings/factory.js +98 -0
- package/build/embeddings/factory.js.map +1 -0
- package/build/embeddings/factory.test.d.ts +2 -0
- package/build/embeddings/factory.test.d.ts.map +1 -0
- package/build/embeddings/factory.test.js +329 -0
- package/build/embeddings/factory.test.js.map +1 -0
- package/build/embeddings/ollama.d.ts +18 -0
- package/build/embeddings/ollama.d.ts.map +1 -0
- package/build/embeddings/ollama.js +135 -0
- package/build/embeddings/ollama.js.map +1 -0
- package/build/embeddings/ollama.test.d.ts +2 -0
- package/build/embeddings/ollama.test.d.ts.map +1 -0
- package/build/embeddings/ollama.test.js +399 -0
- package/build/embeddings/ollama.test.js.map +1 -0
- package/build/embeddings/openai.d.ts +16 -0
- package/build/embeddings/openai.d.ts.map +1 -0
- package/build/embeddings/openai.js +108 -0
- package/build/embeddings/openai.js.map +1 -0
- package/build/embeddings/openai.test.d.ts +2 -0
- package/build/embeddings/openai.test.d.ts.map +1 -0
- package/build/embeddings/openai.test.js +283 -0
- package/build/embeddings/openai.test.js.map +1 -0
- package/build/embeddings/voyage.d.ts +19 -0
- package/build/embeddings/voyage.d.ts.map +1 -0
- package/build/embeddings/voyage.js +113 -0
- package/build/embeddings/voyage.js.map +1 -0
- package/build/embeddings/voyage.test.d.ts +2 -0
- package/build/embeddings/voyage.test.d.ts.map +1 -0
- package/build/embeddings/voyage.test.js +371 -0
- package/build/embeddings/voyage.test.js.map +1 -0
- package/build/index.d.ts +3 -0
- package/build/index.d.ts.map +1 -0
- package/build/index.js +534 -0
- package/build/index.js.map +1 -0
- package/build/index.test.d.ts +2 -0
- package/build/index.test.d.ts.map +1 -0
- package/build/index.test.js +241 -0
- package/build/index.test.js.map +1 -0
- package/build/qdrant/client.d.ts +37 -0
- package/build/qdrant/client.d.ts.map +1 -0
- package/build/qdrant/client.js +142 -0
- package/build/qdrant/client.js.map +1 -0
- package/build/qdrant/client.test.d.ts +2 -0
- package/build/qdrant/client.test.d.ts.map +1 -0
- package/build/qdrant/client.test.js +340 -0
- package/build/qdrant/client.test.js.map +1 -0
- package/commitlint.config.js +25 -0
- package/docker-compose.yml +22 -0
- package/docs/test_report.md +259 -0
- package/examples/README.md +315 -0
- package/examples/basic/README.md +111 -0
- package/examples/filters/README.md +262 -0
- package/examples/knowledge-base/README.md +207 -0
- package/examples/rate-limiting/README.md +376 -0
- package/package.json +59 -0
- package/scripts/verify-providers.js +238 -0
- package/src/embeddings/base.ts +25 -0
- package/src/embeddings/cohere.test.ts +408 -0
- package/src/embeddings/cohere.ts +152 -0
- package/src/embeddings/factory.test.ts +453 -0
- package/src/embeddings/factory.ts +163 -0
- package/src/embeddings/ollama.test.ts +543 -0
- package/src/embeddings/ollama.ts +196 -0
- package/src/embeddings/openai.test.ts +402 -0
- package/src/embeddings/openai.ts +158 -0
- package/src/embeddings/voyage.test.ts +520 -0
- package/src/embeddings/voyage.ts +168 -0
- package/src/index.test.ts +304 -0
- package/src/index.ts +614 -0
- package/src/qdrant/client.test.ts +456 -0
- package/src/qdrant/client.ts +195 -0
- package/tsconfig.json +19 -0
- package/vitest.config.ts +37 -0
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import Bottleneck from "bottleneck";
|
|
2
|
+
import { EmbeddingProvider, EmbeddingResult, RateLimitConfig } from "./base.js";
|
|
3
|
+
|
|
4
|
+
interface OllamaError {
|
|
5
|
+
status?: number;
|
|
6
|
+
message?: string;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
interface OllamaEmbedResponse {
|
|
10
|
+
embedding: number[];
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export class OllamaEmbeddings implements EmbeddingProvider {
|
|
14
|
+
private model: string;
|
|
15
|
+
private dimensions: number;
|
|
16
|
+
private limiter: Bottleneck;
|
|
17
|
+
private retryAttempts: number;
|
|
18
|
+
private retryDelayMs: number;
|
|
19
|
+
private baseUrl: string;
|
|
20
|
+
|
|
21
|
+
constructor(
|
|
22
|
+
model: string = "nomic-embed-text",
|
|
23
|
+
dimensions?: number,
|
|
24
|
+
rateLimitConfig?: RateLimitConfig,
|
|
25
|
+
baseUrl: string = "http://localhost:11434",
|
|
26
|
+
) {
|
|
27
|
+
this.model = model;
|
|
28
|
+
this.baseUrl = baseUrl;
|
|
29
|
+
|
|
30
|
+
// Default dimensions for different models
|
|
31
|
+
const defaultDimensions: Record<string, number> = {
|
|
32
|
+
"nomic-embed-text": 768,
|
|
33
|
+
"mxbai-embed-large": 1024,
|
|
34
|
+
"all-minilm": 384,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
this.dimensions = dimensions || defaultDimensions[model] || 768;
|
|
38
|
+
|
|
39
|
+
// Rate limiting configuration (more lenient for local models)
|
|
40
|
+
const maxRequestsPerMinute = rateLimitConfig?.maxRequestsPerMinute || 1000;
|
|
41
|
+
this.retryAttempts = rateLimitConfig?.retryAttempts || 3;
|
|
42
|
+
this.retryDelayMs = rateLimitConfig?.retryDelayMs || 500;
|
|
43
|
+
|
|
44
|
+
this.limiter = new Bottleneck({
|
|
45
|
+
reservoir: maxRequestsPerMinute,
|
|
46
|
+
reservoirRefreshAmount: maxRequestsPerMinute,
|
|
47
|
+
reservoirRefreshInterval: 60 * 1000,
|
|
48
|
+
maxConcurrent: 10,
|
|
49
|
+
minTime: Math.floor((60 * 1000) / maxRequestsPerMinute),
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
private isOllamaError(e: unknown): e is OllamaError {
|
|
54
|
+
return (
|
|
55
|
+
typeof e === "object" && e !== null && ("status" in e || "message" in e)
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
private async retryWithBackoff<T>(
|
|
60
|
+
fn: () => Promise<T>,
|
|
61
|
+
attempt: number = 0,
|
|
62
|
+
): Promise<T> {
|
|
63
|
+
try {
|
|
64
|
+
return await fn();
|
|
65
|
+
} catch (error: unknown) {
|
|
66
|
+
// Type guard for OllamaError
|
|
67
|
+
const apiError = this.isOllamaError(error)
|
|
68
|
+
? error
|
|
69
|
+
: { status: 0, message: String(error) };
|
|
70
|
+
|
|
71
|
+
const isRateLimitError =
|
|
72
|
+
apiError.status === 429 ||
|
|
73
|
+
(typeof apiError.message === "string" &&
|
|
74
|
+
apiError.message.toLowerCase().includes("rate limit"));
|
|
75
|
+
|
|
76
|
+
if (isRateLimitError && attempt < this.retryAttempts) {
|
|
77
|
+
const delayMs = this.retryDelayMs * Math.pow(2, attempt);
|
|
78
|
+
const waitTimeSeconds = (delayMs / 1000).toFixed(1);
|
|
79
|
+
console.error(
|
|
80
|
+
`Rate limit reached. Retrying in ${waitTimeSeconds}s (attempt ${attempt + 1}/${this.retryAttempts})...`,
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
84
|
+
return this.retryWithBackoff(fn, attempt + 1);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (isRateLimitError) {
|
|
88
|
+
throw new Error(
|
|
89
|
+
`Ollama API rate limit exceeded after ${this.retryAttempts} retry attempts. Please try again later or reduce request frequency.`,
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
throw error;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
private async callApi(text: string): Promise<OllamaEmbedResponse> {
|
|
98
|
+
try {
|
|
99
|
+
const response = await fetch(`${this.baseUrl}/api/embeddings`, {
|
|
100
|
+
method: "POST",
|
|
101
|
+
headers: {
|
|
102
|
+
"Content-Type": "application/json",
|
|
103
|
+
},
|
|
104
|
+
body: JSON.stringify({
|
|
105
|
+
model: this.model,
|
|
106
|
+
prompt: text,
|
|
107
|
+
}),
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
if (!response.ok) {
|
|
111
|
+
const errorBody = await response.text();
|
|
112
|
+
const textPreview =
|
|
113
|
+
text.length > 100 ? text.substring(0, 100) + "..." : text;
|
|
114
|
+
const error: OllamaError = {
|
|
115
|
+
status: response.status,
|
|
116
|
+
message: `Ollama API error (${response.status}) for model "${this.model}": ${errorBody}. Text preview: "${textPreview}"`,
|
|
117
|
+
};
|
|
118
|
+
throw error;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return response.json();
|
|
122
|
+
} catch (error) {
|
|
123
|
+
// Re-throw if it's already an OllamaError from the !response.ok block
|
|
124
|
+
if (error && typeof error === "object" && "status" in error) {
|
|
125
|
+
throw error;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// For Error instances (like network errors), enhance the message
|
|
129
|
+
if (error instanceof Error) {
|
|
130
|
+
const textPreview =
|
|
131
|
+
text.length > 100 ? text.substring(0, 100) + "..." : text;
|
|
132
|
+
throw new Error(
|
|
133
|
+
`Failed to call Ollama API at ${this.baseUrl} with model ${this.model}: ${error.message}. Text preview: "${textPreview}"`,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Handle objects with 'message' property - preserve the original error structure
|
|
138
|
+
// This ensures objects with 'message' property work correctly in tests
|
|
139
|
+
if (this.isOllamaError(error)) {
|
|
140
|
+
throw error;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// For other types, create a descriptive error message
|
|
144
|
+
const textPreview =
|
|
145
|
+
text.length > 100 ? text.substring(0, 100) + "..." : text;
|
|
146
|
+
const errorMessage = JSON.stringify(error);
|
|
147
|
+
|
|
148
|
+
throw new Error(
|
|
149
|
+
`Failed to call Ollama API at ${this.baseUrl} with model ${this.model}: ${errorMessage}. Text preview: "${textPreview}"`,
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async embed(text: string): Promise<EmbeddingResult> {
|
|
155
|
+
return this.limiter.schedule(() =>
|
|
156
|
+
this.retryWithBackoff(async () => {
|
|
157
|
+
const response = await this.callApi(text);
|
|
158
|
+
|
|
159
|
+
if (!response.embedding) {
|
|
160
|
+
throw new Error("No embedding returned from Ollama API");
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return {
|
|
164
|
+
embedding: response.embedding,
|
|
165
|
+
dimensions: this.dimensions,
|
|
166
|
+
};
|
|
167
|
+
}),
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
|
|
172
|
+
// Ollama doesn't support batch embeddings natively, so we process in parallel
|
|
173
|
+
// Process in chunks to avoid overwhelming Ollama and prevent memory issues
|
|
174
|
+
const CHUNK_SIZE = 50;
|
|
175
|
+
const results: EmbeddingResult[] = [];
|
|
176
|
+
|
|
177
|
+
for (let i = 0; i < texts.length; i += CHUNK_SIZE) {
|
|
178
|
+
const chunk = texts.slice(i, i + CHUNK_SIZE);
|
|
179
|
+
// The Bottleneck limiter will handle rate limiting and concurrency (maxConcurrent: 10)
|
|
180
|
+
const chunkResults = await Promise.all(
|
|
181
|
+
chunk.map((text) => this.embed(text)),
|
|
182
|
+
);
|
|
183
|
+
results.push(...chunkResults);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return results;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
getDimensions(): number {
|
|
190
|
+
return this.dimensions;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
getModel(): string {
|
|
194
|
+
return this.model;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
import { OpenAIEmbeddings } from "./openai.js";
|
|
3
|
+
import OpenAI from "openai";
|
|
4
|
+
|
|
5
|
+
vi.mock("openai", () => ({
|
|
6
|
+
default: vi.fn(),
|
|
7
|
+
}));
|
|
8
|
+
|
|
9
|
+
describe("OpenAIEmbeddings", () => {
|
|
10
|
+
let embeddings: OpenAIEmbeddings;
|
|
11
|
+
let mockOpenAI: any;
|
|
12
|
+
|
|
13
|
+
beforeEach(() => {
|
|
14
|
+
mockOpenAI = {
|
|
15
|
+
embeddings: {
|
|
16
|
+
create: vi.fn(),
|
|
17
|
+
},
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
vi.mocked(OpenAI).mockImplementation(() => mockOpenAI as any);
|
|
21
|
+
|
|
22
|
+
embeddings = new OpenAIEmbeddings("test-api-key");
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
describe("constructor", () => {
|
|
26
|
+
it("should use default model and dimensions", () => {
|
|
27
|
+
expect(embeddings.getModel()).toBe("text-embedding-3-small");
|
|
28
|
+
expect(embeddings.getDimensions()).toBe(1536);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("should use custom model", () => {
|
|
32
|
+
const customEmbeddings = new OpenAIEmbeddings(
|
|
33
|
+
"test-api-key",
|
|
34
|
+
"text-embedding-3-large",
|
|
35
|
+
);
|
|
36
|
+
expect(customEmbeddings.getModel()).toBe("text-embedding-3-large");
|
|
37
|
+
expect(customEmbeddings.getDimensions()).toBe(3072);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("should use custom dimensions", () => {
|
|
41
|
+
const customEmbeddings = new OpenAIEmbeddings(
|
|
42
|
+
"test-api-key",
|
|
43
|
+
"text-embedding-3-small",
|
|
44
|
+
512,
|
|
45
|
+
);
|
|
46
|
+
expect(customEmbeddings.getDimensions()).toBe(512);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("should use default dimensions for text-embedding-ada-002", () => {
|
|
50
|
+
const adaEmbeddings = new OpenAIEmbeddings(
|
|
51
|
+
"test-api-key",
|
|
52
|
+
"text-embedding-ada-002",
|
|
53
|
+
);
|
|
54
|
+
expect(adaEmbeddings.getDimensions()).toBe(1536);
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
describe("embed", () => {
|
|
59
|
+
it("should generate embedding for single text", async () => {
|
|
60
|
+
const mockEmbedding = Array(1536)
|
|
61
|
+
.fill(0)
|
|
62
|
+
.map((_, i) => i * 0.001);
|
|
63
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
64
|
+
data: [{ embedding: mockEmbedding }],
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
const result = await embeddings.embed("test text");
|
|
68
|
+
|
|
69
|
+
expect(result).toEqual({
|
|
70
|
+
embedding: mockEmbedding,
|
|
71
|
+
dimensions: 1536,
|
|
72
|
+
});
|
|
73
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
|
|
74
|
+
model: "text-embedding-3-small",
|
|
75
|
+
input: "test text",
|
|
76
|
+
dimensions: 1536,
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("should handle long text", async () => {
|
|
81
|
+
const longText = "word ".repeat(1000);
|
|
82
|
+
const mockEmbedding = Array(1536).fill(0.5);
|
|
83
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
84
|
+
data: [{ embedding: mockEmbedding }],
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
const result = await embeddings.embed(longText);
|
|
88
|
+
|
|
89
|
+
expect(result.embedding).toEqual(mockEmbedding);
|
|
90
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
|
|
91
|
+
model: "text-embedding-3-small",
|
|
92
|
+
input: longText,
|
|
93
|
+
dimensions: 1536,
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it("should use custom model configuration", async () => {
|
|
98
|
+
const customEmbeddings = new OpenAIEmbeddings(
|
|
99
|
+
"test-api-key",
|
|
100
|
+
"text-embedding-3-large",
|
|
101
|
+
3072,
|
|
102
|
+
);
|
|
103
|
+
const mockEmbedding = Array(3072).fill(0.1);
|
|
104
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
105
|
+
data: [{ embedding: mockEmbedding }],
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
await customEmbeddings.embed("test");
|
|
109
|
+
|
|
110
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
|
|
111
|
+
model: "text-embedding-3-large",
|
|
112
|
+
input: "test",
|
|
113
|
+
dimensions: 3072,
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it("should propagate errors", async () => {
|
|
118
|
+
mockOpenAI.embeddings.create.mockRejectedValue(new Error("API Error"));
|
|
119
|
+
|
|
120
|
+
await expect(embeddings.embed("test")).rejects.toThrow("API Error");
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
describe("embedBatch", () => {
|
|
125
|
+
it("should generate embeddings for multiple texts", async () => {
|
|
126
|
+
const mockEmbeddings = [
|
|
127
|
+
Array(1536).fill(0.1),
|
|
128
|
+
Array(1536).fill(0.2),
|
|
129
|
+
Array(1536).fill(0.3),
|
|
130
|
+
];
|
|
131
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
132
|
+
data: [
|
|
133
|
+
{ embedding: mockEmbeddings[0] },
|
|
134
|
+
{ embedding: mockEmbeddings[1] },
|
|
135
|
+
{ embedding: mockEmbeddings[2] },
|
|
136
|
+
],
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
const texts = ["text1", "text2", "text3"];
|
|
140
|
+
const results = await embeddings.embedBatch(texts);
|
|
141
|
+
|
|
142
|
+
expect(results).toEqual([
|
|
143
|
+
{ embedding: mockEmbeddings[0], dimensions: 1536 },
|
|
144
|
+
{ embedding: mockEmbeddings[1], dimensions: 1536 },
|
|
145
|
+
{ embedding: mockEmbeddings[2], dimensions: 1536 },
|
|
146
|
+
]);
|
|
147
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
|
|
148
|
+
model: "text-embedding-3-small",
|
|
149
|
+
input: texts,
|
|
150
|
+
dimensions: 1536,
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it("should handle empty batch", async () => {
|
|
155
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
156
|
+
data: [],
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
const results = await embeddings.embedBatch([]);
|
|
160
|
+
|
|
161
|
+
expect(results).toEqual([]);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it("should handle single item in batch", async () => {
|
|
165
|
+
const mockEmbedding = Array(1536).fill(0.5);
|
|
166
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
167
|
+
data: [{ embedding: mockEmbedding }],
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
const results = await embeddings.embedBatch(["single text"]);
|
|
171
|
+
|
|
172
|
+
expect(results).toHaveLength(1);
|
|
173
|
+
expect(results[0].embedding).toEqual(mockEmbedding);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it("should handle large batches", async () => {
|
|
177
|
+
const batchSize = 100;
|
|
178
|
+
const mockEmbeddings = Array(batchSize)
|
|
179
|
+
.fill(null)
|
|
180
|
+
.map(() => Array(1536).fill(Math.random()));
|
|
181
|
+
|
|
182
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
183
|
+
data: mockEmbeddings.map((embedding) => ({ embedding })),
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
const texts = Array(batchSize)
|
|
187
|
+
.fill(null)
|
|
188
|
+
.map((_, i) => `text ${i}`);
|
|
189
|
+
const results = await embeddings.embedBatch(texts);
|
|
190
|
+
|
|
191
|
+
expect(results).toHaveLength(batchSize);
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it("should propagate errors in batch", async () => {
|
|
195
|
+
mockOpenAI.embeddings.create.mockRejectedValue(
|
|
196
|
+
new Error("Batch API Error"),
|
|
197
|
+
);
|
|
198
|
+
|
|
199
|
+
await expect(embeddings.embedBatch(["text1", "text2"])).rejects.toThrow(
|
|
200
|
+
"Batch API Error",
|
|
201
|
+
);
|
|
202
|
+
});
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
describe("getDimensions", () => {
|
|
206
|
+
it("should return configured dimensions", () => {
|
|
207
|
+
expect(embeddings.getDimensions()).toBe(1536);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it("should return custom dimensions", () => {
|
|
211
|
+
const customEmbeddings = new OpenAIEmbeddings(
|
|
212
|
+
"test-api-key",
|
|
213
|
+
"text-embedding-3-small",
|
|
214
|
+
512,
|
|
215
|
+
);
|
|
216
|
+
expect(customEmbeddings.getDimensions()).toBe(512);
|
|
217
|
+
});
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
describe("getModel", () => {
|
|
221
|
+
it("should return configured model", () => {
|
|
222
|
+
expect(embeddings.getModel()).toBe("text-embedding-3-small");
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
it("should return custom model", () => {
|
|
226
|
+
const customEmbeddings = new OpenAIEmbeddings(
|
|
227
|
+
"test-api-key",
|
|
228
|
+
"text-embedding-3-large",
|
|
229
|
+
);
|
|
230
|
+
expect(customEmbeddings.getModel()).toBe("text-embedding-3-large");
|
|
231
|
+
});
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
describe("rate limiting", () => {
|
|
235
|
+
it("should retry on rate limit error (429 status)", async () => {
|
|
236
|
+
const mockEmbedding = Array(1536).fill(0.5);
|
|
237
|
+
|
|
238
|
+
// Fail first two times with rate limit, succeed on third
|
|
239
|
+
mockOpenAI.embeddings.create
|
|
240
|
+
.mockRejectedValueOnce({ status: 429, message: "Rate limit exceeded" })
|
|
241
|
+
.mockRejectedValueOnce({ status: 429, message: "Rate limit exceeded" })
|
|
242
|
+
.mockResolvedValue({ data: [{ embedding: mockEmbedding }] });
|
|
243
|
+
|
|
244
|
+
const result = await embeddings.embed("test text");
|
|
245
|
+
|
|
246
|
+
expect(result.embedding).toEqual(mockEmbedding);
|
|
247
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledTimes(3);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
it("should respect Retry-After header when present", async () => {
|
|
251
|
+
const mockEmbedding = Array(1536).fill(0.5);
|
|
252
|
+
const rateLimitError = {
|
|
253
|
+
status: 429,
|
|
254
|
+
message: "Rate limit exceeded",
|
|
255
|
+
headers: { "retry-after": "2" },
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
mockOpenAI.embeddings.create
|
|
259
|
+
.mockRejectedValueOnce(rateLimitError)
|
|
260
|
+
.mockResolvedValue({ data: [{ embedding: mockEmbedding }] });
|
|
261
|
+
|
|
262
|
+
const startTime = Date.now();
|
|
263
|
+
await embeddings.embed("test text");
|
|
264
|
+
const duration = Date.now() - startTime;
|
|
265
|
+
|
|
266
|
+
// Should wait at least 2 seconds (2000ms)
|
|
267
|
+
expect(duration).toBeGreaterThanOrEqual(1900); // Allow small margin
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
it("should fallback to exponential backoff with invalid Retry-After header", async () => {
|
|
271
|
+
const rateLimitEmbeddings = new OpenAIEmbeddings(
|
|
272
|
+
"test-api-key",
|
|
273
|
+
"text-embedding-3-small",
|
|
274
|
+
undefined,
|
|
275
|
+
{
|
|
276
|
+
retryAttempts: 2,
|
|
277
|
+
retryDelayMs: 100, // 100ms for faster tests
|
|
278
|
+
},
|
|
279
|
+
);
|
|
280
|
+
|
|
281
|
+
const mockEmbedding = Array(1536).fill(0.5);
|
|
282
|
+
|
|
283
|
+
// Test various invalid Retry-After values
|
|
284
|
+
const invalidRetryAfterError = {
|
|
285
|
+
status: 429,
|
|
286
|
+
message: "Rate limit exceeded",
|
|
287
|
+
headers: { "retry-after": "invalid" }, // Non-numeric value
|
|
288
|
+
};
|
|
289
|
+
|
|
290
|
+
mockOpenAI.embeddings.create
|
|
291
|
+
.mockRejectedValueOnce(invalidRetryAfterError)
|
|
292
|
+
.mockResolvedValue({ data: [{ embedding: mockEmbedding }] });
|
|
293
|
+
|
|
294
|
+
const startTime = Date.now();
|
|
295
|
+
await rateLimitEmbeddings.embed("test text");
|
|
296
|
+
const duration = Date.now() - startTime;
|
|
297
|
+
|
|
298
|
+
// Should fallback to exponential backoff (100ms) instead of using invalid header
|
|
299
|
+
expect(duration).toBeGreaterThanOrEqual(90); // Allow small margin
|
|
300
|
+
expect(duration).toBeLessThan(500); // Should not wait too long
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
it("should use exponential backoff when no Retry-After header", async () => {
|
|
304
|
+
const rateLimitEmbeddings = new OpenAIEmbeddings(
|
|
305
|
+
"test-api-key",
|
|
306
|
+
"text-embedding-3-small",
|
|
307
|
+
undefined,
|
|
308
|
+
{
|
|
309
|
+
retryAttempts: 3,
|
|
310
|
+
retryDelayMs: 100, // 100ms for faster tests
|
|
311
|
+
},
|
|
312
|
+
);
|
|
313
|
+
|
|
314
|
+
const mockEmbedding = Array(1536).fill(0.5);
|
|
315
|
+
const rateLimitError = {
|
|
316
|
+
status: 429,
|
|
317
|
+
message: "Rate limit exceeded",
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
mockOpenAI.embeddings.create
|
|
321
|
+
.mockRejectedValueOnce(rateLimitError)
|
|
322
|
+
.mockRejectedValueOnce(rateLimitError)
|
|
323
|
+
.mockResolvedValue({ data: [{ embedding: mockEmbedding }] });
|
|
324
|
+
|
|
325
|
+
const startTime = Date.now();
|
|
326
|
+
await rateLimitEmbeddings.embed("test text");
|
|
327
|
+
const duration = Date.now() - startTime;
|
|
328
|
+
|
|
329
|
+
// Should wait: 100ms (first retry) + 200ms (second retry) = 300ms
|
|
330
|
+
expect(duration).toBeGreaterThanOrEqual(250); // Allow margin for test execution
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
it("should throw error after max retries exceeded", async () => {
|
|
334
|
+
const rateLimitEmbeddings = new OpenAIEmbeddings(
|
|
335
|
+
"test-api-key",
|
|
336
|
+
"text-embedding-3-small",
|
|
337
|
+
undefined,
|
|
338
|
+
{
|
|
339
|
+
retryAttempts: 2,
|
|
340
|
+
retryDelayMs: 100,
|
|
341
|
+
},
|
|
342
|
+
);
|
|
343
|
+
|
|
344
|
+
const rateLimitError = {
|
|
345
|
+
status: 429,
|
|
346
|
+
message: "Rate limit exceeded",
|
|
347
|
+
};
|
|
348
|
+
|
|
349
|
+
mockOpenAI.embeddings.create.mockRejectedValue(rateLimitError);
|
|
350
|
+
|
|
351
|
+
await expect(rateLimitEmbeddings.embed("test text")).rejects.toThrow(
|
|
352
|
+
"OpenAI API rate limit exceeded after 2 retry attempts",
|
|
353
|
+
);
|
|
354
|
+
|
|
355
|
+
// Should try initial + 2 retries = 3 total attempts
|
|
356
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledTimes(3);
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
it("should handle rate limit errors in batch operations", async () => {
|
|
360
|
+
const mockEmbeddings = [Array(1536).fill(0.1), Array(1536).fill(0.2)];
|
|
361
|
+
|
|
362
|
+
mockOpenAI.embeddings.create
|
|
363
|
+
.mockRejectedValueOnce({ status: 429, message: "Rate limit exceeded" })
|
|
364
|
+
.mockResolvedValue({
|
|
365
|
+
data: [
|
|
366
|
+
{ embedding: mockEmbeddings[0] },
|
|
367
|
+
{ embedding: mockEmbeddings[1] },
|
|
368
|
+
],
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
const results = await embeddings.embedBatch(["text1", "text2"]);
|
|
372
|
+
|
|
373
|
+
expect(results).toHaveLength(2);
|
|
374
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledTimes(2);
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
it("should not retry on non-rate-limit errors", async () => {
|
|
378
|
+
const apiError = new Error("Invalid API key");
|
|
379
|
+
mockOpenAI.embeddings.create.mockRejectedValue(apiError);
|
|
380
|
+
|
|
381
|
+
await expect(embeddings.embed("test text")).rejects.toThrow(
|
|
382
|
+
"Invalid API key",
|
|
383
|
+
);
|
|
384
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledTimes(1);
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
it("should accept custom rate limit configuration", () => {
|
|
388
|
+
const customEmbeddings = new OpenAIEmbeddings(
|
|
389
|
+
"test-api-key",
|
|
390
|
+
"text-embedding-3-small",
|
|
391
|
+
undefined,
|
|
392
|
+
{
|
|
393
|
+
maxRequestsPerMinute: 1000,
|
|
394
|
+
retryAttempts: 5,
|
|
395
|
+
retryDelayMs: 2000,
|
|
396
|
+
},
|
|
397
|
+
);
|
|
398
|
+
|
|
399
|
+
expect(customEmbeddings).toBeDefined();
|
|
400
|
+
});
|
|
401
|
+
});
|
|
402
|
+
});
|