@lov3kaizen/agentsea-gateway 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +298 -0
- package/dist/index.d.mts +759 -0
- package/dist/index.d.ts +759 -0
- package/dist/index.js +3592 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +3541 -0
- package/dist/index.mjs.map +1 -0
- package/dist/integrations/nestjs/index.d.mts +4 -0
- package/dist/integrations/nestjs/index.d.ts +4 -0
- package/dist/integrations/nestjs/index.js +10 -0
- package/dist/integrations/nestjs/index.js.map +1 -0
- package/dist/integrations/nestjs/index.mjs +7 -0
- package/dist/integrations/nestjs/index.mjs.map +1 -0
- package/package.json +89 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,3541 @@
|
|
|
1
|
+
import { EventEmitter } from 'events';
|
|
2
|
+
import { get_encoding } from 'tiktoken';
|
|
3
|
+
import murmurhash from 'murmurhash';
|
|
4
|
+
import { LRUCache } from 'lru-cache';
|
|
5
|
+
import pino from 'pino';
|
|
6
|
+
import { Hono } from 'hono';
|
|
7
|
+
import { cors } from 'hono/cors';
|
|
8
|
+
import { logger } from 'hono/logger';
|
|
9
|
+
import { streamSSE } from 'hono/streaming';
|
|
10
|
+
import { serve } from '@hono/node-server';
|
|
11
|
+
|
|
12
|
+
// src/core/Gateway.ts
|
|
13
|
+
|
|
14
|
+
// src/core/types.ts
|
|
15
|
+
var GatewayError = class extends Error {
|
|
16
|
+
constructor(message, code, statusCode = 500, provider, retryable = false) {
|
|
17
|
+
super(message);
|
|
18
|
+
this.code = code;
|
|
19
|
+
this.statusCode = statusCode;
|
|
20
|
+
this.provider = provider;
|
|
21
|
+
this.retryable = retryable;
|
|
22
|
+
this.name = "GatewayError";
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
var ProviderError = class extends GatewayError {
|
|
26
|
+
constructor(message, provider, originalError, retryable = true) {
|
|
27
|
+
super(message, "PROVIDER_ERROR", 502, provider, retryable);
|
|
28
|
+
this.originalError = originalError;
|
|
29
|
+
this.name = "ProviderError";
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
var RateLimitError = class extends GatewayError {
|
|
33
|
+
constructor(message, retryAfter, provider) {
|
|
34
|
+
super(message, "RATE_LIMIT_EXCEEDED", 429, provider, true);
|
|
35
|
+
this.retryAfter = retryAfter;
|
|
36
|
+
this.name = "RateLimitError";
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
var AuthenticationError = class extends GatewayError {
|
|
40
|
+
constructor(message) {
|
|
41
|
+
super(message, "AUTHENTICATION_FAILED", 401, void 0, false);
|
|
42
|
+
this.name = "AuthenticationError";
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
var ValidationError = class extends GatewayError {
|
|
46
|
+
constructor(message) {
|
|
47
|
+
super(message, "VALIDATION_ERROR", 400, void 0, false);
|
|
48
|
+
this.name = "ValidationError";
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// src/providers/ProviderRegistry.ts
|
|
53
|
+
var ProviderRegistry = class {
|
|
54
|
+
providers = /* @__PURE__ */ new Map();
|
|
55
|
+
modelToProvider = /* @__PURE__ */ new Map();
|
|
56
|
+
healthCheckInterval = null;
|
|
57
|
+
constructor(providers = []) {
|
|
58
|
+
for (const provider of providers) {
|
|
59
|
+
this.register(provider);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Register a provider
|
|
64
|
+
*/
|
|
65
|
+
register(provider) {
|
|
66
|
+
this.providers.set(provider.name, provider);
|
|
67
|
+
for (const model of provider.getModels()) {
|
|
68
|
+
const existing = this.modelToProvider.get(model) || [];
|
|
69
|
+
if (!existing.includes(provider.name)) {
|
|
70
|
+
existing.push(provider.name);
|
|
71
|
+
this.modelToProvider.set(model, existing);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Unregister a provider
|
|
77
|
+
*/
|
|
78
|
+
unregister(name) {
|
|
79
|
+
const provider = this.providers.get(name);
|
|
80
|
+
if (!provider) {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
for (const model of provider.getModels()) {
|
|
84
|
+
const providers = this.modelToProvider.get(model);
|
|
85
|
+
if (providers) {
|
|
86
|
+
const filtered = providers.filter((p) => p !== name);
|
|
87
|
+
if (filtered.length > 0) {
|
|
88
|
+
this.modelToProvider.set(model, filtered);
|
|
89
|
+
} else {
|
|
90
|
+
this.modelToProvider.delete(model);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
this.providers.delete(name);
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Get a provider by name
|
|
99
|
+
*/
|
|
100
|
+
get(name) {
|
|
101
|
+
return this.providers.get(name);
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Get all registered providers
|
|
105
|
+
*/
|
|
106
|
+
getAll() {
|
|
107
|
+
return Array.from(this.providers.values());
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Get all provider names
|
|
111
|
+
*/
|
|
112
|
+
getNames() {
|
|
113
|
+
return Array.from(this.providers.keys());
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Get providers that support a specific model
|
|
117
|
+
*/
|
|
118
|
+
getProvidersForModel(model) {
|
|
119
|
+
const names = this.modelToProvider.get(model) || [];
|
|
120
|
+
return names.map((name) => this.providers.get(name)).filter((p) => p !== void 0);
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Get the first available provider for a model
|
|
124
|
+
*/
|
|
125
|
+
getProviderForModel(model) {
|
|
126
|
+
const providers = this.getProvidersForModel(model);
|
|
127
|
+
return providers.find((p) => p.isAvailable());
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Check if any provider supports a model
|
|
131
|
+
*/
|
|
132
|
+
hasModel(model) {
|
|
133
|
+
return this.modelToProvider.has(model);
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Get all available models across all providers
|
|
137
|
+
*/
|
|
138
|
+
getAllModels() {
|
|
139
|
+
return Array.from(this.modelToProvider.keys());
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Get model info from the appropriate provider
|
|
143
|
+
*/
|
|
144
|
+
getModelInfo(model) {
|
|
145
|
+
const provider = this.getProviderForModel(model);
|
|
146
|
+
return provider?.getModelInfo(model) ?? null;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Get health status for all providers
|
|
150
|
+
*/
|
|
151
|
+
getHealthStatus() {
|
|
152
|
+
const status = {};
|
|
153
|
+
for (const [name, provider] of this.providers) {
|
|
154
|
+
status[name] = provider.getHealth();
|
|
155
|
+
}
|
|
156
|
+
return status;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Get healthy providers
|
|
160
|
+
*/
|
|
161
|
+
getHealthyProviders() {
|
|
162
|
+
return this.getAll().filter((p) => p.isHealthy());
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Get available providers (healthy or degraded)
|
|
166
|
+
*/
|
|
167
|
+
getAvailableProviders() {
|
|
168
|
+
return this.getAll().filter((p) => p.isAvailable());
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Run health checks on all providers
|
|
172
|
+
*/
|
|
173
|
+
async checkHealth() {
|
|
174
|
+
const results = {};
|
|
175
|
+
await Promise.all(
|
|
176
|
+
this.getAll().map(async (provider) => {
|
|
177
|
+
results[provider.name] = await provider.healthCheck();
|
|
178
|
+
})
|
|
179
|
+
);
|
|
180
|
+
return results;
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Start periodic health checks
|
|
184
|
+
*/
|
|
185
|
+
startHealthChecks(intervalMs = 6e4) {
|
|
186
|
+
if (this.healthCheckInterval) {
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
this.healthCheckInterval = setInterval(() => {
|
|
190
|
+
this.checkHealth().catch(console.error);
|
|
191
|
+
}, intervalMs);
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Stop periodic health checks
|
|
195
|
+
*/
|
|
196
|
+
stopHealthChecks() {
|
|
197
|
+
if (this.healthCheckInterval) {
|
|
198
|
+
clearInterval(this.healthCheckInterval);
|
|
199
|
+
this.healthCheckInterval = null;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Get the number of registered providers
|
|
204
|
+
*/
|
|
205
|
+
get size() {
|
|
206
|
+
return this.providers.size;
|
|
207
|
+
}
|
|
208
|
+
};
|
|
209
|
+
var CircuitBreaker = class {
|
|
210
|
+
constructor(providerName, config) {
|
|
211
|
+
this.providerName = providerName;
|
|
212
|
+
this.config = config;
|
|
213
|
+
}
|
|
214
|
+
state = "closed";
|
|
215
|
+
failures = 0;
|
|
216
|
+
successes = 0;
|
|
217
|
+
lastFailure = null;
|
|
218
|
+
nextAttempt = null;
|
|
219
|
+
/**
|
|
220
|
+
* Check if requests are allowed
|
|
221
|
+
*/
|
|
222
|
+
isAllowed() {
|
|
223
|
+
if (this.state === "closed") {
|
|
224
|
+
return true;
|
|
225
|
+
}
|
|
226
|
+
if (this.state === "open") {
|
|
227
|
+
if (this.nextAttempt && /* @__PURE__ */ new Date() >= this.nextAttempt) {
|
|
228
|
+
this.state = "half-open";
|
|
229
|
+
return true;
|
|
230
|
+
}
|
|
231
|
+
return false;
|
|
232
|
+
}
|
|
233
|
+
return true;
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Record a successful request
|
|
237
|
+
*/
|
|
238
|
+
recordSuccess() {
|
|
239
|
+
if (this.state === "half-open") {
|
|
240
|
+
this.successes++;
|
|
241
|
+
if (this.successes >= this.config.successThreshold) {
|
|
242
|
+
this.reset();
|
|
243
|
+
}
|
|
244
|
+
} else if (this.state === "closed") {
|
|
245
|
+
this.failures = Math.max(0, this.failures - 1);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Record a failed request
|
|
250
|
+
*/
|
|
251
|
+
recordFailure() {
|
|
252
|
+
this.failures++;
|
|
253
|
+
this.lastFailure = /* @__PURE__ */ new Date();
|
|
254
|
+
if (this.state === "half-open") {
|
|
255
|
+
this.trip();
|
|
256
|
+
} else if (this.state === "closed" && this.failures >= this.config.failureThreshold) {
|
|
257
|
+
this.trip();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Trip the circuit breaker (open it)
|
|
262
|
+
*/
|
|
263
|
+
trip() {
|
|
264
|
+
this.state = "open";
|
|
265
|
+
this.nextAttempt = new Date(Date.now() + this.config.timeout);
|
|
266
|
+
this.successes = 0;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Reset the circuit breaker
|
|
270
|
+
*/
|
|
271
|
+
reset() {
|
|
272
|
+
this.state = "closed";
|
|
273
|
+
this.failures = 0;
|
|
274
|
+
this.successes = 0;
|
|
275
|
+
this.nextAttempt = null;
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Get the current state
|
|
279
|
+
*/
|
|
280
|
+
getState() {
|
|
281
|
+
return this.state;
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Get circuit status
|
|
285
|
+
*/
|
|
286
|
+
getStatus() {
|
|
287
|
+
return {
|
|
288
|
+
providerName: this.providerName,
|
|
289
|
+
state: this.state,
|
|
290
|
+
failures: this.failures,
|
|
291
|
+
nextAttempt: this.nextAttempt,
|
|
292
|
+
lastFailure: this.lastFailure
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
};
|
|
296
|
+
var HealthMonitor = class extends EventEmitter {
|
|
297
|
+
constructor(config) {
|
|
298
|
+
super();
|
|
299
|
+
this.config = config;
|
|
300
|
+
}
|
|
301
|
+
healthHistory = /* @__PURE__ */ new Map();
|
|
302
|
+
circuitBreakers = /* @__PURE__ */ new Map();
|
|
303
|
+
maxHistorySize = 100;
|
|
304
|
+
/**
|
|
305
|
+
* Record a health check result
|
|
306
|
+
*/
|
|
307
|
+
recordHealth(providerName, health) {
|
|
308
|
+
const history = this.healthHistory.get(providerName) || [];
|
|
309
|
+
history.push(health);
|
|
310
|
+
if (history.length > this.maxHistorySize) {
|
|
311
|
+
history.shift();
|
|
312
|
+
}
|
|
313
|
+
this.healthHistory.set(providerName, history);
|
|
314
|
+
if (health.status === "unhealthy") {
|
|
315
|
+
this.emit("unhealthy", providerName, health);
|
|
316
|
+
} else if (health.status === "degraded") {
|
|
317
|
+
this.emit("degraded", providerName, health);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Record a request result
|
|
322
|
+
*/
|
|
323
|
+
recordRequest(providerName, success, _latencyMs) {
|
|
324
|
+
const breaker = this.getOrCreateCircuitBreaker(providerName);
|
|
325
|
+
if (success) {
|
|
326
|
+
breaker.recordSuccess();
|
|
327
|
+
} else {
|
|
328
|
+
breaker.recordFailure();
|
|
329
|
+
if (breaker.getState() === "open") {
|
|
330
|
+
this.emit("circuit-open", providerName);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Check if requests are allowed for a provider
|
|
336
|
+
*/
|
|
337
|
+
isRequestAllowed(providerName) {
|
|
338
|
+
const breaker = this.circuitBreakers.get(providerName);
|
|
339
|
+
return breaker ? breaker.isAllowed() : true;
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Get or create a circuit breaker for a provider
|
|
343
|
+
*/
|
|
344
|
+
getOrCreateCircuitBreaker(providerName) {
|
|
345
|
+
let breaker = this.circuitBreakers.get(providerName);
|
|
346
|
+
if (!breaker && this.config.circuitBreaker) {
|
|
347
|
+
breaker = new CircuitBreaker(providerName, this.config.circuitBreaker);
|
|
348
|
+
this.circuitBreakers.set(providerName, breaker);
|
|
349
|
+
}
|
|
350
|
+
return breaker || new CircuitBreaker(providerName, {
|
|
351
|
+
failureThreshold: 5,
|
|
352
|
+
successThreshold: 3,
|
|
353
|
+
timeout: 3e4
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* Get health history for a provider
|
|
358
|
+
*/
|
|
359
|
+
getHistory(providerName) {
|
|
360
|
+
return this.healthHistory.get(providerName) || [];
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Get average latency for a provider
|
|
364
|
+
*/
|
|
365
|
+
getAverageLatency(providerName) {
|
|
366
|
+
const history = this.healthHistory.get(providerName) || [];
|
|
367
|
+
if (history.length === 0) return 0;
|
|
368
|
+
const sum = history.reduce((acc, h) => acc + h.latencyMs, 0);
|
|
369
|
+
return sum / history.length;
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Get error rate for a provider
|
|
373
|
+
*/
|
|
374
|
+
getErrorRate(providerName) {
|
|
375
|
+
const history = this.healthHistory.get(providerName) || [];
|
|
376
|
+
if (history.length === 0) return 0;
|
|
377
|
+
const lastHealth = history[history.length - 1];
|
|
378
|
+
return lastHealth.errorRate;
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Get circuit breaker status for a provider
|
|
382
|
+
*/
|
|
383
|
+
getCircuitStatus(providerName) {
|
|
384
|
+
const breaker = this.circuitBreakers.get(providerName);
|
|
385
|
+
return breaker ? breaker.getStatus() : null;
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Get all circuit breaker statuses
|
|
389
|
+
*/
|
|
390
|
+
getAllCircuitStatuses() {
|
|
391
|
+
const statuses = {};
|
|
392
|
+
for (const [name, breaker] of this.circuitBreakers) {
|
|
393
|
+
statuses[name] = breaker.getStatus();
|
|
394
|
+
}
|
|
395
|
+
return statuses;
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Reset circuit breaker for a provider
|
|
399
|
+
*/
|
|
400
|
+
resetCircuit(providerName) {
|
|
401
|
+
const breaker = this.circuitBreakers.get(providerName);
|
|
402
|
+
if (breaker) {
|
|
403
|
+
breaker.reset();
|
|
404
|
+
this.emit("circuit-reset", providerName);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Clear all history
|
|
409
|
+
*/
|
|
410
|
+
clear() {
|
|
411
|
+
this.healthHistory.clear();
|
|
412
|
+
this.circuitBreakers.clear();
|
|
413
|
+
}
|
|
414
|
+
};
|
|
415
|
+
|
|
416
|
+
// src/routing/Router.ts
|
|
417
|
+
var DEFAULT_MODEL_MAPPINGS = {
|
|
418
|
+
// GPT-4 class
|
|
419
|
+
"gpt-4o": [
|
|
420
|
+
{ provider: "anthropic", model: "claude-3-5-sonnet-20241022" },
|
|
421
|
+
{ provider: "google", model: "gemini-1.5-pro" }
|
|
422
|
+
],
|
|
423
|
+
"claude-3-5-sonnet-20241022": [
|
|
424
|
+
{ provider: "openai", model: "gpt-4o" },
|
|
425
|
+
{ provider: "google", model: "gemini-1.5-pro" }
|
|
426
|
+
],
|
|
427
|
+
"gemini-1.5-pro": [
|
|
428
|
+
{ provider: "openai", model: "gpt-4o" },
|
|
429
|
+
{ provider: "anthropic", model: "claude-3-5-sonnet-20241022" }
|
|
430
|
+
],
|
|
431
|
+
// GPT-4 mini class
|
|
432
|
+
"gpt-4o-mini": [
|
|
433
|
+
{ provider: "anthropic", model: "claude-3-5-haiku-20241022" },
|
|
434
|
+
{ provider: "google", model: "gemini-1.5-flash" }
|
|
435
|
+
],
|
|
436
|
+
"claude-3-5-haiku-20241022": [
|
|
437
|
+
{ provider: "openai", model: "gpt-4o-mini" },
|
|
438
|
+
{ provider: "google", model: "gemini-1.5-flash" }
|
|
439
|
+
],
|
|
440
|
+
"gemini-1.5-flash": [
|
|
441
|
+
{ provider: "openai", model: "gpt-4o-mini" },
|
|
442
|
+
{ provider: "anthropic", model: "claude-3-5-haiku-20241022" }
|
|
443
|
+
]
|
|
444
|
+
};
|
|
445
|
+
var VIRTUAL_MODELS = ["best", "cheapest", "fastest"];
|
|
446
|
+
var Router = class {
|
|
447
|
+
strategy;
|
|
448
|
+
modelMappings;
|
|
449
|
+
fallbackChain;
|
|
450
|
+
constructor(strategy, config) {
|
|
451
|
+
this.strategy = strategy;
|
|
452
|
+
this.modelMappings = {
|
|
453
|
+
...DEFAULT_MODEL_MAPPINGS,
|
|
454
|
+
...config?.modelMappings
|
|
455
|
+
};
|
|
456
|
+
this.fallbackChain = config?.fallbackChain || [
|
|
457
|
+
"openai",
|
|
458
|
+
"anthropic",
|
|
459
|
+
"google"
|
|
460
|
+
];
|
|
461
|
+
}
|
|
462
|
+
/**
|
|
463
|
+
* Route a request to a provider
|
|
464
|
+
*/
|
|
465
|
+
route(request, registry, context) {
|
|
466
|
+
if (this.isVirtualModel(request.model)) {
|
|
467
|
+
return this.routeVirtualModel(request.model, request, registry, context);
|
|
468
|
+
}
|
|
469
|
+
return this.strategy.route(request, registry, context);
|
|
470
|
+
}
|
|
471
|
+
/**
|
|
472
|
+
* Check if a model is a virtual model
|
|
473
|
+
*/
|
|
474
|
+
isVirtualModel(model) {
|
|
475
|
+
return VIRTUAL_MODELS.includes(model);
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
* Route virtual model to actual provider/model
|
|
479
|
+
*/
|
|
480
|
+
routeVirtualModel(virtualModel, _request, registry, context) {
|
|
481
|
+
const availableProviders = registry.getAvailableProviders().filter((p) => !context?.excludeProviders?.includes(p.name));
|
|
482
|
+
if (availableProviders.length === 0) {
|
|
483
|
+
throw new Error("No available providers");
|
|
484
|
+
}
|
|
485
|
+
switch (virtualModel) {
|
|
486
|
+
case "best":
|
|
487
|
+
return this.routeBest(availableProviders, context);
|
|
488
|
+
case "cheapest":
|
|
489
|
+
return this.routeCheapest(availableProviders, context);
|
|
490
|
+
case "fastest":
|
|
491
|
+
return this.routeFastest(availableProviders, context);
|
|
492
|
+
default:
|
|
493
|
+
throw new Error(`Unknown virtual model: ${String(virtualModel)}`);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
/**
|
|
497
|
+
* Route to best quality model
|
|
498
|
+
*/
|
|
499
|
+
routeBest(providers, context) {
|
|
500
|
+
const qualityRanking = {
|
|
501
|
+
"claude-3-5-sonnet-20241022": 95,
|
|
502
|
+
"claude-sonnet-4-20250514": 96,
|
|
503
|
+
"gpt-4o": 94,
|
|
504
|
+
"gemini-1.5-pro": 92,
|
|
505
|
+
"claude-3-opus-20240229": 93,
|
|
506
|
+
"gpt-4-turbo": 91,
|
|
507
|
+
o1: 97,
|
|
508
|
+
"o1-preview": 96
|
|
509
|
+
};
|
|
510
|
+
const candidates = [];
|
|
511
|
+
for (const provider of providers) {
|
|
512
|
+
for (const model of provider.getModels()) {
|
|
513
|
+
const score = qualityRanking[model] || 50;
|
|
514
|
+
candidates.push({
|
|
515
|
+
provider: provider.name,
|
|
516
|
+
model,
|
|
517
|
+
score
|
|
518
|
+
});
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
522
|
+
if (context?.preferredProvider) {
|
|
523
|
+
const preferred = candidates.find(
|
|
524
|
+
(c) => c.provider === context.preferredProvider
|
|
525
|
+
);
|
|
526
|
+
if (preferred) {
|
|
527
|
+
return {
|
|
528
|
+
provider: preferred.provider,
|
|
529
|
+
model: preferred.model,
|
|
530
|
+
reason: `Best quality model from preferred provider`,
|
|
531
|
+
alternatives: candidates.slice(0, 3),
|
|
532
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
533
|
+
};
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
const best = candidates[0];
|
|
537
|
+
return {
|
|
538
|
+
provider: best.provider,
|
|
539
|
+
model: best.model,
|
|
540
|
+
reason: `Highest quality model available`,
|
|
541
|
+
alternatives: candidates.slice(1, 4),
|
|
542
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
543
|
+
};
|
|
544
|
+
}
|
|
545
|
+
/**
|
|
546
|
+
* Route to cheapest model
|
|
547
|
+
*/
|
|
548
|
+
routeCheapest(providers, context) {
|
|
549
|
+
const candidates = [];
|
|
550
|
+
for (const provider of providers) {
|
|
551
|
+
for (const model of provider.getModels()) {
|
|
552
|
+
const modelInfo = provider.getModelInfo(model);
|
|
553
|
+
const avgCost = modelInfo ? (modelInfo.inputPricePerMillion + modelInfo.outputPricePerMillion) / 2 : Infinity;
|
|
554
|
+
candidates.push({
|
|
555
|
+
provider: provider.name,
|
|
556
|
+
model,
|
|
557
|
+
score: avgCost === 0 ? 0 : 1 / avgCost
|
|
558
|
+
// Higher score = cheaper
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
563
|
+
if (context?.maxCost !== void 0) {
|
|
564
|
+
const filtered = candidates.filter((c) => {
|
|
565
|
+
const provider = providers.find((p) => p.name === c.provider);
|
|
566
|
+
const modelInfo = provider?.getModelInfo(c.model);
|
|
567
|
+
if (!modelInfo) return true;
|
|
568
|
+
const estimatedCost = 1e3 / 1e6 * modelInfo.inputPricePerMillion + 500 / 1e6 * modelInfo.outputPricePerMillion;
|
|
569
|
+
return estimatedCost <= context.maxCost;
|
|
570
|
+
});
|
|
571
|
+
if (filtered.length > 0) {
|
|
572
|
+
const cheapest2 = filtered[0];
|
|
573
|
+
return {
|
|
574
|
+
provider: cheapest2.provider,
|
|
575
|
+
model: cheapest2.model,
|
|
576
|
+
reason: `Cheapest model within budget`,
|
|
577
|
+
alternatives: filtered.slice(1, 4),
|
|
578
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
const cheapest = candidates[0];
|
|
583
|
+
return {
|
|
584
|
+
provider: cheapest.provider,
|
|
585
|
+
model: cheapest.model,
|
|
586
|
+
reason: `Cheapest available model`,
|
|
587
|
+
alternatives: candidates.slice(1, 4),
|
|
588
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
589
|
+
};
|
|
590
|
+
}
|
|
591
|
+
/**
|
|
592
|
+
* Route to fastest model (based on latency)
|
|
593
|
+
*/
|
|
594
|
+
routeFastest(providers, context) {
|
|
595
|
+
const candidates = [];
|
|
596
|
+
for (const provider of providers) {
|
|
597
|
+
const health = provider.getHealth();
|
|
598
|
+
const latency = health.latencyMs || 1e3;
|
|
599
|
+
for (const model of provider.getModels()) {
|
|
600
|
+
candidates.push({
|
|
601
|
+
provider: provider.name,
|
|
602
|
+
model,
|
|
603
|
+
score: 1 / latency
|
|
604
|
+
// Higher score = lower latency
|
|
605
|
+
});
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
609
|
+
if (context?.maxLatency !== void 0) {
|
|
610
|
+
const filtered = candidates.filter((c) => {
|
|
611
|
+
const provider = providers.find((p) => p.name === c.provider);
|
|
612
|
+
const health = provider?.getHealth();
|
|
613
|
+
return (health?.latencyMs || 1e3) <= context.maxLatency;
|
|
614
|
+
});
|
|
615
|
+
if (filtered.length > 0) {
|
|
616
|
+
const fastest2 = filtered[0];
|
|
617
|
+
return {
|
|
618
|
+
provider: fastest2.provider,
|
|
619
|
+
model: fastest2.model,
|
|
620
|
+
reason: `Fastest model within latency limit`,
|
|
621
|
+
alternatives: filtered.slice(1, 4),
|
|
622
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
const fastest = candidates[0];
|
|
627
|
+
return {
|
|
628
|
+
provider: fastest.provider,
|
|
629
|
+
model: fastest.model,
|
|
630
|
+
reason: `Fastest available provider`,
|
|
631
|
+
alternatives: candidates.slice(1, 4),
|
|
632
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
633
|
+
};
|
|
634
|
+
}
|
|
635
|
+
/**
|
|
636
|
+
* Get equivalent models across providers
|
|
637
|
+
*/
|
|
638
|
+
getEquivalentModels(model) {
|
|
639
|
+
return this.modelMappings[model] || [];
|
|
640
|
+
}
|
|
641
|
+
/**
|
|
642
|
+
* Set the routing strategy
|
|
643
|
+
*/
|
|
644
|
+
setStrategy(strategy) {
|
|
645
|
+
this.strategy = strategy;
|
|
646
|
+
}
|
|
647
|
+
/**
|
|
648
|
+
* Get the current strategy name
|
|
649
|
+
*/
|
|
650
|
+
getStrategyName() {
|
|
651
|
+
return this.strategy.name;
|
|
652
|
+
}
|
|
653
|
+
/**
|
|
654
|
+
* Get the fallback chain
|
|
655
|
+
*/
|
|
656
|
+
getFallbackChain() {
|
|
657
|
+
return [...this.fallbackChain];
|
|
658
|
+
}
|
|
659
|
+
};
|
|
660
|
+
function createRouterConfig(options) {
|
|
661
|
+
return {
|
|
662
|
+
strategy: options.strategy || "round-robin",
|
|
663
|
+
fallbackChain: options.fallbackChain,
|
|
664
|
+
weights: options.weights,
|
|
665
|
+
rules: options.rules
|
|
666
|
+
};
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// src/routing/strategies/RoundRobin.ts
|
|
670
|
+
var RoundRobinStrategy = class {
|
|
671
|
+
name = "round-robin";
|
|
672
|
+
currentIndex = 0;
|
|
673
|
+
weights;
|
|
674
|
+
constructor(config = {}) {
|
|
675
|
+
this.weights = config.weights || {};
|
|
676
|
+
}
|
|
677
|
+
route(request, registry, context) {
|
|
678
|
+
let providers = registry.getProvidersForModel(request.model);
|
|
679
|
+
if (providers.length === 0) {
|
|
680
|
+
providers = registry.getAvailableProviders();
|
|
681
|
+
}
|
|
682
|
+
if (context?.excludeProviders) {
|
|
683
|
+
providers = providers.filter(
|
|
684
|
+
(p) => !context.excludeProviders.includes(p.name)
|
|
685
|
+
);
|
|
686
|
+
}
|
|
687
|
+
providers = providers.filter((p) => p.isAvailable());
|
|
688
|
+
if (providers.length === 0) {
|
|
689
|
+
throw new Error(`No available providers for model: ${request.model}`);
|
|
690
|
+
}
|
|
691
|
+
if (context?.preferredProvider) {
|
|
692
|
+
const preferred = providers.find(
|
|
693
|
+
(p) => p.name === context.preferredProvider
|
|
694
|
+
);
|
|
695
|
+
if (preferred) {
|
|
696
|
+
const model2 = preferred.supportsModel(request.model) ? request.model : preferred.getModels()[0];
|
|
697
|
+
return {
|
|
698
|
+
provider: preferred.name,
|
|
699
|
+
model: model2,
|
|
700
|
+
reason: "Preferred provider selected",
|
|
701
|
+
alternatives: providers.filter((p) => p.name !== preferred.name).slice(0, 3).map((p) => ({
|
|
702
|
+
provider: p.name,
|
|
703
|
+
model: p.supportsModel(request.model) ? request.model : p.getModels()[0],
|
|
704
|
+
score: 1
|
|
705
|
+
})),
|
|
706
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
707
|
+
};
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
const weightedProviders = [];
|
|
711
|
+
for (const provider of providers) {
|
|
712
|
+
const weight = this.weights[provider.name] || 1;
|
|
713
|
+
for (let i = 0; i < weight; i++) {
|
|
714
|
+
weightedProviders.push({ provider, weight });
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
this.currentIndex = this.currentIndex % weightedProviders.length;
|
|
718
|
+
const selected = weightedProviders[this.currentIndex];
|
|
719
|
+
this.currentIndex++;
|
|
720
|
+
const model = selected.provider.supportsModel(request.model) ? request.model : selected.provider.getModels()[0];
|
|
721
|
+
return {
|
|
722
|
+
provider: selected.provider.name,
|
|
723
|
+
model,
|
|
724
|
+
reason: `Round-robin selection (index: ${this.currentIndex - 1})`,
|
|
725
|
+
alternatives: providers.filter((p) => p.name !== selected.provider.name).slice(0, 3).map((p) => ({
|
|
726
|
+
provider: p.name,
|
|
727
|
+
model: p.supportsModel(request.model) ? request.model : p.getModels()[0],
|
|
728
|
+
score: 1
|
|
729
|
+
})),
|
|
730
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
731
|
+
};
|
|
732
|
+
}
|
|
733
|
+
/**
|
|
734
|
+
* Reset the rotation index
|
|
735
|
+
*/
|
|
736
|
+
reset() {
|
|
737
|
+
this.currentIndex = 0;
|
|
738
|
+
}
|
|
739
|
+
};
|
|
740
|
+
|
|
741
|
+
// src/routing/strategies/Failover.ts
|
|
742
|
+
var FailoverStrategy = class {
|
|
743
|
+
name = "failover";
|
|
744
|
+
chain;
|
|
745
|
+
modelMappings;
|
|
746
|
+
constructor(config) {
|
|
747
|
+
this.chain = config.chain;
|
|
748
|
+
this.modelMappings = config.modelMappings || {};
|
|
749
|
+
}
|
|
750
|
+
route(request, registry, context) {
|
|
751
|
+
const previousProviders = new Set(
|
|
752
|
+
context?.previousAttempts?.map((a) => a.provider) || []
|
|
753
|
+
);
|
|
754
|
+
if (context?.excludeProviders) {
|
|
755
|
+
for (const p of context.excludeProviders) {
|
|
756
|
+
previousProviders.add(p);
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
const alternatives = [];
|
|
760
|
+
for (const providerName of this.chain) {
|
|
761
|
+
const provider = registry.get(providerName);
|
|
762
|
+
if (!provider) continue;
|
|
763
|
+
if (!provider.isAvailable()) continue;
|
|
764
|
+
if (previousProviders.has(providerName)) continue;
|
|
765
|
+
let model = request.model;
|
|
766
|
+
if (!provider.supportsModel(model)) {
|
|
767
|
+
const mappedModel = this.modelMappings[model]?.[providerName];
|
|
768
|
+
if (mappedModel && provider.supportsModel(mappedModel)) {
|
|
769
|
+
model = mappedModel;
|
|
770
|
+
} else {
|
|
771
|
+
model = provider.getModels()[0];
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
for (const altName of this.chain) {
|
|
775
|
+
if (altName === providerName) continue;
|
|
776
|
+
if (previousProviders.has(altName)) continue;
|
|
777
|
+
const altProvider = registry.get(altName);
|
|
778
|
+
if (!altProvider?.isAvailable()) continue;
|
|
779
|
+
let altModel = request.model;
|
|
780
|
+
if (!altProvider.supportsModel(altModel)) {
|
|
781
|
+
altModel = this.modelMappings[request.model]?.[altName] || altProvider.getModels()[0];
|
|
782
|
+
}
|
|
783
|
+
alternatives.push({
|
|
784
|
+
provider: altName,
|
|
785
|
+
model: altModel,
|
|
786
|
+
score: 1 - alternatives.length * 0.1
|
|
787
|
+
});
|
|
788
|
+
if (alternatives.length >= 3) break;
|
|
789
|
+
}
|
|
790
|
+
return {
|
|
791
|
+
provider: providerName,
|
|
792
|
+
model,
|
|
793
|
+
reason: previousProviders.size > 0 ? `Failover to ${providerName} after ${previousProviders.size} failures` : `Primary provider in failover chain`,
|
|
794
|
+
alternatives,
|
|
795
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
796
|
+
};
|
|
797
|
+
}
|
|
798
|
+
throw new Error(
|
|
799
|
+
`All providers in failover chain exhausted: ${this.chain.join(", ")}`
|
|
800
|
+
);
|
|
801
|
+
}
|
|
802
|
+
/**
|
|
803
|
+
* Get the next provider in chain after the given one
|
|
804
|
+
*/
|
|
805
|
+
getNextProvider(currentProvider) {
|
|
806
|
+
const index = this.chain.indexOf(currentProvider);
|
|
807
|
+
if (index === -1 || index >= this.chain.length - 1) {
|
|
808
|
+
return null;
|
|
809
|
+
}
|
|
810
|
+
return this.chain[index + 1];
|
|
811
|
+
}
|
|
812
|
+
/**
|
|
813
|
+
* Get the current failover chain
|
|
814
|
+
*/
|
|
815
|
+
getChain() {
|
|
816
|
+
return [...this.chain];
|
|
817
|
+
}
|
|
818
|
+
/**
|
|
819
|
+
* Update the failover chain
|
|
820
|
+
*/
|
|
821
|
+
setChain(chain) {
|
|
822
|
+
this.chain = [...chain];
|
|
823
|
+
}
|
|
824
|
+
};
|
|
825
|
+
var encoder = null;
|
|
826
|
+
function getEncoder() {
|
|
827
|
+
if (!encoder) {
|
|
828
|
+
encoder = get_encoding("cl100k_base");
|
|
829
|
+
}
|
|
830
|
+
return encoder;
|
|
831
|
+
}
|
|
832
|
+
function countTokens(text) {
|
|
833
|
+
try {
|
|
834
|
+
const enc = getEncoder();
|
|
835
|
+
return enc.encode(text).length;
|
|
836
|
+
} catch {
|
|
837
|
+
return Math.ceil(text.length / 4);
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
function countMessageTokens(messages) {
|
|
841
|
+
let total = 0;
|
|
842
|
+
for (const message of messages) {
|
|
843
|
+
total += 4;
|
|
844
|
+
if (message.content) {
|
|
845
|
+
total += countTokens(
|
|
846
|
+
typeof message.content === "string" ? message.content : JSON.stringify(message.content)
|
|
847
|
+
);
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
total += 2;
|
|
851
|
+
return total;
|
|
852
|
+
}
|
|
853
|
+
function estimateRequestTokens(messages, tools) {
|
|
854
|
+
let total = countMessageTokens(messages);
|
|
855
|
+
if (tools && tools.length > 0) {
|
|
856
|
+
for (const tool of tools) {
|
|
857
|
+
total += countTokens(tool.function.name);
|
|
858
|
+
if (tool.function.description) {
|
|
859
|
+
total += countTokens(tool.function.description);
|
|
860
|
+
}
|
|
861
|
+
if (tool.function.parameters) {
|
|
862
|
+
total += countTokens(JSON.stringify(tool.function.parameters));
|
|
863
|
+
}
|
|
864
|
+
total += 10;
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
return total;
|
|
868
|
+
}
|
|
869
|
+
function truncateToTokenLimit(text, maxTokens) {
|
|
870
|
+
const enc = getEncoder();
|
|
871
|
+
const tokens = enc.encode(text);
|
|
872
|
+
if (tokens.length <= maxTokens) {
|
|
873
|
+
return text;
|
|
874
|
+
}
|
|
875
|
+
const truncatedTokens = tokens.slice(0, maxTokens);
|
|
876
|
+
const decoded = enc.decode(truncatedTokens);
|
|
877
|
+
return new TextDecoder().decode(decoded);
|
|
878
|
+
}
|
|
879
|
+
function freeEncoder() {
|
|
880
|
+
if (encoder) {
|
|
881
|
+
encoder.free();
|
|
882
|
+
encoder = null;
|
|
883
|
+
}
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
// src/routing/strategies/CostOptimized.ts
|
|
887
|
+
var MODEL_QUALITY_SCORES = {
|
|
888
|
+
// Top tier
|
|
889
|
+
o1: 98,
|
|
890
|
+
"o1-preview": 96,
|
|
891
|
+
"claude-3-opus-20240229": 95,
|
|
892
|
+
"claude-3-5-sonnet-20241022": 94,
|
|
893
|
+
"claude-sonnet-4-20250514": 95,
|
|
894
|
+
"gpt-4o": 93,
|
|
895
|
+
"gemini-1.5-pro": 91,
|
|
896
|
+
// Mid tier
|
|
897
|
+
"gpt-4-turbo": 88,
|
|
898
|
+
"gpt-4": 87,
|
|
899
|
+
"claude-3-sonnet-20240229": 85,
|
|
900
|
+
"o1-mini": 84,
|
|
901
|
+
// Fast/cheap tier
|
|
902
|
+
"gpt-4o-mini": 80,
|
|
903
|
+
"claude-3-5-haiku-20241022": 79,
|
|
904
|
+
"claude-3-haiku-20240307": 75,
|
|
905
|
+
"gemini-1.5-flash": 78,
|
|
906
|
+
"gpt-3.5-turbo": 70,
|
|
907
|
+
// Local models
|
|
908
|
+
llama3: 65,
|
|
909
|
+
"llama3.1": 67,
|
|
910
|
+
"llama3.2": 68,
|
|
911
|
+
mistral: 62
|
|
912
|
+
};
|
|
913
|
+
var CostOptimizedStrategy = class {
|
|
914
|
+
name = "cost-optimized";
|
|
915
|
+
config;
|
|
916
|
+
constructor(config = {}) {
|
|
917
|
+
this.config = {
|
|
918
|
+
preferLocal: false,
|
|
919
|
+
qualityThreshold: 0.6,
|
|
920
|
+
fallbackOnBudget: "cheapest",
|
|
921
|
+
...config
|
|
922
|
+
};
|
|
923
|
+
}
|
|
924
|
+
route(request, registry, context) {
|
|
925
|
+
let providers = registry.getAvailableProviders();
|
|
926
|
+
if (context?.excludeProviders) {
|
|
927
|
+
providers = providers.filter(
|
|
928
|
+
(p) => !context.excludeProviders.includes(p.name)
|
|
929
|
+
);
|
|
930
|
+
}
|
|
931
|
+
if (providers.length === 0) {
|
|
932
|
+
throw new Error("No available providers");
|
|
933
|
+
}
|
|
934
|
+
const estimatedInputTokens = estimateRequestTokens(
|
|
935
|
+
request.messages,
|
|
936
|
+
request.tools
|
|
937
|
+
);
|
|
938
|
+
const estimatedOutputTokens = request.max_tokens || 1e3;
|
|
939
|
+
const candidates = [];
|
|
940
|
+
for (const provider of providers) {
|
|
941
|
+
const isLocal = provider.name === "ollama" || provider.name === "lmstudio";
|
|
942
|
+
for (const model of provider.getModels()) {
|
|
943
|
+
const modelInfo = provider.getModelInfo(model);
|
|
944
|
+
if (!modelInfo) continue;
|
|
945
|
+
const inputCost = estimatedInputTokens / 1e6 * modelInfo.inputPricePerMillion;
|
|
946
|
+
const outputCost = estimatedOutputTokens / 1e6 * modelInfo.outputPricePerMillion;
|
|
947
|
+
const totalCost = inputCost + outputCost;
|
|
948
|
+
const quality = (MODEL_QUALITY_SCORES[model] || 50) / 100;
|
|
949
|
+
candidates.push({
|
|
950
|
+
provider: provider.name,
|
|
951
|
+
model,
|
|
952
|
+
cost: totalCost,
|
|
953
|
+
quality,
|
|
954
|
+
isLocal
|
|
955
|
+
});
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
const minQuality = this.config.qualityThreshold || 0;
|
|
959
|
+
let filtered = candidates.filter((c) => c.quality >= minQuality);
|
|
960
|
+
if (filtered.length === 0) {
|
|
961
|
+
filtered = candidates;
|
|
962
|
+
}
|
|
963
|
+
const maxCost = context?.maxCost ?? this.config.maxCostPerRequest;
|
|
964
|
+
if (maxCost !== void 0) {
|
|
965
|
+
const withinBudget = filtered.filter((c) => c.cost <= maxCost);
|
|
966
|
+
if (withinBudget.length > 0) {
|
|
967
|
+
filtered = withinBudget;
|
|
968
|
+
} else if (this.config.fallbackOnBudget === "error") {
|
|
969
|
+
throw new Error(`No models within budget of $${maxCost.toFixed(4)}`);
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
filtered.sort((a, b) => {
|
|
973
|
+
if (this.config.preferLocal) {
|
|
974
|
+
if (a.isLocal && !b.isLocal) return -1;
|
|
975
|
+
if (!a.isLocal && b.isLocal) return 1;
|
|
976
|
+
}
|
|
977
|
+
return a.cost - b.cost;
|
|
978
|
+
});
|
|
979
|
+
const selected = filtered[0];
|
|
980
|
+
return {
|
|
981
|
+
provider: selected.provider,
|
|
982
|
+
model: selected.model,
|
|
983
|
+
reason: `Cheapest model meeting quality threshold (${(selected.quality * 100).toFixed(0)}% quality, $${selected.cost.toFixed(6)}/req)`,
|
|
984
|
+
alternatives: filtered.slice(1, 4).map((c) => ({
|
|
985
|
+
provider: c.provider,
|
|
986
|
+
model: c.model,
|
|
987
|
+
score: 1 / (c.cost + 1e-4)
|
|
988
|
+
// Higher score = cheaper
|
|
989
|
+
})),
|
|
990
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
991
|
+
};
|
|
992
|
+
}
|
|
993
|
+
/**
|
|
994
|
+
* Update configuration
|
|
995
|
+
*/
|
|
996
|
+
setConfig(config) {
|
|
997
|
+
this.config = { ...this.config, ...config };
|
|
998
|
+
}
|
|
999
|
+
/**
|
|
1000
|
+
* Get current configuration
|
|
1001
|
+
*/
|
|
1002
|
+
getConfig() {
|
|
1003
|
+
return { ...this.config };
|
|
1004
|
+
}
|
|
1005
|
+
};
|
|
1006
|
+
|
|
1007
|
+
// src/routing/strategies/LatencyOptimized.ts
|
|
1008
|
+
var LatencyOptimizedStrategy = class {
|
|
1009
|
+
name = "latency-optimized";
|
|
1010
|
+
config;
|
|
1011
|
+
latencyStats = /* @__PURE__ */ new Map();
|
|
1012
|
+
maxSamples = 100;
|
|
1013
|
+
constructor(config = {}) {
|
|
1014
|
+
this.config = {
|
|
1015
|
+
warmupRequests: 10,
|
|
1016
|
+
adaptiveRouting: true,
|
|
1017
|
+
...config
|
|
1018
|
+
};
|
|
1019
|
+
}
|
|
1020
|
+
route(request, registry, context) {
|
|
1021
|
+
let providers = registry.getAvailableProviders();
|
|
1022
|
+
if (context?.excludeProviders) {
|
|
1023
|
+
providers = providers.filter(
|
|
1024
|
+
(p) => !context.excludeProviders.includes(p.name)
|
|
1025
|
+
);
|
|
1026
|
+
}
|
|
1027
|
+
if (providers.length === 0) {
|
|
1028
|
+
throw new Error("No available providers");
|
|
1029
|
+
}
|
|
1030
|
+
const totalRequests = Array.from(this.latencyStats.values()).reduce(
|
|
1031
|
+
(sum, stats) => sum + stats.count,
|
|
1032
|
+
0
|
|
1033
|
+
);
|
|
1034
|
+
const warmupThreshold = this.config.warmupRequests ?? 10;
|
|
1035
|
+
const isWarmup = warmupThreshold > 0 && totalRequests < warmupThreshold;
|
|
1036
|
+
const candidates = [];
|
|
1037
|
+
for (const provider of providers) {
|
|
1038
|
+
const stats = this.latencyStats.get(provider.name);
|
|
1039
|
+
const health = provider.getHealth();
|
|
1040
|
+
let latency;
|
|
1041
|
+
let confidence;
|
|
1042
|
+
if (stats && stats.count >= 5 && this.config.adaptiveRouting) {
|
|
1043
|
+
latency = stats.avg;
|
|
1044
|
+
confidence = Math.min(stats.count / 50, 1);
|
|
1045
|
+
} else {
|
|
1046
|
+
latency = health.latencyMs || 1e3;
|
|
1047
|
+
confidence = 0.3;
|
|
1048
|
+
}
|
|
1049
|
+
for (const model of provider.getModels()) {
|
|
1050
|
+
if (request.model !== "fastest" && !provider.supportsModel(request.model) && model !== request.model) {
|
|
1051
|
+
continue;
|
|
1052
|
+
}
|
|
1053
|
+
candidates.push({
|
|
1054
|
+
provider: provider.name,
|
|
1055
|
+
model: request.model !== "fastest" && provider.supportsModel(request.model) ? request.model : model,
|
|
1056
|
+
latency,
|
|
1057
|
+
confidence
|
|
1058
|
+
});
|
|
1059
|
+
break;
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
const maxLatency = context?.maxLatency ?? this.config.maxLatencyMs;
|
|
1063
|
+
let filtered = candidates;
|
|
1064
|
+
if (maxLatency !== void 0) {
|
|
1065
|
+
const withinLimit = candidates.filter((c) => c.latency <= maxLatency);
|
|
1066
|
+
if (withinLimit.length > 0) {
|
|
1067
|
+
filtered = withinLimit;
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
filtered.sort((a, b) => a.latency - b.latency);
|
|
1071
|
+
const shouldExplore = isWarmup && Math.random() < 0.3 && filtered.length > 1;
|
|
1072
|
+
if (shouldExplore) {
|
|
1073
|
+
const randomIndex = Math.floor(
|
|
1074
|
+
Math.random() * Math.min(3, filtered.length)
|
|
1075
|
+
);
|
|
1076
|
+
const selected2 = filtered[randomIndex];
|
|
1077
|
+
return {
|
|
1078
|
+
provider: selected2.provider,
|
|
1079
|
+
model: selected2.model,
|
|
1080
|
+
reason: `Warmup exploration (${totalRequests}/${warmupThreshold} requests)`,
|
|
1081
|
+
alternatives: filtered.filter((c) => c.provider !== selected2.provider).slice(0, 3).map((c) => ({
|
|
1082
|
+
provider: c.provider,
|
|
1083
|
+
model: c.model,
|
|
1084
|
+
score: 1 / c.latency
|
|
1085
|
+
})),
|
|
1086
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
1087
|
+
};
|
|
1088
|
+
}
|
|
1089
|
+
const selected = filtered[0];
|
|
1090
|
+
return {
|
|
1091
|
+
provider: selected.provider,
|
|
1092
|
+
model: selected.model,
|
|
1093
|
+
reason: `Fastest provider (${selected.latency.toFixed(0)}ms avg, ${(selected.confidence * 100).toFixed(0)}% confidence)`,
|
|
1094
|
+
alternatives: filtered.slice(1, 4).map((c) => ({
|
|
1095
|
+
provider: c.provider,
|
|
1096
|
+
model: c.model,
|
|
1097
|
+
score: 1 / c.latency
|
|
1098
|
+
})),
|
|
1099
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
1100
|
+
};
|
|
1101
|
+
}
|
|
1102
|
+
/**
|
|
1103
|
+
* Record a latency observation
|
|
1104
|
+
*/
|
|
1105
|
+
recordLatency(provider, latencyMs) {
|
|
1106
|
+
let stats = this.latencyStats.get(provider);
|
|
1107
|
+
if (!stats) {
|
|
1108
|
+
stats = {
|
|
1109
|
+
count: 0,
|
|
1110
|
+
total: 0,
|
|
1111
|
+
min: Infinity,
|
|
1112
|
+
max: 0,
|
|
1113
|
+
avg: 0,
|
|
1114
|
+
p95: 0,
|
|
1115
|
+
samples: []
|
|
1116
|
+
};
|
|
1117
|
+
this.latencyStats.set(provider, stats);
|
|
1118
|
+
}
|
|
1119
|
+
stats.count++;
|
|
1120
|
+
stats.total += latencyMs;
|
|
1121
|
+
stats.min = Math.min(stats.min, latencyMs);
|
|
1122
|
+
stats.max = Math.max(stats.max, latencyMs);
|
|
1123
|
+
stats.samples.push(latencyMs);
|
|
1124
|
+
if (stats.samples.length > this.maxSamples) {
|
|
1125
|
+
stats.samples.shift();
|
|
1126
|
+
}
|
|
1127
|
+
const alpha = 0.2;
|
|
1128
|
+
if (stats.count === 1) {
|
|
1129
|
+
stats.avg = latencyMs;
|
|
1130
|
+
} else {
|
|
1131
|
+
stats.avg = alpha * latencyMs + (1 - alpha) * stats.avg;
|
|
1132
|
+
}
|
|
1133
|
+
if (stats.samples.length >= 20) {
|
|
1134
|
+
const sorted = [...stats.samples].sort((a, b) => a - b);
|
|
1135
|
+
const p95Index = Math.floor(sorted.length * 0.95);
|
|
1136
|
+
stats.p95 = sorted[p95Index];
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
/**
|
|
1140
|
+
* Get latency statistics for a provider
|
|
1141
|
+
*/
|
|
1142
|
+
getStats(provider) {
|
|
1143
|
+
return this.latencyStats.get(provider);
|
|
1144
|
+
}
|
|
1145
|
+
/**
|
|
1146
|
+
* Get all latency statistics
|
|
1147
|
+
*/
|
|
1148
|
+
getAllStats() {
|
|
1149
|
+
const result = {};
|
|
1150
|
+
for (const [provider, stats] of this.latencyStats) {
|
|
1151
|
+
result[provider] = { ...stats, samples: [...stats.samples] };
|
|
1152
|
+
}
|
|
1153
|
+
return result;
|
|
1154
|
+
}
|
|
1155
|
+
/**
|
|
1156
|
+
* Clear latency statistics
|
|
1157
|
+
*/
|
|
1158
|
+
clearStats() {
|
|
1159
|
+
this.latencyStats.clear();
|
|
1160
|
+
}
|
|
1161
|
+
/**
|
|
1162
|
+
* Update configuration
|
|
1163
|
+
*/
|
|
1164
|
+
setConfig(config) {
|
|
1165
|
+
this.config = { ...this.config, ...config };
|
|
1166
|
+
}
|
|
1167
|
+
};
|
|
1168
|
+
|
|
1169
|
+
// src/utils/pricing.ts
|
|
1170
|
+
var MODEL_PRICING = {
|
|
1171
|
+
// OpenAI Models
|
|
1172
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1173
|
+
"gpt-4o-2024-11-20": { input: 2.5, output: 10 },
|
|
1174
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1175
|
+
"gpt-4o-mini-2024-07-18": { input: 0.15, output: 0.6 },
|
|
1176
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1177
|
+
"gpt-4-turbo-preview": { input: 10, output: 30 },
|
|
1178
|
+
"gpt-4": { input: 30, output: 60 },
|
|
1179
|
+
"gpt-4-32k": { input: 60, output: 120 },
|
|
1180
|
+
"gpt-3.5-turbo": { input: 0.5, output: 1.5 },
|
|
1181
|
+
"gpt-3.5-turbo-0125": { input: 0.5, output: 1.5 },
|
|
1182
|
+
o1: { input: 15, output: 60 },
|
|
1183
|
+
"o1-preview": { input: 15, output: 60 },
|
|
1184
|
+
"o1-mini": { input: 3, output: 12 },
|
|
1185
|
+
// Anthropic Models
|
|
1186
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1187
|
+
"claude-3-5-sonnet-latest": { input: 3, output: 15 },
|
|
1188
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1189
|
+
"claude-3-5-haiku-20241022": { input: 0.8, output: 4 },
|
|
1190
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1191
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1192
|
+
"claude-3-sonnet-20240229": { input: 3, output: 15 },
|
|
1193
|
+
// Google Gemini Models
|
|
1194
|
+
"gemini-1.5-pro": { input: 1.25, output: 5 },
|
|
1195
|
+
"gemini-1.5-pro-latest": { input: 1.25, output: 5 },
|
|
1196
|
+
"gemini-1.5-flash": { input: 0.075, output: 0.3 },
|
|
1197
|
+
"gemini-1.5-flash-latest": { input: 0.075, output: 0.3 },
|
|
1198
|
+
"gemini-2.0-flash-exp": { input: 0.1, output: 0.4 },
|
|
1199
|
+
"gemini-pro": { input: 0.5, output: 1.5 },
|
|
1200
|
+
// Mistral Models
|
|
1201
|
+
"mistral-large-latest": { input: 2, output: 6 },
|
|
1202
|
+
"mistral-medium-latest": { input: 2.7, output: 8.1 },
|
|
1203
|
+
"mistral-small-latest": { input: 0.2, output: 0.6 },
|
|
1204
|
+
"open-mistral-7b": { input: 0.25, output: 0.25 },
|
|
1205
|
+
"open-mixtral-8x7b": { input: 0.7, output: 0.7 },
|
|
1206
|
+
"open-mixtral-8x22b": { input: 2, output: 6 },
|
|
1207
|
+
// Cohere Models
|
|
1208
|
+
"command-r-plus": { input: 2.5, output: 10 },
|
|
1209
|
+
"command-r": { input: 0.5, output: 1.5 },
|
|
1210
|
+
command: { input: 1, output: 2 },
|
|
1211
|
+
// Groq Models (significantly cheaper)
|
|
1212
|
+
"llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
|
|
1213
|
+
"llama-3.1-70b-versatile": { input: 0.59, output: 0.79 },
|
|
1214
|
+
"llama-3.1-8b-instant": { input: 0.05, output: 0.08 },
|
|
1215
|
+
"mixtral-8x7b-32768": { input: 0.24, output: 0.24 },
|
|
1216
|
+
// Together AI Models
|
|
1217
|
+
"meta-llama/Llama-3.3-70B-Instruct-Turbo": { input: 0.88, output: 0.88 },
|
|
1218
|
+
"meta-llama/Llama-3.1-70B-Instruct-Turbo": { input: 0.88, output: 0.88 },
|
|
1219
|
+
"meta-llama/Llama-3.1-8B-Instruct-Turbo": { input: 0.18, output: 0.18 },
|
|
1220
|
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": { input: 0.6, output: 0.6 },
|
|
1221
|
+
// Local models (free)
|
|
1222
|
+
llama3: { input: 0, output: 0 },
|
|
1223
|
+
"llama3.1": { input: 0, output: 0 },
|
|
1224
|
+
"llama3.2": { input: 0, output: 0 },
|
|
1225
|
+
mistral: { input: 0, output: 0 },
|
|
1226
|
+
codellama: { input: 0, output: 0 },
|
|
1227
|
+
phi3: { input: 0, output: 0 },
|
|
1228
|
+
"qwen2.5": { input: 0, output: 0 }
|
|
1229
|
+
};
|
|
1230
|
+
var MODEL_CONTEXT_WINDOWS = {
|
|
1231
|
+
// OpenAI
|
|
1232
|
+
"gpt-4o": 128e3,
|
|
1233
|
+
"gpt-4o-mini": 128e3,
|
|
1234
|
+
"gpt-4-turbo": 128e3,
|
|
1235
|
+
"gpt-4": 8192,
|
|
1236
|
+
"gpt-4-32k": 32768,
|
|
1237
|
+
"gpt-3.5-turbo": 16385,
|
|
1238
|
+
o1: 2e5,
|
|
1239
|
+
"o1-preview": 128e3,
|
|
1240
|
+
"o1-mini": 128e3,
|
|
1241
|
+
// Anthropic
|
|
1242
|
+
"claude-3-5-sonnet-20241022": 2e5,
|
|
1243
|
+
"claude-sonnet-4-20250514": 2e5,
|
|
1244
|
+
"claude-3-5-haiku-20241022": 2e5,
|
|
1245
|
+
"claude-3-opus-20240229": 2e5,
|
|
1246
|
+
"claude-3-sonnet-20240229": 2e5,
|
|
1247
|
+
"claude-3-haiku-20240307": 2e5,
|
|
1248
|
+
// Google
|
|
1249
|
+
"gemini-1.5-pro": 2e6,
|
|
1250
|
+
"gemini-1.5-flash": 1e6,
|
|
1251
|
+
"gemini-2.0-flash-exp": 1e6,
|
|
1252
|
+
"gemini-pro": 32e3,
|
|
1253
|
+
// Mistral
|
|
1254
|
+
"mistral-large-latest": 128e3,
|
|
1255
|
+
"mistral-medium-latest": 32e3,
|
|
1256
|
+
"mistral-small-latest": 32e3,
|
|
1257
|
+
// Groq
|
|
1258
|
+
"llama-3.3-70b-versatile": 128e3,
|
|
1259
|
+
"llama-3.1-70b-versatile": 131072,
|
|
1260
|
+
"llama-3.1-8b-instant": 131072,
|
|
1261
|
+
"mixtral-8x7b-32768": 32768
|
|
1262
|
+
};
|
|
1263
|
+
var MODEL_MAX_OUTPUT = {
|
|
1264
|
+
// OpenAI
|
|
1265
|
+
"gpt-4o": 16384,
|
|
1266
|
+
"gpt-4o-mini": 16384,
|
|
1267
|
+
"gpt-4-turbo": 4096,
|
|
1268
|
+
"gpt-4": 8192,
|
|
1269
|
+
"gpt-3.5-turbo": 4096,
|
|
1270
|
+
o1: 1e5,
|
|
1271
|
+
"o1-preview": 32768,
|
|
1272
|
+
"o1-mini": 65536,
|
|
1273
|
+
// Anthropic
|
|
1274
|
+
"claude-3-5-sonnet-20241022": 8192,
|
|
1275
|
+
"claude-sonnet-4-20250514": 16384,
|
|
1276
|
+
"claude-3-opus-20240229": 4096,
|
|
1277
|
+
// Google
|
|
1278
|
+
"gemini-1.5-pro": 8192,
|
|
1279
|
+
"gemini-1.5-flash": 8192
|
|
1280
|
+
};
|
|
1281
|
+
function calculateCost(model, usage) {
|
|
1282
|
+
const pricing = MODEL_PRICING[model];
|
|
1283
|
+
if (!pricing) {
|
|
1284
|
+
return 0;
|
|
1285
|
+
}
|
|
1286
|
+
const inputCost = usage.prompt_tokens / 1e6 * pricing.input;
|
|
1287
|
+
const outputCost = usage.completion_tokens / 1e6 * pricing.output;
|
|
1288
|
+
return inputCost + outputCost;
|
|
1289
|
+
}
|
|
1290
|
+
function estimateCost(model, estimatedInputTokens, estimatedOutputTokens) {
|
|
1291
|
+
const pricing = MODEL_PRICING[model];
|
|
1292
|
+
if (!pricing) {
|
|
1293
|
+
return 0;
|
|
1294
|
+
}
|
|
1295
|
+
const inputCost = estimatedInputTokens / 1e6 * pricing.input;
|
|
1296
|
+
const outputCost = estimatedOutputTokens / 1e6 * pricing.output;
|
|
1297
|
+
return inputCost + outputCost;
|
|
1298
|
+
}
|
|
1299
|
+
function getModelPricing(model) {
|
|
1300
|
+
return MODEL_PRICING[model] || null;
|
|
1301
|
+
}
|
|
1302
|
+
function getModelInfo(model, provider) {
|
|
1303
|
+
const pricing = MODEL_PRICING[model] || { input: 0, output: 0 };
|
|
1304
|
+
const contextWindow = MODEL_CONTEXT_WINDOWS[model] || 4096;
|
|
1305
|
+
const maxOutput = MODEL_MAX_OUTPUT[model] || 4096;
|
|
1306
|
+
return {
|
|
1307
|
+
id: model,
|
|
1308
|
+
provider,
|
|
1309
|
+
contextWindow,
|
|
1310
|
+
maxOutputTokens: maxOutput,
|
|
1311
|
+
inputPricePerMillion: pricing.input,
|
|
1312
|
+
outputPricePerMillion: pricing.output,
|
|
1313
|
+
capabilities: getModelCapabilities(model, provider)
|
|
1314
|
+
};
|
|
1315
|
+
}
|
|
1316
|
+
function getModelCapabilities(model, provider) {
|
|
1317
|
+
const defaults = {
|
|
1318
|
+
streaming: true,
|
|
1319
|
+
tools: true,
|
|
1320
|
+
vision: false,
|
|
1321
|
+
json_mode: true,
|
|
1322
|
+
system_prompts: true
|
|
1323
|
+
};
|
|
1324
|
+
if (model.includes("gpt-4o") || model.includes("gpt-4-turbo")) {
|
|
1325
|
+
return { ...defaults, vision: true };
|
|
1326
|
+
}
|
|
1327
|
+
if (model.includes("o1")) {
|
|
1328
|
+
return {
|
|
1329
|
+
streaming: false,
|
|
1330
|
+
// o1 doesn't support streaming
|
|
1331
|
+
tools: false,
|
|
1332
|
+
vision: false,
|
|
1333
|
+
json_mode: false,
|
|
1334
|
+
system_prompts: false
|
|
1335
|
+
// o1 uses developer messages
|
|
1336
|
+
};
|
|
1337
|
+
}
|
|
1338
|
+
if (model.includes("claude-3")) {
|
|
1339
|
+
return { ...defaults, vision: true };
|
|
1340
|
+
}
|
|
1341
|
+
if (model.includes("gemini")) {
|
|
1342
|
+
return { ...defaults, vision: true };
|
|
1343
|
+
}
|
|
1344
|
+
if (provider === "ollama") {
|
|
1345
|
+
return {
|
|
1346
|
+
streaming: true,
|
|
1347
|
+
tools: false,
|
|
1348
|
+
// Most Ollama models don't support tools natively
|
|
1349
|
+
vision: model.includes("llava") || model.includes("bakllava"),
|
|
1350
|
+
json_mode: true,
|
|
1351
|
+
system_prompts: true
|
|
1352
|
+
};
|
|
1353
|
+
}
|
|
1354
|
+
return defaults;
|
|
1355
|
+
}
|
|
1356
|
+
function findCheapestModel(models, _requiredCapabilities) {
|
|
1357
|
+
let cheapest = null;
|
|
1358
|
+
for (const model of models) {
|
|
1359
|
+
const pricing = MODEL_PRICING[model];
|
|
1360
|
+
if (!pricing) continue;
|
|
1361
|
+
const avgCost = (pricing.input + pricing.output) / 2;
|
|
1362
|
+
if (!cheapest || avgCost < cheapest.cost) {
|
|
1363
|
+
cheapest = { model, cost: avgCost };
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
return cheapest?.model || null;
|
|
1367
|
+
}
|
|
1368
|
+
function sortModelsByCost(models, direction = "asc") {
|
|
1369
|
+
return [...models].sort((a, b) => {
|
|
1370
|
+
const pricingA = MODEL_PRICING[a] || { input: 0, output: 0 };
|
|
1371
|
+
const pricingB = MODEL_PRICING[b] || { input: 0, output: 0 };
|
|
1372
|
+
const costA = (pricingA.input + pricingA.output) / 2;
|
|
1373
|
+
const costB = (pricingB.input + pricingB.output) / 2;
|
|
1374
|
+
return direction === "asc" ? costA - costB : costB - costA;
|
|
1375
|
+
});
|
|
1376
|
+
}
|
|
1377
|
+
function hashRequest(request) {
|
|
1378
|
+
const normalized = {
|
|
1379
|
+
model: request.model,
|
|
1380
|
+
messages: request.messages.map((m) => ({
|
|
1381
|
+
role: m.role,
|
|
1382
|
+
content: normalizeContent(m.content)
|
|
1383
|
+
})),
|
|
1384
|
+
temperature: request.temperature ?? 1,
|
|
1385
|
+
max_tokens: request.max_tokens,
|
|
1386
|
+
tools: request.tools ? JSON.stringify(request.tools) : void 0,
|
|
1387
|
+
tool_choice: request.tool_choice ? JSON.stringify(request.tool_choice) : void 0
|
|
1388
|
+
};
|
|
1389
|
+
const str = JSON.stringify(normalized);
|
|
1390
|
+
return murmurhash.v3(str).toString(16);
|
|
1391
|
+
}
|
|
1392
|
+
function normalizeContent(content) {
|
|
1393
|
+
if (typeof content === "string") {
|
|
1394
|
+
return content;
|
|
1395
|
+
}
|
|
1396
|
+
if (content === null || content === void 0) {
|
|
1397
|
+
return "";
|
|
1398
|
+
}
|
|
1399
|
+
return JSON.stringify(content);
|
|
1400
|
+
}
|
|
1401
|
+
function generateId(prefix = "gw") {
|
|
1402
|
+
const timestamp = Date.now().toString(36);
|
|
1403
|
+
const random = Math.random().toString(36).substring(2, 8);
|
|
1404
|
+
return `${prefix}-${timestamp}${random}`;
|
|
1405
|
+
}
|
|
1406
|
+
function generateRequestId() {
|
|
1407
|
+
return `chatcmpl-${generateId("")}`;
|
|
1408
|
+
}
|
|
1409
|
+
function generateCacheKey(provider, model, requestHash) {
|
|
1410
|
+
return `gw:cache:${provider}:${model}:${requestHash}`;
|
|
1411
|
+
}
|
|
1412
|
+
function hash(str) {
|
|
1413
|
+
return murmurhash.v3(str).toString(16);
|
|
1414
|
+
}
|
|
1415
|
+
function createSystemFingerprint(config) {
|
|
1416
|
+
const str = JSON.stringify(config);
|
|
1417
|
+
return `fp_${murmurhash.v3(str).toString(16)}`;
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
// src/providers/Provider.ts
|
|
1421
|
+
var Provider = class {
|
|
1422
|
+
name;
|
|
1423
|
+
config;
|
|
1424
|
+
health;
|
|
1425
|
+
constructor(config) {
|
|
1426
|
+
this.name = config.name;
|
|
1427
|
+
this.config = config;
|
|
1428
|
+
this.health = {
|
|
1429
|
+
status: "healthy",
|
|
1430
|
+
latencyMs: 0,
|
|
1431
|
+
lastCheck: /* @__PURE__ */ new Date(),
|
|
1432
|
+
errorRate: 0,
|
|
1433
|
+
consecutiveFailures: 0
|
|
1434
|
+
};
|
|
1435
|
+
}
|
|
1436
|
+
/**
|
|
1437
|
+
* Check if the provider supports a specific model
|
|
1438
|
+
*/
|
|
1439
|
+
supportsModel(model) {
|
|
1440
|
+
return this.config.models.includes(model);
|
|
1441
|
+
}
|
|
1442
|
+
/**
|
|
1443
|
+
* Get all supported models
|
|
1444
|
+
*/
|
|
1445
|
+
getModels() {
|
|
1446
|
+
return [...this.config.models];
|
|
1447
|
+
}
|
|
1448
|
+
/**
|
|
1449
|
+
* Get the current health status
|
|
1450
|
+
*/
|
|
1451
|
+
getHealth() {
|
|
1452
|
+
return { ...this.health };
|
|
1453
|
+
}
|
|
1454
|
+
/**
|
|
1455
|
+
* Perform a health check
|
|
1456
|
+
*/
|
|
1457
|
+
async healthCheck() {
|
|
1458
|
+
const start = Date.now();
|
|
1459
|
+
try {
|
|
1460
|
+
await this.chat({
|
|
1461
|
+
model: this.config.models[0],
|
|
1462
|
+
messages: [{ role: "user", content: "hi" }],
|
|
1463
|
+
max_tokens: 1
|
|
1464
|
+
});
|
|
1465
|
+
const latencyMs = Date.now() - start;
|
|
1466
|
+
this.health = {
|
|
1467
|
+
status: "healthy",
|
|
1468
|
+
latencyMs,
|
|
1469
|
+
lastCheck: /* @__PURE__ */ new Date(),
|
|
1470
|
+
errorRate: Math.max(0, this.health.errorRate - 0.1),
|
|
1471
|
+
consecutiveFailures: 0
|
|
1472
|
+
};
|
|
1473
|
+
} catch (error) {
|
|
1474
|
+
const latencyMs = Date.now() - start;
|
|
1475
|
+
this.health = {
|
|
1476
|
+
status: this.health.consecutiveFailures >= 2 ? "unhealthy" : "degraded",
|
|
1477
|
+
latencyMs,
|
|
1478
|
+
lastCheck: /* @__PURE__ */ new Date(),
|
|
1479
|
+
errorRate: Math.min(1, this.health.errorRate + 0.2),
|
|
1480
|
+
consecutiveFailures: this.health.consecutiveFailures + 1
|
|
1481
|
+
};
|
|
1482
|
+
}
|
|
1483
|
+
return this.health;
|
|
1484
|
+
}
|
|
1485
|
+
/**
|
|
1486
|
+
* Update health status after a request
|
|
1487
|
+
*/
|
|
1488
|
+
updateHealth(success, latencyMs) {
|
|
1489
|
+
if (success) {
|
|
1490
|
+
this.health = {
|
|
1491
|
+
status: "healthy",
|
|
1492
|
+
latencyMs: this.health.latencyMs > 0 ? (this.health.latencyMs + latencyMs) / 2 : latencyMs,
|
|
1493
|
+
lastCheck: /* @__PURE__ */ new Date(),
|
|
1494
|
+
errorRate: Math.max(0, this.health.errorRate - 0.05),
|
|
1495
|
+
consecutiveFailures: 0
|
|
1496
|
+
};
|
|
1497
|
+
} else {
|
|
1498
|
+
this.health = {
|
|
1499
|
+
...this.health,
|
|
1500
|
+
status: this.health.consecutiveFailures >= 2 ? "unhealthy" : "degraded",
|
|
1501
|
+
lastCheck: /* @__PURE__ */ new Date(),
|
|
1502
|
+
errorRate: Math.min(1, this.health.errorRate + 0.1),
|
|
1503
|
+
consecutiveFailures: this.health.consecutiveFailures + 1
|
|
1504
|
+
};
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
/**
|
|
1508
|
+
* Check if the provider is currently healthy
|
|
1509
|
+
*/
|
|
1510
|
+
isHealthy() {
|
|
1511
|
+
return this.health.status === "healthy";
|
|
1512
|
+
}
|
|
1513
|
+
/**
|
|
1514
|
+
* Check if the provider is available (healthy or degraded)
|
|
1515
|
+
*/
|
|
1516
|
+
isAvailable() {
|
|
1517
|
+
return this.health.status !== "unhealthy";
|
|
1518
|
+
}
|
|
1519
|
+
};
|
|
1520
|
+
|
|
1521
|
+
// src/providers/registry/OpenAIProvider.ts
|
|
1522
|
+
var DEFAULT_OPENAI_MODELS = [
|
|
1523
|
+
"gpt-4o",
|
|
1524
|
+
"gpt-4o-mini",
|
|
1525
|
+
"gpt-4-turbo",
|
|
1526
|
+
"gpt-4",
|
|
1527
|
+
"gpt-3.5-turbo",
|
|
1528
|
+
"o1",
|
|
1529
|
+
"o1-mini",
|
|
1530
|
+
"o1-preview"
|
|
1531
|
+
];
|
|
1532
|
+
var OpenAIProvider = class extends Provider {
|
|
1533
|
+
apiKey;
|
|
1534
|
+
baseUrl;
|
|
1535
|
+
organization;
|
|
1536
|
+
project;
|
|
1537
|
+
timeout;
|
|
1538
|
+
constructor(options = {}) {
|
|
1539
|
+
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
1540
|
+
if (!apiKey) {
|
|
1541
|
+
throw new Error("OpenAI API key is required");
|
|
1542
|
+
}
|
|
1543
|
+
const config = {
|
|
1544
|
+
name: "openai",
|
|
1545
|
+
apiKey,
|
|
1546
|
+
baseUrl: options.baseUrl || "https://api.openai.com/v1",
|
|
1547
|
+
models: options.models || DEFAULT_OPENAI_MODELS,
|
|
1548
|
+
timeout: options.timeout || 6e4,
|
|
1549
|
+
maxRetries: options.maxRetries || 3,
|
|
1550
|
+
headers: options.headers
|
|
1551
|
+
};
|
|
1552
|
+
super(config);
|
|
1553
|
+
this.apiKey = apiKey;
|
|
1554
|
+
this.baseUrl = config.baseUrl;
|
|
1555
|
+
this.organization = options.organization;
|
|
1556
|
+
this.project = options.project;
|
|
1557
|
+
this.timeout = config.timeout;
|
|
1558
|
+
}
|
|
1559
|
+
/**
|
|
1560
|
+
* Execute a chat completion request
|
|
1561
|
+
*/
|
|
1562
|
+
async chat(request) {
|
|
1563
|
+
const start = Date.now();
|
|
1564
|
+
try {
|
|
1565
|
+
const response = await this.makeRequest("/chat/completions", {
|
|
1566
|
+
method: "POST",
|
|
1567
|
+
body: JSON.stringify(this.transformRequest(request))
|
|
1568
|
+
});
|
|
1569
|
+
if (!response.ok) {
|
|
1570
|
+
const error = await this.parseError(response);
|
|
1571
|
+
throw error;
|
|
1572
|
+
}
|
|
1573
|
+
const data = await response.json();
|
|
1574
|
+
const result = this.transformResponse(data, request.model);
|
|
1575
|
+
this.updateHealth(true, Date.now() - start);
|
|
1576
|
+
return result;
|
|
1577
|
+
} catch (error) {
|
|
1578
|
+
this.updateHealth(false, Date.now() - start);
|
|
1579
|
+
throw this.wrapError(error);
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
/**
|
|
1583
|
+
* Execute a streaming chat completion request
|
|
1584
|
+
*/
|
|
1585
|
+
async *chatStream(request) {
|
|
1586
|
+
const start = Date.now();
|
|
1587
|
+
try {
|
|
1588
|
+
const response = await this.makeRequest("/chat/completions", {
|
|
1589
|
+
method: "POST",
|
|
1590
|
+
body: JSON.stringify({
|
|
1591
|
+
...this.transformRequest(request),
|
|
1592
|
+
stream: true,
|
|
1593
|
+
stream_options: { include_usage: true }
|
|
1594
|
+
})
|
|
1595
|
+
});
|
|
1596
|
+
if (!response.ok) {
|
|
1597
|
+
const error = await this.parseError(response);
|
|
1598
|
+
throw error;
|
|
1599
|
+
}
|
|
1600
|
+
const reader = response.body?.getReader();
|
|
1601
|
+
if (!reader) {
|
|
1602
|
+
throw new ProviderError("No response body", "openai");
|
|
1603
|
+
}
|
|
1604
|
+
const decoder = new TextDecoder();
|
|
1605
|
+
let buffer = "";
|
|
1606
|
+
while (true) {
|
|
1607
|
+
const { done, value } = await reader.read();
|
|
1608
|
+
if (done) break;
|
|
1609
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1610
|
+
const lines = buffer.split("\n");
|
|
1611
|
+
buffer = lines.pop() || "";
|
|
1612
|
+
for (const line of lines) {
|
|
1613
|
+
const trimmed = line.trim();
|
|
1614
|
+
if (!trimmed || trimmed === "data: [DONE]") continue;
|
|
1615
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
1616
|
+
try {
|
|
1617
|
+
const json = JSON.parse(trimmed.slice(6));
|
|
1618
|
+
yield this.transformChunk(json, request.model);
|
|
1619
|
+
} catch {
|
|
1620
|
+
}
|
|
1621
|
+
}
|
|
1622
|
+
}
|
|
1623
|
+
this.updateHealth(true, Date.now() - start);
|
|
1624
|
+
} catch (error) {
|
|
1625
|
+
this.updateHealth(false, Date.now() - start);
|
|
1626
|
+
throw this.wrapError(error);
|
|
1627
|
+
}
|
|
1628
|
+
}
|
|
1629
|
+
/**
|
|
1630
|
+
* Get model information
|
|
1631
|
+
*/
|
|
1632
|
+
getModelInfo(model) {
|
|
1633
|
+
if (!this.supportsModel(model)) {
|
|
1634
|
+
return null;
|
|
1635
|
+
}
|
|
1636
|
+
return getModelInfo(model, "openai");
|
|
1637
|
+
}
|
|
1638
|
+
/**
|
|
1639
|
+
* Make an HTTP request to the OpenAI API
|
|
1640
|
+
*/
|
|
1641
|
+
async makeRequest(path, options) {
|
|
1642
|
+
const headers = {
|
|
1643
|
+
"Content-Type": "application/json",
|
|
1644
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
1645
|
+
...this.config.headers
|
|
1646
|
+
};
|
|
1647
|
+
if (this.organization) {
|
|
1648
|
+
headers["OpenAI-Organization"] = this.organization;
|
|
1649
|
+
}
|
|
1650
|
+
if (this.project) {
|
|
1651
|
+
headers["OpenAI-Project"] = this.project;
|
|
1652
|
+
}
|
|
1653
|
+
const controller = new AbortController();
|
|
1654
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
1655
|
+
try {
|
|
1656
|
+
return await fetch(`${this.baseUrl}${path}`, {
|
|
1657
|
+
...options,
|
|
1658
|
+
headers,
|
|
1659
|
+
signal: controller.signal
|
|
1660
|
+
});
|
|
1661
|
+
} finally {
|
|
1662
|
+
clearTimeout(timeoutId);
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
/**
|
|
1666
|
+
* Transform gateway request to OpenAI format
|
|
1667
|
+
*/
|
|
1668
|
+
transformRequest(request) {
|
|
1669
|
+
const transformed = {
|
|
1670
|
+
model: request.model,
|
|
1671
|
+
messages: request.messages
|
|
1672
|
+
};
|
|
1673
|
+
if (request.temperature !== void 0) {
|
|
1674
|
+
transformed.temperature = request.temperature;
|
|
1675
|
+
}
|
|
1676
|
+
if (request.max_tokens !== void 0) {
|
|
1677
|
+
transformed.max_tokens = request.max_tokens;
|
|
1678
|
+
}
|
|
1679
|
+
if (request.top_p !== void 0) {
|
|
1680
|
+
transformed.top_p = request.top_p;
|
|
1681
|
+
}
|
|
1682
|
+
if (request.frequency_penalty !== void 0) {
|
|
1683
|
+
transformed.frequency_penalty = request.frequency_penalty;
|
|
1684
|
+
}
|
|
1685
|
+
if (request.presence_penalty !== void 0) {
|
|
1686
|
+
transformed.presence_penalty = request.presence_penalty;
|
|
1687
|
+
}
|
|
1688
|
+
if (request.stop !== void 0) {
|
|
1689
|
+
transformed.stop = request.stop;
|
|
1690
|
+
}
|
|
1691
|
+
if (request.tools !== void 0) {
|
|
1692
|
+
transformed.tools = request.tools;
|
|
1693
|
+
}
|
|
1694
|
+
if (request.tool_choice !== void 0) {
|
|
1695
|
+
transformed.tool_choice = request.tool_choice;
|
|
1696
|
+
}
|
|
1697
|
+
if (request.response_format !== void 0) {
|
|
1698
|
+
transformed.response_format = request.response_format;
|
|
1699
|
+
}
|
|
1700
|
+
if (request.seed !== void 0) {
|
|
1701
|
+
transformed.seed = request.seed;
|
|
1702
|
+
}
|
|
1703
|
+
if (request.user !== void 0) {
|
|
1704
|
+
transformed.user = request.user;
|
|
1705
|
+
}
|
|
1706
|
+
return transformed;
|
|
1707
|
+
}
|
|
1708
|
+
/**
|
|
1709
|
+
* Transform OpenAI response to gateway format
|
|
1710
|
+
*/
|
|
1711
|
+
transformResponse(data, model) {
|
|
1712
|
+
return {
|
|
1713
|
+
id: data.id || generateRequestId(),
|
|
1714
|
+
object: "chat.completion",
|
|
1715
|
+
created: data.created || Math.floor(Date.now() / 1e3),
|
|
1716
|
+
model: data.model || model,
|
|
1717
|
+
choices: data.choices,
|
|
1718
|
+
usage: data.usage,
|
|
1719
|
+
system_fingerprint: data.system_fingerprint
|
|
1720
|
+
};
|
|
1721
|
+
}
|
|
1722
|
+
/**
|
|
1723
|
+
* Transform OpenAI stream chunk to gateway format
|
|
1724
|
+
*/
|
|
1725
|
+
transformChunk(data, model) {
|
|
1726
|
+
return {
|
|
1727
|
+
id: data.id || generateRequestId(),
|
|
1728
|
+
object: "chat.completion.chunk",
|
|
1729
|
+
created: data.created || Math.floor(Date.now() / 1e3),
|
|
1730
|
+
model: data.model || model,
|
|
1731
|
+
choices: data.choices,
|
|
1732
|
+
system_fingerprint: data.system_fingerprint,
|
|
1733
|
+
usage: data.usage
|
|
1734
|
+
};
|
|
1735
|
+
}
|
|
1736
|
+
/**
|
|
1737
|
+
* Parse error response from OpenAI
|
|
1738
|
+
*/
|
|
1739
|
+
async parseError(response) {
|
|
1740
|
+
let message = `OpenAI API error: ${response.status}`;
|
|
1741
|
+
const retryable = response.status >= 500 || response.status === 429;
|
|
1742
|
+
try {
|
|
1743
|
+
const data = await response.json();
|
|
1744
|
+
if (data.error?.message) {
|
|
1745
|
+
message = data.error.message;
|
|
1746
|
+
}
|
|
1747
|
+
} catch {
|
|
1748
|
+
}
|
|
1749
|
+
return new ProviderError(message, "openai", void 0, retryable);
|
|
1750
|
+
}
|
|
1751
|
+
/**
|
|
1752
|
+
* Wrap unknown errors
|
|
1753
|
+
*/
|
|
1754
|
+
wrapError(error) {
|
|
1755
|
+
if (error instanceof ProviderError) {
|
|
1756
|
+
return error;
|
|
1757
|
+
}
|
|
1758
|
+
if (error instanceof Error) {
|
|
1759
|
+
const isTimeout = error.name === "AbortError" || error.message.includes("timeout");
|
|
1760
|
+
return new ProviderError(
|
|
1761
|
+
error.message,
|
|
1762
|
+
"openai",
|
|
1763
|
+
error,
|
|
1764
|
+
isTimeout
|
|
1765
|
+
// Timeouts are retryable
|
|
1766
|
+
);
|
|
1767
|
+
}
|
|
1768
|
+
return new ProviderError("Unknown error", "openai", void 0, true);
|
|
1769
|
+
}
|
|
1770
|
+
};
|
|
1771
|
+
|
|
1772
|
+
// src/providers/registry/AnthropicProvider.ts
|
|
1773
|
+
var DEFAULT_ANTHROPIC_MODELS = [
|
|
1774
|
+
"claude-3-5-sonnet-20241022",
|
|
1775
|
+
"claude-3-5-sonnet-latest",
|
|
1776
|
+
"claude-sonnet-4-20250514",
|
|
1777
|
+
"claude-3-5-haiku-20241022",
|
|
1778
|
+
"claude-3-opus-20240229",
|
|
1779
|
+
"claude-3-sonnet-20240229",
|
|
1780
|
+
"claude-3-haiku-20240307"
|
|
1781
|
+
];
|
|
1782
|
+
var ANTHROPIC_API_VERSION = "2023-06-01";
|
|
1783
|
+
var AnthropicProvider = class extends Provider {
|
|
1784
|
+
apiKey;
|
|
1785
|
+
baseUrl;
|
|
1786
|
+
timeout;
|
|
1787
|
+
anthropicBeta;
|
|
1788
|
+
constructor(options = {}) {
|
|
1789
|
+
const apiKey = options.apiKey || process.env.ANTHROPIC_API_KEY;
|
|
1790
|
+
if (!apiKey) {
|
|
1791
|
+
throw new Error("Anthropic API key is required");
|
|
1792
|
+
}
|
|
1793
|
+
const config = {
|
|
1794
|
+
name: "anthropic",
|
|
1795
|
+
apiKey,
|
|
1796
|
+
baseUrl: options.baseUrl || "https://api.anthropic.com",
|
|
1797
|
+
models: options.models || DEFAULT_ANTHROPIC_MODELS,
|
|
1798
|
+
timeout: options.timeout || 6e4,
|
|
1799
|
+
maxRetries: options.maxRetries || 3,
|
|
1800
|
+
headers: options.headers
|
|
1801
|
+
};
|
|
1802
|
+
super(config);
|
|
1803
|
+
this.apiKey = apiKey;
|
|
1804
|
+
this.baseUrl = config.baseUrl;
|
|
1805
|
+
this.timeout = config.timeout;
|
|
1806
|
+
this.anthropicBeta = options.anthropicBeta;
|
|
1807
|
+
}
|
|
1808
|
+
/**
|
|
1809
|
+
* Execute a chat completion request
|
|
1810
|
+
*/
|
|
1811
|
+
async chat(request) {
|
|
1812
|
+
const start = Date.now();
|
|
1813
|
+
try {
|
|
1814
|
+
const anthropicRequest = this.transformToAnthropic(request);
|
|
1815
|
+
const response = await this.makeRequest("/v1/messages", {
|
|
1816
|
+
method: "POST",
|
|
1817
|
+
body: JSON.stringify(anthropicRequest)
|
|
1818
|
+
});
|
|
1819
|
+
if (!response.ok) {
|
|
1820
|
+
const error = await this.parseError(response);
|
|
1821
|
+
throw error;
|
|
1822
|
+
}
|
|
1823
|
+
const data = await response.json();
|
|
1824
|
+
const result = this.transformFromAnthropic(data, request.model);
|
|
1825
|
+
this.updateHealth(true, Date.now() - start);
|
|
1826
|
+
return result;
|
|
1827
|
+
} catch (error) {
|
|
1828
|
+
this.updateHealth(false, Date.now() - start);
|
|
1829
|
+
throw this.wrapError(error);
|
|
1830
|
+
}
|
|
1831
|
+
}
|
|
1832
|
+
/**
|
|
1833
|
+
* Execute a streaming chat completion request
|
|
1834
|
+
*/
|
|
1835
|
+
async *chatStream(request) {
|
|
1836
|
+
const start = Date.now();
|
|
1837
|
+
try {
|
|
1838
|
+
const anthropicRequest = this.transformToAnthropic(request);
|
|
1839
|
+
const response = await this.makeRequest("/v1/messages", {
|
|
1840
|
+
method: "POST",
|
|
1841
|
+
body: JSON.stringify({
|
|
1842
|
+
...anthropicRequest,
|
|
1843
|
+
stream: true
|
|
1844
|
+
})
|
|
1845
|
+
});
|
|
1846
|
+
if (!response.ok) {
|
|
1847
|
+
const error = await this.parseError(response);
|
|
1848
|
+
throw error;
|
|
1849
|
+
}
|
|
1850
|
+
const reader = response.body?.getReader();
|
|
1851
|
+
if (!reader) {
|
|
1852
|
+
throw new ProviderError("No response body", "anthropic");
|
|
1853
|
+
}
|
|
1854
|
+
const decoder = new TextDecoder();
|
|
1855
|
+
let buffer = "";
|
|
1856
|
+
const requestId = generateRequestId();
|
|
1857
|
+
let inputTokens = 0;
|
|
1858
|
+
let outputTokens = 0;
|
|
1859
|
+
while (true) {
|
|
1860
|
+
const { done, value } = await reader.read();
|
|
1861
|
+
if (done) break;
|
|
1862
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1863
|
+
const lines = buffer.split("\n");
|
|
1864
|
+
buffer = lines.pop() || "";
|
|
1865
|
+
for (const line of lines) {
|
|
1866
|
+
const trimmed = line.trim();
|
|
1867
|
+
if (!trimmed || !trimmed.startsWith("data: ")) continue;
|
|
1868
|
+
try {
|
|
1869
|
+
const event = JSON.parse(trimmed.slice(6));
|
|
1870
|
+
const chunk = this.transformStreamEvent(
|
|
1871
|
+
event,
|
|
1872
|
+
request.model,
|
|
1873
|
+
requestId
|
|
1874
|
+
);
|
|
1875
|
+
if (event.type === "message_start" && event.message?.usage) {
|
|
1876
|
+
inputTokens = event.message.usage.input_tokens || 0;
|
|
1877
|
+
}
|
|
1878
|
+
if (event.type === "message_delta" && event.usage) {
|
|
1879
|
+
outputTokens = event.usage.output_tokens || 0;
|
|
1880
|
+
}
|
|
1881
|
+
if (chunk) {
|
|
1882
|
+
if (event.type === "message_stop") {
|
|
1883
|
+
chunk.usage = {
|
|
1884
|
+
prompt_tokens: inputTokens,
|
|
1885
|
+
completion_tokens: outputTokens,
|
|
1886
|
+
total_tokens: inputTokens + outputTokens
|
|
1887
|
+
};
|
|
1888
|
+
}
|
|
1889
|
+
yield chunk;
|
|
1890
|
+
}
|
|
1891
|
+
} catch {
|
|
1892
|
+
}
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
this.updateHealth(true, Date.now() - start);
|
|
1896
|
+
} catch (error) {
|
|
1897
|
+
this.updateHealth(false, Date.now() - start);
|
|
1898
|
+
throw this.wrapError(error);
|
|
1899
|
+
}
|
|
1900
|
+
}
|
|
1901
|
+
/**
|
|
1902
|
+
* Get model information
|
|
1903
|
+
*/
|
|
1904
|
+
getModelInfo(model) {
|
|
1905
|
+
if (!this.supportsModel(model)) {
|
|
1906
|
+
return null;
|
|
1907
|
+
}
|
|
1908
|
+
return getModelInfo(model, "anthropic");
|
|
1909
|
+
}
|
|
1910
|
+
/**
|
|
1911
|
+
* Make an HTTP request to the Anthropic API
|
|
1912
|
+
*/
|
|
1913
|
+
async makeRequest(path, options) {
|
|
1914
|
+
const headers = {
|
|
1915
|
+
"Content-Type": "application/json",
|
|
1916
|
+
"x-api-key": this.apiKey,
|
|
1917
|
+
"anthropic-version": ANTHROPIC_API_VERSION,
|
|
1918
|
+
...this.config.headers
|
|
1919
|
+
};
|
|
1920
|
+
if (this.anthropicBeta && this.anthropicBeta.length > 0) {
|
|
1921
|
+
headers["anthropic-beta"] = this.anthropicBeta.join(",");
|
|
1922
|
+
}
|
|
1923
|
+
const controller = new AbortController();
|
|
1924
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
1925
|
+
try {
|
|
1926
|
+
return await fetch(`${this.baseUrl}${path}`, {
|
|
1927
|
+
...options,
|
|
1928
|
+
headers,
|
|
1929
|
+
signal: controller.signal
|
|
1930
|
+
});
|
|
1931
|
+
} finally {
|
|
1932
|
+
clearTimeout(timeoutId);
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
/**
|
|
1936
|
+
* Transform OpenAI-compatible request to Anthropic format
|
|
1937
|
+
*/
|
|
1938
|
+
transformToAnthropic(request) {
|
|
1939
|
+
const { messages, systemPrompt } = this.extractSystemPrompt(
|
|
1940
|
+
request.messages
|
|
1941
|
+
);
|
|
1942
|
+
const transformed = {
|
|
1943
|
+
model: request.model,
|
|
1944
|
+
messages: messages.map((m) => this.transformMessage(m)),
|
|
1945
|
+
max_tokens: request.max_tokens || 4096
|
|
1946
|
+
};
|
|
1947
|
+
if (systemPrompt) {
|
|
1948
|
+
transformed.system = systemPrompt;
|
|
1949
|
+
}
|
|
1950
|
+
if (request.temperature !== void 0) {
|
|
1951
|
+
transformed.temperature = request.temperature;
|
|
1952
|
+
}
|
|
1953
|
+
if (request.top_p !== void 0) {
|
|
1954
|
+
transformed.top_p = request.top_p;
|
|
1955
|
+
}
|
|
1956
|
+
if (request.stop !== void 0) {
|
|
1957
|
+
transformed.stop_sequences = Array.isArray(request.stop) ? request.stop : [request.stop];
|
|
1958
|
+
}
|
|
1959
|
+
if (request.tools && request.tools.length > 0) {
|
|
1960
|
+
transformed.tools = request.tools.map((tool) => ({
|
|
1961
|
+
name: tool.function.name,
|
|
1962
|
+
description: tool.function.description,
|
|
1963
|
+
input_schema: tool.function.parameters || { type: "object" }
|
|
1964
|
+
}));
|
|
1965
|
+
if (request.tool_choice) {
|
|
1966
|
+
if (request.tool_choice === "auto") {
|
|
1967
|
+
transformed.tool_choice = { type: "auto" };
|
|
1968
|
+
} else if (request.tool_choice === "required") {
|
|
1969
|
+
transformed.tool_choice = { type: "any" };
|
|
1970
|
+
} else if (request.tool_choice === "none") {
|
|
1971
|
+
delete transformed.tools;
|
|
1972
|
+
} else if (typeof request.tool_choice === "object" && request.tool_choice.function) {
|
|
1973
|
+
transformed.tool_choice = {
|
|
1974
|
+
type: "tool",
|
|
1975
|
+
name: request.tool_choice.function.name
|
|
1976
|
+
};
|
|
1977
|
+
}
|
|
1978
|
+
}
|
|
1979
|
+
}
|
|
1980
|
+
return transformed;
|
|
1981
|
+
}
|
|
1982
|
+
/**
|
|
1983
|
+
* Extract system prompt from messages
|
|
1984
|
+
*/
|
|
1985
|
+
extractSystemPrompt(messages) {
|
|
1986
|
+
const systemMessages = messages.filter((m) => m.role === "system");
|
|
1987
|
+
const otherMessages = messages.filter((m) => m.role !== "system");
|
|
1988
|
+
const systemPrompt = systemMessages.length > 0 ? systemMessages.map(
|
|
1989
|
+
(m) => typeof m.content === "string" ? m.content : JSON.stringify(m.content)
|
|
1990
|
+
).join("\n") : null;
|
|
1991
|
+
return { messages: otherMessages, systemPrompt };
|
|
1992
|
+
}
|
|
1993
|
+
/**
|
|
1994
|
+
* Transform a single message to Anthropic format
|
|
1995
|
+
*/
|
|
1996
|
+
transformMessage(message) {
|
|
1997
|
+
if (message.role === "tool") {
|
|
1998
|
+
return {
|
|
1999
|
+
role: "user",
|
|
2000
|
+
content: [
|
|
2001
|
+
{
|
|
2002
|
+
type: "tool_result",
|
|
2003
|
+
tool_use_id: message.tool_call_id,
|
|
2004
|
+
content: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
|
|
2005
|
+
}
|
|
2006
|
+
]
|
|
2007
|
+
};
|
|
2008
|
+
}
|
|
2009
|
+
if (message.role === "assistant" && message.tool_calls) {
|
|
2010
|
+
const content = [];
|
|
2011
|
+
if (message.content) {
|
|
2012
|
+
content.push({
|
|
2013
|
+
type: "text",
|
|
2014
|
+
text: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
|
|
2015
|
+
});
|
|
2016
|
+
}
|
|
2017
|
+
for (const toolCall of message.tool_calls) {
|
|
2018
|
+
content.push({
|
|
2019
|
+
type: "tool_use",
|
|
2020
|
+
id: toolCall.id,
|
|
2021
|
+
name: toolCall.function.name,
|
|
2022
|
+
input: JSON.parse(toolCall.function.arguments)
|
|
2023
|
+
});
|
|
2024
|
+
}
|
|
2025
|
+
return { role: "assistant", content };
|
|
2026
|
+
}
|
|
2027
|
+
return {
|
|
2028
|
+
role: message.role,
|
|
2029
|
+
content: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
|
|
2030
|
+
};
|
|
2031
|
+
}
|
|
2032
|
+
/**
|
|
2033
|
+
* Transform Anthropic response to OpenAI-compatible format
|
|
2034
|
+
*/
|
|
2035
|
+
transformFromAnthropic(data, model) {
|
|
2036
|
+
const content = data.content;
|
|
2037
|
+
let textContent = "";
|
|
2038
|
+
const toolCalls = [];
|
|
2039
|
+
for (const block of content) {
|
|
2040
|
+
if (block.type === "text") {
|
|
2041
|
+
textContent += block.text;
|
|
2042
|
+
} else if (block.type === "tool_use") {
|
|
2043
|
+
toolCalls.push({
|
|
2044
|
+
id: block.id,
|
|
2045
|
+
type: "function",
|
|
2046
|
+
function: {
|
|
2047
|
+
name: block.name,
|
|
2048
|
+
arguments: JSON.stringify(block.input)
|
|
2049
|
+
}
|
|
2050
|
+
});
|
|
2051
|
+
}
|
|
2052
|
+
}
|
|
2053
|
+
const message = {
|
|
2054
|
+
role: "assistant",
|
|
2055
|
+
content: textContent || null
|
|
2056
|
+
};
|
|
2057
|
+
if (toolCalls.length > 0) {
|
|
2058
|
+
message.tool_calls = toolCalls;
|
|
2059
|
+
}
|
|
2060
|
+
const stopReason = data.stop_reason;
|
|
2061
|
+
let finishReason = null;
|
|
2062
|
+
if (stopReason === "end_turn") {
|
|
2063
|
+
finishReason = "stop";
|
|
2064
|
+
} else if (stopReason === "max_tokens") {
|
|
2065
|
+
finishReason = "length";
|
|
2066
|
+
} else if (stopReason === "tool_use") {
|
|
2067
|
+
finishReason = "tool_calls";
|
|
2068
|
+
}
|
|
2069
|
+
const usage = data.usage;
|
|
2070
|
+
return {
|
|
2071
|
+
id: data.id || generateRequestId(),
|
|
2072
|
+
object: "chat.completion",
|
|
2073
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2074
|
+
model: data.model || model,
|
|
2075
|
+
choices: [
|
|
2076
|
+
{
|
|
2077
|
+
index: 0,
|
|
2078
|
+
message,
|
|
2079
|
+
finish_reason: finishReason
|
|
2080
|
+
}
|
|
2081
|
+
],
|
|
2082
|
+
usage: {
|
|
2083
|
+
prompt_tokens: usage.input_tokens,
|
|
2084
|
+
completion_tokens: usage.output_tokens,
|
|
2085
|
+
total_tokens: usage.input_tokens + usage.output_tokens
|
|
2086
|
+
}
|
|
2087
|
+
};
|
|
2088
|
+
}
|
|
2089
|
+
/**
|
|
2090
|
+
* Transform Anthropic stream event to OpenAI-compatible chunk
|
|
2091
|
+
*/
|
|
2092
|
+
transformStreamEvent(event, model, requestId) {
|
|
2093
|
+
const eventType = event.type;
|
|
2094
|
+
if (eventType === "content_block_delta") {
|
|
2095
|
+
const delta = event.delta;
|
|
2096
|
+
if (delta.type === "text_delta") {
|
|
2097
|
+
return {
|
|
2098
|
+
id: requestId,
|
|
2099
|
+
object: "chat.completion.chunk",
|
|
2100
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2101
|
+
model,
|
|
2102
|
+
choices: [
|
|
2103
|
+
{
|
|
2104
|
+
index: 0,
|
|
2105
|
+
delta: {
|
|
2106
|
+
content: delta.text
|
|
2107
|
+
},
|
|
2108
|
+
finish_reason: null
|
|
2109
|
+
}
|
|
2110
|
+
]
|
|
2111
|
+
};
|
|
2112
|
+
}
|
|
2113
|
+
if (delta.type === "input_json_delta") {
|
|
2114
|
+
return {
|
|
2115
|
+
id: requestId,
|
|
2116
|
+
object: "chat.completion.chunk",
|
|
2117
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2118
|
+
model,
|
|
2119
|
+
choices: [
|
|
2120
|
+
{
|
|
2121
|
+
index: 0,
|
|
2122
|
+
delta: {},
|
|
2123
|
+
finish_reason: null
|
|
2124
|
+
}
|
|
2125
|
+
]
|
|
2126
|
+
};
|
|
2127
|
+
}
|
|
2128
|
+
}
|
|
2129
|
+
if (eventType === "content_block_start") {
|
|
2130
|
+
const contentBlock = event.content_block;
|
|
2131
|
+
if (contentBlock?.type === "tool_use") {
|
|
2132
|
+
return {
|
|
2133
|
+
id: requestId,
|
|
2134
|
+
object: "chat.completion.chunk",
|
|
2135
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2136
|
+
model,
|
|
2137
|
+
choices: [
|
|
2138
|
+
{
|
|
2139
|
+
index: 0,
|
|
2140
|
+
delta: {
|
|
2141
|
+
tool_calls: [
|
|
2142
|
+
{
|
|
2143
|
+
id: contentBlock.id,
|
|
2144
|
+
type: "function",
|
|
2145
|
+
function: {
|
|
2146
|
+
name: contentBlock.name,
|
|
2147
|
+
arguments: ""
|
|
2148
|
+
}
|
|
2149
|
+
}
|
|
2150
|
+
]
|
|
2151
|
+
},
|
|
2152
|
+
finish_reason: null
|
|
2153
|
+
}
|
|
2154
|
+
]
|
|
2155
|
+
};
|
|
2156
|
+
}
|
|
2157
|
+
}
|
|
2158
|
+
if (eventType === "message_stop") {
|
|
2159
|
+
return {
|
|
2160
|
+
id: requestId,
|
|
2161
|
+
object: "chat.completion.chunk",
|
|
2162
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2163
|
+
model,
|
|
2164
|
+
choices: [
|
|
2165
|
+
{
|
|
2166
|
+
index: 0,
|
|
2167
|
+
delta: {},
|
|
2168
|
+
finish_reason: "stop"
|
|
2169
|
+
}
|
|
2170
|
+
]
|
|
2171
|
+
};
|
|
2172
|
+
}
|
|
2173
|
+
return null;
|
|
2174
|
+
}
|
|
2175
|
+
/**
|
|
2176
|
+
* Parse error response from Anthropic
|
|
2177
|
+
*/
|
|
2178
|
+
async parseError(response) {
|
|
2179
|
+
let message = `Anthropic API error: ${response.status}`;
|
|
2180
|
+
const retryable = response.status >= 500 || response.status === 429;
|
|
2181
|
+
try {
|
|
2182
|
+
const data = await response.json();
|
|
2183
|
+
if (data.error?.message) {
|
|
2184
|
+
message = data.error.message;
|
|
2185
|
+
}
|
|
2186
|
+
} catch {
|
|
2187
|
+
}
|
|
2188
|
+
return new ProviderError(message, "anthropic", void 0, retryable);
|
|
2189
|
+
}
|
|
2190
|
+
/**
|
|
2191
|
+
* Wrap unknown errors
|
|
2192
|
+
*/
|
|
2193
|
+
wrapError(error) {
|
|
2194
|
+
if (error instanceof ProviderError) {
|
|
2195
|
+
return error;
|
|
2196
|
+
}
|
|
2197
|
+
if (error instanceof Error) {
|
|
2198
|
+
const isTimeout = error.name === "AbortError" || error.message.includes("timeout");
|
|
2199
|
+
return new ProviderError(error.message, "anthropic", error, isTimeout);
|
|
2200
|
+
}
|
|
2201
|
+
return new ProviderError("Unknown error", "anthropic", void 0, true);
|
|
2202
|
+
}
|
|
2203
|
+
};
|
|
2204
|
+
|
|
2205
|
+
// src/providers/registry/GoogleProvider.ts
|
|
2206
|
+
var DEFAULT_GEMINI_MODELS = [
|
|
2207
|
+
"gemini-1.5-pro",
|
|
2208
|
+
"gemini-1.5-pro-latest",
|
|
2209
|
+
"gemini-1.5-flash",
|
|
2210
|
+
"gemini-1.5-flash-latest",
|
|
2211
|
+
"gemini-2.0-flash-exp",
|
|
2212
|
+
"gemini-pro"
|
|
2213
|
+
];
|
|
2214
|
+
var GoogleProvider = class extends Provider {
|
|
2215
|
+
apiKey;
|
|
2216
|
+
baseUrl;
|
|
2217
|
+
timeout;
|
|
2218
|
+
constructor(options = {}) {
|
|
2219
|
+
const apiKey = options.apiKey || process.env.GOOGLE_API_KEY;
|
|
2220
|
+
if (!apiKey) {
|
|
2221
|
+
throw new Error("Google API key is required");
|
|
2222
|
+
}
|
|
2223
|
+
const config = {
|
|
2224
|
+
name: "google",
|
|
2225
|
+
apiKey,
|
|
2226
|
+
baseUrl: options.baseUrl || "https://generativelanguage.googleapis.com/v1beta",
|
|
2227
|
+
models: options.models || DEFAULT_GEMINI_MODELS,
|
|
2228
|
+
timeout: options.timeout || 6e4,
|
|
2229
|
+
maxRetries: options.maxRetries || 3,
|
|
2230
|
+
headers: options.headers
|
|
2231
|
+
};
|
|
2232
|
+
super(config);
|
|
2233
|
+
this.apiKey = apiKey;
|
|
2234
|
+
this.baseUrl = config.baseUrl;
|
|
2235
|
+
this.timeout = config.timeout;
|
|
2236
|
+
}
|
|
2237
|
+
/**
|
|
2238
|
+
* Execute a chat completion request
|
|
2239
|
+
*/
|
|
2240
|
+
async chat(request) {
|
|
2241
|
+
const start = Date.now();
|
|
2242
|
+
try {
|
|
2243
|
+
const geminiRequest = this.transformToGemini(request);
|
|
2244
|
+
const endpoint = `/models/${request.model}:generateContent`;
|
|
2245
|
+
const response = await this.makeRequest(endpoint, {
|
|
2246
|
+
method: "POST",
|
|
2247
|
+
body: JSON.stringify(geminiRequest)
|
|
2248
|
+
});
|
|
2249
|
+
if (!response.ok) {
|
|
2250
|
+
const error = await this.parseError(response);
|
|
2251
|
+
throw error;
|
|
2252
|
+
}
|
|
2253
|
+
const data = await response.json();
|
|
2254
|
+
const result = this.transformFromGemini(data, request.model);
|
|
2255
|
+
this.updateHealth(true, Date.now() - start);
|
|
2256
|
+
return result;
|
|
2257
|
+
} catch (error) {
|
|
2258
|
+
this.updateHealth(false, Date.now() - start);
|
|
2259
|
+
throw this.wrapError(error);
|
|
2260
|
+
}
|
|
2261
|
+
}
|
|
2262
|
+
/**
|
|
2263
|
+
* Execute a streaming chat completion request
|
|
2264
|
+
*/
|
|
2265
|
+
async *chatStream(request) {
|
|
2266
|
+
const start = Date.now();
|
|
2267
|
+
try {
|
|
2268
|
+
const geminiRequest = this.transformToGemini(request);
|
|
2269
|
+
const endpoint = `/models/${request.model}:streamGenerateContent`;
|
|
2270
|
+
const response = await this.makeRequest(endpoint, {
|
|
2271
|
+
method: "POST",
|
|
2272
|
+
body: JSON.stringify(geminiRequest)
|
|
2273
|
+
});
|
|
2274
|
+
if (!response.ok) {
|
|
2275
|
+
const error = await this.parseError(response);
|
|
2276
|
+
throw error;
|
|
2277
|
+
}
|
|
2278
|
+
const reader = response.body?.getReader();
|
|
2279
|
+
if (!reader) {
|
|
2280
|
+
throw new ProviderError("No response body", "google");
|
|
2281
|
+
}
|
|
2282
|
+
const decoder = new TextDecoder();
|
|
2283
|
+
let buffer = "";
|
|
2284
|
+
const requestId = generateRequestId();
|
|
2285
|
+
let totalInputTokens = 0;
|
|
2286
|
+
let totalOutputTokens = 0;
|
|
2287
|
+
while (true) {
|
|
2288
|
+
const { done, value } = await reader.read();
|
|
2289
|
+
if (done) break;
|
|
2290
|
+
buffer += decoder.decode(value, { stream: true });
|
|
2291
|
+
const lines = buffer.split("\n");
|
|
2292
|
+
buffer = lines.pop() || "";
|
|
2293
|
+
for (const line of lines) {
|
|
2294
|
+
const trimmed = line.trim();
|
|
2295
|
+
if (!trimmed || trimmed === "[" || trimmed === "]" || trimmed === ",")
|
|
2296
|
+
continue;
|
|
2297
|
+
let jsonStr = trimmed;
|
|
2298
|
+
if (jsonStr.startsWith(",")) jsonStr = jsonStr.slice(1);
|
|
2299
|
+
if (jsonStr.endsWith(",")) jsonStr = jsonStr.slice(0, -1);
|
|
2300
|
+
if (!jsonStr.startsWith("{")) continue;
|
|
2301
|
+
try {
|
|
2302
|
+
const data = JSON.parse(jsonStr);
|
|
2303
|
+
const chunk = this.transformStreamChunk(
|
|
2304
|
+
data,
|
|
2305
|
+
request.model,
|
|
2306
|
+
requestId
|
|
2307
|
+
);
|
|
2308
|
+
if (data.usageMetadata) {
|
|
2309
|
+
totalInputTokens = data.usageMetadata.promptTokenCount || 0;
|
|
2310
|
+
totalOutputTokens = data.usageMetadata.candidatesTokenCount || 0;
|
|
2311
|
+
}
|
|
2312
|
+
if (chunk) {
|
|
2313
|
+
yield chunk;
|
|
2314
|
+
}
|
|
2315
|
+
} catch {
|
|
2316
|
+
}
|
|
2317
|
+
}
|
|
2318
|
+
}
|
|
2319
|
+
yield {
|
|
2320
|
+
id: requestId,
|
|
2321
|
+
object: "chat.completion.chunk",
|
|
2322
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2323
|
+
model: request.model,
|
|
2324
|
+
choices: [
|
|
2325
|
+
{
|
|
2326
|
+
index: 0,
|
|
2327
|
+
delta: {},
|
|
2328
|
+
finish_reason: "stop"
|
|
2329
|
+
}
|
|
2330
|
+
],
|
|
2331
|
+
usage: {
|
|
2332
|
+
prompt_tokens: totalInputTokens,
|
|
2333
|
+
completion_tokens: totalOutputTokens,
|
|
2334
|
+
total_tokens: totalInputTokens + totalOutputTokens
|
|
2335
|
+
}
|
|
2336
|
+
};
|
|
2337
|
+
this.updateHealth(true, Date.now() - start);
|
|
2338
|
+
} catch (error) {
|
|
2339
|
+
this.updateHealth(false, Date.now() - start);
|
|
2340
|
+
throw this.wrapError(error);
|
|
2341
|
+
}
|
|
2342
|
+
}
|
|
2343
|
+
/**
|
|
2344
|
+
* Get model information
|
|
2345
|
+
*/
|
|
2346
|
+
getModelInfo(model) {
|
|
2347
|
+
if (!this.supportsModel(model)) {
|
|
2348
|
+
return null;
|
|
2349
|
+
}
|
|
2350
|
+
return getModelInfo(model, "google");
|
|
2351
|
+
}
|
|
2352
|
+
/**
|
|
2353
|
+
* Make an HTTP request to the Google API
|
|
2354
|
+
*/
|
|
2355
|
+
async makeRequest(path, options) {
|
|
2356
|
+
const url = `${this.baseUrl}${path}?key=${this.apiKey}`;
|
|
2357
|
+
const headers = {
|
|
2358
|
+
"Content-Type": "application/json",
|
|
2359
|
+
...this.config.headers
|
|
2360
|
+
};
|
|
2361
|
+
const controller = new AbortController();
|
|
2362
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
2363
|
+
try {
|
|
2364
|
+
return await fetch(url, {
|
|
2365
|
+
...options,
|
|
2366
|
+
headers,
|
|
2367
|
+
signal: controller.signal
|
|
2368
|
+
});
|
|
2369
|
+
} finally {
|
|
2370
|
+
clearTimeout(timeoutId);
|
|
2371
|
+
}
|
|
2372
|
+
}
|
|
2373
|
+
/**
|
|
2374
|
+
* Transform OpenAI-compatible request to Gemini format
|
|
2375
|
+
*/
|
|
2376
|
+
transformToGemini(request) {
|
|
2377
|
+
const contents = this.transformMessages(request.messages);
|
|
2378
|
+
const transformed = {
|
|
2379
|
+
contents
|
|
2380
|
+
};
|
|
2381
|
+
const generationConfig = {};
|
|
2382
|
+
if (request.temperature !== void 0) {
|
|
2383
|
+
generationConfig.temperature = request.temperature;
|
|
2384
|
+
}
|
|
2385
|
+
if (request.max_tokens !== void 0) {
|
|
2386
|
+
generationConfig.maxOutputTokens = request.max_tokens;
|
|
2387
|
+
}
|
|
2388
|
+
if (request.top_p !== void 0) {
|
|
2389
|
+
generationConfig.topP = request.top_p;
|
|
2390
|
+
}
|
|
2391
|
+
if (request.stop !== void 0) {
|
|
2392
|
+
generationConfig.stopSequences = Array.isArray(request.stop) ? request.stop : [request.stop];
|
|
2393
|
+
}
|
|
2394
|
+
if (Object.keys(generationConfig).length > 0) {
|
|
2395
|
+
transformed.generationConfig = generationConfig;
|
|
2396
|
+
}
|
|
2397
|
+
const systemMessages = request.messages.filter((m) => m.role === "system");
|
|
2398
|
+
if (systemMessages.length > 0) {
|
|
2399
|
+
const systemText = systemMessages.map(
|
|
2400
|
+
(m) => typeof m.content === "string" ? m.content : JSON.stringify(m.content)
|
|
2401
|
+
).join("\n");
|
|
2402
|
+
transformed.systemInstruction = { parts: [{ text: systemText }] };
|
|
2403
|
+
}
|
|
2404
|
+
if (request.tools && request.tools.length > 0) {
|
|
2405
|
+
transformed.tools = [
|
|
2406
|
+
{
|
|
2407
|
+
functionDeclarations: request.tools.map((tool) => ({
|
|
2408
|
+
name: tool.function.name,
|
|
2409
|
+
description: tool.function.description,
|
|
2410
|
+
parameters: tool.function.parameters || { type: "object" }
|
|
2411
|
+
}))
|
|
2412
|
+
}
|
|
2413
|
+
];
|
|
2414
|
+
}
|
|
2415
|
+
return transformed;
|
|
2416
|
+
}
|
|
2417
|
+
/**
|
|
2418
|
+
* Transform messages to Gemini format
|
|
2419
|
+
*/
|
|
2420
|
+
transformMessages(messages) {
|
|
2421
|
+
const contents = [];
|
|
2422
|
+
for (const message of messages) {
|
|
2423
|
+
if (message.role === "system") continue;
|
|
2424
|
+
const role = message.role === "assistant" ? "model" : "user";
|
|
2425
|
+
const parts = [];
|
|
2426
|
+
if (message.content) {
|
|
2427
|
+
parts.push({
|
|
2428
|
+
text: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
|
|
2429
|
+
});
|
|
2430
|
+
}
|
|
2431
|
+
if (message.tool_calls) {
|
|
2432
|
+
for (const toolCall of message.tool_calls) {
|
|
2433
|
+
parts.push({
|
|
2434
|
+
functionCall: {
|
|
2435
|
+
name: toolCall.function.name,
|
|
2436
|
+
args: JSON.parse(toolCall.function.arguments)
|
|
2437
|
+
}
|
|
2438
|
+
});
|
|
2439
|
+
}
|
|
2440
|
+
}
|
|
2441
|
+
if (message.role === "tool" && message.tool_call_id) {
|
|
2442
|
+
parts.push({
|
|
2443
|
+
functionResponse: {
|
|
2444
|
+
name: message.name || "tool_result",
|
|
2445
|
+
response: {
|
|
2446
|
+
result: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
|
|
2447
|
+
}
|
|
2448
|
+
}
|
|
2449
|
+
});
|
|
2450
|
+
}
|
|
2451
|
+
if (parts.length > 0) {
|
|
2452
|
+
contents.push({ role, parts });
|
|
2453
|
+
}
|
|
2454
|
+
}
|
|
2455
|
+
return contents;
|
|
2456
|
+
}
|
|
2457
|
+
/**
|
|
2458
|
+
* Transform Gemini response to OpenAI-compatible format
|
|
2459
|
+
*/
|
|
2460
|
+
transformFromGemini(data, model) {
|
|
2461
|
+
const candidates = data.candidates;
|
|
2462
|
+
const candidate = candidates?.[0];
|
|
2463
|
+
if (!candidate) {
|
|
2464
|
+
throw new ProviderError("No response candidates", "google");
|
|
2465
|
+
}
|
|
2466
|
+
const content = candidate.content;
|
|
2467
|
+
const parts = content?.parts || [];
|
|
2468
|
+
let textContent = "";
|
|
2469
|
+
const toolCalls = [];
|
|
2470
|
+
for (const part of parts) {
|
|
2471
|
+
if (part.text) {
|
|
2472
|
+
textContent += part.text;
|
|
2473
|
+
}
|
|
2474
|
+
if (part.functionCall) {
|
|
2475
|
+
const fc = part.functionCall;
|
|
2476
|
+
toolCalls.push({
|
|
2477
|
+
id: `call_${Math.random().toString(36).substring(2, 11)}`,
|
|
2478
|
+
type: "function",
|
|
2479
|
+
function: {
|
|
2480
|
+
name: fc.name,
|
|
2481
|
+
arguments: JSON.stringify(fc.args)
|
|
2482
|
+
}
|
|
2483
|
+
});
|
|
2484
|
+
}
|
|
2485
|
+
}
|
|
2486
|
+
const message = {
|
|
2487
|
+
role: "assistant",
|
|
2488
|
+
content: textContent || null
|
|
2489
|
+
};
|
|
2490
|
+
if (toolCalls.length > 0) {
|
|
2491
|
+
message.tool_calls = toolCalls;
|
|
2492
|
+
}
|
|
2493
|
+
const finishReason = candidate.finishReason;
|
|
2494
|
+
let mappedReason = null;
|
|
2495
|
+
if (finishReason === "STOP") {
|
|
2496
|
+
mappedReason = "stop";
|
|
2497
|
+
} else if (finishReason === "MAX_TOKENS") {
|
|
2498
|
+
mappedReason = "length";
|
|
2499
|
+
} else if (toolCalls.length > 0) {
|
|
2500
|
+
mappedReason = "tool_calls";
|
|
2501
|
+
}
|
|
2502
|
+
const usageMetadata = data.usageMetadata;
|
|
2503
|
+
const usage = {
|
|
2504
|
+
prompt_tokens: usageMetadata?.promptTokenCount || 0,
|
|
2505
|
+
completion_tokens: usageMetadata?.candidatesTokenCount || 0,
|
|
2506
|
+
total_tokens: usageMetadata?.totalTokenCount || 0
|
|
2507
|
+
};
|
|
2508
|
+
return {
|
|
2509
|
+
id: generateRequestId(),
|
|
2510
|
+
object: "chat.completion",
|
|
2511
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2512
|
+
model,
|
|
2513
|
+
choices: [
|
|
2514
|
+
{
|
|
2515
|
+
index: 0,
|
|
2516
|
+
message,
|
|
2517
|
+
finish_reason: mappedReason
|
|
2518
|
+
}
|
|
2519
|
+
],
|
|
2520
|
+
usage
|
|
2521
|
+
};
|
|
2522
|
+
}
|
|
2523
|
+
/**
|
|
2524
|
+
* Transform Gemini stream chunk to OpenAI-compatible format
|
|
2525
|
+
*/
|
|
2526
|
+
transformStreamChunk(data, model, requestId) {
|
|
2527
|
+
const candidates = data.candidates;
|
|
2528
|
+
const candidate = candidates?.[0];
|
|
2529
|
+
if (!candidate) {
|
|
2530
|
+
return null;
|
|
2531
|
+
}
|
|
2532
|
+
const content = candidate.content;
|
|
2533
|
+
const parts = content?.parts || [];
|
|
2534
|
+
let textContent = "";
|
|
2535
|
+
for (const part of parts) {
|
|
2536
|
+
if (part.text) {
|
|
2537
|
+
textContent += part.text;
|
|
2538
|
+
}
|
|
2539
|
+
}
|
|
2540
|
+
if (!textContent) {
|
|
2541
|
+
return null;
|
|
2542
|
+
}
|
|
2543
|
+
return {
|
|
2544
|
+
id: requestId,
|
|
2545
|
+
object: "chat.completion.chunk",
|
|
2546
|
+
created: Math.floor(Date.now() / 1e3),
|
|
2547
|
+
model,
|
|
2548
|
+
choices: [
|
|
2549
|
+
{
|
|
2550
|
+
index: 0,
|
|
2551
|
+
delta: {
|
|
2552
|
+
content: textContent
|
|
2553
|
+
},
|
|
2554
|
+
finish_reason: null
|
|
2555
|
+
}
|
|
2556
|
+
]
|
|
2557
|
+
};
|
|
2558
|
+
}
|
|
2559
|
+
/**
|
|
2560
|
+
* Parse error response from Google
|
|
2561
|
+
*/
|
|
2562
|
+
async parseError(response) {
|
|
2563
|
+
let message = `Google API error: ${response.status}`;
|
|
2564
|
+
const retryable = response.status >= 500 || response.status === 429;
|
|
2565
|
+
try {
|
|
2566
|
+
const data = await response.json();
|
|
2567
|
+
if (data.error?.message) {
|
|
2568
|
+
message = data.error.message;
|
|
2569
|
+
}
|
|
2570
|
+
} catch {
|
|
2571
|
+
}
|
|
2572
|
+
return new ProviderError(message, "google", void 0, retryable);
|
|
2573
|
+
}
|
|
2574
|
+
/**
|
|
2575
|
+
* Wrap unknown errors
|
|
2576
|
+
*/
|
|
2577
|
+
wrapError(error) {
|
|
2578
|
+
if (error instanceof ProviderError) {
|
|
2579
|
+
return error;
|
|
2580
|
+
}
|
|
2581
|
+
if (error instanceof Error) {
|
|
2582
|
+
const isTimeout = error.name === "AbortError" || error.message.includes("timeout");
|
|
2583
|
+
return new ProviderError(error.message, "google", error, isTimeout);
|
|
2584
|
+
}
|
|
2585
|
+
return new ProviderError("Unknown error", "google", void 0, true);
|
|
2586
|
+
}
|
|
2587
|
+
};
|
|
2588
|
+
var Gateway = class extends EventEmitter {
|
|
2589
|
+
_config;
|
|
2590
|
+
registry;
|
|
2591
|
+
router;
|
|
2592
|
+
healthMonitor;
|
|
2593
|
+
cache;
|
|
2594
|
+
logger;
|
|
2595
|
+
metrics;
|
|
2596
|
+
/**
|
|
2597
|
+
* Chat completions API interface (OpenAI-compatible)
|
|
2598
|
+
*/
|
|
2599
|
+
chat = {
|
|
2600
|
+
completions: {
|
|
2601
|
+
create: this.createCompletion.bind(this)
|
|
2602
|
+
}
|
|
2603
|
+
};
|
|
2604
|
+
constructor(config) {
|
|
2605
|
+
super();
|
|
2606
|
+
this._config = config;
|
|
2607
|
+
this.logger = pino({
|
|
2608
|
+
level: config.telemetry?.logging?.level || "info",
|
|
2609
|
+
transport: process.env.NODE_ENV !== "production" ? { target: "pino-pretty" } : void 0
|
|
2610
|
+
});
|
|
2611
|
+
this.registry = new ProviderRegistry();
|
|
2612
|
+
for (const providerConfig of config.providers) {
|
|
2613
|
+
const provider = this.createProvider(providerConfig);
|
|
2614
|
+
if (provider) {
|
|
2615
|
+
this.registry.register(provider);
|
|
2616
|
+
}
|
|
2617
|
+
}
|
|
2618
|
+
this.router = this.createRouter(config);
|
|
2619
|
+
this.healthMonitor = new HealthMonitor({
|
|
2620
|
+
checkInterval: 6e4,
|
|
2621
|
+
unhealthyThreshold: 3,
|
|
2622
|
+
degradedThreshold: 1,
|
|
2623
|
+
circuitBreaker: {
|
|
2624
|
+
failureThreshold: 5,
|
|
2625
|
+
successThreshold: 3,
|
|
2626
|
+
timeout: 3e4
|
|
2627
|
+
}
|
|
2628
|
+
});
|
|
2629
|
+
if (config.cache?.enabled) {
|
|
2630
|
+
this.cache = new LRUCache({
|
|
2631
|
+
max: config.cache.maxEntries || 1e3,
|
|
2632
|
+
ttl: (config.cache.ttl || 3600) * 1e3
|
|
2633
|
+
});
|
|
2634
|
+
} else {
|
|
2635
|
+
this.cache = null;
|
|
2636
|
+
}
|
|
2637
|
+
this.metrics = this.createInitialMetrics();
|
|
2638
|
+
this.healthMonitor.on("unhealthy", (provider) => {
|
|
2639
|
+
this.logger.warn({ provider }, "Provider marked unhealthy");
|
|
2640
|
+
this.emit("provider:unhealthy", provider);
|
|
2641
|
+
});
|
|
2642
|
+
this.healthMonitor.on("circuit-open", (provider) => {
|
|
2643
|
+
this.logger.warn({ provider }, "Circuit breaker opened");
|
|
2644
|
+
});
|
|
2645
|
+
}
|
|
2646
|
+
/**
|
|
2647
|
+
* Create a chat completion (main API)
|
|
2648
|
+
*/
|
|
2649
|
+
async createCompletion(request) {
|
|
2650
|
+
this.validateRequest(request);
|
|
2651
|
+
if (request.stream) {
|
|
2652
|
+
return this.createStreamingCompletion(request);
|
|
2653
|
+
}
|
|
2654
|
+
return this.createNonStreamingCompletion(request);
|
|
2655
|
+
}
|
|
2656
|
+
/**
|
|
2657
|
+
* Non-streaming completion
|
|
2658
|
+
*/
|
|
2659
|
+
async createNonStreamingCompletion(request) {
|
|
2660
|
+
const requestId = request._gateway?.requestId || generateRequestId();
|
|
2661
|
+
const start = Date.now();
|
|
2662
|
+
this.emit("request:start", {
|
|
2663
|
+
requestId,
|
|
2664
|
+
model: request.model
|
|
2665
|
+
});
|
|
2666
|
+
try {
|
|
2667
|
+
if (this.cache && request._gateway?.cachePolicy !== "no-cache") {
|
|
2668
|
+
const cacheKey = hashRequest(request);
|
|
2669
|
+
const cached = this.cache.get(cacheKey);
|
|
2670
|
+
if (cached) {
|
|
2671
|
+
this.metrics.cache.hits++;
|
|
2672
|
+
this.updateCacheHitRate();
|
|
2673
|
+
this.logger.debug({ requestId, cacheKey }, "Cache hit");
|
|
2674
|
+
const response = {
|
|
2675
|
+
...cached,
|
|
2676
|
+
_gateway: {
|
|
2677
|
+
...cached._gateway,
|
|
2678
|
+
cached: true,
|
|
2679
|
+
cacheKey,
|
|
2680
|
+
latencyMs: Date.now() - start
|
|
2681
|
+
}
|
|
2682
|
+
};
|
|
2683
|
+
this.emit("request:complete", {
|
|
2684
|
+
requestId,
|
|
2685
|
+
provider: cached._gateway?.provider || "cache",
|
|
2686
|
+
model: cached.model,
|
|
2687
|
+
latencyMs: Date.now() - start,
|
|
2688
|
+
cost: 0,
|
|
2689
|
+
cached: true,
|
|
2690
|
+
tokens: { input: 0, output: 0 }
|
|
2691
|
+
});
|
|
2692
|
+
return response;
|
|
2693
|
+
}
|
|
2694
|
+
this.metrics.cache.misses++;
|
|
2695
|
+
}
|
|
2696
|
+
const routingContext = {
|
|
2697
|
+
excludeProviders: request._gateway?.excludeProviders,
|
|
2698
|
+
preferredProvider: request._gateway?.preferredProvider,
|
|
2699
|
+
maxCost: request._gateway?.maxCost,
|
|
2700
|
+
maxLatency: request._gateway?.maxLatency
|
|
2701
|
+
};
|
|
2702
|
+
let lastError = null;
|
|
2703
|
+
let attempts = 0;
|
|
2704
|
+
const maxAttempts = 3;
|
|
2705
|
+
while (attempts < maxAttempts) {
|
|
2706
|
+
attempts++;
|
|
2707
|
+
const decision = this.router.route(request, this.registry, {
|
|
2708
|
+
...routingContext,
|
|
2709
|
+
previousAttempts: attempts > 1 ? [{ provider: "", model: "", error: lastError?.message }] : void 0
|
|
2710
|
+
});
|
|
2711
|
+
const provider = this.registry.get(decision.provider);
|
|
2712
|
+
if (!provider) {
|
|
2713
|
+
lastError = new GatewayError(
|
|
2714
|
+
`Provider not found: ${decision.provider}`,
|
|
2715
|
+
"PROVIDER_NOT_FOUND",
|
|
2716
|
+
500
|
|
2717
|
+
);
|
|
2718
|
+
continue;
|
|
2719
|
+
}
|
|
2720
|
+
if (!this.healthMonitor.isRequestAllowed(decision.provider)) {
|
|
2721
|
+
this.logger.debug(
|
|
2722
|
+
{ provider: decision.provider },
|
|
2723
|
+
"Circuit breaker open, skipping"
|
|
2724
|
+
);
|
|
2725
|
+
routingContext.excludeProviders = [
|
|
2726
|
+
...routingContext.excludeProviders || [],
|
|
2727
|
+
decision.provider
|
|
2728
|
+
];
|
|
2729
|
+
continue;
|
|
2730
|
+
}
|
|
2731
|
+
try {
|
|
2732
|
+
const providerRequest = {
|
|
2733
|
+
...request,
|
|
2734
|
+
model: decision.model
|
|
2735
|
+
};
|
|
2736
|
+
const providerStart = Date.now();
|
|
2737
|
+
const response = await provider.chat(providerRequest);
|
|
2738
|
+
const latencyMs = Date.now() - providerStart;
|
|
2739
|
+
this.healthMonitor.recordRequest(decision.provider, true, latencyMs);
|
|
2740
|
+
const cost = calculateCost(decision.model, response.usage);
|
|
2741
|
+
this.updateMetrics(decision, response, latencyMs, cost);
|
|
2742
|
+
const gatewayResponse = {
|
|
2743
|
+
...response,
|
|
2744
|
+
_gateway: {
|
|
2745
|
+
provider: decision.provider,
|
|
2746
|
+
originalModel: request.model,
|
|
2747
|
+
latencyMs,
|
|
2748
|
+
cost,
|
|
2749
|
+
cached: false,
|
|
2750
|
+
retries: attempts - 1,
|
|
2751
|
+
routingDecision: decision
|
|
2752
|
+
}
|
|
2753
|
+
};
|
|
2754
|
+
if (this.cache && request._gateway?.cachePolicy !== "no-cache") {
|
|
2755
|
+
const cacheKey = hashRequest(request);
|
|
2756
|
+
this.cache.set(cacheKey, gatewayResponse);
|
|
2757
|
+
}
|
|
2758
|
+
this.emit("request:complete", {
|
|
2759
|
+
requestId,
|
|
2760
|
+
provider: decision.provider,
|
|
2761
|
+
model: decision.model,
|
|
2762
|
+
latencyMs: Date.now() - start,
|
|
2763
|
+
cost,
|
|
2764
|
+
cached: false,
|
|
2765
|
+
tokens: {
|
|
2766
|
+
input: response.usage.prompt_tokens,
|
|
2767
|
+
output: response.usage.completion_tokens
|
|
2768
|
+
}
|
|
2769
|
+
});
|
|
2770
|
+
return gatewayResponse;
|
|
2771
|
+
} catch (error) {
|
|
2772
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
2773
|
+
this.healthMonitor.recordRequest(
|
|
2774
|
+
decision.provider,
|
|
2775
|
+
false,
|
|
2776
|
+
Date.now() - start
|
|
2777
|
+
);
|
|
2778
|
+
if (error instanceof ProviderError && !error.retryable) {
|
|
2779
|
+
throw error;
|
|
2780
|
+
}
|
|
2781
|
+
this.logger.warn(
|
|
2782
|
+
{
|
|
2783
|
+
provider: decision.provider,
|
|
2784
|
+
error: lastError.message,
|
|
2785
|
+
attempt: attempts
|
|
2786
|
+
},
|
|
2787
|
+
"Request failed, retrying"
|
|
2788
|
+
);
|
|
2789
|
+
routingContext.excludeProviders = [
|
|
2790
|
+
...routingContext.excludeProviders || [],
|
|
2791
|
+
decision.provider
|
|
2792
|
+
];
|
|
2793
|
+
}
|
|
2794
|
+
}
|
|
2795
|
+
this.metrics.requests.failed++;
|
|
2796
|
+
this.emit("request:error", { requestId, error: lastError });
|
|
2797
|
+
throw lastError || new GatewayError("All attempts failed", "ALL_ATTEMPTS_FAILED", 502);
|
|
2798
|
+
} catch (error) {
|
|
2799
|
+
this.metrics.requests.failed++;
|
|
2800
|
+
this.emit("request:error", {
|
|
2801
|
+
requestId,
|
|
2802
|
+
error: error instanceof Error ? error : new Error(String(error))
|
|
2803
|
+
});
|
|
2804
|
+
throw error;
|
|
2805
|
+
}
|
|
2806
|
+
}
|
|
2807
|
+
/**
|
|
2808
|
+
* Streaming completion
|
|
2809
|
+
*/
|
|
2810
|
+
async *createStreamingCompletion(request) {
|
|
2811
|
+
const requestId = request._gateway?.requestId || generateRequestId();
|
|
2812
|
+
const start = Date.now();
|
|
2813
|
+
this.emit("request:start", { requestId, model: request.model });
|
|
2814
|
+
const decision = this.router.route(request, this.registry, {
|
|
2815
|
+
excludeProviders: request._gateway?.excludeProviders,
|
|
2816
|
+
preferredProvider: request._gateway?.preferredProvider
|
|
2817
|
+
});
|
|
2818
|
+
const provider = this.registry.get(decision.provider);
|
|
2819
|
+
if (!provider) {
|
|
2820
|
+
throw new GatewayError(
|
|
2821
|
+
`Provider not found: ${decision.provider}`,
|
|
2822
|
+
"PROVIDER_NOT_FOUND",
|
|
2823
|
+
500
|
|
2824
|
+
);
|
|
2825
|
+
}
|
|
2826
|
+
const providerRequest = { ...request, model: decision.model };
|
|
2827
|
+
try {
|
|
2828
|
+
let totalInputTokens = 0;
|
|
2829
|
+
let totalOutputTokens = 0;
|
|
2830
|
+
for await (const chunk of provider.chatStream(providerRequest)) {
|
|
2831
|
+
if (chunk.usage) {
|
|
2832
|
+
totalInputTokens = chunk.usage.prompt_tokens;
|
|
2833
|
+
totalOutputTokens = chunk.usage.completion_tokens;
|
|
2834
|
+
}
|
|
2835
|
+
yield chunk;
|
|
2836
|
+
}
|
|
2837
|
+
const latencyMs = Date.now() - start;
|
|
2838
|
+
const cost = calculateCost(decision.model, {
|
|
2839
|
+
prompt_tokens: totalInputTokens,
|
|
2840
|
+
completion_tokens: totalOutputTokens,
|
|
2841
|
+
total_tokens: totalInputTokens + totalOutputTokens
|
|
2842
|
+
});
|
|
2843
|
+
this.healthMonitor.recordRequest(decision.provider, true, latencyMs);
|
|
2844
|
+
this.emit("request:complete", {
|
|
2845
|
+
requestId,
|
|
2846
|
+
provider: decision.provider,
|
|
2847
|
+
model: decision.model,
|
|
2848
|
+
latencyMs,
|
|
2849
|
+
cost,
|
|
2850
|
+
cached: false,
|
|
2851
|
+
tokens: { input: totalInputTokens, output: totalOutputTokens }
|
|
2852
|
+
});
|
|
2853
|
+
} catch (error) {
|
|
2854
|
+
this.healthMonitor.recordRequest(
|
|
2855
|
+
decision.provider,
|
|
2856
|
+
false,
|
|
2857
|
+
Date.now() - start
|
|
2858
|
+
);
|
|
2859
|
+
this.emit("request:error", {
|
|
2860
|
+
requestId,
|
|
2861
|
+
provider: decision.provider,
|
|
2862
|
+
error: error instanceof Error ? error : new Error(String(error))
|
|
2863
|
+
});
|
|
2864
|
+
throw error;
|
|
2865
|
+
}
|
|
2866
|
+
}
|
|
2867
|
+
/**
|
|
2868
|
+
* Validate a request
|
|
2869
|
+
*/
|
|
2870
|
+
validateRequest(request) {
|
|
2871
|
+
if (!request.model) {
|
|
2872
|
+
throw new ValidationError("Model is required");
|
|
2873
|
+
}
|
|
2874
|
+
if (!request.messages || request.messages.length === 0) {
|
|
2875
|
+
throw new ValidationError(
|
|
2876
|
+
"Messages array is required and cannot be empty"
|
|
2877
|
+
);
|
|
2878
|
+
}
|
|
2879
|
+
for (const message of request.messages) {
|
|
2880
|
+
if (!message.role) {
|
|
2881
|
+
throw new ValidationError("Message role is required");
|
|
2882
|
+
}
|
|
2883
|
+
if (!["system", "user", "assistant", "tool"].includes(message.role)) {
|
|
2884
|
+
throw new ValidationError(`Invalid message role: ${message.role}`);
|
|
2885
|
+
}
|
|
2886
|
+
}
|
|
2887
|
+
}
|
|
2888
|
+
/**
|
|
2889
|
+
* Create a provider instance from config
|
|
2890
|
+
*/
|
|
2891
|
+
createProvider(config) {
|
|
2892
|
+
switch (config.name) {
|
|
2893
|
+
case "openai":
|
|
2894
|
+
return new OpenAIProvider({
|
|
2895
|
+
apiKey: config.apiKey,
|
|
2896
|
+
baseUrl: config.baseUrl,
|
|
2897
|
+
models: config.models,
|
|
2898
|
+
timeout: config.timeout
|
|
2899
|
+
});
|
|
2900
|
+
case "anthropic":
|
|
2901
|
+
return new AnthropicProvider({
|
|
2902
|
+
apiKey: config.apiKey,
|
|
2903
|
+
baseUrl: config.baseUrl,
|
|
2904
|
+
models: config.models,
|
|
2905
|
+
timeout: config.timeout
|
|
2906
|
+
});
|
|
2907
|
+
case "google":
|
|
2908
|
+
return new GoogleProvider({
|
|
2909
|
+
apiKey: config.apiKey,
|
|
2910
|
+
baseUrl: config.baseUrl,
|
|
2911
|
+
models: config.models,
|
|
2912
|
+
timeout: config.timeout
|
|
2913
|
+
});
|
|
2914
|
+
default:
|
|
2915
|
+
this.logger.warn({ provider: config.name }, "Unknown provider type");
|
|
2916
|
+
return null;
|
|
2917
|
+
}
|
|
2918
|
+
}
|
|
2919
|
+
/**
|
|
2920
|
+
* Create router from config
|
|
2921
|
+
*/
|
|
2922
|
+
createRouter(config) {
|
|
2923
|
+
const strategyName = config.routing?.strategy || "round-robin";
|
|
2924
|
+
let strategy;
|
|
2925
|
+
switch (strategyName) {
|
|
2926
|
+
case "failover":
|
|
2927
|
+
strategy = new FailoverStrategy({
|
|
2928
|
+
chain: config.routing?.fallbackChain || [
|
|
2929
|
+
"openai",
|
|
2930
|
+
"anthropic",
|
|
2931
|
+
"google"
|
|
2932
|
+
]
|
|
2933
|
+
});
|
|
2934
|
+
break;
|
|
2935
|
+
case "cost-optimized":
|
|
2936
|
+
strategy = new CostOptimizedStrategy();
|
|
2937
|
+
break;
|
|
2938
|
+
case "latency-optimized":
|
|
2939
|
+
strategy = new LatencyOptimizedStrategy();
|
|
2940
|
+
break;
|
|
2941
|
+
case "round-robin":
|
|
2942
|
+
default:
|
|
2943
|
+
strategy = new RoundRobinStrategy({
|
|
2944
|
+
weights: config.routing?.weights
|
|
2945
|
+
});
|
|
2946
|
+
break;
|
|
2947
|
+
}
|
|
2948
|
+
return new Router(strategy, {
|
|
2949
|
+
fallbackChain: config.routing?.fallbackChain
|
|
2950
|
+
});
|
|
2951
|
+
}
|
|
2952
|
+
/**
|
|
2953
|
+
* Create initial metrics object
|
|
2954
|
+
*/
|
|
2955
|
+
createInitialMetrics() {
|
|
2956
|
+
return {
|
|
2957
|
+
requests: { total: 0, successful: 0, failed: 0, cached: 0 },
|
|
2958
|
+
latency: { avg: 0, p50: 0, p95: 0, p99: 0 },
|
|
2959
|
+
tokens: { input: 0, output: 0, total: 0 },
|
|
2960
|
+
cost: { total: 0, byProvider: {}, byModel: {} },
|
|
2961
|
+
cache: { hits: 0, misses: 0, hitRate: 0 },
|
|
2962
|
+
providers: {}
|
|
2963
|
+
};
|
|
2964
|
+
}
|
|
2965
|
+
/**
|
|
2966
|
+
* Update metrics after a request
|
|
2967
|
+
*/
|
|
2968
|
+
updateMetrics(decision, response, latencyMs, cost) {
|
|
2969
|
+
this.metrics.requests.total++;
|
|
2970
|
+
this.metrics.requests.successful++;
|
|
2971
|
+
this.metrics.latency.avg = (this.metrics.latency.avg * (this.metrics.requests.successful - 1) + latencyMs) / this.metrics.requests.successful;
|
|
2972
|
+
this.metrics.tokens.input += response.usage.prompt_tokens;
|
|
2973
|
+
this.metrics.tokens.output += response.usage.completion_tokens;
|
|
2974
|
+
this.metrics.tokens.total += response.usage.total_tokens;
|
|
2975
|
+
this.metrics.cost.total += cost;
|
|
2976
|
+
this.metrics.cost.byProvider[decision.provider] = (this.metrics.cost.byProvider[decision.provider] || 0) + cost;
|
|
2977
|
+
this.metrics.cost.byModel[decision.model] = (this.metrics.cost.byModel[decision.model] || 0) + cost;
|
|
2978
|
+
this.updateCacheHitRate();
|
|
2979
|
+
this.metrics.providers = this.registry.getHealthStatus();
|
|
2980
|
+
}
|
|
2981
|
+
/**
|
|
2982
|
+
* Update cache hit rate metric
|
|
2983
|
+
*/
|
|
2984
|
+
updateCacheHitRate() {
|
|
2985
|
+
const totalCacheOps = this.metrics.cache.hits + this.metrics.cache.misses;
|
|
2986
|
+
this.metrics.cache.hitRate = totalCacheOps > 0 ? this.metrics.cache.hits / totalCacheOps : 0;
|
|
2987
|
+
}
|
|
2988
|
+
/**
|
|
2989
|
+
* Get current metrics
|
|
2990
|
+
*/
|
|
2991
|
+
getMetrics() {
|
|
2992
|
+
return { ...this.metrics };
|
|
2993
|
+
}
|
|
2994
|
+
/**
|
|
2995
|
+
* Get gateway configuration
|
|
2996
|
+
*/
|
|
2997
|
+
getConfig() {
|
|
2998
|
+
return { ...this._config };
|
|
2999
|
+
}
|
|
3000
|
+
/**
|
|
3001
|
+
* Get provider registry
|
|
3002
|
+
*/
|
|
3003
|
+
getRegistry() {
|
|
3004
|
+
return this.registry;
|
|
3005
|
+
}
|
|
3006
|
+
/**
|
|
3007
|
+
* Get router
|
|
3008
|
+
*/
|
|
3009
|
+
getRouter() {
|
|
3010
|
+
return this.router;
|
|
3011
|
+
}
|
|
3012
|
+
/**
|
|
3013
|
+
* Check health of all providers
|
|
3014
|
+
*/
|
|
3015
|
+
async checkHealth() {
|
|
3016
|
+
const healthStatus = await this.registry.checkHealth();
|
|
3017
|
+
const result = {};
|
|
3018
|
+
for (const [name, health] of Object.entries(healthStatus)) {
|
|
3019
|
+
result[name] = health.status === "healthy";
|
|
3020
|
+
}
|
|
3021
|
+
return result;
|
|
3022
|
+
}
|
|
3023
|
+
/**
|
|
3024
|
+
* Shut down the gateway
|
|
3025
|
+
*/
|
|
3026
|
+
shutdown() {
|
|
3027
|
+
this.registry.stopHealthChecks();
|
|
3028
|
+
this.cache?.clear();
|
|
3029
|
+
this.logger.info("Gateway shut down");
|
|
3030
|
+
}
|
|
3031
|
+
};
|
|
3032
|
+
function createHTTPServer(options) {
|
|
3033
|
+
const { gateway, basePath = "" } = options;
|
|
3034
|
+
const app = new Hono();
|
|
3035
|
+
app.use("*", logger());
|
|
3036
|
+
if (options.cors) {
|
|
3037
|
+
app.use(
|
|
3038
|
+
"*",
|
|
3039
|
+
cors({
|
|
3040
|
+
origin: options.cors.origin || "*",
|
|
3041
|
+
allowMethods: options.cors.methods || ["GET", "POST", "OPTIONS"],
|
|
3042
|
+
allowHeaders: options.cors.headers || [
|
|
3043
|
+
"Content-Type",
|
|
3044
|
+
"Authorization",
|
|
3045
|
+
"X-Request-Id"
|
|
3046
|
+
]
|
|
3047
|
+
})
|
|
3048
|
+
);
|
|
3049
|
+
}
|
|
3050
|
+
app.get(`${basePath}/health`, async (c) => {
|
|
3051
|
+
const health = await gateway.checkHealth();
|
|
3052
|
+
const allHealthy = Object.values(health).every((h) => h);
|
|
3053
|
+
return c.json(
|
|
3054
|
+
{
|
|
3055
|
+
status: allHealthy ? "healthy" : "degraded",
|
|
3056
|
+
providers: health,
|
|
3057
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
3058
|
+
},
|
|
3059
|
+
allHealthy ? 200 : 503
|
|
3060
|
+
);
|
|
3061
|
+
});
|
|
3062
|
+
app.get(`${basePath}/metrics`, (c) => {
|
|
3063
|
+
const metrics = gateway.getMetrics();
|
|
3064
|
+
return c.json(metrics);
|
|
3065
|
+
});
|
|
3066
|
+
app.get(`${basePath}/v1/models`, (c) => {
|
|
3067
|
+
const registry = gateway.getRegistry();
|
|
3068
|
+
const models = registry.getAllModels();
|
|
3069
|
+
const data = models.map((model) => {
|
|
3070
|
+
const modelInfo = registry.getModelInfo(model);
|
|
3071
|
+
return {
|
|
3072
|
+
id: model,
|
|
3073
|
+
object: "model",
|
|
3074
|
+
created: Math.floor(Date.now() / 1e3),
|
|
3075
|
+
owned_by: modelInfo?.provider || "unknown"
|
|
3076
|
+
};
|
|
3077
|
+
});
|
|
3078
|
+
return c.json({
|
|
3079
|
+
object: "list",
|
|
3080
|
+
data
|
|
3081
|
+
});
|
|
3082
|
+
});
|
|
3083
|
+
app.post(`${basePath}/v1/chat/completions`, async (c) => {
|
|
3084
|
+
try {
|
|
3085
|
+
const body = await c.req.json();
|
|
3086
|
+
const requestId = c.req.header("X-Request-Id");
|
|
3087
|
+
if (requestId) {
|
|
3088
|
+
body._gateway = { ...body._gateway, requestId };
|
|
3089
|
+
}
|
|
3090
|
+
if (body.stream) {
|
|
3091
|
+
return streamSSE(c, async (stream) => {
|
|
3092
|
+
try {
|
|
3093
|
+
const generator = await gateway.chat.completions.create(
|
|
3094
|
+
body
|
|
3095
|
+
);
|
|
3096
|
+
for await (const chunk of generator) {
|
|
3097
|
+
await stream.writeSSE({
|
|
3098
|
+
data: JSON.stringify(chunk)
|
|
3099
|
+
});
|
|
3100
|
+
}
|
|
3101
|
+
await stream.writeSSE({ data: "[DONE]" });
|
|
3102
|
+
} catch (error) {
|
|
3103
|
+
const errorResponse = formatError(error);
|
|
3104
|
+
await stream.writeSSE({
|
|
3105
|
+
data: JSON.stringify({ error: errorResponse })
|
|
3106
|
+
});
|
|
3107
|
+
}
|
|
3108
|
+
});
|
|
3109
|
+
}
|
|
3110
|
+
const response = await gateway.chat.completions.create(
|
|
3111
|
+
body
|
|
3112
|
+
);
|
|
3113
|
+
return c.json(response);
|
|
3114
|
+
} catch (error) {
|
|
3115
|
+
const { status, body } = formatError(error);
|
|
3116
|
+
return c.json(body, status);
|
|
3117
|
+
}
|
|
3118
|
+
});
|
|
3119
|
+
app.post(`${basePath}/v1/completions`, (c) => {
|
|
3120
|
+
return c.json(
|
|
3121
|
+
{
|
|
3122
|
+
error: {
|
|
3123
|
+
message: "The completions endpoint is deprecated. Please use /v1/chat/completions instead.",
|
|
3124
|
+
type: "invalid_request_error",
|
|
3125
|
+
code: "deprecated_endpoint"
|
|
3126
|
+
}
|
|
3127
|
+
},
|
|
3128
|
+
400
|
|
3129
|
+
);
|
|
3130
|
+
});
|
|
3131
|
+
app.all("*", (c) => {
|
|
3132
|
+
return c.json(
|
|
3133
|
+
{
|
|
3134
|
+
error: {
|
|
3135
|
+
message: `Unknown endpoint: ${c.req.method} ${c.req.path}`,
|
|
3136
|
+
type: "invalid_request_error",
|
|
3137
|
+
code: "unknown_endpoint"
|
|
3138
|
+
}
|
|
3139
|
+
},
|
|
3140
|
+
404
|
|
3141
|
+
);
|
|
3142
|
+
});
|
|
3143
|
+
return app;
|
|
3144
|
+
}
|
|
3145
|
+
function startServer(app, options) {
|
|
3146
|
+
const port = options.port || 3e3;
|
|
3147
|
+
const host = options.host || "0.0.0.0";
|
|
3148
|
+
const server = serve({
|
|
3149
|
+
fetch: app.fetch,
|
|
3150
|
+
port,
|
|
3151
|
+
hostname: host
|
|
3152
|
+
});
|
|
3153
|
+
console.log(`Gateway server running on http://${host}:${port}`);
|
|
3154
|
+
return server;
|
|
3155
|
+
}
|
|
3156
|
+
function formatError(error) {
|
|
3157
|
+
if (error instanceof ValidationError) {
|
|
3158
|
+
return {
|
|
3159
|
+
status: 400,
|
|
3160
|
+
body: {
|
|
3161
|
+
error: {
|
|
3162
|
+
message: error.message,
|
|
3163
|
+
type: "invalid_request_error",
|
|
3164
|
+
code: error.code
|
|
3165
|
+
}
|
|
3166
|
+
}
|
|
3167
|
+
};
|
|
3168
|
+
}
|
|
3169
|
+
if (error instanceof GatewayError) {
|
|
3170
|
+
return {
|
|
3171
|
+
status: error.statusCode,
|
|
3172
|
+
body: {
|
|
3173
|
+
error: {
|
|
3174
|
+
message: error.message,
|
|
3175
|
+
type: "gateway_error",
|
|
3176
|
+
code: error.code
|
|
3177
|
+
}
|
|
3178
|
+
}
|
|
3179
|
+
};
|
|
3180
|
+
}
|
|
3181
|
+
if (error instanceof Error) {
|
|
3182
|
+
if ("name" in error && error.name === "ValidationError" && "code" in error) {
|
|
3183
|
+
return {
|
|
3184
|
+
status: 400,
|
|
3185
|
+
body: {
|
|
3186
|
+
error: {
|
|
3187
|
+
message: error.message,
|
|
3188
|
+
type: "invalid_request_error",
|
|
3189
|
+
code: error.code
|
|
3190
|
+
}
|
|
3191
|
+
}
|
|
3192
|
+
};
|
|
3193
|
+
}
|
|
3194
|
+
if ("name" in error && error.name === "GatewayError" && "statusCode" in error && "code" in error) {
|
|
3195
|
+
return {
|
|
3196
|
+
status: error.statusCode,
|
|
3197
|
+
body: {
|
|
3198
|
+
error: {
|
|
3199
|
+
message: error.message,
|
|
3200
|
+
type: "gateway_error",
|
|
3201
|
+
code: error.code
|
|
3202
|
+
}
|
|
3203
|
+
}
|
|
3204
|
+
};
|
|
3205
|
+
}
|
|
3206
|
+
return {
|
|
3207
|
+
status: 500,
|
|
3208
|
+
body: {
|
|
3209
|
+
error: {
|
|
3210
|
+
message: error.message,
|
|
3211
|
+
type: "internal_error",
|
|
3212
|
+
code: "internal_error"
|
|
3213
|
+
}
|
|
3214
|
+
}
|
|
3215
|
+
};
|
|
3216
|
+
}
|
|
3217
|
+
return {
|
|
3218
|
+
status: 500,
|
|
3219
|
+
body: {
|
|
3220
|
+
error: {
|
|
3221
|
+
message: "An unknown error occurred",
|
|
3222
|
+
type: "internal_error",
|
|
3223
|
+
code: "unknown_error"
|
|
3224
|
+
}
|
|
3225
|
+
}
|
|
3226
|
+
};
|
|
3227
|
+
}
|
|
3228
|
+
|
|
3229
|
+
// src/telemetry/Metrics.ts
|
|
3230
|
+
var MetricsCollector = class {
|
|
3231
|
+
prefix;
|
|
3232
|
+
counters = /* @__PURE__ */ new Map();
|
|
3233
|
+
gauges = /* @__PURE__ */ new Map();
|
|
3234
|
+
histograms = /* @__PURE__ */ new Map();
|
|
3235
|
+
latencyBuckets;
|
|
3236
|
+
tokenBuckets;
|
|
3237
|
+
constructor(config = {}) {
|
|
3238
|
+
this.prefix = config.prefix || "agentsea_gateway";
|
|
3239
|
+
this.latencyBuckets = config.histogramBuckets?.latency || [
|
|
3240
|
+
50,
|
|
3241
|
+
100,
|
|
3242
|
+
250,
|
|
3243
|
+
500,
|
|
3244
|
+
1e3,
|
|
3245
|
+
2500,
|
|
3246
|
+
5e3,
|
|
3247
|
+
1e4
|
|
3248
|
+
];
|
|
3249
|
+
this.tokenBuckets = config.histogramBuckets?.tokens || [
|
|
3250
|
+
100,
|
|
3251
|
+
500,
|
|
3252
|
+
1e3,
|
|
3253
|
+
2e3,
|
|
3254
|
+
5e3,
|
|
3255
|
+
1e4,
|
|
3256
|
+
5e4
|
|
3257
|
+
];
|
|
3258
|
+
}
|
|
3259
|
+
/**
|
|
3260
|
+
* Get token histogram buckets
|
|
3261
|
+
*/
|
|
3262
|
+
getTokenBuckets() {
|
|
3263
|
+
return [...this.tokenBuckets];
|
|
3264
|
+
}
|
|
3265
|
+
/**
|
|
3266
|
+
* Increment a counter
|
|
3267
|
+
*/
|
|
3268
|
+
incrementCounter(name, value = 1, labels) {
|
|
3269
|
+
const key = this.formatKey(name, labels);
|
|
3270
|
+
const current = this.counters.get(key) || 0;
|
|
3271
|
+
this.counters.set(key, current + value);
|
|
3272
|
+
}
|
|
3273
|
+
/**
|
|
3274
|
+
* Set a gauge value
|
|
3275
|
+
*/
|
|
3276
|
+
setGauge(name, value, labels) {
|
|
3277
|
+
const key = this.formatKey(name, labels);
|
|
3278
|
+
this.gauges.set(key, value);
|
|
3279
|
+
}
|
|
3280
|
+
/**
|
|
3281
|
+
* Record a histogram observation
|
|
3282
|
+
*/
|
|
3283
|
+
recordHistogram(name, value, labels, buckets) {
|
|
3284
|
+
const key = this.formatKey(name, labels);
|
|
3285
|
+
let histogram = this.histograms.get(key);
|
|
3286
|
+
if (!histogram) {
|
|
3287
|
+
histogram = {
|
|
3288
|
+
count: 0,
|
|
3289
|
+
sum: 0,
|
|
3290
|
+
buckets: /* @__PURE__ */ new Map()
|
|
3291
|
+
};
|
|
3292
|
+
const bucketsToUse = buckets || this.latencyBuckets;
|
|
3293
|
+
for (const bucket of bucketsToUse) {
|
|
3294
|
+
histogram.buckets.set(bucket, 0);
|
|
3295
|
+
}
|
|
3296
|
+
histogram.buckets.set(Infinity, 0);
|
|
3297
|
+
this.histograms.set(key, histogram);
|
|
3298
|
+
}
|
|
3299
|
+
histogram.count++;
|
|
3300
|
+
histogram.sum += value;
|
|
3301
|
+
for (const [bucket, count] of histogram.buckets) {
|
|
3302
|
+
if (value <= bucket) {
|
|
3303
|
+
histogram.buckets.set(bucket, count + 1);
|
|
3304
|
+
}
|
|
3305
|
+
}
|
|
3306
|
+
}
|
|
3307
|
+
/**
|
|
3308
|
+
* Record request metrics
|
|
3309
|
+
*/
|
|
3310
|
+
recordRequest(data) {
|
|
3311
|
+
const labels = { provider: data.provider, model: data.model };
|
|
3312
|
+
this.incrementCounter("requests_total", 1, {
|
|
3313
|
+
...labels,
|
|
3314
|
+
status: data.status,
|
|
3315
|
+
cached: String(data.cached)
|
|
3316
|
+
});
|
|
3317
|
+
this.recordHistogram("request_latency_ms", data.latencyMs, labels);
|
|
3318
|
+
this.incrementCounter("tokens_input_total", data.inputTokens, labels);
|
|
3319
|
+
this.incrementCounter("tokens_output_total", data.outputTokens, labels);
|
|
3320
|
+
this.incrementCounter(
|
|
3321
|
+
"cost_microdollars_total",
|
|
3322
|
+
Math.round(data.cost * 1e6),
|
|
3323
|
+
labels
|
|
3324
|
+
);
|
|
3325
|
+
if (data.cached) {
|
|
3326
|
+
this.incrementCounter("cache_hits_total", 1);
|
|
3327
|
+
}
|
|
3328
|
+
}
|
|
3329
|
+
/**
|
|
3330
|
+
* Get counter value
|
|
3331
|
+
*/
|
|
3332
|
+
getCounter(name, labels) {
|
|
3333
|
+
const key = this.formatKey(name, labels);
|
|
3334
|
+
return this.counters.get(key) || 0;
|
|
3335
|
+
}
|
|
3336
|
+
/**
|
|
3337
|
+
* Get gauge value
|
|
3338
|
+
*/
|
|
3339
|
+
getGauge(name, labels) {
|
|
3340
|
+
const key = this.formatKey(name, labels);
|
|
3341
|
+
return this.gauges.get(key) || 0;
|
|
3342
|
+
}
|
|
3343
|
+
/**
|
|
3344
|
+
* Get histogram data
|
|
3345
|
+
*/
|
|
3346
|
+
getHistogram(name, labels) {
|
|
3347
|
+
const key = this.formatKey(name, labels);
|
|
3348
|
+
return this.histograms.get(key);
|
|
3349
|
+
}
|
|
3350
|
+
/**
|
|
3351
|
+
* Get all metrics as a summary object
|
|
3352
|
+
*/
|
|
3353
|
+
getSummary() {
|
|
3354
|
+
const requestsTotal = this.sumAllCounters("requests_total");
|
|
3355
|
+
const requestsSuccess = this.sumCountersByLabel(
|
|
3356
|
+
"requests_total",
|
|
3357
|
+
"status",
|
|
3358
|
+
"success"
|
|
3359
|
+
);
|
|
3360
|
+
const requestsError = this.sumCountersByLabel(
|
|
3361
|
+
"requests_total",
|
|
3362
|
+
"status",
|
|
3363
|
+
"error"
|
|
3364
|
+
);
|
|
3365
|
+
const requestsCached = this.sumCountersByLabel(
|
|
3366
|
+
"requests_total",
|
|
3367
|
+
"cached",
|
|
3368
|
+
"true"
|
|
3369
|
+
);
|
|
3370
|
+
const latencyHistogram = this.aggregateHistograms("request_latency_ms");
|
|
3371
|
+
const avgLatency = latencyHistogram.count > 0 ? latencyHistogram.sum / latencyHistogram.count : 0;
|
|
3372
|
+
const inputTokens = this.sumAllCounters("tokens_input_total");
|
|
3373
|
+
const outputTokens = this.sumAllCounters("tokens_output_total");
|
|
3374
|
+
const totalCostMicro = this.sumAllCounters("cost_microdollars_total");
|
|
3375
|
+
const cacheHits = this.getCounter("cache_hits_total");
|
|
3376
|
+
const cacheMisses = requestsTotal - cacheHits;
|
|
3377
|
+
return {
|
|
3378
|
+
requests: {
|
|
3379
|
+
total: requestsTotal,
|
|
3380
|
+
successful: requestsSuccess,
|
|
3381
|
+
failed: requestsError,
|
|
3382
|
+
cached: requestsCached
|
|
3383
|
+
},
|
|
3384
|
+
latency: {
|
|
3385
|
+
avg: avgLatency,
|
|
3386
|
+
p50: this.calculatePercentile("request_latency_ms", 0.5),
|
|
3387
|
+
p95: this.calculatePercentile("request_latency_ms", 0.95),
|
|
3388
|
+
p99: this.calculatePercentile("request_latency_ms", 0.99)
|
|
3389
|
+
},
|
|
3390
|
+
tokens: {
|
|
3391
|
+
input: inputTokens,
|
|
3392
|
+
output: outputTokens,
|
|
3393
|
+
total: inputTokens + outputTokens
|
|
3394
|
+
},
|
|
3395
|
+
cost: {
|
|
3396
|
+
total: totalCostMicro / 1e6,
|
|
3397
|
+
byProvider: this.getCostByLabel("provider"),
|
|
3398
|
+
byModel: this.getCostByLabel("model")
|
|
3399
|
+
},
|
|
3400
|
+
cache: {
|
|
3401
|
+
hits: cacheHits,
|
|
3402
|
+
misses: cacheMisses,
|
|
3403
|
+
hitRate: requestsTotal > 0 ? cacheHits / requestsTotal : 0
|
|
3404
|
+
},
|
|
3405
|
+
providers: {}
|
|
3406
|
+
};
|
|
3407
|
+
}
|
|
3408
|
+
/**
|
|
3409
|
+
* Export metrics in Prometheus format
|
|
3410
|
+
*/
|
|
3411
|
+
toPrometheusFormat() {
|
|
3412
|
+
const lines = [];
|
|
3413
|
+
for (const [key, value] of this.counters) {
|
|
3414
|
+
lines.push(`${this.prefix}_${key} ${value}`);
|
|
3415
|
+
}
|
|
3416
|
+
for (const [key, value] of this.gauges) {
|
|
3417
|
+
lines.push(`${this.prefix}_${key} ${value}`);
|
|
3418
|
+
}
|
|
3419
|
+
for (const [key, histogram] of this.histograms) {
|
|
3420
|
+
for (const [bucket, count] of histogram.buckets) {
|
|
3421
|
+
const le = bucket === Infinity ? "+Inf" : bucket;
|
|
3422
|
+
lines.push(`${this.prefix}_${key}_bucket{le="${le}"} ${count}`);
|
|
3423
|
+
}
|
|
3424
|
+
lines.push(`${this.prefix}_${key}_sum ${histogram.sum}`);
|
|
3425
|
+
lines.push(`${this.prefix}_${key}_count ${histogram.count}`);
|
|
3426
|
+
}
|
|
3427
|
+
return lines.join("\n");
|
|
3428
|
+
}
|
|
3429
|
+
/**
|
|
3430
|
+
* Reset all metrics
|
|
3431
|
+
*/
|
|
3432
|
+
reset() {
|
|
3433
|
+
this.counters.clear();
|
|
3434
|
+
this.gauges.clear();
|
|
3435
|
+
this.histograms.clear();
|
|
3436
|
+
}
|
|
3437
|
+
/**
|
|
3438
|
+
* Format metric key with labels
|
|
3439
|
+
*/
|
|
3440
|
+
formatKey(name, labels) {
|
|
3441
|
+
if (!labels || Object.keys(labels).length === 0) {
|
|
3442
|
+
return name;
|
|
3443
|
+
}
|
|
3444
|
+
const labelStr = Object.entries(labels).sort(([a], [b]) => a.localeCompare(b)).map(([k, v]) => `${k}="${v}"`).join(",");
|
|
3445
|
+
return `${name}{${labelStr}}`;
|
|
3446
|
+
}
|
|
3447
|
+
/**
|
|
3448
|
+
* Sum counters by a specific label value
|
|
3449
|
+
*/
|
|
3450
|
+
sumCountersByLabel(name, labelKey, labelValue) {
|
|
3451
|
+
let sum = 0;
|
|
3452
|
+
for (const [key, value] of this.counters) {
|
|
3453
|
+
if (key.startsWith(name) && key.includes(`${labelKey}="${labelValue}"`)) {
|
|
3454
|
+
sum += value;
|
|
3455
|
+
}
|
|
3456
|
+
}
|
|
3457
|
+
return sum;
|
|
3458
|
+
}
|
|
3459
|
+
/**
|
|
3460
|
+
* Sum all counters with a given name prefix
|
|
3461
|
+
*/
|
|
3462
|
+
sumAllCounters(namePrefix) {
|
|
3463
|
+
let sum = 0;
|
|
3464
|
+
for (const [key, value] of this.counters) {
|
|
3465
|
+
if (key.startsWith(namePrefix)) {
|
|
3466
|
+
sum += value;
|
|
3467
|
+
}
|
|
3468
|
+
}
|
|
3469
|
+
return sum;
|
|
3470
|
+
}
|
|
3471
|
+
/**
|
|
3472
|
+
* Get cost breakdown by label
|
|
3473
|
+
*/
|
|
3474
|
+
getCostByLabel(labelKey) {
|
|
3475
|
+
const result = {};
|
|
3476
|
+
const prefix = "cost_microdollars_total";
|
|
3477
|
+
for (const [key, value] of this.counters) {
|
|
3478
|
+
if (key.startsWith(prefix)) {
|
|
3479
|
+
const match = key.match(new RegExp(`${labelKey}="([^"]+)"`));
|
|
3480
|
+
if (match) {
|
|
3481
|
+
const labelValue = match[1];
|
|
3482
|
+
result[labelValue] = (result[labelValue] || 0) + value / 1e6;
|
|
3483
|
+
}
|
|
3484
|
+
}
|
|
3485
|
+
}
|
|
3486
|
+
return result;
|
|
3487
|
+
}
|
|
3488
|
+
/**
|
|
3489
|
+
* Aggregate histograms for a metric name
|
|
3490
|
+
*/
|
|
3491
|
+
aggregateHistograms(name) {
|
|
3492
|
+
const result = {
|
|
3493
|
+
count: 0,
|
|
3494
|
+
sum: 0,
|
|
3495
|
+
buckets: /* @__PURE__ */ new Map()
|
|
3496
|
+
};
|
|
3497
|
+
for (const [key, histogram] of this.histograms) {
|
|
3498
|
+
if (key.startsWith(name)) {
|
|
3499
|
+
result.count += histogram.count;
|
|
3500
|
+
result.sum += histogram.sum;
|
|
3501
|
+
for (const [bucket, count] of histogram.buckets) {
|
|
3502
|
+
const existing = result.buckets.get(bucket) || 0;
|
|
3503
|
+
result.buckets.set(bucket, existing + count);
|
|
3504
|
+
}
|
|
3505
|
+
}
|
|
3506
|
+
}
|
|
3507
|
+
return result;
|
|
3508
|
+
}
|
|
3509
|
+
/**
|
|
3510
|
+
* Calculate percentile from histogram (approximate)
|
|
3511
|
+
*/
|
|
3512
|
+
calculatePercentile(name, percentile) {
|
|
3513
|
+
const histogram = this.aggregateHistograms(name);
|
|
3514
|
+
if (histogram.count === 0) return 0;
|
|
3515
|
+
const sortedBuckets = Array.from(histogram.buckets.entries()).sort(
|
|
3516
|
+
([a], [b]) => a - b
|
|
3517
|
+
);
|
|
3518
|
+
const targetCount = histogram.count * percentile;
|
|
3519
|
+
let prevBucket = 0;
|
|
3520
|
+
let prevCount = 0;
|
|
3521
|
+
for (const [bucket, count] of sortedBuckets) {
|
|
3522
|
+
if (count >= targetCount) {
|
|
3523
|
+
const bucketRange = bucket - prevBucket;
|
|
3524
|
+
const bucketCount = count - prevCount;
|
|
3525
|
+
if (bucketCount === 0) {
|
|
3526
|
+
return prevBucket;
|
|
3527
|
+
}
|
|
3528
|
+
const positionInBucket = targetCount - prevCount;
|
|
3529
|
+
const fraction = positionInBucket / bucketCount;
|
|
3530
|
+
return prevBucket + bucketRange * Math.max(0, Math.min(1, fraction));
|
|
3531
|
+
}
|
|
3532
|
+
prevBucket = bucket;
|
|
3533
|
+
prevCount = count;
|
|
3534
|
+
}
|
|
3535
|
+
return prevBucket;
|
|
3536
|
+
}
|
|
3537
|
+
};
|
|
3538
|
+
|
|
3539
|
+
export { AnthropicProvider, AuthenticationError, CircuitBreaker, CostOptimizedStrategy, DEFAULT_MODEL_MAPPINGS, FailoverStrategy, Gateway, GatewayError, GoogleProvider, HealthMonitor, LatencyOptimizedStrategy, MODEL_CONTEXT_WINDOWS, MODEL_MAX_OUTPUT, MODEL_PRICING, MetricsCollector, OpenAIProvider, Provider, ProviderError, ProviderRegistry, RateLimitError, RoundRobinStrategy, Router, VIRTUAL_MODELS, ValidationError, calculateCost, countMessageTokens, countTokens, createHTTPServer, createRouterConfig, createSystemFingerprint, estimateCost, estimateRequestTokens, findCheapestModel, freeEncoder, generateCacheKey, generateId, generateRequestId, getModelCapabilities, getModelInfo, getModelPricing, hash, hashRequest, sortModelsByCost, startServer, truncateToTokenLimit };
|
|
3540
|
+
//# sourceMappingURL=index.mjs.map
|
|
3541
|
+
//# sourceMappingURL=index.mjs.map
|