@lov3kaizen/agentsea-gateway 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,3541 @@
1
+ import { EventEmitter } from 'events';
2
+ import { get_encoding } from 'tiktoken';
3
+ import murmurhash from 'murmurhash';
4
+ import { LRUCache } from 'lru-cache';
5
+ import pino from 'pino';
6
+ import { Hono } from 'hono';
7
+ import { cors } from 'hono/cors';
8
+ import { logger } from 'hono/logger';
9
+ import { streamSSE } from 'hono/streaming';
10
+ import { serve } from '@hono/node-server';
11
+
12
+ // src/core/Gateway.ts
13
+
14
+ // src/core/types.ts
15
+ var GatewayError = class extends Error {
16
+ constructor(message, code, statusCode = 500, provider, retryable = false) {
17
+ super(message);
18
+ this.code = code;
19
+ this.statusCode = statusCode;
20
+ this.provider = provider;
21
+ this.retryable = retryable;
22
+ this.name = "GatewayError";
23
+ }
24
+ };
25
+ var ProviderError = class extends GatewayError {
26
+ constructor(message, provider, originalError, retryable = true) {
27
+ super(message, "PROVIDER_ERROR", 502, provider, retryable);
28
+ this.originalError = originalError;
29
+ this.name = "ProviderError";
30
+ }
31
+ };
32
+ var RateLimitError = class extends GatewayError {
33
+ constructor(message, retryAfter, provider) {
34
+ super(message, "RATE_LIMIT_EXCEEDED", 429, provider, true);
35
+ this.retryAfter = retryAfter;
36
+ this.name = "RateLimitError";
37
+ }
38
+ };
39
+ var AuthenticationError = class extends GatewayError {
40
+ constructor(message) {
41
+ super(message, "AUTHENTICATION_FAILED", 401, void 0, false);
42
+ this.name = "AuthenticationError";
43
+ }
44
+ };
45
+ var ValidationError = class extends GatewayError {
46
+ constructor(message) {
47
+ super(message, "VALIDATION_ERROR", 400, void 0, false);
48
+ this.name = "ValidationError";
49
+ }
50
+ };
51
+
52
+ // src/providers/ProviderRegistry.ts
53
+ var ProviderRegistry = class {
54
+ providers = /* @__PURE__ */ new Map();
55
+ modelToProvider = /* @__PURE__ */ new Map();
56
+ healthCheckInterval = null;
57
+ constructor(providers = []) {
58
+ for (const provider of providers) {
59
+ this.register(provider);
60
+ }
61
+ }
62
+ /**
63
+ * Register a provider
64
+ */
65
+ register(provider) {
66
+ this.providers.set(provider.name, provider);
67
+ for (const model of provider.getModels()) {
68
+ const existing = this.modelToProvider.get(model) || [];
69
+ if (!existing.includes(provider.name)) {
70
+ existing.push(provider.name);
71
+ this.modelToProvider.set(model, existing);
72
+ }
73
+ }
74
+ }
75
+ /**
76
+ * Unregister a provider
77
+ */
78
+ unregister(name) {
79
+ const provider = this.providers.get(name);
80
+ if (!provider) {
81
+ return false;
82
+ }
83
+ for (const model of provider.getModels()) {
84
+ const providers = this.modelToProvider.get(model);
85
+ if (providers) {
86
+ const filtered = providers.filter((p) => p !== name);
87
+ if (filtered.length > 0) {
88
+ this.modelToProvider.set(model, filtered);
89
+ } else {
90
+ this.modelToProvider.delete(model);
91
+ }
92
+ }
93
+ }
94
+ this.providers.delete(name);
95
+ return true;
96
+ }
97
+ /**
98
+ * Get a provider by name
99
+ */
100
+ get(name) {
101
+ return this.providers.get(name);
102
+ }
103
+ /**
104
+ * Get all registered providers
105
+ */
106
+ getAll() {
107
+ return Array.from(this.providers.values());
108
+ }
109
+ /**
110
+ * Get all provider names
111
+ */
112
+ getNames() {
113
+ return Array.from(this.providers.keys());
114
+ }
115
+ /**
116
+ * Get providers that support a specific model
117
+ */
118
+ getProvidersForModel(model) {
119
+ const names = this.modelToProvider.get(model) || [];
120
+ return names.map((name) => this.providers.get(name)).filter((p) => p !== void 0);
121
+ }
122
+ /**
123
+ * Get the first available provider for a model
124
+ */
125
+ getProviderForModel(model) {
126
+ const providers = this.getProvidersForModel(model);
127
+ return providers.find((p) => p.isAvailable());
128
+ }
129
+ /**
130
+ * Check if any provider supports a model
131
+ */
132
+ hasModel(model) {
133
+ return this.modelToProvider.has(model);
134
+ }
135
+ /**
136
+ * Get all available models across all providers
137
+ */
138
+ getAllModels() {
139
+ return Array.from(this.modelToProvider.keys());
140
+ }
141
+ /**
142
+ * Get model info from the appropriate provider
143
+ */
144
+ getModelInfo(model) {
145
+ const provider = this.getProviderForModel(model);
146
+ return provider?.getModelInfo(model) ?? null;
147
+ }
148
+ /**
149
+ * Get health status for all providers
150
+ */
151
+ getHealthStatus() {
152
+ const status = {};
153
+ for (const [name, provider] of this.providers) {
154
+ status[name] = provider.getHealth();
155
+ }
156
+ return status;
157
+ }
158
+ /**
159
+ * Get healthy providers
160
+ */
161
+ getHealthyProviders() {
162
+ return this.getAll().filter((p) => p.isHealthy());
163
+ }
164
+ /**
165
+ * Get available providers (healthy or degraded)
166
+ */
167
+ getAvailableProviders() {
168
+ return this.getAll().filter((p) => p.isAvailable());
169
+ }
170
+ /**
171
+ * Run health checks on all providers
172
+ */
173
+ async checkHealth() {
174
+ const results = {};
175
+ await Promise.all(
176
+ this.getAll().map(async (provider) => {
177
+ results[provider.name] = await provider.healthCheck();
178
+ })
179
+ );
180
+ return results;
181
+ }
182
+ /**
183
+ * Start periodic health checks
184
+ */
185
+ startHealthChecks(intervalMs = 6e4) {
186
+ if (this.healthCheckInterval) {
187
+ return;
188
+ }
189
+ this.healthCheckInterval = setInterval(() => {
190
+ this.checkHealth().catch(console.error);
191
+ }, intervalMs);
192
+ }
193
+ /**
194
+ * Stop periodic health checks
195
+ */
196
+ stopHealthChecks() {
197
+ if (this.healthCheckInterval) {
198
+ clearInterval(this.healthCheckInterval);
199
+ this.healthCheckInterval = null;
200
+ }
201
+ }
202
+ /**
203
+ * Get the number of registered providers
204
+ */
205
+ get size() {
206
+ return this.providers.size;
207
+ }
208
+ };
209
+ var CircuitBreaker = class {
210
+ constructor(providerName, config) {
211
+ this.providerName = providerName;
212
+ this.config = config;
213
+ }
214
+ state = "closed";
215
+ failures = 0;
216
+ successes = 0;
217
+ lastFailure = null;
218
+ nextAttempt = null;
219
+ /**
220
+ * Check if requests are allowed
221
+ */
222
+ isAllowed() {
223
+ if (this.state === "closed") {
224
+ return true;
225
+ }
226
+ if (this.state === "open") {
227
+ if (this.nextAttempt && /* @__PURE__ */ new Date() >= this.nextAttempt) {
228
+ this.state = "half-open";
229
+ return true;
230
+ }
231
+ return false;
232
+ }
233
+ return true;
234
+ }
235
+ /**
236
+ * Record a successful request
237
+ */
238
+ recordSuccess() {
239
+ if (this.state === "half-open") {
240
+ this.successes++;
241
+ if (this.successes >= this.config.successThreshold) {
242
+ this.reset();
243
+ }
244
+ } else if (this.state === "closed") {
245
+ this.failures = Math.max(0, this.failures - 1);
246
+ }
247
+ }
248
+ /**
249
+ * Record a failed request
250
+ */
251
+ recordFailure() {
252
+ this.failures++;
253
+ this.lastFailure = /* @__PURE__ */ new Date();
254
+ if (this.state === "half-open") {
255
+ this.trip();
256
+ } else if (this.state === "closed" && this.failures >= this.config.failureThreshold) {
257
+ this.trip();
258
+ }
259
+ }
260
+ /**
261
+ * Trip the circuit breaker (open it)
262
+ */
263
+ trip() {
264
+ this.state = "open";
265
+ this.nextAttempt = new Date(Date.now() + this.config.timeout);
266
+ this.successes = 0;
267
+ }
268
+ /**
269
+ * Reset the circuit breaker
270
+ */
271
+ reset() {
272
+ this.state = "closed";
273
+ this.failures = 0;
274
+ this.successes = 0;
275
+ this.nextAttempt = null;
276
+ }
277
+ /**
278
+ * Get the current state
279
+ */
280
+ getState() {
281
+ return this.state;
282
+ }
283
+ /**
284
+ * Get circuit status
285
+ */
286
+ getStatus() {
287
+ return {
288
+ providerName: this.providerName,
289
+ state: this.state,
290
+ failures: this.failures,
291
+ nextAttempt: this.nextAttempt,
292
+ lastFailure: this.lastFailure
293
+ };
294
+ }
295
+ };
296
+ var HealthMonitor = class extends EventEmitter {
297
+ constructor(config) {
298
+ super();
299
+ this.config = config;
300
+ }
301
+ healthHistory = /* @__PURE__ */ new Map();
302
+ circuitBreakers = /* @__PURE__ */ new Map();
303
+ maxHistorySize = 100;
304
+ /**
305
+ * Record a health check result
306
+ */
307
+ recordHealth(providerName, health) {
308
+ const history = this.healthHistory.get(providerName) || [];
309
+ history.push(health);
310
+ if (history.length > this.maxHistorySize) {
311
+ history.shift();
312
+ }
313
+ this.healthHistory.set(providerName, history);
314
+ if (health.status === "unhealthy") {
315
+ this.emit("unhealthy", providerName, health);
316
+ } else if (health.status === "degraded") {
317
+ this.emit("degraded", providerName, health);
318
+ }
319
+ }
320
+ /**
321
+ * Record a request result
322
+ */
323
+ recordRequest(providerName, success, _latencyMs) {
324
+ const breaker = this.getOrCreateCircuitBreaker(providerName);
325
+ if (success) {
326
+ breaker.recordSuccess();
327
+ } else {
328
+ breaker.recordFailure();
329
+ if (breaker.getState() === "open") {
330
+ this.emit("circuit-open", providerName);
331
+ }
332
+ }
333
+ }
334
+ /**
335
+ * Check if requests are allowed for a provider
336
+ */
337
+ isRequestAllowed(providerName) {
338
+ const breaker = this.circuitBreakers.get(providerName);
339
+ return breaker ? breaker.isAllowed() : true;
340
+ }
341
+ /**
342
+ * Get or create a circuit breaker for a provider
343
+ */
344
+ getOrCreateCircuitBreaker(providerName) {
345
+ let breaker = this.circuitBreakers.get(providerName);
346
+ if (!breaker && this.config.circuitBreaker) {
347
+ breaker = new CircuitBreaker(providerName, this.config.circuitBreaker);
348
+ this.circuitBreakers.set(providerName, breaker);
349
+ }
350
+ return breaker || new CircuitBreaker(providerName, {
351
+ failureThreshold: 5,
352
+ successThreshold: 3,
353
+ timeout: 3e4
354
+ });
355
+ }
356
+ /**
357
+ * Get health history for a provider
358
+ */
359
+ getHistory(providerName) {
360
+ return this.healthHistory.get(providerName) || [];
361
+ }
362
+ /**
363
+ * Get average latency for a provider
364
+ */
365
+ getAverageLatency(providerName) {
366
+ const history = this.healthHistory.get(providerName) || [];
367
+ if (history.length === 0) return 0;
368
+ const sum = history.reduce((acc, h) => acc + h.latencyMs, 0);
369
+ return sum / history.length;
370
+ }
371
+ /**
372
+ * Get error rate for a provider
373
+ */
374
+ getErrorRate(providerName) {
375
+ const history = this.healthHistory.get(providerName) || [];
376
+ if (history.length === 0) return 0;
377
+ const lastHealth = history[history.length - 1];
378
+ return lastHealth.errorRate;
379
+ }
380
+ /**
381
+ * Get circuit breaker status for a provider
382
+ */
383
+ getCircuitStatus(providerName) {
384
+ const breaker = this.circuitBreakers.get(providerName);
385
+ return breaker ? breaker.getStatus() : null;
386
+ }
387
+ /**
388
+ * Get all circuit breaker statuses
389
+ */
390
+ getAllCircuitStatuses() {
391
+ const statuses = {};
392
+ for (const [name, breaker] of this.circuitBreakers) {
393
+ statuses[name] = breaker.getStatus();
394
+ }
395
+ return statuses;
396
+ }
397
+ /**
398
+ * Reset circuit breaker for a provider
399
+ */
400
+ resetCircuit(providerName) {
401
+ const breaker = this.circuitBreakers.get(providerName);
402
+ if (breaker) {
403
+ breaker.reset();
404
+ this.emit("circuit-reset", providerName);
405
+ }
406
+ }
407
+ /**
408
+ * Clear all history
409
+ */
410
+ clear() {
411
+ this.healthHistory.clear();
412
+ this.circuitBreakers.clear();
413
+ }
414
+ };
415
+
416
+ // src/routing/Router.ts
417
+ var DEFAULT_MODEL_MAPPINGS = {
418
+ // GPT-4 class
419
+ "gpt-4o": [
420
+ { provider: "anthropic", model: "claude-3-5-sonnet-20241022" },
421
+ { provider: "google", model: "gemini-1.5-pro" }
422
+ ],
423
+ "claude-3-5-sonnet-20241022": [
424
+ { provider: "openai", model: "gpt-4o" },
425
+ { provider: "google", model: "gemini-1.5-pro" }
426
+ ],
427
+ "gemini-1.5-pro": [
428
+ { provider: "openai", model: "gpt-4o" },
429
+ { provider: "anthropic", model: "claude-3-5-sonnet-20241022" }
430
+ ],
431
+ // GPT-4 mini class
432
+ "gpt-4o-mini": [
433
+ { provider: "anthropic", model: "claude-3-5-haiku-20241022" },
434
+ { provider: "google", model: "gemini-1.5-flash" }
435
+ ],
436
+ "claude-3-5-haiku-20241022": [
437
+ { provider: "openai", model: "gpt-4o-mini" },
438
+ { provider: "google", model: "gemini-1.5-flash" }
439
+ ],
440
+ "gemini-1.5-flash": [
441
+ { provider: "openai", model: "gpt-4o-mini" },
442
+ { provider: "anthropic", model: "claude-3-5-haiku-20241022" }
443
+ ]
444
+ };
445
+ var VIRTUAL_MODELS = ["best", "cheapest", "fastest"];
446
+ var Router = class {
447
+ strategy;
448
+ modelMappings;
449
+ fallbackChain;
450
+ constructor(strategy, config) {
451
+ this.strategy = strategy;
452
+ this.modelMappings = {
453
+ ...DEFAULT_MODEL_MAPPINGS,
454
+ ...config?.modelMappings
455
+ };
456
+ this.fallbackChain = config?.fallbackChain || [
457
+ "openai",
458
+ "anthropic",
459
+ "google"
460
+ ];
461
+ }
462
+ /**
463
+ * Route a request to a provider
464
+ */
465
+ route(request, registry, context) {
466
+ if (this.isVirtualModel(request.model)) {
467
+ return this.routeVirtualModel(request.model, request, registry, context);
468
+ }
469
+ return this.strategy.route(request, registry, context);
470
+ }
471
+ /**
472
+ * Check if a model is a virtual model
473
+ */
474
+ isVirtualModel(model) {
475
+ return VIRTUAL_MODELS.includes(model);
476
+ }
477
+ /**
478
+ * Route virtual model to actual provider/model
479
+ */
480
+ routeVirtualModel(virtualModel, _request, registry, context) {
481
+ const availableProviders = registry.getAvailableProviders().filter((p) => !context?.excludeProviders?.includes(p.name));
482
+ if (availableProviders.length === 0) {
483
+ throw new Error("No available providers");
484
+ }
485
+ switch (virtualModel) {
486
+ case "best":
487
+ return this.routeBest(availableProviders, context);
488
+ case "cheapest":
489
+ return this.routeCheapest(availableProviders, context);
490
+ case "fastest":
491
+ return this.routeFastest(availableProviders, context);
492
+ default:
493
+ throw new Error(`Unknown virtual model: ${String(virtualModel)}`);
494
+ }
495
+ }
496
+ /**
497
+ * Route to best quality model
498
+ */
499
+ routeBest(providers, context) {
500
+ const qualityRanking = {
501
+ "claude-3-5-sonnet-20241022": 95,
502
+ "claude-sonnet-4-20250514": 96,
503
+ "gpt-4o": 94,
504
+ "gemini-1.5-pro": 92,
505
+ "claude-3-opus-20240229": 93,
506
+ "gpt-4-turbo": 91,
507
+ o1: 97,
508
+ "o1-preview": 96
509
+ };
510
+ const candidates = [];
511
+ for (const provider of providers) {
512
+ for (const model of provider.getModels()) {
513
+ const score = qualityRanking[model] || 50;
514
+ candidates.push({
515
+ provider: provider.name,
516
+ model,
517
+ score
518
+ });
519
+ }
520
+ }
521
+ candidates.sort((a, b) => b.score - a.score);
522
+ if (context?.preferredProvider) {
523
+ const preferred = candidates.find(
524
+ (c) => c.provider === context.preferredProvider
525
+ );
526
+ if (preferred) {
527
+ return {
528
+ provider: preferred.provider,
529
+ model: preferred.model,
530
+ reason: `Best quality model from preferred provider`,
531
+ alternatives: candidates.slice(0, 3),
532
+ timestamp: /* @__PURE__ */ new Date()
533
+ };
534
+ }
535
+ }
536
+ const best = candidates[0];
537
+ return {
538
+ provider: best.provider,
539
+ model: best.model,
540
+ reason: `Highest quality model available`,
541
+ alternatives: candidates.slice(1, 4),
542
+ timestamp: /* @__PURE__ */ new Date()
543
+ };
544
+ }
545
+ /**
546
+ * Route to cheapest model
547
+ */
548
+ routeCheapest(providers, context) {
549
+ const candidates = [];
550
+ for (const provider of providers) {
551
+ for (const model of provider.getModels()) {
552
+ const modelInfo = provider.getModelInfo(model);
553
+ const avgCost = modelInfo ? (modelInfo.inputPricePerMillion + modelInfo.outputPricePerMillion) / 2 : Infinity;
554
+ candidates.push({
555
+ provider: provider.name,
556
+ model,
557
+ score: avgCost === 0 ? 0 : 1 / avgCost
558
+ // Higher score = cheaper
559
+ });
560
+ }
561
+ }
562
+ candidates.sort((a, b) => b.score - a.score);
563
+ if (context?.maxCost !== void 0) {
564
+ const filtered = candidates.filter((c) => {
565
+ const provider = providers.find((p) => p.name === c.provider);
566
+ const modelInfo = provider?.getModelInfo(c.model);
567
+ if (!modelInfo) return true;
568
+ const estimatedCost = 1e3 / 1e6 * modelInfo.inputPricePerMillion + 500 / 1e6 * modelInfo.outputPricePerMillion;
569
+ return estimatedCost <= context.maxCost;
570
+ });
571
+ if (filtered.length > 0) {
572
+ const cheapest2 = filtered[0];
573
+ return {
574
+ provider: cheapest2.provider,
575
+ model: cheapest2.model,
576
+ reason: `Cheapest model within budget`,
577
+ alternatives: filtered.slice(1, 4),
578
+ timestamp: /* @__PURE__ */ new Date()
579
+ };
580
+ }
581
+ }
582
+ const cheapest = candidates[0];
583
+ return {
584
+ provider: cheapest.provider,
585
+ model: cheapest.model,
586
+ reason: `Cheapest available model`,
587
+ alternatives: candidates.slice(1, 4),
588
+ timestamp: /* @__PURE__ */ new Date()
589
+ };
590
+ }
591
+ /**
592
+ * Route to fastest model (based on latency)
593
+ */
594
+ routeFastest(providers, context) {
595
+ const candidates = [];
596
+ for (const provider of providers) {
597
+ const health = provider.getHealth();
598
+ const latency = health.latencyMs || 1e3;
599
+ for (const model of provider.getModels()) {
600
+ candidates.push({
601
+ provider: provider.name,
602
+ model,
603
+ score: 1 / latency
604
+ // Higher score = lower latency
605
+ });
606
+ }
607
+ }
608
+ candidates.sort((a, b) => b.score - a.score);
609
+ if (context?.maxLatency !== void 0) {
610
+ const filtered = candidates.filter((c) => {
611
+ const provider = providers.find((p) => p.name === c.provider);
612
+ const health = provider?.getHealth();
613
+ return (health?.latencyMs || 1e3) <= context.maxLatency;
614
+ });
615
+ if (filtered.length > 0) {
616
+ const fastest2 = filtered[0];
617
+ return {
618
+ provider: fastest2.provider,
619
+ model: fastest2.model,
620
+ reason: `Fastest model within latency limit`,
621
+ alternatives: filtered.slice(1, 4),
622
+ timestamp: /* @__PURE__ */ new Date()
623
+ };
624
+ }
625
+ }
626
+ const fastest = candidates[0];
627
+ return {
628
+ provider: fastest.provider,
629
+ model: fastest.model,
630
+ reason: `Fastest available provider`,
631
+ alternatives: candidates.slice(1, 4),
632
+ timestamp: /* @__PURE__ */ new Date()
633
+ };
634
+ }
635
+ /**
636
+ * Get equivalent models across providers
637
+ */
638
+ getEquivalentModels(model) {
639
+ return this.modelMappings[model] || [];
640
+ }
641
+ /**
642
+ * Set the routing strategy
643
+ */
644
+ setStrategy(strategy) {
645
+ this.strategy = strategy;
646
+ }
647
+ /**
648
+ * Get the current strategy name
649
+ */
650
+ getStrategyName() {
651
+ return this.strategy.name;
652
+ }
653
+ /**
654
+ * Get the fallback chain
655
+ */
656
+ getFallbackChain() {
657
+ return [...this.fallbackChain];
658
+ }
659
+ };
660
+ function createRouterConfig(options) {
661
+ return {
662
+ strategy: options.strategy || "round-robin",
663
+ fallbackChain: options.fallbackChain,
664
+ weights: options.weights,
665
+ rules: options.rules
666
+ };
667
+ }
668
+
669
+ // src/routing/strategies/RoundRobin.ts
670
+ var RoundRobinStrategy = class {
671
+ name = "round-robin";
672
+ currentIndex = 0;
673
+ weights;
674
+ constructor(config = {}) {
675
+ this.weights = config.weights || {};
676
+ }
677
+ route(request, registry, context) {
678
+ let providers = registry.getProvidersForModel(request.model);
679
+ if (providers.length === 0) {
680
+ providers = registry.getAvailableProviders();
681
+ }
682
+ if (context?.excludeProviders) {
683
+ providers = providers.filter(
684
+ (p) => !context.excludeProviders.includes(p.name)
685
+ );
686
+ }
687
+ providers = providers.filter((p) => p.isAvailable());
688
+ if (providers.length === 0) {
689
+ throw new Error(`No available providers for model: ${request.model}`);
690
+ }
691
+ if (context?.preferredProvider) {
692
+ const preferred = providers.find(
693
+ (p) => p.name === context.preferredProvider
694
+ );
695
+ if (preferred) {
696
+ const model2 = preferred.supportsModel(request.model) ? request.model : preferred.getModels()[0];
697
+ return {
698
+ provider: preferred.name,
699
+ model: model2,
700
+ reason: "Preferred provider selected",
701
+ alternatives: providers.filter((p) => p.name !== preferred.name).slice(0, 3).map((p) => ({
702
+ provider: p.name,
703
+ model: p.supportsModel(request.model) ? request.model : p.getModels()[0],
704
+ score: 1
705
+ })),
706
+ timestamp: /* @__PURE__ */ new Date()
707
+ };
708
+ }
709
+ }
710
+ const weightedProviders = [];
711
+ for (const provider of providers) {
712
+ const weight = this.weights[provider.name] || 1;
713
+ for (let i = 0; i < weight; i++) {
714
+ weightedProviders.push({ provider, weight });
715
+ }
716
+ }
717
+ this.currentIndex = this.currentIndex % weightedProviders.length;
718
+ const selected = weightedProviders[this.currentIndex];
719
+ this.currentIndex++;
720
+ const model = selected.provider.supportsModel(request.model) ? request.model : selected.provider.getModels()[0];
721
+ return {
722
+ provider: selected.provider.name,
723
+ model,
724
+ reason: `Round-robin selection (index: ${this.currentIndex - 1})`,
725
+ alternatives: providers.filter((p) => p.name !== selected.provider.name).slice(0, 3).map((p) => ({
726
+ provider: p.name,
727
+ model: p.supportsModel(request.model) ? request.model : p.getModels()[0],
728
+ score: 1
729
+ })),
730
+ timestamp: /* @__PURE__ */ new Date()
731
+ };
732
+ }
733
+ /**
734
+ * Reset the rotation index
735
+ */
736
+ reset() {
737
+ this.currentIndex = 0;
738
+ }
739
+ };
740
+
741
+ // src/routing/strategies/Failover.ts
742
+ var FailoverStrategy = class {
743
+ name = "failover";
744
+ chain;
745
+ modelMappings;
746
+ constructor(config) {
747
+ this.chain = config.chain;
748
+ this.modelMappings = config.modelMappings || {};
749
+ }
750
+ route(request, registry, context) {
751
+ const previousProviders = new Set(
752
+ context?.previousAttempts?.map((a) => a.provider) || []
753
+ );
754
+ if (context?.excludeProviders) {
755
+ for (const p of context.excludeProviders) {
756
+ previousProviders.add(p);
757
+ }
758
+ }
759
+ const alternatives = [];
760
+ for (const providerName of this.chain) {
761
+ const provider = registry.get(providerName);
762
+ if (!provider) continue;
763
+ if (!provider.isAvailable()) continue;
764
+ if (previousProviders.has(providerName)) continue;
765
+ let model = request.model;
766
+ if (!provider.supportsModel(model)) {
767
+ const mappedModel = this.modelMappings[model]?.[providerName];
768
+ if (mappedModel && provider.supportsModel(mappedModel)) {
769
+ model = mappedModel;
770
+ } else {
771
+ model = provider.getModels()[0];
772
+ }
773
+ }
774
+ for (const altName of this.chain) {
775
+ if (altName === providerName) continue;
776
+ if (previousProviders.has(altName)) continue;
777
+ const altProvider = registry.get(altName);
778
+ if (!altProvider?.isAvailable()) continue;
779
+ let altModel = request.model;
780
+ if (!altProvider.supportsModel(altModel)) {
781
+ altModel = this.modelMappings[request.model]?.[altName] || altProvider.getModels()[0];
782
+ }
783
+ alternatives.push({
784
+ provider: altName,
785
+ model: altModel,
786
+ score: 1 - alternatives.length * 0.1
787
+ });
788
+ if (alternatives.length >= 3) break;
789
+ }
790
+ return {
791
+ provider: providerName,
792
+ model,
793
+ reason: previousProviders.size > 0 ? `Failover to ${providerName} after ${previousProviders.size} failures` : `Primary provider in failover chain`,
794
+ alternatives,
795
+ timestamp: /* @__PURE__ */ new Date()
796
+ };
797
+ }
798
+ throw new Error(
799
+ `All providers in failover chain exhausted: ${this.chain.join(", ")}`
800
+ );
801
+ }
802
+ /**
803
+ * Get the next provider in chain after the given one
804
+ */
805
+ getNextProvider(currentProvider) {
806
+ const index = this.chain.indexOf(currentProvider);
807
+ if (index === -1 || index >= this.chain.length - 1) {
808
+ return null;
809
+ }
810
+ return this.chain[index + 1];
811
+ }
812
+ /**
813
+ * Get the current failover chain
814
+ */
815
+ getChain() {
816
+ return [...this.chain];
817
+ }
818
+ /**
819
+ * Update the failover chain
820
+ */
821
+ setChain(chain) {
822
+ this.chain = [...chain];
823
+ }
824
+ };
825
+ var encoder = null;
826
+ function getEncoder() {
827
+ if (!encoder) {
828
+ encoder = get_encoding("cl100k_base");
829
+ }
830
+ return encoder;
831
+ }
832
+ function countTokens(text) {
833
+ try {
834
+ const enc = getEncoder();
835
+ return enc.encode(text).length;
836
+ } catch {
837
+ return Math.ceil(text.length / 4);
838
+ }
839
+ }
840
+ function countMessageTokens(messages) {
841
+ let total = 0;
842
+ for (const message of messages) {
843
+ total += 4;
844
+ if (message.content) {
845
+ total += countTokens(
846
+ typeof message.content === "string" ? message.content : JSON.stringify(message.content)
847
+ );
848
+ }
849
+ }
850
+ total += 2;
851
+ return total;
852
+ }
853
+ function estimateRequestTokens(messages, tools) {
854
+ let total = countMessageTokens(messages);
855
+ if (tools && tools.length > 0) {
856
+ for (const tool of tools) {
857
+ total += countTokens(tool.function.name);
858
+ if (tool.function.description) {
859
+ total += countTokens(tool.function.description);
860
+ }
861
+ if (tool.function.parameters) {
862
+ total += countTokens(JSON.stringify(tool.function.parameters));
863
+ }
864
+ total += 10;
865
+ }
866
+ }
867
+ return total;
868
+ }
869
+ function truncateToTokenLimit(text, maxTokens) {
870
+ const enc = getEncoder();
871
+ const tokens = enc.encode(text);
872
+ if (tokens.length <= maxTokens) {
873
+ return text;
874
+ }
875
+ const truncatedTokens = tokens.slice(0, maxTokens);
876
+ const decoded = enc.decode(truncatedTokens);
877
+ return new TextDecoder().decode(decoded);
878
+ }
879
+ function freeEncoder() {
880
+ if (encoder) {
881
+ encoder.free();
882
+ encoder = null;
883
+ }
884
+ }
885
+
886
+ // src/routing/strategies/CostOptimized.ts
887
+ var MODEL_QUALITY_SCORES = {
888
+ // Top tier
889
+ o1: 98,
890
+ "o1-preview": 96,
891
+ "claude-3-opus-20240229": 95,
892
+ "claude-3-5-sonnet-20241022": 94,
893
+ "claude-sonnet-4-20250514": 95,
894
+ "gpt-4o": 93,
895
+ "gemini-1.5-pro": 91,
896
+ // Mid tier
897
+ "gpt-4-turbo": 88,
898
+ "gpt-4": 87,
899
+ "claude-3-sonnet-20240229": 85,
900
+ "o1-mini": 84,
901
+ // Fast/cheap tier
902
+ "gpt-4o-mini": 80,
903
+ "claude-3-5-haiku-20241022": 79,
904
+ "claude-3-haiku-20240307": 75,
905
+ "gemini-1.5-flash": 78,
906
+ "gpt-3.5-turbo": 70,
907
+ // Local models
908
+ llama3: 65,
909
+ "llama3.1": 67,
910
+ "llama3.2": 68,
911
+ mistral: 62
912
+ };
913
+ var CostOptimizedStrategy = class {
914
+ name = "cost-optimized";
915
+ config;
916
+ constructor(config = {}) {
917
+ this.config = {
918
+ preferLocal: false,
919
+ qualityThreshold: 0.6,
920
+ fallbackOnBudget: "cheapest",
921
+ ...config
922
+ };
923
+ }
924
+ route(request, registry, context) {
925
+ let providers = registry.getAvailableProviders();
926
+ if (context?.excludeProviders) {
927
+ providers = providers.filter(
928
+ (p) => !context.excludeProviders.includes(p.name)
929
+ );
930
+ }
931
+ if (providers.length === 0) {
932
+ throw new Error("No available providers");
933
+ }
934
+ const estimatedInputTokens = estimateRequestTokens(
935
+ request.messages,
936
+ request.tools
937
+ );
938
+ const estimatedOutputTokens = request.max_tokens || 1e3;
939
+ const candidates = [];
940
+ for (const provider of providers) {
941
+ const isLocal = provider.name === "ollama" || provider.name === "lmstudio";
942
+ for (const model of provider.getModels()) {
943
+ const modelInfo = provider.getModelInfo(model);
944
+ if (!modelInfo) continue;
945
+ const inputCost = estimatedInputTokens / 1e6 * modelInfo.inputPricePerMillion;
946
+ const outputCost = estimatedOutputTokens / 1e6 * modelInfo.outputPricePerMillion;
947
+ const totalCost = inputCost + outputCost;
948
+ const quality = (MODEL_QUALITY_SCORES[model] || 50) / 100;
949
+ candidates.push({
950
+ provider: provider.name,
951
+ model,
952
+ cost: totalCost,
953
+ quality,
954
+ isLocal
955
+ });
956
+ }
957
+ }
958
+ const minQuality = this.config.qualityThreshold || 0;
959
+ let filtered = candidates.filter((c) => c.quality >= minQuality);
960
+ if (filtered.length === 0) {
961
+ filtered = candidates;
962
+ }
963
+ const maxCost = context?.maxCost ?? this.config.maxCostPerRequest;
964
+ if (maxCost !== void 0) {
965
+ const withinBudget = filtered.filter((c) => c.cost <= maxCost);
966
+ if (withinBudget.length > 0) {
967
+ filtered = withinBudget;
968
+ } else if (this.config.fallbackOnBudget === "error") {
969
+ throw new Error(`No models within budget of $${maxCost.toFixed(4)}`);
970
+ }
971
+ }
972
+ filtered.sort((a, b) => {
973
+ if (this.config.preferLocal) {
974
+ if (a.isLocal && !b.isLocal) return -1;
975
+ if (!a.isLocal && b.isLocal) return 1;
976
+ }
977
+ return a.cost - b.cost;
978
+ });
979
+ const selected = filtered[0];
980
+ return {
981
+ provider: selected.provider,
982
+ model: selected.model,
983
+ reason: `Cheapest model meeting quality threshold (${(selected.quality * 100).toFixed(0)}% quality, $${selected.cost.toFixed(6)}/req)`,
984
+ alternatives: filtered.slice(1, 4).map((c) => ({
985
+ provider: c.provider,
986
+ model: c.model,
987
+ score: 1 / (c.cost + 1e-4)
988
+ // Higher score = cheaper
989
+ })),
990
+ timestamp: /* @__PURE__ */ new Date()
991
+ };
992
+ }
993
+ /**
994
+ * Update configuration
995
+ */
996
+ setConfig(config) {
997
+ this.config = { ...this.config, ...config };
998
+ }
999
+ /**
1000
+ * Get current configuration
1001
+ */
1002
+ getConfig() {
1003
+ return { ...this.config };
1004
+ }
1005
+ };
1006
+
1007
+ // src/routing/strategies/LatencyOptimized.ts
1008
+ var LatencyOptimizedStrategy = class {
1009
+ name = "latency-optimized";
1010
+ config;
1011
+ latencyStats = /* @__PURE__ */ new Map();
1012
+ maxSamples = 100;
1013
+ constructor(config = {}) {
1014
+ this.config = {
1015
+ warmupRequests: 10,
1016
+ adaptiveRouting: true,
1017
+ ...config
1018
+ };
1019
+ }
1020
+ route(request, registry, context) {
1021
+ let providers = registry.getAvailableProviders();
1022
+ if (context?.excludeProviders) {
1023
+ providers = providers.filter(
1024
+ (p) => !context.excludeProviders.includes(p.name)
1025
+ );
1026
+ }
1027
+ if (providers.length === 0) {
1028
+ throw new Error("No available providers");
1029
+ }
1030
+ const totalRequests = Array.from(this.latencyStats.values()).reduce(
1031
+ (sum, stats) => sum + stats.count,
1032
+ 0
1033
+ );
1034
+ const warmupThreshold = this.config.warmupRequests ?? 10;
1035
+ const isWarmup = warmupThreshold > 0 && totalRequests < warmupThreshold;
1036
+ const candidates = [];
1037
+ for (const provider of providers) {
1038
+ const stats = this.latencyStats.get(provider.name);
1039
+ const health = provider.getHealth();
1040
+ let latency;
1041
+ let confidence;
1042
+ if (stats && stats.count >= 5 && this.config.adaptiveRouting) {
1043
+ latency = stats.avg;
1044
+ confidence = Math.min(stats.count / 50, 1);
1045
+ } else {
1046
+ latency = health.latencyMs || 1e3;
1047
+ confidence = 0.3;
1048
+ }
1049
+ for (const model of provider.getModels()) {
1050
+ if (request.model !== "fastest" && !provider.supportsModel(request.model) && model !== request.model) {
1051
+ continue;
1052
+ }
1053
+ candidates.push({
1054
+ provider: provider.name,
1055
+ model: request.model !== "fastest" && provider.supportsModel(request.model) ? request.model : model,
1056
+ latency,
1057
+ confidence
1058
+ });
1059
+ break;
1060
+ }
1061
+ }
1062
+ const maxLatency = context?.maxLatency ?? this.config.maxLatencyMs;
1063
+ let filtered = candidates;
1064
+ if (maxLatency !== void 0) {
1065
+ const withinLimit = candidates.filter((c) => c.latency <= maxLatency);
1066
+ if (withinLimit.length > 0) {
1067
+ filtered = withinLimit;
1068
+ }
1069
+ }
1070
+ filtered.sort((a, b) => a.latency - b.latency);
1071
+ const shouldExplore = isWarmup && Math.random() < 0.3 && filtered.length > 1;
1072
+ if (shouldExplore) {
1073
+ const randomIndex = Math.floor(
1074
+ Math.random() * Math.min(3, filtered.length)
1075
+ );
1076
+ const selected2 = filtered[randomIndex];
1077
+ return {
1078
+ provider: selected2.provider,
1079
+ model: selected2.model,
1080
+ reason: `Warmup exploration (${totalRequests}/${warmupThreshold} requests)`,
1081
+ alternatives: filtered.filter((c) => c.provider !== selected2.provider).slice(0, 3).map((c) => ({
1082
+ provider: c.provider,
1083
+ model: c.model,
1084
+ score: 1 / c.latency
1085
+ })),
1086
+ timestamp: /* @__PURE__ */ new Date()
1087
+ };
1088
+ }
1089
+ const selected = filtered[0];
1090
+ return {
1091
+ provider: selected.provider,
1092
+ model: selected.model,
1093
+ reason: `Fastest provider (${selected.latency.toFixed(0)}ms avg, ${(selected.confidence * 100).toFixed(0)}% confidence)`,
1094
+ alternatives: filtered.slice(1, 4).map((c) => ({
1095
+ provider: c.provider,
1096
+ model: c.model,
1097
+ score: 1 / c.latency
1098
+ })),
1099
+ timestamp: /* @__PURE__ */ new Date()
1100
+ };
1101
+ }
1102
+ /**
1103
+ * Record a latency observation
1104
+ */
1105
+ recordLatency(provider, latencyMs) {
1106
+ let stats = this.latencyStats.get(provider);
1107
+ if (!stats) {
1108
+ stats = {
1109
+ count: 0,
1110
+ total: 0,
1111
+ min: Infinity,
1112
+ max: 0,
1113
+ avg: 0,
1114
+ p95: 0,
1115
+ samples: []
1116
+ };
1117
+ this.latencyStats.set(provider, stats);
1118
+ }
1119
+ stats.count++;
1120
+ stats.total += latencyMs;
1121
+ stats.min = Math.min(stats.min, latencyMs);
1122
+ stats.max = Math.max(stats.max, latencyMs);
1123
+ stats.samples.push(latencyMs);
1124
+ if (stats.samples.length > this.maxSamples) {
1125
+ stats.samples.shift();
1126
+ }
1127
+ const alpha = 0.2;
1128
+ if (stats.count === 1) {
1129
+ stats.avg = latencyMs;
1130
+ } else {
1131
+ stats.avg = alpha * latencyMs + (1 - alpha) * stats.avg;
1132
+ }
1133
+ if (stats.samples.length >= 20) {
1134
+ const sorted = [...stats.samples].sort((a, b) => a - b);
1135
+ const p95Index = Math.floor(sorted.length * 0.95);
1136
+ stats.p95 = sorted[p95Index];
1137
+ }
1138
+ }
1139
+ /**
1140
+ * Get latency statistics for a provider
1141
+ */
1142
+ getStats(provider) {
1143
+ return this.latencyStats.get(provider);
1144
+ }
1145
+ /**
1146
+ * Get all latency statistics
1147
+ */
1148
+ getAllStats() {
1149
+ const result = {};
1150
+ for (const [provider, stats] of this.latencyStats) {
1151
+ result[provider] = { ...stats, samples: [...stats.samples] };
1152
+ }
1153
+ return result;
1154
+ }
1155
+ /**
1156
+ * Clear latency statistics
1157
+ */
1158
+ clearStats() {
1159
+ this.latencyStats.clear();
1160
+ }
1161
+ /**
1162
+ * Update configuration
1163
+ */
1164
+ setConfig(config) {
1165
+ this.config = { ...this.config, ...config };
1166
+ }
1167
+ };
1168
+
1169
+ // src/utils/pricing.ts
1170
+ var MODEL_PRICING = {
1171
+ // OpenAI Models
1172
+ "gpt-4o": { input: 2.5, output: 10 },
1173
+ "gpt-4o-2024-11-20": { input: 2.5, output: 10 },
1174
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
1175
+ "gpt-4o-mini-2024-07-18": { input: 0.15, output: 0.6 },
1176
+ "gpt-4-turbo": { input: 10, output: 30 },
1177
+ "gpt-4-turbo-preview": { input: 10, output: 30 },
1178
+ "gpt-4": { input: 30, output: 60 },
1179
+ "gpt-4-32k": { input: 60, output: 120 },
1180
+ "gpt-3.5-turbo": { input: 0.5, output: 1.5 },
1181
+ "gpt-3.5-turbo-0125": { input: 0.5, output: 1.5 },
1182
+ o1: { input: 15, output: 60 },
1183
+ "o1-preview": { input: 15, output: 60 },
1184
+ "o1-mini": { input: 3, output: 12 },
1185
+ // Anthropic Models
1186
+ "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1187
+ "claude-3-5-sonnet-latest": { input: 3, output: 15 },
1188
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
1189
+ "claude-3-5-haiku-20241022": { input: 0.8, output: 4 },
1190
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
1191
+ "claude-3-opus-20240229": { input: 15, output: 75 },
1192
+ "claude-3-sonnet-20240229": { input: 3, output: 15 },
1193
+ // Google Gemini Models
1194
+ "gemini-1.5-pro": { input: 1.25, output: 5 },
1195
+ "gemini-1.5-pro-latest": { input: 1.25, output: 5 },
1196
+ "gemini-1.5-flash": { input: 0.075, output: 0.3 },
1197
+ "gemini-1.5-flash-latest": { input: 0.075, output: 0.3 },
1198
+ "gemini-2.0-flash-exp": { input: 0.1, output: 0.4 },
1199
+ "gemini-pro": { input: 0.5, output: 1.5 },
1200
+ // Mistral Models
1201
+ "mistral-large-latest": { input: 2, output: 6 },
1202
+ "mistral-medium-latest": { input: 2.7, output: 8.1 },
1203
+ "mistral-small-latest": { input: 0.2, output: 0.6 },
1204
+ "open-mistral-7b": { input: 0.25, output: 0.25 },
1205
+ "open-mixtral-8x7b": { input: 0.7, output: 0.7 },
1206
+ "open-mixtral-8x22b": { input: 2, output: 6 },
1207
+ // Cohere Models
1208
+ "command-r-plus": { input: 2.5, output: 10 },
1209
+ "command-r": { input: 0.5, output: 1.5 },
1210
+ command: { input: 1, output: 2 },
1211
+ // Groq Models (significantly cheaper)
1212
+ "llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
1213
+ "llama-3.1-70b-versatile": { input: 0.59, output: 0.79 },
1214
+ "llama-3.1-8b-instant": { input: 0.05, output: 0.08 },
1215
+ "mixtral-8x7b-32768": { input: 0.24, output: 0.24 },
1216
+ // Together AI Models
1217
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo": { input: 0.88, output: 0.88 },
1218
+ "meta-llama/Llama-3.1-70B-Instruct-Turbo": { input: 0.88, output: 0.88 },
1219
+ "meta-llama/Llama-3.1-8B-Instruct-Turbo": { input: 0.18, output: 0.18 },
1220
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": { input: 0.6, output: 0.6 },
1221
+ // Local models (free)
1222
+ llama3: { input: 0, output: 0 },
1223
+ "llama3.1": { input: 0, output: 0 },
1224
+ "llama3.2": { input: 0, output: 0 },
1225
+ mistral: { input: 0, output: 0 },
1226
+ codellama: { input: 0, output: 0 },
1227
+ phi3: { input: 0, output: 0 },
1228
+ "qwen2.5": { input: 0, output: 0 }
1229
+ };
1230
+ var MODEL_CONTEXT_WINDOWS = {
1231
+ // OpenAI
1232
+ "gpt-4o": 128e3,
1233
+ "gpt-4o-mini": 128e3,
1234
+ "gpt-4-turbo": 128e3,
1235
+ "gpt-4": 8192,
1236
+ "gpt-4-32k": 32768,
1237
+ "gpt-3.5-turbo": 16385,
1238
+ o1: 2e5,
1239
+ "o1-preview": 128e3,
1240
+ "o1-mini": 128e3,
1241
+ // Anthropic
1242
+ "claude-3-5-sonnet-20241022": 2e5,
1243
+ "claude-sonnet-4-20250514": 2e5,
1244
+ "claude-3-5-haiku-20241022": 2e5,
1245
+ "claude-3-opus-20240229": 2e5,
1246
+ "claude-3-sonnet-20240229": 2e5,
1247
+ "claude-3-haiku-20240307": 2e5,
1248
+ // Google
1249
+ "gemini-1.5-pro": 2e6,
1250
+ "gemini-1.5-flash": 1e6,
1251
+ "gemini-2.0-flash-exp": 1e6,
1252
+ "gemini-pro": 32e3,
1253
+ // Mistral
1254
+ "mistral-large-latest": 128e3,
1255
+ "mistral-medium-latest": 32e3,
1256
+ "mistral-small-latest": 32e3,
1257
+ // Groq
1258
+ "llama-3.3-70b-versatile": 128e3,
1259
+ "llama-3.1-70b-versatile": 131072,
1260
+ "llama-3.1-8b-instant": 131072,
1261
+ "mixtral-8x7b-32768": 32768
1262
+ };
1263
+ var MODEL_MAX_OUTPUT = {
1264
+ // OpenAI
1265
+ "gpt-4o": 16384,
1266
+ "gpt-4o-mini": 16384,
1267
+ "gpt-4-turbo": 4096,
1268
+ "gpt-4": 8192,
1269
+ "gpt-3.5-turbo": 4096,
1270
+ o1: 1e5,
1271
+ "o1-preview": 32768,
1272
+ "o1-mini": 65536,
1273
+ // Anthropic
1274
+ "claude-3-5-sonnet-20241022": 8192,
1275
+ "claude-sonnet-4-20250514": 16384,
1276
+ "claude-3-opus-20240229": 4096,
1277
+ // Google
1278
+ "gemini-1.5-pro": 8192,
1279
+ "gemini-1.5-flash": 8192
1280
+ };
1281
+ function calculateCost(model, usage) {
1282
+ const pricing = MODEL_PRICING[model];
1283
+ if (!pricing) {
1284
+ return 0;
1285
+ }
1286
+ const inputCost = usage.prompt_tokens / 1e6 * pricing.input;
1287
+ const outputCost = usage.completion_tokens / 1e6 * pricing.output;
1288
+ return inputCost + outputCost;
1289
+ }
1290
+ function estimateCost(model, estimatedInputTokens, estimatedOutputTokens) {
1291
+ const pricing = MODEL_PRICING[model];
1292
+ if (!pricing) {
1293
+ return 0;
1294
+ }
1295
+ const inputCost = estimatedInputTokens / 1e6 * pricing.input;
1296
+ const outputCost = estimatedOutputTokens / 1e6 * pricing.output;
1297
+ return inputCost + outputCost;
1298
+ }
1299
+ function getModelPricing(model) {
1300
+ return MODEL_PRICING[model] || null;
1301
+ }
1302
+ function getModelInfo(model, provider) {
1303
+ const pricing = MODEL_PRICING[model] || { input: 0, output: 0 };
1304
+ const contextWindow = MODEL_CONTEXT_WINDOWS[model] || 4096;
1305
+ const maxOutput = MODEL_MAX_OUTPUT[model] || 4096;
1306
+ return {
1307
+ id: model,
1308
+ provider,
1309
+ contextWindow,
1310
+ maxOutputTokens: maxOutput,
1311
+ inputPricePerMillion: pricing.input,
1312
+ outputPricePerMillion: pricing.output,
1313
+ capabilities: getModelCapabilities(model, provider)
1314
+ };
1315
+ }
1316
+ function getModelCapabilities(model, provider) {
1317
+ const defaults = {
1318
+ streaming: true,
1319
+ tools: true,
1320
+ vision: false,
1321
+ json_mode: true,
1322
+ system_prompts: true
1323
+ };
1324
+ if (model.includes("gpt-4o") || model.includes("gpt-4-turbo")) {
1325
+ return { ...defaults, vision: true };
1326
+ }
1327
+ if (model.includes("o1")) {
1328
+ return {
1329
+ streaming: false,
1330
+ // o1 doesn't support streaming
1331
+ tools: false,
1332
+ vision: false,
1333
+ json_mode: false,
1334
+ system_prompts: false
1335
+ // o1 uses developer messages
1336
+ };
1337
+ }
1338
+ if (model.includes("claude-3")) {
1339
+ return { ...defaults, vision: true };
1340
+ }
1341
+ if (model.includes("gemini")) {
1342
+ return { ...defaults, vision: true };
1343
+ }
1344
+ if (provider === "ollama") {
1345
+ return {
1346
+ streaming: true,
1347
+ tools: false,
1348
+ // Most Ollama models don't support tools natively
1349
+ vision: model.includes("llava") || model.includes("bakllava"),
1350
+ json_mode: true,
1351
+ system_prompts: true
1352
+ };
1353
+ }
1354
+ return defaults;
1355
+ }
1356
+ function findCheapestModel(models, _requiredCapabilities) {
1357
+ let cheapest = null;
1358
+ for (const model of models) {
1359
+ const pricing = MODEL_PRICING[model];
1360
+ if (!pricing) continue;
1361
+ const avgCost = (pricing.input + pricing.output) / 2;
1362
+ if (!cheapest || avgCost < cheapest.cost) {
1363
+ cheapest = { model, cost: avgCost };
1364
+ }
1365
+ }
1366
+ return cheapest?.model || null;
1367
+ }
1368
+ function sortModelsByCost(models, direction = "asc") {
1369
+ return [...models].sort((a, b) => {
1370
+ const pricingA = MODEL_PRICING[a] || { input: 0, output: 0 };
1371
+ const pricingB = MODEL_PRICING[b] || { input: 0, output: 0 };
1372
+ const costA = (pricingA.input + pricingA.output) / 2;
1373
+ const costB = (pricingB.input + pricingB.output) / 2;
1374
+ return direction === "asc" ? costA - costB : costB - costA;
1375
+ });
1376
+ }
1377
+ function hashRequest(request) {
1378
+ const normalized = {
1379
+ model: request.model,
1380
+ messages: request.messages.map((m) => ({
1381
+ role: m.role,
1382
+ content: normalizeContent(m.content)
1383
+ })),
1384
+ temperature: request.temperature ?? 1,
1385
+ max_tokens: request.max_tokens,
1386
+ tools: request.tools ? JSON.stringify(request.tools) : void 0,
1387
+ tool_choice: request.tool_choice ? JSON.stringify(request.tool_choice) : void 0
1388
+ };
1389
+ const str = JSON.stringify(normalized);
1390
+ return murmurhash.v3(str).toString(16);
1391
+ }
1392
+ function normalizeContent(content) {
1393
+ if (typeof content === "string") {
1394
+ return content;
1395
+ }
1396
+ if (content === null || content === void 0) {
1397
+ return "";
1398
+ }
1399
+ return JSON.stringify(content);
1400
+ }
1401
+ function generateId(prefix = "gw") {
1402
+ const timestamp = Date.now().toString(36);
1403
+ const random = Math.random().toString(36).substring(2, 8);
1404
+ return `${prefix}-${timestamp}${random}`;
1405
+ }
1406
+ function generateRequestId() {
1407
+ return `chatcmpl-${generateId("")}`;
1408
+ }
1409
+ function generateCacheKey(provider, model, requestHash) {
1410
+ return `gw:cache:${provider}:${model}:${requestHash}`;
1411
+ }
1412
+ function hash(str) {
1413
+ return murmurhash.v3(str).toString(16);
1414
+ }
1415
+ function createSystemFingerprint(config) {
1416
+ const str = JSON.stringify(config);
1417
+ return `fp_${murmurhash.v3(str).toString(16)}`;
1418
+ }
1419
+
1420
+ // src/providers/Provider.ts
1421
+ var Provider = class {
1422
+ name;
1423
+ config;
1424
+ health;
1425
+ constructor(config) {
1426
+ this.name = config.name;
1427
+ this.config = config;
1428
+ this.health = {
1429
+ status: "healthy",
1430
+ latencyMs: 0,
1431
+ lastCheck: /* @__PURE__ */ new Date(),
1432
+ errorRate: 0,
1433
+ consecutiveFailures: 0
1434
+ };
1435
+ }
1436
+ /**
1437
+ * Check if the provider supports a specific model
1438
+ */
1439
+ supportsModel(model) {
1440
+ return this.config.models.includes(model);
1441
+ }
1442
+ /**
1443
+ * Get all supported models
1444
+ */
1445
+ getModels() {
1446
+ return [...this.config.models];
1447
+ }
1448
+ /**
1449
+ * Get the current health status
1450
+ */
1451
+ getHealth() {
1452
+ return { ...this.health };
1453
+ }
1454
+ /**
1455
+ * Perform a health check
1456
+ */
1457
+ async healthCheck() {
1458
+ const start = Date.now();
1459
+ try {
1460
+ await this.chat({
1461
+ model: this.config.models[0],
1462
+ messages: [{ role: "user", content: "hi" }],
1463
+ max_tokens: 1
1464
+ });
1465
+ const latencyMs = Date.now() - start;
1466
+ this.health = {
1467
+ status: "healthy",
1468
+ latencyMs,
1469
+ lastCheck: /* @__PURE__ */ new Date(),
1470
+ errorRate: Math.max(0, this.health.errorRate - 0.1),
1471
+ consecutiveFailures: 0
1472
+ };
1473
+ } catch (error) {
1474
+ const latencyMs = Date.now() - start;
1475
+ this.health = {
1476
+ status: this.health.consecutiveFailures >= 2 ? "unhealthy" : "degraded",
1477
+ latencyMs,
1478
+ lastCheck: /* @__PURE__ */ new Date(),
1479
+ errorRate: Math.min(1, this.health.errorRate + 0.2),
1480
+ consecutiveFailures: this.health.consecutiveFailures + 1
1481
+ };
1482
+ }
1483
+ return this.health;
1484
+ }
1485
+ /**
1486
+ * Update health status after a request
1487
+ */
1488
+ updateHealth(success, latencyMs) {
1489
+ if (success) {
1490
+ this.health = {
1491
+ status: "healthy",
1492
+ latencyMs: this.health.latencyMs > 0 ? (this.health.latencyMs + latencyMs) / 2 : latencyMs,
1493
+ lastCheck: /* @__PURE__ */ new Date(),
1494
+ errorRate: Math.max(0, this.health.errorRate - 0.05),
1495
+ consecutiveFailures: 0
1496
+ };
1497
+ } else {
1498
+ this.health = {
1499
+ ...this.health,
1500
+ status: this.health.consecutiveFailures >= 2 ? "unhealthy" : "degraded",
1501
+ lastCheck: /* @__PURE__ */ new Date(),
1502
+ errorRate: Math.min(1, this.health.errorRate + 0.1),
1503
+ consecutiveFailures: this.health.consecutiveFailures + 1
1504
+ };
1505
+ }
1506
+ }
1507
+ /**
1508
+ * Check if the provider is currently healthy
1509
+ */
1510
+ isHealthy() {
1511
+ return this.health.status === "healthy";
1512
+ }
1513
+ /**
1514
+ * Check if the provider is available (healthy or degraded)
1515
+ */
1516
+ isAvailable() {
1517
+ return this.health.status !== "unhealthy";
1518
+ }
1519
+ };
1520
+
1521
+ // src/providers/registry/OpenAIProvider.ts
1522
+ var DEFAULT_OPENAI_MODELS = [
1523
+ "gpt-4o",
1524
+ "gpt-4o-mini",
1525
+ "gpt-4-turbo",
1526
+ "gpt-4",
1527
+ "gpt-3.5-turbo",
1528
+ "o1",
1529
+ "o1-mini",
1530
+ "o1-preview"
1531
+ ];
1532
+ var OpenAIProvider = class extends Provider {
1533
+ apiKey;
1534
+ baseUrl;
1535
+ organization;
1536
+ project;
1537
+ timeout;
1538
+ constructor(options = {}) {
1539
+ const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
1540
+ if (!apiKey) {
1541
+ throw new Error("OpenAI API key is required");
1542
+ }
1543
+ const config = {
1544
+ name: "openai",
1545
+ apiKey,
1546
+ baseUrl: options.baseUrl || "https://api.openai.com/v1",
1547
+ models: options.models || DEFAULT_OPENAI_MODELS,
1548
+ timeout: options.timeout || 6e4,
1549
+ maxRetries: options.maxRetries || 3,
1550
+ headers: options.headers
1551
+ };
1552
+ super(config);
1553
+ this.apiKey = apiKey;
1554
+ this.baseUrl = config.baseUrl;
1555
+ this.organization = options.organization;
1556
+ this.project = options.project;
1557
+ this.timeout = config.timeout;
1558
+ }
1559
+ /**
1560
+ * Execute a chat completion request
1561
+ */
1562
+ async chat(request) {
1563
+ const start = Date.now();
1564
+ try {
1565
+ const response = await this.makeRequest("/chat/completions", {
1566
+ method: "POST",
1567
+ body: JSON.stringify(this.transformRequest(request))
1568
+ });
1569
+ if (!response.ok) {
1570
+ const error = await this.parseError(response);
1571
+ throw error;
1572
+ }
1573
+ const data = await response.json();
1574
+ const result = this.transformResponse(data, request.model);
1575
+ this.updateHealth(true, Date.now() - start);
1576
+ return result;
1577
+ } catch (error) {
1578
+ this.updateHealth(false, Date.now() - start);
1579
+ throw this.wrapError(error);
1580
+ }
1581
+ }
1582
+ /**
1583
+ * Execute a streaming chat completion request
1584
+ */
1585
+ async *chatStream(request) {
1586
+ const start = Date.now();
1587
+ try {
1588
+ const response = await this.makeRequest("/chat/completions", {
1589
+ method: "POST",
1590
+ body: JSON.stringify({
1591
+ ...this.transformRequest(request),
1592
+ stream: true,
1593
+ stream_options: { include_usage: true }
1594
+ })
1595
+ });
1596
+ if (!response.ok) {
1597
+ const error = await this.parseError(response);
1598
+ throw error;
1599
+ }
1600
+ const reader = response.body?.getReader();
1601
+ if (!reader) {
1602
+ throw new ProviderError("No response body", "openai");
1603
+ }
1604
+ const decoder = new TextDecoder();
1605
+ let buffer = "";
1606
+ while (true) {
1607
+ const { done, value } = await reader.read();
1608
+ if (done) break;
1609
+ buffer += decoder.decode(value, { stream: true });
1610
+ const lines = buffer.split("\n");
1611
+ buffer = lines.pop() || "";
1612
+ for (const line of lines) {
1613
+ const trimmed = line.trim();
1614
+ if (!trimmed || trimmed === "data: [DONE]") continue;
1615
+ if (!trimmed.startsWith("data: ")) continue;
1616
+ try {
1617
+ const json = JSON.parse(trimmed.slice(6));
1618
+ yield this.transformChunk(json, request.model);
1619
+ } catch {
1620
+ }
1621
+ }
1622
+ }
1623
+ this.updateHealth(true, Date.now() - start);
1624
+ } catch (error) {
1625
+ this.updateHealth(false, Date.now() - start);
1626
+ throw this.wrapError(error);
1627
+ }
1628
+ }
1629
+ /**
1630
+ * Get model information
1631
+ */
1632
+ getModelInfo(model) {
1633
+ if (!this.supportsModel(model)) {
1634
+ return null;
1635
+ }
1636
+ return getModelInfo(model, "openai");
1637
+ }
1638
+ /**
1639
+ * Make an HTTP request to the OpenAI API
1640
+ */
1641
+ async makeRequest(path, options) {
1642
+ const headers = {
1643
+ "Content-Type": "application/json",
1644
+ Authorization: `Bearer ${this.apiKey}`,
1645
+ ...this.config.headers
1646
+ };
1647
+ if (this.organization) {
1648
+ headers["OpenAI-Organization"] = this.organization;
1649
+ }
1650
+ if (this.project) {
1651
+ headers["OpenAI-Project"] = this.project;
1652
+ }
1653
+ const controller = new AbortController();
1654
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
1655
+ try {
1656
+ return await fetch(`${this.baseUrl}${path}`, {
1657
+ ...options,
1658
+ headers,
1659
+ signal: controller.signal
1660
+ });
1661
+ } finally {
1662
+ clearTimeout(timeoutId);
1663
+ }
1664
+ }
1665
+ /**
1666
+ * Transform gateway request to OpenAI format
1667
+ */
1668
+ transformRequest(request) {
1669
+ const transformed = {
1670
+ model: request.model,
1671
+ messages: request.messages
1672
+ };
1673
+ if (request.temperature !== void 0) {
1674
+ transformed.temperature = request.temperature;
1675
+ }
1676
+ if (request.max_tokens !== void 0) {
1677
+ transformed.max_tokens = request.max_tokens;
1678
+ }
1679
+ if (request.top_p !== void 0) {
1680
+ transformed.top_p = request.top_p;
1681
+ }
1682
+ if (request.frequency_penalty !== void 0) {
1683
+ transformed.frequency_penalty = request.frequency_penalty;
1684
+ }
1685
+ if (request.presence_penalty !== void 0) {
1686
+ transformed.presence_penalty = request.presence_penalty;
1687
+ }
1688
+ if (request.stop !== void 0) {
1689
+ transformed.stop = request.stop;
1690
+ }
1691
+ if (request.tools !== void 0) {
1692
+ transformed.tools = request.tools;
1693
+ }
1694
+ if (request.tool_choice !== void 0) {
1695
+ transformed.tool_choice = request.tool_choice;
1696
+ }
1697
+ if (request.response_format !== void 0) {
1698
+ transformed.response_format = request.response_format;
1699
+ }
1700
+ if (request.seed !== void 0) {
1701
+ transformed.seed = request.seed;
1702
+ }
1703
+ if (request.user !== void 0) {
1704
+ transformed.user = request.user;
1705
+ }
1706
+ return transformed;
1707
+ }
1708
+ /**
1709
+ * Transform OpenAI response to gateway format
1710
+ */
1711
+ transformResponse(data, model) {
1712
+ return {
1713
+ id: data.id || generateRequestId(),
1714
+ object: "chat.completion",
1715
+ created: data.created || Math.floor(Date.now() / 1e3),
1716
+ model: data.model || model,
1717
+ choices: data.choices,
1718
+ usage: data.usage,
1719
+ system_fingerprint: data.system_fingerprint
1720
+ };
1721
+ }
1722
+ /**
1723
+ * Transform OpenAI stream chunk to gateway format
1724
+ */
1725
+ transformChunk(data, model) {
1726
+ return {
1727
+ id: data.id || generateRequestId(),
1728
+ object: "chat.completion.chunk",
1729
+ created: data.created || Math.floor(Date.now() / 1e3),
1730
+ model: data.model || model,
1731
+ choices: data.choices,
1732
+ system_fingerprint: data.system_fingerprint,
1733
+ usage: data.usage
1734
+ };
1735
+ }
1736
+ /**
1737
+ * Parse error response from OpenAI
1738
+ */
1739
+ async parseError(response) {
1740
+ let message = `OpenAI API error: ${response.status}`;
1741
+ const retryable = response.status >= 500 || response.status === 429;
1742
+ try {
1743
+ const data = await response.json();
1744
+ if (data.error?.message) {
1745
+ message = data.error.message;
1746
+ }
1747
+ } catch {
1748
+ }
1749
+ return new ProviderError(message, "openai", void 0, retryable);
1750
+ }
1751
+ /**
1752
+ * Wrap unknown errors
1753
+ */
1754
+ wrapError(error) {
1755
+ if (error instanceof ProviderError) {
1756
+ return error;
1757
+ }
1758
+ if (error instanceof Error) {
1759
+ const isTimeout = error.name === "AbortError" || error.message.includes("timeout");
1760
+ return new ProviderError(
1761
+ error.message,
1762
+ "openai",
1763
+ error,
1764
+ isTimeout
1765
+ // Timeouts are retryable
1766
+ );
1767
+ }
1768
+ return new ProviderError("Unknown error", "openai", void 0, true);
1769
+ }
1770
+ };
1771
+
1772
+ // src/providers/registry/AnthropicProvider.ts
1773
+ var DEFAULT_ANTHROPIC_MODELS = [
1774
+ "claude-3-5-sonnet-20241022",
1775
+ "claude-3-5-sonnet-latest",
1776
+ "claude-sonnet-4-20250514",
1777
+ "claude-3-5-haiku-20241022",
1778
+ "claude-3-opus-20240229",
1779
+ "claude-3-sonnet-20240229",
1780
+ "claude-3-haiku-20240307"
1781
+ ];
1782
+ var ANTHROPIC_API_VERSION = "2023-06-01";
1783
+ var AnthropicProvider = class extends Provider {
1784
+ apiKey;
1785
+ baseUrl;
1786
+ timeout;
1787
+ anthropicBeta;
1788
+ constructor(options = {}) {
1789
+ const apiKey = options.apiKey || process.env.ANTHROPIC_API_KEY;
1790
+ if (!apiKey) {
1791
+ throw new Error("Anthropic API key is required");
1792
+ }
1793
+ const config = {
1794
+ name: "anthropic",
1795
+ apiKey,
1796
+ baseUrl: options.baseUrl || "https://api.anthropic.com",
1797
+ models: options.models || DEFAULT_ANTHROPIC_MODELS,
1798
+ timeout: options.timeout || 6e4,
1799
+ maxRetries: options.maxRetries || 3,
1800
+ headers: options.headers
1801
+ };
1802
+ super(config);
1803
+ this.apiKey = apiKey;
1804
+ this.baseUrl = config.baseUrl;
1805
+ this.timeout = config.timeout;
1806
+ this.anthropicBeta = options.anthropicBeta;
1807
+ }
1808
+ /**
1809
+ * Execute a chat completion request
1810
+ */
1811
+ async chat(request) {
1812
+ const start = Date.now();
1813
+ try {
1814
+ const anthropicRequest = this.transformToAnthropic(request);
1815
+ const response = await this.makeRequest("/v1/messages", {
1816
+ method: "POST",
1817
+ body: JSON.stringify(anthropicRequest)
1818
+ });
1819
+ if (!response.ok) {
1820
+ const error = await this.parseError(response);
1821
+ throw error;
1822
+ }
1823
+ const data = await response.json();
1824
+ const result = this.transformFromAnthropic(data, request.model);
1825
+ this.updateHealth(true, Date.now() - start);
1826
+ return result;
1827
+ } catch (error) {
1828
+ this.updateHealth(false, Date.now() - start);
1829
+ throw this.wrapError(error);
1830
+ }
1831
+ }
1832
+ /**
1833
+ * Execute a streaming chat completion request
1834
+ */
1835
+ async *chatStream(request) {
1836
+ const start = Date.now();
1837
+ try {
1838
+ const anthropicRequest = this.transformToAnthropic(request);
1839
+ const response = await this.makeRequest("/v1/messages", {
1840
+ method: "POST",
1841
+ body: JSON.stringify({
1842
+ ...anthropicRequest,
1843
+ stream: true
1844
+ })
1845
+ });
1846
+ if (!response.ok) {
1847
+ const error = await this.parseError(response);
1848
+ throw error;
1849
+ }
1850
+ const reader = response.body?.getReader();
1851
+ if (!reader) {
1852
+ throw new ProviderError("No response body", "anthropic");
1853
+ }
1854
+ const decoder = new TextDecoder();
1855
+ let buffer = "";
1856
+ const requestId = generateRequestId();
1857
+ let inputTokens = 0;
1858
+ let outputTokens = 0;
1859
+ while (true) {
1860
+ const { done, value } = await reader.read();
1861
+ if (done) break;
1862
+ buffer += decoder.decode(value, { stream: true });
1863
+ const lines = buffer.split("\n");
1864
+ buffer = lines.pop() || "";
1865
+ for (const line of lines) {
1866
+ const trimmed = line.trim();
1867
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
1868
+ try {
1869
+ const event = JSON.parse(trimmed.slice(6));
1870
+ const chunk = this.transformStreamEvent(
1871
+ event,
1872
+ request.model,
1873
+ requestId
1874
+ );
1875
+ if (event.type === "message_start" && event.message?.usage) {
1876
+ inputTokens = event.message.usage.input_tokens || 0;
1877
+ }
1878
+ if (event.type === "message_delta" && event.usage) {
1879
+ outputTokens = event.usage.output_tokens || 0;
1880
+ }
1881
+ if (chunk) {
1882
+ if (event.type === "message_stop") {
1883
+ chunk.usage = {
1884
+ prompt_tokens: inputTokens,
1885
+ completion_tokens: outputTokens,
1886
+ total_tokens: inputTokens + outputTokens
1887
+ };
1888
+ }
1889
+ yield chunk;
1890
+ }
1891
+ } catch {
1892
+ }
1893
+ }
1894
+ }
1895
+ this.updateHealth(true, Date.now() - start);
1896
+ } catch (error) {
1897
+ this.updateHealth(false, Date.now() - start);
1898
+ throw this.wrapError(error);
1899
+ }
1900
+ }
1901
+ /**
1902
+ * Get model information
1903
+ */
1904
+ getModelInfo(model) {
1905
+ if (!this.supportsModel(model)) {
1906
+ return null;
1907
+ }
1908
+ return getModelInfo(model, "anthropic");
1909
+ }
1910
+ /**
1911
+ * Make an HTTP request to the Anthropic API
1912
+ */
1913
+ async makeRequest(path, options) {
1914
+ const headers = {
1915
+ "Content-Type": "application/json",
1916
+ "x-api-key": this.apiKey,
1917
+ "anthropic-version": ANTHROPIC_API_VERSION,
1918
+ ...this.config.headers
1919
+ };
1920
+ if (this.anthropicBeta && this.anthropicBeta.length > 0) {
1921
+ headers["anthropic-beta"] = this.anthropicBeta.join(",");
1922
+ }
1923
+ const controller = new AbortController();
1924
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
1925
+ try {
1926
+ return await fetch(`${this.baseUrl}${path}`, {
1927
+ ...options,
1928
+ headers,
1929
+ signal: controller.signal
1930
+ });
1931
+ } finally {
1932
+ clearTimeout(timeoutId);
1933
+ }
1934
+ }
1935
+ /**
1936
+ * Transform OpenAI-compatible request to Anthropic format
1937
+ */
1938
+ transformToAnthropic(request) {
1939
+ const { messages, systemPrompt } = this.extractSystemPrompt(
1940
+ request.messages
1941
+ );
1942
+ const transformed = {
1943
+ model: request.model,
1944
+ messages: messages.map((m) => this.transformMessage(m)),
1945
+ max_tokens: request.max_tokens || 4096
1946
+ };
1947
+ if (systemPrompt) {
1948
+ transformed.system = systemPrompt;
1949
+ }
1950
+ if (request.temperature !== void 0) {
1951
+ transformed.temperature = request.temperature;
1952
+ }
1953
+ if (request.top_p !== void 0) {
1954
+ transformed.top_p = request.top_p;
1955
+ }
1956
+ if (request.stop !== void 0) {
1957
+ transformed.stop_sequences = Array.isArray(request.stop) ? request.stop : [request.stop];
1958
+ }
1959
+ if (request.tools && request.tools.length > 0) {
1960
+ transformed.tools = request.tools.map((tool) => ({
1961
+ name: tool.function.name,
1962
+ description: tool.function.description,
1963
+ input_schema: tool.function.parameters || { type: "object" }
1964
+ }));
1965
+ if (request.tool_choice) {
1966
+ if (request.tool_choice === "auto") {
1967
+ transformed.tool_choice = { type: "auto" };
1968
+ } else if (request.tool_choice === "required") {
1969
+ transformed.tool_choice = { type: "any" };
1970
+ } else if (request.tool_choice === "none") {
1971
+ delete transformed.tools;
1972
+ } else if (typeof request.tool_choice === "object" && request.tool_choice.function) {
1973
+ transformed.tool_choice = {
1974
+ type: "tool",
1975
+ name: request.tool_choice.function.name
1976
+ };
1977
+ }
1978
+ }
1979
+ }
1980
+ return transformed;
1981
+ }
1982
+ /**
1983
+ * Extract system prompt from messages
1984
+ */
1985
+ extractSystemPrompt(messages) {
1986
+ const systemMessages = messages.filter((m) => m.role === "system");
1987
+ const otherMessages = messages.filter((m) => m.role !== "system");
1988
+ const systemPrompt = systemMessages.length > 0 ? systemMessages.map(
1989
+ (m) => typeof m.content === "string" ? m.content : JSON.stringify(m.content)
1990
+ ).join("\n") : null;
1991
+ return { messages: otherMessages, systemPrompt };
1992
+ }
1993
+ /**
1994
+ * Transform a single message to Anthropic format
1995
+ */
1996
+ transformMessage(message) {
1997
+ if (message.role === "tool") {
1998
+ return {
1999
+ role: "user",
2000
+ content: [
2001
+ {
2002
+ type: "tool_result",
2003
+ tool_use_id: message.tool_call_id,
2004
+ content: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2005
+ }
2006
+ ]
2007
+ };
2008
+ }
2009
+ if (message.role === "assistant" && message.tool_calls) {
2010
+ const content = [];
2011
+ if (message.content) {
2012
+ content.push({
2013
+ type: "text",
2014
+ text: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2015
+ });
2016
+ }
2017
+ for (const toolCall of message.tool_calls) {
2018
+ content.push({
2019
+ type: "tool_use",
2020
+ id: toolCall.id,
2021
+ name: toolCall.function.name,
2022
+ input: JSON.parse(toolCall.function.arguments)
2023
+ });
2024
+ }
2025
+ return { role: "assistant", content };
2026
+ }
2027
+ return {
2028
+ role: message.role,
2029
+ content: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2030
+ };
2031
+ }
2032
+ /**
2033
+ * Transform Anthropic response to OpenAI-compatible format
2034
+ */
2035
+ transformFromAnthropic(data, model) {
2036
+ const content = data.content;
2037
+ let textContent = "";
2038
+ const toolCalls = [];
2039
+ for (const block of content) {
2040
+ if (block.type === "text") {
2041
+ textContent += block.text;
2042
+ } else if (block.type === "tool_use") {
2043
+ toolCalls.push({
2044
+ id: block.id,
2045
+ type: "function",
2046
+ function: {
2047
+ name: block.name,
2048
+ arguments: JSON.stringify(block.input)
2049
+ }
2050
+ });
2051
+ }
2052
+ }
2053
+ const message = {
2054
+ role: "assistant",
2055
+ content: textContent || null
2056
+ };
2057
+ if (toolCalls.length > 0) {
2058
+ message.tool_calls = toolCalls;
2059
+ }
2060
+ const stopReason = data.stop_reason;
2061
+ let finishReason = null;
2062
+ if (stopReason === "end_turn") {
2063
+ finishReason = "stop";
2064
+ } else if (stopReason === "max_tokens") {
2065
+ finishReason = "length";
2066
+ } else if (stopReason === "tool_use") {
2067
+ finishReason = "tool_calls";
2068
+ }
2069
+ const usage = data.usage;
2070
+ return {
2071
+ id: data.id || generateRequestId(),
2072
+ object: "chat.completion",
2073
+ created: Math.floor(Date.now() / 1e3),
2074
+ model: data.model || model,
2075
+ choices: [
2076
+ {
2077
+ index: 0,
2078
+ message,
2079
+ finish_reason: finishReason
2080
+ }
2081
+ ],
2082
+ usage: {
2083
+ prompt_tokens: usage.input_tokens,
2084
+ completion_tokens: usage.output_tokens,
2085
+ total_tokens: usage.input_tokens + usage.output_tokens
2086
+ }
2087
+ };
2088
+ }
2089
+ /**
2090
+ * Transform Anthropic stream event to OpenAI-compatible chunk
2091
+ */
2092
+ transformStreamEvent(event, model, requestId) {
2093
+ const eventType = event.type;
2094
+ if (eventType === "content_block_delta") {
2095
+ const delta = event.delta;
2096
+ if (delta.type === "text_delta") {
2097
+ return {
2098
+ id: requestId,
2099
+ object: "chat.completion.chunk",
2100
+ created: Math.floor(Date.now() / 1e3),
2101
+ model,
2102
+ choices: [
2103
+ {
2104
+ index: 0,
2105
+ delta: {
2106
+ content: delta.text
2107
+ },
2108
+ finish_reason: null
2109
+ }
2110
+ ]
2111
+ };
2112
+ }
2113
+ if (delta.type === "input_json_delta") {
2114
+ return {
2115
+ id: requestId,
2116
+ object: "chat.completion.chunk",
2117
+ created: Math.floor(Date.now() / 1e3),
2118
+ model,
2119
+ choices: [
2120
+ {
2121
+ index: 0,
2122
+ delta: {},
2123
+ finish_reason: null
2124
+ }
2125
+ ]
2126
+ };
2127
+ }
2128
+ }
2129
+ if (eventType === "content_block_start") {
2130
+ const contentBlock = event.content_block;
2131
+ if (contentBlock?.type === "tool_use") {
2132
+ return {
2133
+ id: requestId,
2134
+ object: "chat.completion.chunk",
2135
+ created: Math.floor(Date.now() / 1e3),
2136
+ model,
2137
+ choices: [
2138
+ {
2139
+ index: 0,
2140
+ delta: {
2141
+ tool_calls: [
2142
+ {
2143
+ id: contentBlock.id,
2144
+ type: "function",
2145
+ function: {
2146
+ name: contentBlock.name,
2147
+ arguments: ""
2148
+ }
2149
+ }
2150
+ ]
2151
+ },
2152
+ finish_reason: null
2153
+ }
2154
+ ]
2155
+ };
2156
+ }
2157
+ }
2158
+ if (eventType === "message_stop") {
2159
+ return {
2160
+ id: requestId,
2161
+ object: "chat.completion.chunk",
2162
+ created: Math.floor(Date.now() / 1e3),
2163
+ model,
2164
+ choices: [
2165
+ {
2166
+ index: 0,
2167
+ delta: {},
2168
+ finish_reason: "stop"
2169
+ }
2170
+ ]
2171
+ };
2172
+ }
2173
+ return null;
2174
+ }
2175
+ /**
2176
+ * Parse error response from Anthropic
2177
+ */
2178
+ async parseError(response) {
2179
+ let message = `Anthropic API error: ${response.status}`;
2180
+ const retryable = response.status >= 500 || response.status === 429;
2181
+ try {
2182
+ const data = await response.json();
2183
+ if (data.error?.message) {
2184
+ message = data.error.message;
2185
+ }
2186
+ } catch {
2187
+ }
2188
+ return new ProviderError(message, "anthropic", void 0, retryable);
2189
+ }
2190
+ /**
2191
+ * Wrap unknown errors
2192
+ */
2193
+ wrapError(error) {
2194
+ if (error instanceof ProviderError) {
2195
+ return error;
2196
+ }
2197
+ if (error instanceof Error) {
2198
+ const isTimeout = error.name === "AbortError" || error.message.includes("timeout");
2199
+ return new ProviderError(error.message, "anthropic", error, isTimeout);
2200
+ }
2201
+ return new ProviderError("Unknown error", "anthropic", void 0, true);
2202
+ }
2203
+ };
2204
+
2205
+ // src/providers/registry/GoogleProvider.ts
2206
+ var DEFAULT_GEMINI_MODELS = [
2207
+ "gemini-1.5-pro",
2208
+ "gemini-1.5-pro-latest",
2209
+ "gemini-1.5-flash",
2210
+ "gemini-1.5-flash-latest",
2211
+ "gemini-2.0-flash-exp",
2212
+ "gemini-pro"
2213
+ ];
2214
+ var GoogleProvider = class extends Provider {
2215
+ apiKey;
2216
+ baseUrl;
2217
+ timeout;
2218
+ constructor(options = {}) {
2219
+ const apiKey = options.apiKey || process.env.GOOGLE_API_KEY;
2220
+ if (!apiKey) {
2221
+ throw new Error("Google API key is required");
2222
+ }
2223
+ const config = {
2224
+ name: "google",
2225
+ apiKey,
2226
+ baseUrl: options.baseUrl || "https://generativelanguage.googleapis.com/v1beta",
2227
+ models: options.models || DEFAULT_GEMINI_MODELS,
2228
+ timeout: options.timeout || 6e4,
2229
+ maxRetries: options.maxRetries || 3,
2230
+ headers: options.headers
2231
+ };
2232
+ super(config);
2233
+ this.apiKey = apiKey;
2234
+ this.baseUrl = config.baseUrl;
2235
+ this.timeout = config.timeout;
2236
+ }
2237
+ /**
2238
+ * Execute a chat completion request
2239
+ */
2240
+ async chat(request) {
2241
+ const start = Date.now();
2242
+ try {
2243
+ const geminiRequest = this.transformToGemini(request);
2244
+ const endpoint = `/models/${request.model}:generateContent`;
2245
+ const response = await this.makeRequest(endpoint, {
2246
+ method: "POST",
2247
+ body: JSON.stringify(geminiRequest)
2248
+ });
2249
+ if (!response.ok) {
2250
+ const error = await this.parseError(response);
2251
+ throw error;
2252
+ }
2253
+ const data = await response.json();
2254
+ const result = this.transformFromGemini(data, request.model);
2255
+ this.updateHealth(true, Date.now() - start);
2256
+ return result;
2257
+ } catch (error) {
2258
+ this.updateHealth(false, Date.now() - start);
2259
+ throw this.wrapError(error);
2260
+ }
2261
+ }
2262
+ /**
2263
+ * Execute a streaming chat completion request
2264
+ */
2265
+ async *chatStream(request) {
2266
+ const start = Date.now();
2267
+ try {
2268
+ const geminiRequest = this.transformToGemini(request);
2269
+ const endpoint = `/models/${request.model}:streamGenerateContent`;
2270
+ const response = await this.makeRequest(endpoint, {
2271
+ method: "POST",
2272
+ body: JSON.stringify(geminiRequest)
2273
+ });
2274
+ if (!response.ok) {
2275
+ const error = await this.parseError(response);
2276
+ throw error;
2277
+ }
2278
+ const reader = response.body?.getReader();
2279
+ if (!reader) {
2280
+ throw new ProviderError("No response body", "google");
2281
+ }
2282
+ const decoder = new TextDecoder();
2283
+ let buffer = "";
2284
+ const requestId = generateRequestId();
2285
+ let totalInputTokens = 0;
2286
+ let totalOutputTokens = 0;
2287
+ while (true) {
2288
+ const { done, value } = await reader.read();
2289
+ if (done) break;
2290
+ buffer += decoder.decode(value, { stream: true });
2291
+ const lines = buffer.split("\n");
2292
+ buffer = lines.pop() || "";
2293
+ for (const line of lines) {
2294
+ const trimmed = line.trim();
2295
+ if (!trimmed || trimmed === "[" || trimmed === "]" || trimmed === ",")
2296
+ continue;
2297
+ let jsonStr = trimmed;
2298
+ if (jsonStr.startsWith(",")) jsonStr = jsonStr.slice(1);
2299
+ if (jsonStr.endsWith(",")) jsonStr = jsonStr.slice(0, -1);
2300
+ if (!jsonStr.startsWith("{")) continue;
2301
+ try {
2302
+ const data = JSON.parse(jsonStr);
2303
+ const chunk = this.transformStreamChunk(
2304
+ data,
2305
+ request.model,
2306
+ requestId
2307
+ );
2308
+ if (data.usageMetadata) {
2309
+ totalInputTokens = data.usageMetadata.promptTokenCount || 0;
2310
+ totalOutputTokens = data.usageMetadata.candidatesTokenCount || 0;
2311
+ }
2312
+ if (chunk) {
2313
+ yield chunk;
2314
+ }
2315
+ } catch {
2316
+ }
2317
+ }
2318
+ }
2319
+ yield {
2320
+ id: requestId,
2321
+ object: "chat.completion.chunk",
2322
+ created: Math.floor(Date.now() / 1e3),
2323
+ model: request.model,
2324
+ choices: [
2325
+ {
2326
+ index: 0,
2327
+ delta: {},
2328
+ finish_reason: "stop"
2329
+ }
2330
+ ],
2331
+ usage: {
2332
+ prompt_tokens: totalInputTokens,
2333
+ completion_tokens: totalOutputTokens,
2334
+ total_tokens: totalInputTokens + totalOutputTokens
2335
+ }
2336
+ };
2337
+ this.updateHealth(true, Date.now() - start);
2338
+ } catch (error) {
2339
+ this.updateHealth(false, Date.now() - start);
2340
+ throw this.wrapError(error);
2341
+ }
2342
+ }
2343
+ /**
2344
+ * Get model information
2345
+ */
2346
+ getModelInfo(model) {
2347
+ if (!this.supportsModel(model)) {
2348
+ return null;
2349
+ }
2350
+ return getModelInfo(model, "google");
2351
+ }
2352
+ /**
2353
+ * Make an HTTP request to the Google API
2354
+ */
2355
+ async makeRequest(path, options) {
2356
+ const url = `${this.baseUrl}${path}?key=${this.apiKey}`;
2357
+ const headers = {
2358
+ "Content-Type": "application/json",
2359
+ ...this.config.headers
2360
+ };
2361
+ const controller = new AbortController();
2362
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
2363
+ try {
2364
+ return await fetch(url, {
2365
+ ...options,
2366
+ headers,
2367
+ signal: controller.signal
2368
+ });
2369
+ } finally {
2370
+ clearTimeout(timeoutId);
2371
+ }
2372
+ }
2373
+ /**
2374
+ * Transform OpenAI-compatible request to Gemini format
2375
+ */
2376
+ transformToGemini(request) {
2377
+ const contents = this.transformMessages(request.messages);
2378
+ const transformed = {
2379
+ contents
2380
+ };
2381
+ const generationConfig = {};
2382
+ if (request.temperature !== void 0) {
2383
+ generationConfig.temperature = request.temperature;
2384
+ }
2385
+ if (request.max_tokens !== void 0) {
2386
+ generationConfig.maxOutputTokens = request.max_tokens;
2387
+ }
2388
+ if (request.top_p !== void 0) {
2389
+ generationConfig.topP = request.top_p;
2390
+ }
2391
+ if (request.stop !== void 0) {
2392
+ generationConfig.stopSequences = Array.isArray(request.stop) ? request.stop : [request.stop];
2393
+ }
2394
+ if (Object.keys(generationConfig).length > 0) {
2395
+ transformed.generationConfig = generationConfig;
2396
+ }
2397
+ const systemMessages = request.messages.filter((m) => m.role === "system");
2398
+ if (systemMessages.length > 0) {
2399
+ const systemText = systemMessages.map(
2400
+ (m) => typeof m.content === "string" ? m.content : JSON.stringify(m.content)
2401
+ ).join("\n");
2402
+ transformed.systemInstruction = { parts: [{ text: systemText }] };
2403
+ }
2404
+ if (request.tools && request.tools.length > 0) {
2405
+ transformed.tools = [
2406
+ {
2407
+ functionDeclarations: request.tools.map((tool) => ({
2408
+ name: tool.function.name,
2409
+ description: tool.function.description,
2410
+ parameters: tool.function.parameters || { type: "object" }
2411
+ }))
2412
+ }
2413
+ ];
2414
+ }
2415
+ return transformed;
2416
+ }
2417
+ /**
2418
+ * Transform messages to Gemini format
2419
+ */
2420
+ transformMessages(messages) {
2421
+ const contents = [];
2422
+ for (const message of messages) {
2423
+ if (message.role === "system") continue;
2424
+ const role = message.role === "assistant" ? "model" : "user";
2425
+ const parts = [];
2426
+ if (message.content) {
2427
+ parts.push({
2428
+ text: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2429
+ });
2430
+ }
2431
+ if (message.tool_calls) {
2432
+ for (const toolCall of message.tool_calls) {
2433
+ parts.push({
2434
+ functionCall: {
2435
+ name: toolCall.function.name,
2436
+ args: JSON.parse(toolCall.function.arguments)
2437
+ }
2438
+ });
2439
+ }
2440
+ }
2441
+ if (message.role === "tool" && message.tool_call_id) {
2442
+ parts.push({
2443
+ functionResponse: {
2444
+ name: message.name || "tool_result",
2445
+ response: {
2446
+ result: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2447
+ }
2448
+ }
2449
+ });
2450
+ }
2451
+ if (parts.length > 0) {
2452
+ contents.push({ role, parts });
2453
+ }
2454
+ }
2455
+ return contents;
2456
+ }
2457
+ /**
2458
+ * Transform Gemini response to OpenAI-compatible format
2459
+ */
2460
+ transformFromGemini(data, model) {
2461
+ const candidates = data.candidates;
2462
+ const candidate = candidates?.[0];
2463
+ if (!candidate) {
2464
+ throw new ProviderError("No response candidates", "google");
2465
+ }
2466
+ const content = candidate.content;
2467
+ const parts = content?.parts || [];
2468
+ let textContent = "";
2469
+ const toolCalls = [];
2470
+ for (const part of parts) {
2471
+ if (part.text) {
2472
+ textContent += part.text;
2473
+ }
2474
+ if (part.functionCall) {
2475
+ const fc = part.functionCall;
2476
+ toolCalls.push({
2477
+ id: `call_${Math.random().toString(36).substring(2, 11)}`,
2478
+ type: "function",
2479
+ function: {
2480
+ name: fc.name,
2481
+ arguments: JSON.stringify(fc.args)
2482
+ }
2483
+ });
2484
+ }
2485
+ }
2486
+ const message = {
2487
+ role: "assistant",
2488
+ content: textContent || null
2489
+ };
2490
+ if (toolCalls.length > 0) {
2491
+ message.tool_calls = toolCalls;
2492
+ }
2493
+ const finishReason = candidate.finishReason;
2494
+ let mappedReason = null;
2495
+ if (finishReason === "STOP") {
2496
+ mappedReason = "stop";
2497
+ } else if (finishReason === "MAX_TOKENS") {
2498
+ mappedReason = "length";
2499
+ } else if (toolCalls.length > 0) {
2500
+ mappedReason = "tool_calls";
2501
+ }
2502
+ const usageMetadata = data.usageMetadata;
2503
+ const usage = {
2504
+ prompt_tokens: usageMetadata?.promptTokenCount || 0,
2505
+ completion_tokens: usageMetadata?.candidatesTokenCount || 0,
2506
+ total_tokens: usageMetadata?.totalTokenCount || 0
2507
+ };
2508
+ return {
2509
+ id: generateRequestId(),
2510
+ object: "chat.completion",
2511
+ created: Math.floor(Date.now() / 1e3),
2512
+ model,
2513
+ choices: [
2514
+ {
2515
+ index: 0,
2516
+ message,
2517
+ finish_reason: mappedReason
2518
+ }
2519
+ ],
2520
+ usage
2521
+ };
2522
+ }
2523
+ /**
2524
+ * Transform Gemini stream chunk to OpenAI-compatible format
2525
+ */
2526
+ transformStreamChunk(data, model, requestId) {
2527
+ const candidates = data.candidates;
2528
+ const candidate = candidates?.[0];
2529
+ if (!candidate) {
2530
+ return null;
2531
+ }
2532
+ const content = candidate.content;
2533
+ const parts = content?.parts || [];
2534
+ let textContent = "";
2535
+ for (const part of parts) {
2536
+ if (part.text) {
2537
+ textContent += part.text;
2538
+ }
2539
+ }
2540
+ if (!textContent) {
2541
+ return null;
2542
+ }
2543
+ return {
2544
+ id: requestId,
2545
+ object: "chat.completion.chunk",
2546
+ created: Math.floor(Date.now() / 1e3),
2547
+ model,
2548
+ choices: [
2549
+ {
2550
+ index: 0,
2551
+ delta: {
2552
+ content: textContent
2553
+ },
2554
+ finish_reason: null
2555
+ }
2556
+ ]
2557
+ };
2558
+ }
2559
+ /**
2560
+ * Parse error response from Google
2561
+ */
2562
+ async parseError(response) {
2563
+ let message = `Google API error: ${response.status}`;
2564
+ const retryable = response.status >= 500 || response.status === 429;
2565
+ try {
2566
+ const data = await response.json();
2567
+ if (data.error?.message) {
2568
+ message = data.error.message;
2569
+ }
2570
+ } catch {
2571
+ }
2572
+ return new ProviderError(message, "google", void 0, retryable);
2573
+ }
2574
+ /**
2575
+ * Wrap unknown errors
2576
+ */
2577
+ wrapError(error) {
2578
+ if (error instanceof ProviderError) {
2579
+ return error;
2580
+ }
2581
+ if (error instanceof Error) {
2582
+ const isTimeout = error.name === "AbortError" || error.message.includes("timeout");
2583
+ return new ProviderError(error.message, "google", error, isTimeout);
2584
+ }
2585
+ return new ProviderError("Unknown error", "google", void 0, true);
2586
+ }
2587
+ };
2588
+ var Gateway = class extends EventEmitter {
2589
+ _config;
2590
+ registry;
2591
+ router;
2592
+ healthMonitor;
2593
+ cache;
2594
+ logger;
2595
+ metrics;
2596
+ /**
2597
+ * Chat completions API interface (OpenAI-compatible)
2598
+ */
2599
+ chat = {
2600
+ completions: {
2601
+ create: this.createCompletion.bind(this)
2602
+ }
2603
+ };
2604
+ constructor(config) {
2605
+ super();
2606
+ this._config = config;
2607
+ this.logger = pino({
2608
+ level: config.telemetry?.logging?.level || "info",
2609
+ transport: process.env.NODE_ENV !== "production" ? { target: "pino-pretty" } : void 0
2610
+ });
2611
+ this.registry = new ProviderRegistry();
2612
+ for (const providerConfig of config.providers) {
2613
+ const provider = this.createProvider(providerConfig);
2614
+ if (provider) {
2615
+ this.registry.register(provider);
2616
+ }
2617
+ }
2618
+ this.router = this.createRouter(config);
2619
+ this.healthMonitor = new HealthMonitor({
2620
+ checkInterval: 6e4,
2621
+ unhealthyThreshold: 3,
2622
+ degradedThreshold: 1,
2623
+ circuitBreaker: {
2624
+ failureThreshold: 5,
2625
+ successThreshold: 3,
2626
+ timeout: 3e4
2627
+ }
2628
+ });
2629
+ if (config.cache?.enabled) {
2630
+ this.cache = new LRUCache({
2631
+ max: config.cache.maxEntries || 1e3,
2632
+ ttl: (config.cache.ttl || 3600) * 1e3
2633
+ });
2634
+ } else {
2635
+ this.cache = null;
2636
+ }
2637
+ this.metrics = this.createInitialMetrics();
2638
+ this.healthMonitor.on("unhealthy", (provider) => {
2639
+ this.logger.warn({ provider }, "Provider marked unhealthy");
2640
+ this.emit("provider:unhealthy", provider);
2641
+ });
2642
+ this.healthMonitor.on("circuit-open", (provider) => {
2643
+ this.logger.warn({ provider }, "Circuit breaker opened");
2644
+ });
2645
+ }
2646
+ /**
2647
+ * Create a chat completion (main API)
2648
+ */
2649
+ async createCompletion(request) {
2650
+ this.validateRequest(request);
2651
+ if (request.stream) {
2652
+ return this.createStreamingCompletion(request);
2653
+ }
2654
+ return this.createNonStreamingCompletion(request);
2655
+ }
2656
+ /**
2657
+ * Non-streaming completion
2658
+ */
2659
+ async createNonStreamingCompletion(request) {
2660
+ const requestId = request._gateway?.requestId || generateRequestId();
2661
+ const start = Date.now();
2662
+ this.emit("request:start", {
2663
+ requestId,
2664
+ model: request.model
2665
+ });
2666
+ try {
2667
+ if (this.cache && request._gateway?.cachePolicy !== "no-cache") {
2668
+ const cacheKey = hashRequest(request);
2669
+ const cached = this.cache.get(cacheKey);
2670
+ if (cached) {
2671
+ this.metrics.cache.hits++;
2672
+ this.updateCacheHitRate();
2673
+ this.logger.debug({ requestId, cacheKey }, "Cache hit");
2674
+ const response = {
2675
+ ...cached,
2676
+ _gateway: {
2677
+ ...cached._gateway,
2678
+ cached: true,
2679
+ cacheKey,
2680
+ latencyMs: Date.now() - start
2681
+ }
2682
+ };
2683
+ this.emit("request:complete", {
2684
+ requestId,
2685
+ provider: cached._gateway?.provider || "cache",
2686
+ model: cached.model,
2687
+ latencyMs: Date.now() - start,
2688
+ cost: 0,
2689
+ cached: true,
2690
+ tokens: { input: 0, output: 0 }
2691
+ });
2692
+ return response;
2693
+ }
2694
+ this.metrics.cache.misses++;
2695
+ }
2696
+ const routingContext = {
2697
+ excludeProviders: request._gateway?.excludeProviders,
2698
+ preferredProvider: request._gateway?.preferredProvider,
2699
+ maxCost: request._gateway?.maxCost,
2700
+ maxLatency: request._gateway?.maxLatency
2701
+ };
2702
+ let lastError = null;
2703
+ let attempts = 0;
2704
+ const maxAttempts = 3;
2705
+ while (attempts < maxAttempts) {
2706
+ attempts++;
2707
+ const decision = this.router.route(request, this.registry, {
2708
+ ...routingContext,
2709
+ previousAttempts: attempts > 1 ? [{ provider: "", model: "", error: lastError?.message }] : void 0
2710
+ });
2711
+ const provider = this.registry.get(decision.provider);
2712
+ if (!provider) {
2713
+ lastError = new GatewayError(
2714
+ `Provider not found: ${decision.provider}`,
2715
+ "PROVIDER_NOT_FOUND",
2716
+ 500
2717
+ );
2718
+ continue;
2719
+ }
2720
+ if (!this.healthMonitor.isRequestAllowed(decision.provider)) {
2721
+ this.logger.debug(
2722
+ { provider: decision.provider },
2723
+ "Circuit breaker open, skipping"
2724
+ );
2725
+ routingContext.excludeProviders = [
2726
+ ...routingContext.excludeProviders || [],
2727
+ decision.provider
2728
+ ];
2729
+ continue;
2730
+ }
2731
+ try {
2732
+ const providerRequest = {
2733
+ ...request,
2734
+ model: decision.model
2735
+ };
2736
+ const providerStart = Date.now();
2737
+ const response = await provider.chat(providerRequest);
2738
+ const latencyMs = Date.now() - providerStart;
2739
+ this.healthMonitor.recordRequest(decision.provider, true, latencyMs);
2740
+ const cost = calculateCost(decision.model, response.usage);
2741
+ this.updateMetrics(decision, response, latencyMs, cost);
2742
+ const gatewayResponse = {
2743
+ ...response,
2744
+ _gateway: {
2745
+ provider: decision.provider,
2746
+ originalModel: request.model,
2747
+ latencyMs,
2748
+ cost,
2749
+ cached: false,
2750
+ retries: attempts - 1,
2751
+ routingDecision: decision
2752
+ }
2753
+ };
2754
+ if (this.cache && request._gateway?.cachePolicy !== "no-cache") {
2755
+ const cacheKey = hashRequest(request);
2756
+ this.cache.set(cacheKey, gatewayResponse);
2757
+ }
2758
+ this.emit("request:complete", {
2759
+ requestId,
2760
+ provider: decision.provider,
2761
+ model: decision.model,
2762
+ latencyMs: Date.now() - start,
2763
+ cost,
2764
+ cached: false,
2765
+ tokens: {
2766
+ input: response.usage.prompt_tokens,
2767
+ output: response.usage.completion_tokens
2768
+ }
2769
+ });
2770
+ return gatewayResponse;
2771
+ } catch (error) {
2772
+ lastError = error instanceof Error ? error : new Error(String(error));
2773
+ this.healthMonitor.recordRequest(
2774
+ decision.provider,
2775
+ false,
2776
+ Date.now() - start
2777
+ );
2778
+ if (error instanceof ProviderError && !error.retryable) {
2779
+ throw error;
2780
+ }
2781
+ this.logger.warn(
2782
+ {
2783
+ provider: decision.provider,
2784
+ error: lastError.message,
2785
+ attempt: attempts
2786
+ },
2787
+ "Request failed, retrying"
2788
+ );
2789
+ routingContext.excludeProviders = [
2790
+ ...routingContext.excludeProviders || [],
2791
+ decision.provider
2792
+ ];
2793
+ }
2794
+ }
2795
+ this.metrics.requests.failed++;
2796
+ this.emit("request:error", { requestId, error: lastError });
2797
+ throw lastError || new GatewayError("All attempts failed", "ALL_ATTEMPTS_FAILED", 502);
2798
+ } catch (error) {
2799
+ this.metrics.requests.failed++;
2800
+ this.emit("request:error", {
2801
+ requestId,
2802
+ error: error instanceof Error ? error : new Error(String(error))
2803
+ });
2804
+ throw error;
2805
+ }
2806
+ }
2807
+ /**
2808
+ * Streaming completion
2809
+ */
2810
+ async *createStreamingCompletion(request) {
2811
+ const requestId = request._gateway?.requestId || generateRequestId();
2812
+ const start = Date.now();
2813
+ this.emit("request:start", { requestId, model: request.model });
2814
+ const decision = this.router.route(request, this.registry, {
2815
+ excludeProviders: request._gateway?.excludeProviders,
2816
+ preferredProvider: request._gateway?.preferredProvider
2817
+ });
2818
+ const provider = this.registry.get(decision.provider);
2819
+ if (!provider) {
2820
+ throw new GatewayError(
2821
+ `Provider not found: ${decision.provider}`,
2822
+ "PROVIDER_NOT_FOUND",
2823
+ 500
2824
+ );
2825
+ }
2826
+ const providerRequest = { ...request, model: decision.model };
2827
+ try {
2828
+ let totalInputTokens = 0;
2829
+ let totalOutputTokens = 0;
2830
+ for await (const chunk of provider.chatStream(providerRequest)) {
2831
+ if (chunk.usage) {
2832
+ totalInputTokens = chunk.usage.prompt_tokens;
2833
+ totalOutputTokens = chunk.usage.completion_tokens;
2834
+ }
2835
+ yield chunk;
2836
+ }
2837
+ const latencyMs = Date.now() - start;
2838
+ const cost = calculateCost(decision.model, {
2839
+ prompt_tokens: totalInputTokens,
2840
+ completion_tokens: totalOutputTokens,
2841
+ total_tokens: totalInputTokens + totalOutputTokens
2842
+ });
2843
+ this.healthMonitor.recordRequest(decision.provider, true, latencyMs);
2844
+ this.emit("request:complete", {
2845
+ requestId,
2846
+ provider: decision.provider,
2847
+ model: decision.model,
2848
+ latencyMs,
2849
+ cost,
2850
+ cached: false,
2851
+ tokens: { input: totalInputTokens, output: totalOutputTokens }
2852
+ });
2853
+ } catch (error) {
2854
+ this.healthMonitor.recordRequest(
2855
+ decision.provider,
2856
+ false,
2857
+ Date.now() - start
2858
+ );
2859
+ this.emit("request:error", {
2860
+ requestId,
2861
+ provider: decision.provider,
2862
+ error: error instanceof Error ? error : new Error(String(error))
2863
+ });
2864
+ throw error;
2865
+ }
2866
+ }
2867
+ /**
2868
+ * Validate a request
2869
+ */
2870
+ validateRequest(request) {
2871
+ if (!request.model) {
2872
+ throw new ValidationError("Model is required");
2873
+ }
2874
+ if (!request.messages || request.messages.length === 0) {
2875
+ throw new ValidationError(
2876
+ "Messages array is required and cannot be empty"
2877
+ );
2878
+ }
2879
+ for (const message of request.messages) {
2880
+ if (!message.role) {
2881
+ throw new ValidationError("Message role is required");
2882
+ }
2883
+ if (!["system", "user", "assistant", "tool"].includes(message.role)) {
2884
+ throw new ValidationError(`Invalid message role: ${message.role}`);
2885
+ }
2886
+ }
2887
+ }
2888
+ /**
2889
+ * Create a provider instance from config
2890
+ */
2891
+ createProvider(config) {
2892
+ switch (config.name) {
2893
+ case "openai":
2894
+ return new OpenAIProvider({
2895
+ apiKey: config.apiKey,
2896
+ baseUrl: config.baseUrl,
2897
+ models: config.models,
2898
+ timeout: config.timeout
2899
+ });
2900
+ case "anthropic":
2901
+ return new AnthropicProvider({
2902
+ apiKey: config.apiKey,
2903
+ baseUrl: config.baseUrl,
2904
+ models: config.models,
2905
+ timeout: config.timeout
2906
+ });
2907
+ case "google":
2908
+ return new GoogleProvider({
2909
+ apiKey: config.apiKey,
2910
+ baseUrl: config.baseUrl,
2911
+ models: config.models,
2912
+ timeout: config.timeout
2913
+ });
2914
+ default:
2915
+ this.logger.warn({ provider: config.name }, "Unknown provider type");
2916
+ return null;
2917
+ }
2918
+ }
2919
+ /**
2920
+ * Create router from config
2921
+ */
2922
+ createRouter(config) {
2923
+ const strategyName = config.routing?.strategy || "round-robin";
2924
+ let strategy;
2925
+ switch (strategyName) {
2926
+ case "failover":
2927
+ strategy = new FailoverStrategy({
2928
+ chain: config.routing?.fallbackChain || [
2929
+ "openai",
2930
+ "anthropic",
2931
+ "google"
2932
+ ]
2933
+ });
2934
+ break;
2935
+ case "cost-optimized":
2936
+ strategy = new CostOptimizedStrategy();
2937
+ break;
2938
+ case "latency-optimized":
2939
+ strategy = new LatencyOptimizedStrategy();
2940
+ break;
2941
+ case "round-robin":
2942
+ default:
2943
+ strategy = new RoundRobinStrategy({
2944
+ weights: config.routing?.weights
2945
+ });
2946
+ break;
2947
+ }
2948
+ return new Router(strategy, {
2949
+ fallbackChain: config.routing?.fallbackChain
2950
+ });
2951
+ }
2952
+ /**
2953
+ * Create initial metrics object
2954
+ */
2955
+ createInitialMetrics() {
2956
+ return {
2957
+ requests: { total: 0, successful: 0, failed: 0, cached: 0 },
2958
+ latency: { avg: 0, p50: 0, p95: 0, p99: 0 },
2959
+ tokens: { input: 0, output: 0, total: 0 },
2960
+ cost: { total: 0, byProvider: {}, byModel: {} },
2961
+ cache: { hits: 0, misses: 0, hitRate: 0 },
2962
+ providers: {}
2963
+ };
2964
+ }
2965
+ /**
2966
+ * Update metrics after a request
2967
+ */
2968
+ updateMetrics(decision, response, latencyMs, cost) {
2969
+ this.metrics.requests.total++;
2970
+ this.metrics.requests.successful++;
2971
+ this.metrics.latency.avg = (this.metrics.latency.avg * (this.metrics.requests.successful - 1) + latencyMs) / this.metrics.requests.successful;
2972
+ this.metrics.tokens.input += response.usage.prompt_tokens;
2973
+ this.metrics.tokens.output += response.usage.completion_tokens;
2974
+ this.metrics.tokens.total += response.usage.total_tokens;
2975
+ this.metrics.cost.total += cost;
2976
+ this.metrics.cost.byProvider[decision.provider] = (this.metrics.cost.byProvider[decision.provider] || 0) + cost;
2977
+ this.metrics.cost.byModel[decision.model] = (this.metrics.cost.byModel[decision.model] || 0) + cost;
2978
+ this.updateCacheHitRate();
2979
+ this.metrics.providers = this.registry.getHealthStatus();
2980
+ }
2981
+ /**
2982
+ * Update cache hit rate metric
2983
+ */
2984
+ updateCacheHitRate() {
2985
+ const totalCacheOps = this.metrics.cache.hits + this.metrics.cache.misses;
2986
+ this.metrics.cache.hitRate = totalCacheOps > 0 ? this.metrics.cache.hits / totalCacheOps : 0;
2987
+ }
2988
+ /**
2989
+ * Get current metrics
2990
+ */
2991
+ getMetrics() {
2992
+ return { ...this.metrics };
2993
+ }
2994
+ /**
2995
+ * Get gateway configuration
2996
+ */
2997
+ getConfig() {
2998
+ return { ...this._config };
2999
+ }
3000
+ /**
3001
+ * Get provider registry
3002
+ */
3003
+ getRegistry() {
3004
+ return this.registry;
3005
+ }
3006
+ /**
3007
+ * Get router
3008
+ */
3009
+ getRouter() {
3010
+ return this.router;
3011
+ }
3012
+ /**
3013
+ * Check health of all providers
3014
+ */
3015
+ async checkHealth() {
3016
+ const healthStatus = await this.registry.checkHealth();
3017
+ const result = {};
3018
+ for (const [name, health] of Object.entries(healthStatus)) {
3019
+ result[name] = health.status === "healthy";
3020
+ }
3021
+ return result;
3022
+ }
3023
+ /**
3024
+ * Shut down the gateway
3025
+ */
3026
+ shutdown() {
3027
+ this.registry.stopHealthChecks();
3028
+ this.cache?.clear();
3029
+ this.logger.info("Gateway shut down");
3030
+ }
3031
+ };
3032
+ function createHTTPServer(options) {
3033
+ const { gateway, basePath = "" } = options;
3034
+ const app = new Hono();
3035
+ app.use("*", logger());
3036
+ if (options.cors) {
3037
+ app.use(
3038
+ "*",
3039
+ cors({
3040
+ origin: options.cors.origin || "*",
3041
+ allowMethods: options.cors.methods || ["GET", "POST", "OPTIONS"],
3042
+ allowHeaders: options.cors.headers || [
3043
+ "Content-Type",
3044
+ "Authorization",
3045
+ "X-Request-Id"
3046
+ ]
3047
+ })
3048
+ );
3049
+ }
3050
+ app.get(`${basePath}/health`, async (c) => {
3051
+ const health = await gateway.checkHealth();
3052
+ const allHealthy = Object.values(health).every((h) => h);
3053
+ return c.json(
3054
+ {
3055
+ status: allHealthy ? "healthy" : "degraded",
3056
+ providers: health,
3057
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
3058
+ },
3059
+ allHealthy ? 200 : 503
3060
+ );
3061
+ });
3062
+ app.get(`${basePath}/metrics`, (c) => {
3063
+ const metrics = gateway.getMetrics();
3064
+ return c.json(metrics);
3065
+ });
3066
+ app.get(`${basePath}/v1/models`, (c) => {
3067
+ const registry = gateway.getRegistry();
3068
+ const models = registry.getAllModels();
3069
+ const data = models.map((model) => {
3070
+ const modelInfo = registry.getModelInfo(model);
3071
+ return {
3072
+ id: model,
3073
+ object: "model",
3074
+ created: Math.floor(Date.now() / 1e3),
3075
+ owned_by: modelInfo?.provider || "unknown"
3076
+ };
3077
+ });
3078
+ return c.json({
3079
+ object: "list",
3080
+ data
3081
+ });
3082
+ });
3083
+ app.post(`${basePath}/v1/chat/completions`, async (c) => {
3084
+ try {
3085
+ const body = await c.req.json();
3086
+ const requestId = c.req.header("X-Request-Id");
3087
+ if (requestId) {
3088
+ body._gateway = { ...body._gateway, requestId };
3089
+ }
3090
+ if (body.stream) {
3091
+ return streamSSE(c, async (stream) => {
3092
+ try {
3093
+ const generator = await gateway.chat.completions.create(
3094
+ body
3095
+ );
3096
+ for await (const chunk of generator) {
3097
+ await stream.writeSSE({
3098
+ data: JSON.stringify(chunk)
3099
+ });
3100
+ }
3101
+ await stream.writeSSE({ data: "[DONE]" });
3102
+ } catch (error) {
3103
+ const errorResponse = formatError(error);
3104
+ await stream.writeSSE({
3105
+ data: JSON.stringify({ error: errorResponse })
3106
+ });
3107
+ }
3108
+ });
3109
+ }
3110
+ const response = await gateway.chat.completions.create(
3111
+ body
3112
+ );
3113
+ return c.json(response);
3114
+ } catch (error) {
3115
+ const { status, body } = formatError(error);
3116
+ return c.json(body, status);
3117
+ }
3118
+ });
3119
+ app.post(`${basePath}/v1/completions`, (c) => {
3120
+ return c.json(
3121
+ {
3122
+ error: {
3123
+ message: "The completions endpoint is deprecated. Please use /v1/chat/completions instead.",
3124
+ type: "invalid_request_error",
3125
+ code: "deprecated_endpoint"
3126
+ }
3127
+ },
3128
+ 400
3129
+ );
3130
+ });
3131
+ app.all("*", (c) => {
3132
+ return c.json(
3133
+ {
3134
+ error: {
3135
+ message: `Unknown endpoint: ${c.req.method} ${c.req.path}`,
3136
+ type: "invalid_request_error",
3137
+ code: "unknown_endpoint"
3138
+ }
3139
+ },
3140
+ 404
3141
+ );
3142
+ });
3143
+ return app;
3144
+ }
3145
+ function startServer(app, options) {
3146
+ const port = options.port || 3e3;
3147
+ const host = options.host || "0.0.0.0";
3148
+ const server = serve({
3149
+ fetch: app.fetch,
3150
+ port,
3151
+ hostname: host
3152
+ });
3153
+ console.log(`Gateway server running on http://${host}:${port}`);
3154
+ return server;
3155
+ }
3156
+ function formatError(error) {
3157
+ if (error instanceof ValidationError) {
3158
+ return {
3159
+ status: 400,
3160
+ body: {
3161
+ error: {
3162
+ message: error.message,
3163
+ type: "invalid_request_error",
3164
+ code: error.code
3165
+ }
3166
+ }
3167
+ };
3168
+ }
3169
+ if (error instanceof GatewayError) {
3170
+ return {
3171
+ status: error.statusCode,
3172
+ body: {
3173
+ error: {
3174
+ message: error.message,
3175
+ type: "gateway_error",
3176
+ code: error.code
3177
+ }
3178
+ }
3179
+ };
3180
+ }
3181
+ if (error instanceof Error) {
3182
+ if ("name" in error && error.name === "ValidationError" && "code" in error) {
3183
+ return {
3184
+ status: 400,
3185
+ body: {
3186
+ error: {
3187
+ message: error.message,
3188
+ type: "invalid_request_error",
3189
+ code: error.code
3190
+ }
3191
+ }
3192
+ };
3193
+ }
3194
+ if ("name" in error && error.name === "GatewayError" && "statusCode" in error && "code" in error) {
3195
+ return {
3196
+ status: error.statusCode,
3197
+ body: {
3198
+ error: {
3199
+ message: error.message,
3200
+ type: "gateway_error",
3201
+ code: error.code
3202
+ }
3203
+ }
3204
+ };
3205
+ }
3206
+ return {
3207
+ status: 500,
3208
+ body: {
3209
+ error: {
3210
+ message: error.message,
3211
+ type: "internal_error",
3212
+ code: "internal_error"
3213
+ }
3214
+ }
3215
+ };
3216
+ }
3217
+ return {
3218
+ status: 500,
3219
+ body: {
3220
+ error: {
3221
+ message: "An unknown error occurred",
3222
+ type: "internal_error",
3223
+ code: "unknown_error"
3224
+ }
3225
+ }
3226
+ };
3227
+ }
3228
+
3229
+ // src/telemetry/Metrics.ts
3230
+ var MetricsCollector = class {
3231
+ prefix;
3232
+ counters = /* @__PURE__ */ new Map();
3233
+ gauges = /* @__PURE__ */ new Map();
3234
+ histograms = /* @__PURE__ */ new Map();
3235
+ latencyBuckets;
3236
+ tokenBuckets;
3237
+ constructor(config = {}) {
3238
+ this.prefix = config.prefix || "agentsea_gateway";
3239
+ this.latencyBuckets = config.histogramBuckets?.latency || [
3240
+ 50,
3241
+ 100,
3242
+ 250,
3243
+ 500,
3244
+ 1e3,
3245
+ 2500,
3246
+ 5e3,
3247
+ 1e4
3248
+ ];
3249
+ this.tokenBuckets = config.histogramBuckets?.tokens || [
3250
+ 100,
3251
+ 500,
3252
+ 1e3,
3253
+ 2e3,
3254
+ 5e3,
3255
+ 1e4,
3256
+ 5e4
3257
+ ];
3258
+ }
3259
+ /**
3260
+ * Get token histogram buckets
3261
+ */
3262
+ getTokenBuckets() {
3263
+ return [...this.tokenBuckets];
3264
+ }
3265
+ /**
3266
+ * Increment a counter
3267
+ */
3268
+ incrementCounter(name, value = 1, labels) {
3269
+ const key = this.formatKey(name, labels);
3270
+ const current = this.counters.get(key) || 0;
3271
+ this.counters.set(key, current + value);
3272
+ }
3273
+ /**
3274
+ * Set a gauge value
3275
+ */
3276
+ setGauge(name, value, labels) {
3277
+ const key = this.formatKey(name, labels);
3278
+ this.gauges.set(key, value);
3279
+ }
3280
+ /**
3281
+ * Record a histogram observation
3282
+ */
3283
+ recordHistogram(name, value, labels, buckets) {
3284
+ const key = this.formatKey(name, labels);
3285
+ let histogram = this.histograms.get(key);
3286
+ if (!histogram) {
3287
+ histogram = {
3288
+ count: 0,
3289
+ sum: 0,
3290
+ buckets: /* @__PURE__ */ new Map()
3291
+ };
3292
+ const bucketsToUse = buckets || this.latencyBuckets;
3293
+ for (const bucket of bucketsToUse) {
3294
+ histogram.buckets.set(bucket, 0);
3295
+ }
3296
+ histogram.buckets.set(Infinity, 0);
3297
+ this.histograms.set(key, histogram);
3298
+ }
3299
+ histogram.count++;
3300
+ histogram.sum += value;
3301
+ for (const [bucket, count] of histogram.buckets) {
3302
+ if (value <= bucket) {
3303
+ histogram.buckets.set(bucket, count + 1);
3304
+ }
3305
+ }
3306
+ }
3307
+ /**
3308
+ * Record request metrics
3309
+ */
3310
+ recordRequest(data) {
3311
+ const labels = { provider: data.provider, model: data.model };
3312
+ this.incrementCounter("requests_total", 1, {
3313
+ ...labels,
3314
+ status: data.status,
3315
+ cached: String(data.cached)
3316
+ });
3317
+ this.recordHistogram("request_latency_ms", data.latencyMs, labels);
3318
+ this.incrementCounter("tokens_input_total", data.inputTokens, labels);
3319
+ this.incrementCounter("tokens_output_total", data.outputTokens, labels);
3320
+ this.incrementCounter(
3321
+ "cost_microdollars_total",
3322
+ Math.round(data.cost * 1e6),
3323
+ labels
3324
+ );
3325
+ if (data.cached) {
3326
+ this.incrementCounter("cache_hits_total", 1);
3327
+ }
3328
+ }
3329
+ /**
3330
+ * Get counter value
3331
+ */
3332
+ getCounter(name, labels) {
3333
+ const key = this.formatKey(name, labels);
3334
+ return this.counters.get(key) || 0;
3335
+ }
3336
+ /**
3337
+ * Get gauge value
3338
+ */
3339
+ getGauge(name, labels) {
3340
+ const key = this.formatKey(name, labels);
3341
+ return this.gauges.get(key) || 0;
3342
+ }
3343
+ /**
3344
+ * Get histogram data
3345
+ */
3346
+ getHistogram(name, labels) {
3347
+ const key = this.formatKey(name, labels);
3348
+ return this.histograms.get(key);
3349
+ }
3350
+ /**
3351
+ * Get all metrics as a summary object
3352
+ */
3353
+ getSummary() {
3354
+ const requestsTotal = this.sumAllCounters("requests_total");
3355
+ const requestsSuccess = this.sumCountersByLabel(
3356
+ "requests_total",
3357
+ "status",
3358
+ "success"
3359
+ );
3360
+ const requestsError = this.sumCountersByLabel(
3361
+ "requests_total",
3362
+ "status",
3363
+ "error"
3364
+ );
3365
+ const requestsCached = this.sumCountersByLabel(
3366
+ "requests_total",
3367
+ "cached",
3368
+ "true"
3369
+ );
3370
+ const latencyHistogram = this.aggregateHistograms("request_latency_ms");
3371
+ const avgLatency = latencyHistogram.count > 0 ? latencyHistogram.sum / latencyHistogram.count : 0;
3372
+ const inputTokens = this.sumAllCounters("tokens_input_total");
3373
+ const outputTokens = this.sumAllCounters("tokens_output_total");
3374
+ const totalCostMicro = this.sumAllCounters("cost_microdollars_total");
3375
+ const cacheHits = this.getCounter("cache_hits_total");
3376
+ const cacheMisses = requestsTotal - cacheHits;
3377
+ return {
3378
+ requests: {
3379
+ total: requestsTotal,
3380
+ successful: requestsSuccess,
3381
+ failed: requestsError,
3382
+ cached: requestsCached
3383
+ },
3384
+ latency: {
3385
+ avg: avgLatency,
3386
+ p50: this.calculatePercentile("request_latency_ms", 0.5),
3387
+ p95: this.calculatePercentile("request_latency_ms", 0.95),
3388
+ p99: this.calculatePercentile("request_latency_ms", 0.99)
3389
+ },
3390
+ tokens: {
3391
+ input: inputTokens,
3392
+ output: outputTokens,
3393
+ total: inputTokens + outputTokens
3394
+ },
3395
+ cost: {
3396
+ total: totalCostMicro / 1e6,
3397
+ byProvider: this.getCostByLabel("provider"),
3398
+ byModel: this.getCostByLabel("model")
3399
+ },
3400
+ cache: {
3401
+ hits: cacheHits,
3402
+ misses: cacheMisses,
3403
+ hitRate: requestsTotal > 0 ? cacheHits / requestsTotal : 0
3404
+ },
3405
+ providers: {}
3406
+ };
3407
+ }
3408
+ /**
3409
+ * Export metrics in Prometheus format
3410
+ */
3411
+ toPrometheusFormat() {
3412
+ const lines = [];
3413
+ for (const [key, value] of this.counters) {
3414
+ lines.push(`${this.prefix}_${key} ${value}`);
3415
+ }
3416
+ for (const [key, value] of this.gauges) {
3417
+ lines.push(`${this.prefix}_${key} ${value}`);
3418
+ }
3419
+ for (const [key, histogram] of this.histograms) {
3420
+ for (const [bucket, count] of histogram.buckets) {
3421
+ const le = bucket === Infinity ? "+Inf" : bucket;
3422
+ lines.push(`${this.prefix}_${key}_bucket{le="${le}"} ${count}`);
3423
+ }
3424
+ lines.push(`${this.prefix}_${key}_sum ${histogram.sum}`);
3425
+ lines.push(`${this.prefix}_${key}_count ${histogram.count}`);
3426
+ }
3427
+ return lines.join("\n");
3428
+ }
3429
+ /**
3430
+ * Reset all metrics
3431
+ */
3432
+ reset() {
3433
+ this.counters.clear();
3434
+ this.gauges.clear();
3435
+ this.histograms.clear();
3436
+ }
3437
+ /**
3438
+ * Format metric key with labels
3439
+ */
3440
+ formatKey(name, labels) {
3441
+ if (!labels || Object.keys(labels).length === 0) {
3442
+ return name;
3443
+ }
3444
+ const labelStr = Object.entries(labels).sort(([a], [b]) => a.localeCompare(b)).map(([k, v]) => `${k}="${v}"`).join(",");
3445
+ return `${name}{${labelStr}}`;
3446
+ }
3447
+ /**
3448
+ * Sum counters by a specific label value
3449
+ */
3450
+ sumCountersByLabel(name, labelKey, labelValue) {
3451
+ let sum = 0;
3452
+ for (const [key, value] of this.counters) {
3453
+ if (key.startsWith(name) && key.includes(`${labelKey}="${labelValue}"`)) {
3454
+ sum += value;
3455
+ }
3456
+ }
3457
+ return sum;
3458
+ }
3459
+ /**
3460
+ * Sum all counters with a given name prefix
3461
+ */
3462
+ sumAllCounters(namePrefix) {
3463
+ let sum = 0;
3464
+ for (const [key, value] of this.counters) {
3465
+ if (key.startsWith(namePrefix)) {
3466
+ sum += value;
3467
+ }
3468
+ }
3469
+ return sum;
3470
+ }
3471
+ /**
3472
+ * Get cost breakdown by label
3473
+ */
3474
+ getCostByLabel(labelKey) {
3475
+ const result = {};
3476
+ const prefix = "cost_microdollars_total";
3477
+ for (const [key, value] of this.counters) {
3478
+ if (key.startsWith(prefix)) {
3479
+ const match = key.match(new RegExp(`${labelKey}="([^"]+)"`));
3480
+ if (match) {
3481
+ const labelValue = match[1];
3482
+ result[labelValue] = (result[labelValue] || 0) + value / 1e6;
3483
+ }
3484
+ }
3485
+ }
3486
+ return result;
3487
+ }
3488
+ /**
3489
+ * Aggregate histograms for a metric name
3490
+ */
3491
+ aggregateHistograms(name) {
3492
+ const result = {
3493
+ count: 0,
3494
+ sum: 0,
3495
+ buckets: /* @__PURE__ */ new Map()
3496
+ };
3497
+ for (const [key, histogram] of this.histograms) {
3498
+ if (key.startsWith(name)) {
3499
+ result.count += histogram.count;
3500
+ result.sum += histogram.sum;
3501
+ for (const [bucket, count] of histogram.buckets) {
3502
+ const existing = result.buckets.get(bucket) || 0;
3503
+ result.buckets.set(bucket, existing + count);
3504
+ }
3505
+ }
3506
+ }
3507
+ return result;
3508
+ }
3509
+ /**
3510
+ * Calculate percentile from histogram (approximate)
3511
+ */
3512
+ calculatePercentile(name, percentile) {
3513
+ const histogram = this.aggregateHistograms(name);
3514
+ if (histogram.count === 0) return 0;
3515
+ const sortedBuckets = Array.from(histogram.buckets.entries()).sort(
3516
+ ([a], [b]) => a - b
3517
+ );
3518
+ const targetCount = histogram.count * percentile;
3519
+ let prevBucket = 0;
3520
+ let prevCount = 0;
3521
+ for (const [bucket, count] of sortedBuckets) {
3522
+ if (count >= targetCount) {
3523
+ const bucketRange = bucket - prevBucket;
3524
+ const bucketCount = count - prevCount;
3525
+ if (bucketCount === 0) {
3526
+ return prevBucket;
3527
+ }
3528
+ const positionInBucket = targetCount - prevCount;
3529
+ const fraction = positionInBucket / bucketCount;
3530
+ return prevBucket + bucketRange * Math.max(0, Math.min(1, fraction));
3531
+ }
3532
+ prevBucket = bucket;
3533
+ prevCount = count;
3534
+ }
3535
+ return prevBucket;
3536
+ }
3537
+ };
3538
+
3539
+ export { AnthropicProvider, AuthenticationError, CircuitBreaker, CostOptimizedStrategy, DEFAULT_MODEL_MAPPINGS, FailoverStrategy, Gateway, GatewayError, GoogleProvider, HealthMonitor, LatencyOptimizedStrategy, MODEL_CONTEXT_WINDOWS, MODEL_MAX_OUTPUT, MODEL_PRICING, MetricsCollector, OpenAIProvider, Provider, ProviderError, ProviderRegistry, RateLimitError, RoundRobinStrategy, Router, VIRTUAL_MODELS, ValidationError, calculateCost, countMessageTokens, countTokens, createHTTPServer, createRouterConfig, createSystemFingerprint, estimateCost, estimateRequestTokens, findCheapestModel, freeEncoder, generateCacheKey, generateId, generateRequestId, getModelCapabilities, getModelInfo, getModelPricing, hash, hashRequest, sortModelsByCost, startServer, truncateToTokenLimit };
3540
+ //# sourceMappingURL=index.mjs.map
3541
+ //# sourceMappingURL=index.mjs.map