@lov3kaizen/agentsea-gateway 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,3592 @@
1
+ 'use strict';
2
+
3
+ var events = require('events');
4
+ var tiktoken = require('tiktoken');
5
+ var murmurhash = require('murmurhash');
6
+ var lruCache = require('lru-cache');
7
+ var pino = require('pino');
8
+ var hono = require('hono');
9
+ var cors = require('hono/cors');
10
+ var logger = require('hono/logger');
11
+ var streaming = require('hono/streaming');
12
+ var nodeServer = require('@hono/node-server');
13
+
14
+ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
15
+
16
+ var murmurhash__default = /*#__PURE__*/_interopDefault(murmurhash);
17
+ var pino__default = /*#__PURE__*/_interopDefault(pino);
18
+
19
+ // src/core/Gateway.ts
20
+
21
+ // src/core/types.ts
22
+ var GatewayError = class extends Error {
23
+ constructor(message, code, statusCode = 500, provider, retryable = false) {
24
+ super(message);
25
+ this.code = code;
26
+ this.statusCode = statusCode;
27
+ this.provider = provider;
28
+ this.retryable = retryable;
29
+ this.name = "GatewayError";
30
+ }
31
+ };
32
+ var ProviderError = class extends GatewayError {
33
+ constructor(message, provider, originalError, retryable = true) {
34
+ super(message, "PROVIDER_ERROR", 502, provider, retryable);
35
+ this.originalError = originalError;
36
+ this.name = "ProviderError";
37
+ }
38
+ };
39
+ var RateLimitError = class extends GatewayError {
40
+ constructor(message, retryAfter, provider) {
41
+ super(message, "RATE_LIMIT_EXCEEDED", 429, provider, true);
42
+ this.retryAfter = retryAfter;
43
+ this.name = "RateLimitError";
44
+ }
45
+ };
46
+ var AuthenticationError = class extends GatewayError {
47
+ constructor(message) {
48
+ super(message, "AUTHENTICATION_FAILED", 401, void 0, false);
49
+ this.name = "AuthenticationError";
50
+ }
51
+ };
52
+ var ValidationError = class extends GatewayError {
53
+ constructor(message) {
54
+ super(message, "VALIDATION_ERROR", 400, void 0, false);
55
+ this.name = "ValidationError";
56
+ }
57
+ };
58
+
59
+ // src/providers/ProviderRegistry.ts
60
+ var ProviderRegistry = class {
61
+ providers = /* @__PURE__ */ new Map();
62
+ modelToProvider = /* @__PURE__ */ new Map();
63
+ healthCheckInterval = null;
64
+ constructor(providers = []) {
65
+ for (const provider of providers) {
66
+ this.register(provider);
67
+ }
68
+ }
69
+ /**
70
+ * Register a provider
71
+ */
72
+ register(provider) {
73
+ this.providers.set(provider.name, provider);
74
+ for (const model of provider.getModels()) {
75
+ const existing = this.modelToProvider.get(model) || [];
76
+ if (!existing.includes(provider.name)) {
77
+ existing.push(provider.name);
78
+ this.modelToProvider.set(model, existing);
79
+ }
80
+ }
81
+ }
82
+ /**
83
+ * Unregister a provider
84
+ */
85
+ unregister(name) {
86
+ const provider = this.providers.get(name);
87
+ if (!provider) {
88
+ return false;
89
+ }
90
+ for (const model of provider.getModels()) {
91
+ const providers = this.modelToProvider.get(model);
92
+ if (providers) {
93
+ const filtered = providers.filter((p) => p !== name);
94
+ if (filtered.length > 0) {
95
+ this.modelToProvider.set(model, filtered);
96
+ } else {
97
+ this.modelToProvider.delete(model);
98
+ }
99
+ }
100
+ }
101
+ this.providers.delete(name);
102
+ return true;
103
+ }
104
+ /**
105
+ * Get a provider by name
106
+ */
107
+ get(name) {
108
+ return this.providers.get(name);
109
+ }
110
+ /**
111
+ * Get all registered providers
112
+ */
113
+ getAll() {
114
+ return Array.from(this.providers.values());
115
+ }
116
+ /**
117
+ * Get all provider names
118
+ */
119
+ getNames() {
120
+ return Array.from(this.providers.keys());
121
+ }
122
+ /**
123
+ * Get providers that support a specific model
124
+ */
125
+ getProvidersForModel(model) {
126
+ const names = this.modelToProvider.get(model) || [];
127
+ return names.map((name) => this.providers.get(name)).filter((p) => p !== void 0);
128
+ }
129
+ /**
130
+ * Get the first available provider for a model
131
+ */
132
+ getProviderForModel(model) {
133
+ const providers = this.getProvidersForModel(model);
134
+ return providers.find((p) => p.isAvailable());
135
+ }
136
+ /**
137
+ * Check if any provider supports a model
138
+ */
139
+ hasModel(model) {
140
+ return this.modelToProvider.has(model);
141
+ }
142
+ /**
143
+ * Get all available models across all providers
144
+ */
145
+ getAllModels() {
146
+ return Array.from(this.modelToProvider.keys());
147
+ }
148
+ /**
149
+ * Get model info from the appropriate provider
150
+ */
151
+ getModelInfo(model) {
152
+ const provider = this.getProviderForModel(model);
153
+ return provider?.getModelInfo(model) ?? null;
154
+ }
155
+ /**
156
+ * Get health status for all providers
157
+ */
158
+ getHealthStatus() {
159
+ const status = {};
160
+ for (const [name, provider] of this.providers) {
161
+ status[name] = provider.getHealth();
162
+ }
163
+ return status;
164
+ }
165
+ /**
166
+ * Get healthy providers
167
+ */
168
+ getHealthyProviders() {
169
+ return this.getAll().filter((p) => p.isHealthy());
170
+ }
171
+ /**
172
+ * Get available providers (healthy or degraded)
173
+ */
174
+ getAvailableProviders() {
175
+ return this.getAll().filter((p) => p.isAvailable());
176
+ }
177
+ /**
178
+ * Run health checks on all providers
179
+ */
180
+ async checkHealth() {
181
+ const results = {};
182
+ await Promise.all(
183
+ this.getAll().map(async (provider) => {
184
+ results[provider.name] = await provider.healthCheck();
185
+ })
186
+ );
187
+ return results;
188
+ }
189
+ /**
190
+ * Start periodic health checks
191
+ */
192
+ startHealthChecks(intervalMs = 6e4) {
193
+ if (this.healthCheckInterval) {
194
+ return;
195
+ }
196
+ this.healthCheckInterval = setInterval(() => {
197
+ this.checkHealth().catch(console.error);
198
+ }, intervalMs);
199
+ }
200
+ /**
201
+ * Stop periodic health checks
202
+ */
203
+ stopHealthChecks() {
204
+ if (this.healthCheckInterval) {
205
+ clearInterval(this.healthCheckInterval);
206
+ this.healthCheckInterval = null;
207
+ }
208
+ }
209
+ /**
210
+ * Get the number of registered providers
211
+ */
212
+ get size() {
213
+ return this.providers.size;
214
+ }
215
+ };
216
+ var CircuitBreaker = class {
217
+ constructor(providerName, config) {
218
+ this.providerName = providerName;
219
+ this.config = config;
220
+ }
221
+ state = "closed";
222
+ failures = 0;
223
+ successes = 0;
224
+ lastFailure = null;
225
+ nextAttempt = null;
226
+ /**
227
+ * Check if requests are allowed
228
+ */
229
+ isAllowed() {
230
+ if (this.state === "closed") {
231
+ return true;
232
+ }
233
+ if (this.state === "open") {
234
+ if (this.nextAttempt && /* @__PURE__ */ new Date() >= this.nextAttempt) {
235
+ this.state = "half-open";
236
+ return true;
237
+ }
238
+ return false;
239
+ }
240
+ return true;
241
+ }
242
+ /**
243
+ * Record a successful request
244
+ */
245
+ recordSuccess() {
246
+ if (this.state === "half-open") {
247
+ this.successes++;
248
+ if (this.successes >= this.config.successThreshold) {
249
+ this.reset();
250
+ }
251
+ } else if (this.state === "closed") {
252
+ this.failures = Math.max(0, this.failures - 1);
253
+ }
254
+ }
255
+ /**
256
+ * Record a failed request
257
+ */
258
+ recordFailure() {
259
+ this.failures++;
260
+ this.lastFailure = /* @__PURE__ */ new Date();
261
+ if (this.state === "half-open") {
262
+ this.trip();
263
+ } else if (this.state === "closed" && this.failures >= this.config.failureThreshold) {
264
+ this.trip();
265
+ }
266
+ }
267
+ /**
268
+ * Trip the circuit breaker (open it)
269
+ */
270
+ trip() {
271
+ this.state = "open";
272
+ this.nextAttempt = new Date(Date.now() + this.config.timeout);
273
+ this.successes = 0;
274
+ }
275
+ /**
276
+ * Reset the circuit breaker
277
+ */
278
+ reset() {
279
+ this.state = "closed";
280
+ this.failures = 0;
281
+ this.successes = 0;
282
+ this.nextAttempt = null;
283
+ }
284
+ /**
285
+ * Get the current state
286
+ */
287
+ getState() {
288
+ return this.state;
289
+ }
290
+ /**
291
+ * Get circuit status
292
+ */
293
+ getStatus() {
294
+ return {
295
+ providerName: this.providerName,
296
+ state: this.state,
297
+ failures: this.failures,
298
+ nextAttempt: this.nextAttempt,
299
+ lastFailure: this.lastFailure
300
+ };
301
+ }
302
+ };
303
+ var HealthMonitor = class extends events.EventEmitter {
304
+ constructor(config) {
305
+ super();
306
+ this.config = config;
307
+ }
308
+ healthHistory = /* @__PURE__ */ new Map();
309
+ circuitBreakers = /* @__PURE__ */ new Map();
310
+ maxHistorySize = 100;
311
+ /**
312
+ * Record a health check result
313
+ */
314
+ recordHealth(providerName, health) {
315
+ const history = this.healthHistory.get(providerName) || [];
316
+ history.push(health);
317
+ if (history.length > this.maxHistorySize) {
318
+ history.shift();
319
+ }
320
+ this.healthHistory.set(providerName, history);
321
+ if (health.status === "unhealthy") {
322
+ this.emit("unhealthy", providerName, health);
323
+ } else if (health.status === "degraded") {
324
+ this.emit("degraded", providerName, health);
325
+ }
326
+ }
327
+ /**
328
+ * Record a request result
329
+ */
330
+ recordRequest(providerName, success, _latencyMs) {
331
+ const breaker = this.getOrCreateCircuitBreaker(providerName);
332
+ if (success) {
333
+ breaker.recordSuccess();
334
+ } else {
335
+ breaker.recordFailure();
336
+ if (breaker.getState() === "open") {
337
+ this.emit("circuit-open", providerName);
338
+ }
339
+ }
340
+ }
341
+ /**
342
+ * Check if requests are allowed for a provider
343
+ */
344
+ isRequestAllowed(providerName) {
345
+ const breaker = this.circuitBreakers.get(providerName);
346
+ return breaker ? breaker.isAllowed() : true;
347
+ }
348
+ /**
349
+ * Get or create a circuit breaker for a provider
350
+ */
351
+ getOrCreateCircuitBreaker(providerName) {
352
+ let breaker = this.circuitBreakers.get(providerName);
353
+ if (!breaker && this.config.circuitBreaker) {
354
+ breaker = new CircuitBreaker(providerName, this.config.circuitBreaker);
355
+ this.circuitBreakers.set(providerName, breaker);
356
+ }
357
+ return breaker || new CircuitBreaker(providerName, {
358
+ failureThreshold: 5,
359
+ successThreshold: 3,
360
+ timeout: 3e4
361
+ });
362
+ }
363
+ /**
364
+ * Get health history for a provider
365
+ */
366
+ getHistory(providerName) {
367
+ return this.healthHistory.get(providerName) || [];
368
+ }
369
+ /**
370
+ * Get average latency for a provider
371
+ */
372
+ getAverageLatency(providerName) {
373
+ const history = this.healthHistory.get(providerName) || [];
374
+ if (history.length === 0) return 0;
375
+ const sum = history.reduce((acc, h) => acc + h.latencyMs, 0);
376
+ return sum / history.length;
377
+ }
378
+ /**
379
+ * Get error rate for a provider
380
+ */
381
+ getErrorRate(providerName) {
382
+ const history = this.healthHistory.get(providerName) || [];
383
+ if (history.length === 0) return 0;
384
+ const lastHealth = history[history.length - 1];
385
+ return lastHealth.errorRate;
386
+ }
387
+ /**
388
+ * Get circuit breaker status for a provider
389
+ */
390
+ getCircuitStatus(providerName) {
391
+ const breaker = this.circuitBreakers.get(providerName);
392
+ return breaker ? breaker.getStatus() : null;
393
+ }
394
+ /**
395
+ * Get all circuit breaker statuses
396
+ */
397
+ getAllCircuitStatuses() {
398
+ const statuses = {};
399
+ for (const [name, breaker] of this.circuitBreakers) {
400
+ statuses[name] = breaker.getStatus();
401
+ }
402
+ return statuses;
403
+ }
404
+ /**
405
+ * Reset circuit breaker for a provider
406
+ */
407
+ resetCircuit(providerName) {
408
+ const breaker = this.circuitBreakers.get(providerName);
409
+ if (breaker) {
410
+ breaker.reset();
411
+ this.emit("circuit-reset", providerName);
412
+ }
413
+ }
414
+ /**
415
+ * Clear all history
416
+ */
417
+ clear() {
418
+ this.healthHistory.clear();
419
+ this.circuitBreakers.clear();
420
+ }
421
+ };
422
+
423
+ // src/routing/Router.ts
424
+ var DEFAULT_MODEL_MAPPINGS = {
425
+ // GPT-4 class
426
+ "gpt-4o": [
427
+ { provider: "anthropic", model: "claude-3-5-sonnet-20241022" },
428
+ { provider: "google", model: "gemini-1.5-pro" }
429
+ ],
430
+ "claude-3-5-sonnet-20241022": [
431
+ { provider: "openai", model: "gpt-4o" },
432
+ { provider: "google", model: "gemini-1.5-pro" }
433
+ ],
434
+ "gemini-1.5-pro": [
435
+ { provider: "openai", model: "gpt-4o" },
436
+ { provider: "anthropic", model: "claude-3-5-sonnet-20241022" }
437
+ ],
438
+ // GPT-4 mini class
439
+ "gpt-4o-mini": [
440
+ { provider: "anthropic", model: "claude-3-5-haiku-20241022" },
441
+ { provider: "google", model: "gemini-1.5-flash" }
442
+ ],
443
+ "claude-3-5-haiku-20241022": [
444
+ { provider: "openai", model: "gpt-4o-mini" },
445
+ { provider: "google", model: "gemini-1.5-flash" }
446
+ ],
447
+ "gemini-1.5-flash": [
448
+ { provider: "openai", model: "gpt-4o-mini" },
449
+ { provider: "anthropic", model: "claude-3-5-haiku-20241022" }
450
+ ]
451
+ };
452
+ var VIRTUAL_MODELS = ["best", "cheapest", "fastest"];
453
+ var Router = class {
454
+ strategy;
455
+ modelMappings;
456
+ fallbackChain;
457
+ constructor(strategy, config) {
458
+ this.strategy = strategy;
459
+ this.modelMappings = {
460
+ ...DEFAULT_MODEL_MAPPINGS,
461
+ ...config?.modelMappings
462
+ };
463
+ this.fallbackChain = config?.fallbackChain || [
464
+ "openai",
465
+ "anthropic",
466
+ "google"
467
+ ];
468
+ }
469
+ /**
470
+ * Route a request to a provider
471
+ */
472
+ route(request, registry, context) {
473
+ if (this.isVirtualModel(request.model)) {
474
+ return this.routeVirtualModel(request.model, request, registry, context);
475
+ }
476
+ return this.strategy.route(request, registry, context);
477
+ }
478
+ /**
479
+ * Check if a model is a virtual model
480
+ */
481
+ isVirtualModel(model) {
482
+ return VIRTUAL_MODELS.includes(model);
483
+ }
484
+ /**
485
+ * Route virtual model to actual provider/model
486
+ */
487
+ routeVirtualModel(virtualModel, _request, registry, context) {
488
+ const availableProviders = registry.getAvailableProviders().filter((p) => !context?.excludeProviders?.includes(p.name));
489
+ if (availableProviders.length === 0) {
490
+ throw new Error("No available providers");
491
+ }
492
+ switch (virtualModel) {
493
+ case "best":
494
+ return this.routeBest(availableProviders, context);
495
+ case "cheapest":
496
+ return this.routeCheapest(availableProviders, context);
497
+ case "fastest":
498
+ return this.routeFastest(availableProviders, context);
499
+ default:
500
+ throw new Error(`Unknown virtual model: ${String(virtualModel)}`);
501
+ }
502
+ }
503
+ /**
504
+ * Route to best quality model
505
+ */
506
+ routeBest(providers, context) {
507
+ const qualityRanking = {
508
+ "claude-3-5-sonnet-20241022": 95,
509
+ "claude-sonnet-4-20250514": 96,
510
+ "gpt-4o": 94,
511
+ "gemini-1.5-pro": 92,
512
+ "claude-3-opus-20240229": 93,
513
+ "gpt-4-turbo": 91,
514
+ o1: 97,
515
+ "o1-preview": 96
516
+ };
517
+ const candidates = [];
518
+ for (const provider of providers) {
519
+ for (const model of provider.getModels()) {
520
+ const score = qualityRanking[model] || 50;
521
+ candidates.push({
522
+ provider: provider.name,
523
+ model,
524
+ score
525
+ });
526
+ }
527
+ }
528
+ candidates.sort((a, b) => b.score - a.score);
529
+ if (context?.preferredProvider) {
530
+ const preferred = candidates.find(
531
+ (c) => c.provider === context.preferredProvider
532
+ );
533
+ if (preferred) {
534
+ return {
535
+ provider: preferred.provider,
536
+ model: preferred.model,
537
+ reason: `Best quality model from preferred provider`,
538
+ alternatives: candidates.slice(0, 3),
539
+ timestamp: /* @__PURE__ */ new Date()
540
+ };
541
+ }
542
+ }
543
+ const best = candidates[0];
544
+ return {
545
+ provider: best.provider,
546
+ model: best.model,
547
+ reason: `Highest quality model available`,
548
+ alternatives: candidates.slice(1, 4),
549
+ timestamp: /* @__PURE__ */ new Date()
550
+ };
551
+ }
552
+ /**
553
+ * Route to cheapest model
554
+ */
555
+ routeCheapest(providers, context) {
556
+ const candidates = [];
557
+ for (const provider of providers) {
558
+ for (const model of provider.getModels()) {
559
+ const modelInfo = provider.getModelInfo(model);
560
+ const avgCost = modelInfo ? (modelInfo.inputPricePerMillion + modelInfo.outputPricePerMillion) / 2 : Infinity;
561
+ candidates.push({
562
+ provider: provider.name,
563
+ model,
564
+ score: avgCost === 0 ? 0 : 1 / avgCost
565
+ // Higher score = cheaper
566
+ });
567
+ }
568
+ }
569
+ candidates.sort((a, b) => b.score - a.score);
570
+ if (context?.maxCost !== void 0) {
571
+ const filtered = candidates.filter((c) => {
572
+ const provider = providers.find((p) => p.name === c.provider);
573
+ const modelInfo = provider?.getModelInfo(c.model);
574
+ if (!modelInfo) return true;
575
+ const estimatedCost = 1e3 / 1e6 * modelInfo.inputPricePerMillion + 500 / 1e6 * modelInfo.outputPricePerMillion;
576
+ return estimatedCost <= context.maxCost;
577
+ });
578
+ if (filtered.length > 0) {
579
+ const cheapest2 = filtered[0];
580
+ return {
581
+ provider: cheapest2.provider,
582
+ model: cheapest2.model,
583
+ reason: `Cheapest model within budget`,
584
+ alternatives: filtered.slice(1, 4),
585
+ timestamp: /* @__PURE__ */ new Date()
586
+ };
587
+ }
588
+ }
589
+ const cheapest = candidates[0];
590
+ return {
591
+ provider: cheapest.provider,
592
+ model: cheapest.model,
593
+ reason: `Cheapest available model`,
594
+ alternatives: candidates.slice(1, 4),
595
+ timestamp: /* @__PURE__ */ new Date()
596
+ };
597
+ }
598
+ /**
599
+ * Route to fastest model (based on latency)
600
+ */
601
+ routeFastest(providers, context) {
602
+ const candidates = [];
603
+ for (const provider of providers) {
604
+ const health = provider.getHealth();
605
+ const latency = health.latencyMs || 1e3;
606
+ for (const model of provider.getModels()) {
607
+ candidates.push({
608
+ provider: provider.name,
609
+ model,
610
+ score: 1 / latency
611
+ // Higher score = lower latency
612
+ });
613
+ }
614
+ }
615
+ candidates.sort((a, b) => b.score - a.score);
616
+ if (context?.maxLatency !== void 0) {
617
+ const filtered = candidates.filter((c) => {
618
+ const provider = providers.find((p) => p.name === c.provider);
619
+ const health = provider?.getHealth();
620
+ return (health?.latencyMs || 1e3) <= context.maxLatency;
621
+ });
622
+ if (filtered.length > 0) {
623
+ const fastest2 = filtered[0];
624
+ return {
625
+ provider: fastest2.provider,
626
+ model: fastest2.model,
627
+ reason: `Fastest model within latency limit`,
628
+ alternatives: filtered.slice(1, 4),
629
+ timestamp: /* @__PURE__ */ new Date()
630
+ };
631
+ }
632
+ }
633
+ const fastest = candidates[0];
634
+ return {
635
+ provider: fastest.provider,
636
+ model: fastest.model,
637
+ reason: `Fastest available provider`,
638
+ alternatives: candidates.slice(1, 4),
639
+ timestamp: /* @__PURE__ */ new Date()
640
+ };
641
+ }
642
+ /**
643
+ * Get equivalent models across providers
644
+ */
645
+ getEquivalentModels(model) {
646
+ return this.modelMappings[model] || [];
647
+ }
648
+ /**
649
+ * Set the routing strategy
650
+ */
651
+ setStrategy(strategy) {
652
+ this.strategy = strategy;
653
+ }
654
+ /**
655
+ * Get the current strategy name
656
+ */
657
+ getStrategyName() {
658
+ return this.strategy.name;
659
+ }
660
+ /**
661
+ * Get the fallback chain
662
+ */
663
+ getFallbackChain() {
664
+ return [...this.fallbackChain];
665
+ }
666
+ };
667
+ function createRouterConfig(options) {
668
+ return {
669
+ strategy: options.strategy || "round-robin",
670
+ fallbackChain: options.fallbackChain,
671
+ weights: options.weights,
672
+ rules: options.rules
673
+ };
674
+ }
675
+
676
+ // src/routing/strategies/RoundRobin.ts
677
+ var RoundRobinStrategy = class {
678
+ name = "round-robin";
679
+ currentIndex = 0;
680
+ weights;
681
+ constructor(config = {}) {
682
+ this.weights = config.weights || {};
683
+ }
684
+ route(request, registry, context) {
685
+ let providers = registry.getProvidersForModel(request.model);
686
+ if (providers.length === 0) {
687
+ providers = registry.getAvailableProviders();
688
+ }
689
+ if (context?.excludeProviders) {
690
+ providers = providers.filter(
691
+ (p) => !context.excludeProviders.includes(p.name)
692
+ );
693
+ }
694
+ providers = providers.filter((p) => p.isAvailable());
695
+ if (providers.length === 0) {
696
+ throw new Error(`No available providers for model: ${request.model}`);
697
+ }
698
+ if (context?.preferredProvider) {
699
+ const preferred = providers.find(
700
+ (p) => p.name === context.preferredProvider
701
+ );
702
+ if (preferred) {
703
+ const model2 = preferred.supportsModel(request.model) ? request.model : preferred.getModels()[0];
704
+ return {
705
+ provider: preferred.name,
706
+ model: model2,
707
+ reason: "Preferred provider selected",
708
+ alternatives: providers.filter((p) => p.name !== preferred.name).slice(0, 3).map((p) => ({
709
+ provider: p.name,
710
+ model: p.supportsModel(request.model) ? request.model : p.getModels()[0],
711
+ score: 1
712
+ })),
713
+ timestamp: /* @__PURE__ */ new Date()
714
+ };
715
+ }
716
+ }
717
+ const weightedProviders = [];
718
+ for (const provider of providers) {
719
+ const weight = this.weights[provider.name] || 1;
720
+ for (let i = 0; i < weight; i++) {
721
+ weightedProviders.push({ provider, weight });
722
+ }
723
+ }
724
+ this.currentIndex = this.currentIndex % weightedProviders.length;
725
+ const selected = weightedProviders[this.currentIndex];
726
+ this.currentIndex++;
727
+ const model = selected.provider.supportsModel(request.model) ? request.model : selected.provider.getModels()[0];
728
+ return {
729
+ provider: selected.provider.name,
730
+ model,
731
+ reason: `Round-robin selection (index: ${this.currentIndex - 1})`,
732
+ alternatives: providers.filter((p) => p.name !== selected.provider.name).slice(0, 3).map((p) => ({
733
+ provider: p.name,
734
+ model: p.supportsModel(request.model) ? request.model : p.getModels()[0],
735
+ score: 1
736
+ })),
737
+ timestamp: /* @__PURE__ */ new Date()
738
+ };
739
+ }
740
+ /**
741
+ * Reset the rotation index
742
+ */
743
+ reset() {
744
+ this.currentIndex = 0;
745
+ }
746
+ };
747
+
748
+ // src/routing/strategies/Failover.ts
749
+ var FailoverStrategy = class {
750
+ name = "failover";
751
+ chain;
752
+ modelMappings;
753
+ constructor(config) {
754
+ this.chain = config.chain;
755
+ this.modelMappings = config.modelMappings || {};
756
+ }
757
+ route(request, registry, context) {
758
+ const previousProviders = new Set(
759
+ context?.previousAttempts?.map((a) => a.provider) || []
760
+ );
761
+ if (context?.excludeProviders) {
762
+ for (const p of context.excludeProviders) {
763
+ previousProviders.add(p);
764
+ }
765
+ }
766
+ const alternatives = [];
767
+ for (const providerName of this.chain) {
768
+ const provider = registry.get(providerName);
769
+ if (!provider) continue;
770
+ if (!provider.isAvailable()) continue;
771
+ if (previousProviders.has(providerName)) continue;
772
+ let model = request.model;
773
+ if (!provider.supportsModel(model)) {
774
+ const mappedModel = this.modelMappings[model]?.[providerName];
775
+ if (mappedModel && provider.supportsModel(mappedModel)) {
776
+ model = mappedModel;
777
+ } else {
778
+ model = provider.getModels()[0];
779
+ }
780
+ }
781
+ for (const altName of this.chain) {
782
+ if (altName === providerName) continue;
783
+ if (previousProviders.has(altName)) continue;
784
+ const altProvider = registry.get(altName);
785
+ if (!altProvider?.isAvailable()) continue;
786
+ let altModel = request.model;
787
+ if (!altProvider.supportsModel(altModel)) {
788
+ altModel = this.modelMappings[request.model]?.[altName] || altProvider.getModels()[0];
789
+ }
790
+ alternatives.push({
791
+ provider: altName,
792
+ model: altModel,
793
+ score: 1 - alternatives.length * 0.1
794
+ });
795
+ if (alternatives.length >= 3) break;
796
+ }
797
+ return {
798
+ provider: providerName,
799
+ model,
800
+ reason: previousProviders.size > 0 ? `Failover to ${providerName} after ${previousProviders.size} failures` : `Primary provider in failover chain`,
801
+ alternatives,
802
+ timestamp: /* @__PURE__ */ new Date()
803
+ };
804
+ }
805
+ throw new Error(
806
+ `All providers in failover chain exhausted: ${this.chain.join(", ")}`
807
+ );
808
+ }
809
+ /**
810
+ * Get the next provider in chain after the given one
811
+ */
812
+ getNextProvider(currentProvider) {
813
+ const index = this.chain.indexOf(currentProvider);
814
+ if (index === -1 || index >= this.chain.length - 1) {
815
+ return null;
816
+ }
817
+ return this.chain[index + 1];
818
+ }
819
+ /**
820
+ * Get the current failover chain
821
+ */
822
+ getChain() {
823
+ return [...this.chain];
824
+ }
825
+ /**
826
+ * Update the failover chain
827
+ */
828
+ setChain(chain) {
829
+ this.chain = [...chain];
830
+ }
831
+ };
832
+ var encoder = null;
833
+ function getEncoder() {
834
+ if (!encoder) {
835
+ encoder = tiktoken.get_encoding("cl100k_base");
836
+ }
837
+ return encoder;
838
+ }
839
+ function countTokens(text) {
840
+ try {
841
+ const enc = getEncoder();
842
+ return enc.encode(text).length;
843
+ } catch {
844
+ return Math.ceil(text.length / 4);
845
+ }
846
+ }
847
+ function countMessageTokens(messages) {
848
+ let total = 0;
849
+ for (const message of messages) {
850
+ total += 4;
851
+ if (message.content) {
852
+ total += countTokens(
853
+ typeof message.content === "string" ? message.content : JSON.stringify(message.content)
854
+ );
855
+ }
856
+ }
857
+ total += 2;
858
+ return total;
859
+ }
860
+ function estimateRequestTokens(messages, tools) {
861
+ let total = countMessageTokens(messages);
862
+ if (tools && tools.length > 0) {
863
+ for (const tool of tools) {
864
+ total += countTokens(tool.function.name);
865
+ if (tool.function.description) {
866
+ total += countTokens(tool.function.description);
867
+ }
868
+ if (tool.function.parameters) {
869
+ total += countTokens(JSON.stringify(tool.function.parameters));
870
+ }
871
+ total += 10;
872
+ }
873
+ }
874
+ return total;
875
+ }
876
+ function truncateToTokenLimit(text, maxTokens) {
877
+ const enc = getEncoder();
878
+ const tokens = enc.encode(text);
879
+ if (tokens.length <= maxTokens) {
880
+ return text;
881
+ }
882
+ const truncatedTokens = tokens.slice(0, maxTokens);
883
+ const decoded = enc.decode(truncatedTokens);
884
+ return new TextDecoder().decode(decoded);
885
+ }
886
+ function freeEncoder() {
887
+ if (encoder) {
888
+ encoder.free();
889
+ encoder = null;
890
+ }
891
+ }
892
+
893
+ // src/routing/strategies/CostOptimized.ts
894
+ var MODEL_QUALITY_SCORES = {
895
+ // Top tier
896
+ o1: 98,
897
+ "o1-preview": 96,
898
+ "claude-3-opus-20240229": 95,
899
+ "claude-3-5-sonnet-20241022": 94,
900
+ "claude-sonnet-4-20250514": 95,
901
+ "gpt-4o": 93,
902
+ "gemini-1.5-pro": 91,
903
+ // Mid tier
904
+ "gpt-4-turbo": 88,
905
+ "gpt-4": 87,
906
+ "claude-3-sonnet-20240229": 85,
907
+ "o1-mini": 84,
908
+ // Fast/cheap tier
909
+ "gpt-4o-mini": 80,
910
+ "claude-3-5-haiku-20241022": 79,
911
+ "claude-3-haiku-20240307": 75,
912
+ "gemini-1.5-flash": 78,
913
+ "gpt-3.5-turbo": 70,
914
+ // Local models
915
+ llama3: 65,
916
+ "llama3.1": 67,
917
+ "llama3.2": 68,
918
+ mistral: 62
919
+ };
920
+ var CostOptimizedStrategy = class {
921
+ name = "cost-optimized";
922
+ config;
923
+ constructor(config = {}) {
924
+ this.config = {
925
+ preferLocal: false,
926
+ qualityThreshold: 0.6,
927
+ fallbackOnBudget: "cheapest",
928
+ ...config
929
+ };
930
+ }
931
+ route(request, registry, context) {
932
+ let providers = registry.getAvailableProviders();
933
+ if (context?.excludeProviders) {
934
+ providers = providers.filter(
935
+ (p) => !context.excludeProviders.includes(p.name)
936
+ );
937
+ }
938
+ if (providers.length === 0) {
939
+ throw new Error("No available providers");
940
+ }
941
+ const estimatedInputTokens = estimateRequestTokens(
942
+ request.messages,
943
+ request.tools
944
+ );
945
+ const estimatedOutputTokens = request.max_tokens || 1e3;
946
+ const candidates = [];
947
+ for (const provider of providers) {
948
+ const isLocal = provider.name === "ollama" || provider.name === "lmstudio";
949
+ for (const model of provider.getModels()) {
950
+ const modelInfo = provider.getModelInfo(model);
951
+ if (!modelInfo) continue;
952
+ const inputCost = estimatedInputTokens / 1e6 * modelInfo.inputPricePerMillion;
953
+ const outputCost = estimatedOutputTokens / 1e6 * modelInfo.outputPricePerMillion;
954
+ const totalCost = inputCost + outputCost;
955
+ const quality = (MODEL_QUALITY_SCORES[model] || 50) / 100;
956
+ candidates.push({
957
+ provider: provider.name,
958
+ model,
959
+ cost: totalCost,
960
+ quality,
961
+ isLocal
962
+ });
963
+ }
964
+ }
965
+ const minQuality = this.config.qualityThreshold || 0;
966
+ let filtered = candidates.filter((c) => c.quality >= minQuality);
967
+ if (filtered.length === 0) {
968
+ filtered = candidates;
969
+ }
970
+ const maxCost = context?.maxCost ?? this.config.maxCostPerRequest;
971
+ if (maxCost !== void 0) {
972
+ const withinBudget = filtered.filter((c) => c.cost <= maxCost);
973
+ if (withinBudget.length > 0) {
974
+ filtered = withinBudget;
975
+ } else if (this.config.fallbackOnBudget === "error") {
976
+ throw new Error(`No models within budget of $${maxCost.toFixed(4)}`);
977
+ }
978
+ }
979
+ filtered.sort((a, b) => {
980
+ if (this.config.preferLocal) {
981
+ if (a.isLocal && !b.isLocal) return -1;
982
+ if (!a.isLocal && b.isLocal) return 1;
983
+ }
984
+ return a.cost - b.cost;
985
+ });
986
+ const selected = filtered[0];
987
+ return {
988
+ provider: selected.provider,
989
+ model: selected.model,
990
+ reason: `Cheapest model meeting quality threshold (${(selected.quality * 100).toFixed(0)}% quality, $${selected.cost.toFixed(6)}/req)`,
991
+ alternatives: filtered.slice(1, 4).map((c) => ({
992
+ provider: c.provider,
993
+ model: c.model,
994
+ score: 1 / (c.cost + 1e-4)
995
+ // Higher score = cheaper
996
+ })),
997
+ timestamp: /* @__PURE__ */ new Date()
998
+ };
999
+ }
1000
+ /**
1001
+ * Update configuration
1002
+ */
1003
+ setConfig(config) {
1004
+ this.config = { ...this.config, ...config };
1005
+ }
1006
+ /**
1007
+ * Get current configuration
1008
+ */
1009
+ getConfig() {
1010
+ return { ...this.config };
1011
+ }
1012
+ };
1013
+
1014
+ // src/routing/strategies/LatencyOptimized.ts
1015
+ var LatencyOptimizedStrategy = class {
1016
+ name = "latency-optimized";
1017
+ config;
1018
+ latencyStats = /* @__PURE__ */ new Map();
1019
+ maxSamples = 100;
1020
+ constructor(config = {}) {
1021
+ this.config = {
1022
+ warmupRequests: 10,
1023
+ adaptiveRouting: true,
1024
+ ...config
1025
+ };
1026
+ }
1027
+ route(request, registry, context) {
1028
+ let providers = registry.getAvailableProviders();
1029
+ if (context?.excludeProviders) {
1030
+ providers = providers.filter(
1031
+ (p) => !context.excludeProviders.includes(p.name)
1032
+ );
1033
+ }
1034
+ if (providers.length === 0) {
1035
+ throw new Error("No available providers");
1036
+ }
1037
+ const totalRequests = Array.from(this.latencyStats.values()).reduce(
1038
+ (sum, stats) => sum + stats.count,
1039
+ 0
1040
+ );
1041
+ const warmupThreshold = this.config.warmupRequests ?? 10;
1042
+ const isWarmup = warmupThreshold > 0 && totalRequests < warmupThreshold;
1043
+ const candidates = [];
1044
+ for (const provider of providers) {
1045
+ const stats = this.latencyStats.get(provider.name);
1046
+ const health = provider.getHealth();
1047
+ let latency;
1048
+ let confidence;
1049
+ if (stats && stats.count >= 5 && this.config.adaptiveRouting) {
1050
+ latency = stats.avg;
1051
+ confidence = Math.min(stats.count / 50, 1);
1052
+ } else {
1053
+ latency = health.latencyMs || 1e3;
1054
+ confidence = 0.3;
1055
+ }
1056
+ for (const model of provider.getModels()) {
1057
+ if (request.model !== "fastest" && !provider.supportsModel(request.model) && model !== request.model) {
1058
+ continue;
1059
+ }
1060
+ candidates.push({
1061
+ provider: provider.name,
1062
+ model: request.model !== "fastest" && provider.supportsModel(request.model) ? request.model : model,
1063
+ latency,
1064
+ confidence
1065
+ });
1066
+ break;
1067
+ }
1068
+ }
1069
+ const maxLatency = context?.maxLatency ?? this.config.maxLatencyMs;
1070
+ let filtered = candidates;
1071
+ if (maxLatency !== void 0) {
1072
+ const withinLimit = candidates.filter((c) => c.latency <= maxLatency);
1073
+ if (withinLimit.length > 0) {
1074
+ filtered = withinLimit;
1075
+ }
1076
+ }
1077
+ filtered.sort((a, b) => a.latency - b.latency);
1078
+ const shouldExplore = isWarmup && Math.random() < 0.3 && filtered.length > 1;
1079
+ if (shouldExplore) {
1080
+ const randomIndex = Math.floor(
1081
+ Math.random() * Math.min(3, filtered.length)
1082
+ );
1083
+ const selected2 = filtered[randomIndex];
1084
+ return {
1085
+ provider: selected2.provider,
1086
+ model: selected2.model,
1087
+ reason: `Warmup exploration (${totalRequests}/${warmupThreshold} requests)`,
1088
+ alternatives: filtered.filter((c) => c.provider !== selected2.provider).slice(0, 3).map((c) => ({
1089
+ provider: c.provider,
1090
+ model: c.model,
1091
+ score: 1 / c.latency
1092
+ })),
1093
+ timestamp: /* @__PURE__ */ new Date()
1094
+ };
1095
+ }
1096
+ const selected = filtered[0];
1097
+ return {
1098
+ provider: selected.provider,
1099
+ model: selected.model,
1100
+ reason: `Fastest provider (${selected.latency.toFixed(0)}ms avg, ${(selected.confidence * 100).toFixed(0)}% confidence)`,
1101
+ alternatives: filtered.slice(1, 4).map((c) => ({
1102
+ provider: c.provider,
1103
+ model: c.model,
1104
+ score: 1 / c.latency
1105
+ })),
1106
+ timestamp: /* @__PURE__ */ new Date()
1107
+ };
1108
+ }
1109
+ /**
1110
+ * Record a latency observation
1111
+ */
1112
+ recordLatency(provider, latencyMs) {
1113
+ let stats = this.latencyStats.get(provider);
1114
+ if (!stats) {
1115
+ stats = {
1116
+ count: 0,
1117
+ total: 0,
1118
+ min: Infinity,
1119
+ max: 0,
1120
+ avg: 0,
1121
+ p95: 0,
1122
+ samples: []
1123
+ };
1124
+ this.latencyStats.set(provider, stats);
1125
+ }
1126
+ stats.count++;
1127
+ stats.total += latencyMs;
1128
+ stats.min = Math.min(stats.min, latencyMs);
1129
+ stats.max = Math.max(stats.max, latencyMs);
1130
+ stats.samples.push(latencyMs);
1131
+ if (stats.samples.length > this.maxSamples) {
1132
+ stats.samples.shift();
1133
+ }
1134
+ const alpha = 0.2;
1135
+ if (stats.count === 1) {
1136
+ stats.avg = latencyMs;
1137
+ } else {
1138
+ stats.avg = alpha * latencyMs + (1 - alpha) * stats.avg;
1139
+ }
1140
+ if (stats.samples.length >= 20) {
1141
+ const sorted = [...stats.samples].sort((a, b) => a - b);
1142
+ const p95Index = Math.floor(sorted.length * 0.95);
1143
+ stats.p95 = sorted[p95Index];
1144
+ }
1145
+ }
1146
+ /**
1147
+ * Get latency statistics for a provider
1148
+ */
1149
+ getStats(provider) {
1150
+ return this.latencyStats.get(provider);
1151
+ }
1152
+ /**
1153
+ * Get all latency statistics
1154
+ */
1155
+ getAllStats() {
1156
+ const result = {};
1157
+ for (const [provider, stats] of this.latencyStats) {
1158
+ result[provider] = { ...stats, samples: [...stats.samples] };
1159
+ }
1160
+ return result;
1161
+ }
1162
+ /**
1163
+ * Clear latency statistics
1164
+ */
1165
+ clearStats() {
1166
+ this.latencyStats.clear();
1167
+ }
1168
+ /**
1169
+ * Update configuration
1170
+ */
1171
+ setConfig(config) {
1172
+ this.config = { ...this.config, ...config };
1173
+ }
1174
+ };
1175
+
1176
+ // src/utils/pricing.ts
1177
+ var MODEL_PRICING = {
1178
+ // OpenAI Models
1179
+ "gpt-4o": { input: 2.5, output: 10 },
1180
+ "gpt-4o-2024-11-20": { input: 2.5, output: 10 },
1181
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
1182
+ "gpt-4o-mini-2024-07-18": { input: 0.15, output: 0.6 },
1183
+ "gpt-4-turbo": { input: 10, output: 30 },
1184
+ "gpt-4-turbo-preview": { input: 10, output: 30 },
1185
+ "gpt-4": { input: 30, output: 60 },
1186
+ "gpt-4-32k": { input: 60, output: 120 },
1187
+ "gpt-3.5-turbo": { input: 0.5, output: 1.5 },
1188
+ "gpt-3.5-turbo-0125": { input: 0.5, output: 1.5 },
1189
+ o1: { input: 15, output: 60 },
1190
+ "o1-preview": { input: 15, output: 60 },
1191
+ "o1-mini": { input: 3, output: 12 },
1192
+ // Anthropic Models
1193
+ "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1194
+ "claude-3-5-sonnet-latest": { input: 3, output: 15 },
1195
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
1196
+ "claude-3-5-haiku-20241022": { input: 0.8, output: 4 },
1197
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
1198
+ "claude-3-opus-20240229": { input: 15, output: 75 },
1199
+ "claude-3-sonnet-20240229": { input: 3, output: 15 },
1200
+ // Google Gemini Models
1201
+ "gemini-1.5-pro": { input: 1.25, output: 5 },
1202
+ "gemini-1.5-pro-latest": { input: 1.25, output: 5 },
1203
+ "gemini-1.5-flash": { input: 0.075, output: 0.3 },
1204
+ "gemini-1.5-flash-latest": { input: 0.075, output: 0.3 },
1205
+ "gemini-2.0-flash-exp": { input: 0.1, output: 0.4 },
1206
+ "gemini-pro": { input: 0.5, output: 1.5 },
1207
+ // Mistral Models
1208
+ "mistral-large-latest": { input: 2, output: 6 },
1209
+ "mistral-medium-latest": { input: 2.7, output: 8.1 },
1210
+ "mistral-small-latest": { input: 0.2, output: 0.6 },
1211
+ "open-mistral-7b": { input: 0.25, output: 0.25 },
1212
+ "open-mixtral-8x7b": { input: 0.7, output: 0.7 },
1213
+ "open-mixtral-8x22b": { input: 2, output: 6 },
1214
+ // Cohere Models
1215
+ "command-r-plus": { input: 2.5, output: 10 },
1216
+ "command-r": { input: 0.5, output: 1.5 },
1217
+ command: { input: 1, output: 2 },
1218
+ // Groq Models (significantly cheaper)
1219
+ "llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
1220
+ "llama-3.1-70b-versatile": { input: 0.59, output: 0.79 },
1221
+ "llama-3.1-8b-instant": { input: 0.05, output: 0.08 },
1222
+ "mixtral-8x7b-32768": { input: 0.24, output: 0.24 },
1223
+ // Together AI Models
1224
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo": { input: 0.88, output: 0.88 },
1225
+ "meta-llama/Llama-3.1-70B-Instruct-Turbo": { input: 0.88, output: 0.88 },
1226
+ "meta-llama/Llama-3.1-8B-Instruct-Turbo": { input: 0.18, output: 0.18 },
1227
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": { input: 0.6, output: 0.6 },
1228
+ // Local models (free)
1229
+ llama3: { input: 0, output: 0 },
1230
+ "llama3.1": { input: 0, output: 0 },
1231
+ "llama3.2": { input: 0, output: 0 },
1232
+ mistral: { input: 0, output: 0 },
1233
+ codellama: { input: 0, output: 0 },
1234
+ phi3: { input: 0, output: 0 },
1235
+ "qwen2.5": { input: 0, output: 0 }
1236
+ };
1237
+ var MODEL_CONTEXT_WINDOWS = {
1238
+ // OpenAI
1239
+ "gpt-4o": 128e3,
1240
+ "gpt-4o-mini": 128e3,
1241
+ "gpt-4-turbo": 128e3,
1242
+ "gpt-4": 8192,
1243
+ "gpt-4-32k": 32768,
1244
+ "gpt-3.5-turbo": 16385,
1245
+ o1: 2e5,
1246
+ "o1-preview": 128e3,
1247
+ "o1-mini": 128e3,
1248
+ // Anthropic
1249
+ "claude-3-5-sonnet-20241022": 2e5,
1250
+ "claude-sonnet-4-20250514": 2e5,
1251
+ "claude-3-5-haiku-20241022": 2e5,
1252
+ "claude-3-opus-20240229": 2e5,
1253
+ "claude-3-sonnet-20240229": 2e5,
1254
+ "claude-3-haiku-20240307": 2e5,
1255
+ // Google
1256
+ "gemini-1.5-pro": 2e6,
1257
+ "gemini-1.5-flash": 1e6,
1258
+ "gemini-2.0-flash-exp": 1e6,
1259
+ "gemini-pro": 32e3,
1260
+ // Mistral
1261
+ "mistral-large-latest": 128e3,
1262
+ "mistral-medium-latest": 32e3,
1263
+ "mistral-small-latest": 32e3,
1264
+ // Groq
1265
+ "llama-3.3-70b-versatile": 128e3,
1266
+ "llama-3.1-70b-versatile": 131072,
1267
+ "llama-3.1-8b-instant": 131072,
1268
+ "mixtral-8x7b-32768": 32768
1269
+ };
1270
+ var MODEL_MAX_OUTPUT = {
1271
+ // OpenAI
1272
+ "gpt-4o": 16384,
1273
+ "gpt-4o-mini": 16384,
1274
+ "gpt-4-turbo": 4096,
1275
+ "gpt-4": 8192,
1276
+ "gpt-3.5-turbo": 4096,
1277
+ o1: 1e5,
1278
+ "o1-preview": 32768,
1279
+ "o1-mini": 65536,
1280
+ // Anthropic
1281
+ "claude-3-5-sonnet-20241022": 8192,
1282
+ "claude-sonnet-4-20250514": 16384,
1283
+ "claude-3-opus-20240229": 4096,
1284
+ // Google
1285
+ "gemini-1.5-pro": 8192,
1286
+ "gemini-1.5-flash": 8192
1287
+ };
1288
+ function calculateCost(model, usage) {
1289
+ const pricing = MODEL_PRICING[model];
1290
+ if (!pricing) {
1291
+ return 0;
1292
+ }
1293
+ const inputCost = usage.prompt_tokens / 1e6 * pricing.input;
1294
+ const outputCost = usage.completion_tokens / 1e6 * pricing.output;
1295
+ return inputCost + outputCost;
1296
+ }
1297
+ function estimateCost(model, estimatedInputTokens, estimatedOutputTokens) {
1298
+ const pricing = MODEL_PRICING[model];
1299
+ if (!pricing) {
1300
+ return 0;
1301
+ }
1302
+ const inputCost = estimatedInputTokens / 1e6 * pricing.input;
1303
+ const outputCost = estimatedOutputTokens / 1e6 * pricing.output;
1304
+ return inputCost + outputCost;
1305
+ }
1306
+ function getModelPricing(model) {
1307
+ return MODEL_PRICING[model] || null;
1308
+ }
1309
+ function getModelInfo(model, provider) {
1310
+ const pricing = MODEL_PRICING[model] || { input: 0, output: 0 };
1311
+ const contextWindow = MODEL_CONTEXT_WINDOWS[model] || 4096;
1312
+ const maxOutput = MODEL_MAX_OUTPUT[model] || 4096;
1313
+ return {
1314
+ id: model,
1315
+ provider,
1316
+ contextWindow,
1317
+ maxOutputTokens: maxOutput,
1318
+ inputPricePerMillion: pricing.input,
1319
+ outputPricePerMillion: pricing.output,
1320
+ capabilities: getModelCapabilities(model, provider)
1321
+ };
1322
+ }
1323
+ function getModelCapabilities(model, provider) {
1324
+ const defaults = {
1325
+ streaming: true,
1326
+ tools: true,
1327
+ vision: false,
1328
+ json_mode: true,
1329
+ system_prompts: true
1330
+ };
1331
+ if (model.includes("gpt-4o") || model.includes("gpt-4-turbo")) {
1332
+ return { ...defaults, vision: true };
1333
+ }
1334
+ if (model.includes("o1")) {
1335
+ return {
1336
+ streaming: false,
1337
+ // o1 doesn't support streaming
1338
+ tools: false,
1339
+ vision: false,
1340
+ json_mode: false,
1341
+ system_prompts: false
1342
+ // o1 uses developer messages
1343
+ };
1344
+ }
1345
+ if (model.includes("claude-3")) {
1346
+ return { ...defaults, vision: true };
1347
+ }
1348
+ if (model.includes("gemini")) {
1349
+ return { ...defaults, vision: true };
1350
+ }
1351
+ if (provider === "ollama") {
1352
+ return {
1353
+ streaming: true,
1354
+ tools: false,
1355
+ // Most Ollama models don't support tools natively
1356
+ vision: model.includes("llava") || model.includes("bakllava"),
1357
+ json_mode: true,
1358
+ system_prompts: true
1359
+ };
1360
+ }
1361
+ return defaults;
1362
+ }
1363
+ function findCheapestModel(models, _requiredCapabilities) {
1364
+ let cheapest = null;
1365
+ for (const model of models) {
1366
+ const pricing = MODEL_PRICING[model];
1367
+ if (!pricing) continue;
1368
+ const avgCost = (pricing.input + pricing.output) / 2;
1369
+ if (!cheapest || avgCost < cheapest.cost) {
1370
+ cheapest = { model, cost: avgCost };
1371
+ }
1372
+ }
1373
+ return cheapest?.model || null;
1374
+ }
1375
+ function sortModelsByCost(models, direction = "asc") {
1376
+ return [...models].sort((a, b) => {
1377
+ const pricingA = MODEL_PRICING[a] || { input: 0, output: 0 };
1378
+ const pricingB = MODEL_PRICING[b] || { input: 0, output: 0 };
1379
+ const costA = (pricingA.input + pricingA.output) / 2;
1380
+ const costB = (pricingB.input + pricingB.output) / 2;
1381
+ return direction === "asc" ? costA - costB : costB - costA;
1382
+ });
1383
+ }
1384
+ function hashRequest(request) {
1385
+ const normalized = {
1386
+ model: request.model,
1387
+ messages: request.messages.map((m) => ({
1388
+ role: m.role,
1389
+ content: normalizeContent(m.content)
1390
+ })),
1391
+ temperature: request.temperature ?? 1,
1392
+ max_tokens: request.max_tokens,
1393
+ tools: request.tools ? JSON.stringify(request.tools) : void 0,
1394
+ tool_choice: request.tool_choice ? JSON.stringify(request.tool_choice) : void 0
1395
+ };
1396
+ const str = JSON.stringify(normalized);
1397
+ return murmurhash__default.default.v3(str).toString(16);
1398
+ }
1399
+ function normalizeContent(content) {
1400
+ if (typeof content === "string") {
1401
+ return content;
1402
+ }
1403
+ if (content === null || content === void 0) {
1404
+ return "";
1405
+ }
1406
+ return JSON.stringify(content);
1407
+ }
1408
+ function generateId(prefix = "gw") {
1409
+ const timestamp = Date.now().toString(36);
1410
+ const random = Math.random().toString(36).substring(2, 8);
1411
+ return `${prefix}-${timestamp}${random}`;
1412
+ }
1413
+ function generateRequestId() {
1414
+ return `chatcmpl-${generateId("")}`;
1415
+ }
1416
+ function generateCacheKey(provider, model, requestHash) {
1417
+ return `gw:cache:${provider}:${model}:${requestHash}`;
1418
+ }
1419
+ function hash(str) {
1420
+ return murmurhash__default.default.v3(str).toString(16);
1421
+ }
1422
+ function createSystemFingerprint(config) {
1423
+ const str = JSON.stringify(config);
1424
+ return `fp_${murmurhash__default.default.v3(str).toString(16)}`;
1425
+ }
1426
+
1427
+ // src/providers/Provider.ts
1428
+ var Provider = class {
1429
+ name;
1430
+ config;
1431
+ health;
1432
+ constructor(config) {
1433
+ this.name = config.name;
1434
+ this.config = config;
1435
+ this.health = {
1436
+ status: "healthy",
1437
+ latencyMs: 0,
1438
+ lastCheck: /* @__PURE__ */ new Date(),
1439
+ errorRate: 0,
1440
+ consecutiveFailures: 0
1441
+ };
1442
+ }
1443
+ /**
1444
+ * Check if the provider supports a specific model
1445
+ */
1446
+ supportsModel(model) {
1447
+ return this.config.models.includes(model);
1448
+ }
1449
+ /**
1450
+ * Get all supported models
1451
+ */
1452
+ getModels() {
1453
+ return [...this.config.models];
1454
+ }
1455
+ /**
1456
+ * Get the current health status
1457
+ */
1458
+ getHealth() {
1459
+ return { ...this.health };
1460
+ }
1461
+ /**
1462
+ * Perform a health check
1463
+ */
1464
+ async healthCheck() {
1465
+ const start = Date.now();
1466
+ try {
1467
+ await this.chat({
1468
+ model: this.config.models[0],
1469
+ messages: [{ role: "user", content: "hi" }],
1470
+ max_tokens: 1
1471
+ });
1472
+ const latencyMs = Date.now() - start;
1473
+ this.health = {
1474
+ status: "healthy",
1475
+ latencyMs,
1476
+ lastCheck: /* @__PURE__ */ new Date(),
1477
+ errorRate: Math.max(0, this.health.errorRate - 0.1),
1478
+ consecutiveFailures: 0
1479
+ };
1480
+ } catch (error) {
1481
+ const latencyMs = Date.now() - start;
1482
+ this.health = {
1483
+ status: this.health.consecutiveFailures >= 2 ? "unhealthy" : "degraded",
1484
+ latencyMs,
1485
+ lastCheck: /* @__PURE__ */ new Date(),
1486
+ errorRate: Math.min(1, this.health.errorRate + 0.2),
1487
+ consecutiveFailures: this.health.consecutiveFailures + 1
1488
+ };
1489
+ }
1490
+ return this.health;
1491
+ }
1492
+ /**
1493
+ * Update health status after a request
1494
+ */
1495
+ updateHealth(success, latencyMs) {
1496
+ if (success) {
1497
+ this.health = {
1498
+ status: "healthy",
1499
+ latencyMs: this.health.latencyMs > 0 ? (this.health.latencyMs + latencyMs) / 2 : latencyMs,
1500
+ lastCheck: /* @__PURE__ */ new Date(),
1501
+ errorRate: Math.max(0, this.health.errorRate - 0.05),
1502
+ consecutiveFailures: 0
1503
+ };
1504
+ } else {
1505
+ this.health = {
1506
+ ...this.health,
1507
+ status: this.health.consecutiveFailures >= 2 ? "unhealthy" : "degraded",
1508
+ lastCheck: /* @__PURE__ */ new Date(),
1509
+ errorRate: Math.min(1, this.health.errorRate + 0.1),
1510
+ consecutiveFailures: this.health.consecutiveFailures + 1
1511
+ };
1512
+ }
1513
+ }
1514
+ /**
1515
+ * Check if the provider is currently healthy
1516
+ */
1517
+ isHealthy() {
1518
+ return this.health.status === "healthy";
1519
+ }
1520
+ /**
1521
+ * Check if the provider is available (healthy or degraded)
1522
+ */
1523
+ isAvailable() {
1524
+ return this.health.status !== "unhealthy";
1525
+ }
1526
+ };
1527
+
1528
+ // src/providers/registry/OpenAIProvider.ts
1529
+ var DEFAULT_OPENAI_MODELS = [
1530
+ "gpt-4o",
1531
+ "gpt-4o-mini",
1532
+ "gpt-4-turbo",
1533
+ "gpt-4",
1534
+ "gpt-3.5-turbo",
1535
+ "o1",
1536
+ "o1-mini",
1537
+ "o1-preview"
1538
+ ];
1539
+ var OpenAIProvider = class extends Provider {
1540
+ apiKey;
1541
+ baseUrl;
1542
+ organization;
1543
+ project;
1544
+ timeout;
1545
+ constructor(options = {}) {
1546
+ const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
1547
+ if (!apiKey) {
1548
+ throw new Error("OpenAI API key is required");
1549
+ }
1550
+ const config = {
1551
+ name: "openai",
1552
+ apiKey,
1553
+ baseUrl: options.baseUrl || "https://api.openai.com/v1",
1554
+ models: options.models || DEFAULT_OPENAI_MODELS,
1555
+ timeout: options.timeout || 6e4,
1556
+ maxRetries: options.maxRetries || 3,
1557
+ headers: options.headers
1558
+ };
1559
+ super(config);
1560
+ this.apiKey = apiKey;
1561
+ this.baseUrl = config.baseUrl;
1562
+ this.organization = options.organization;
1563
+ this.project = options.project;
1564
+ this.timeout = config.timeout;
1565
+ }
1566
+ /**
1567
+ * Execute a chat completion request
1568
+ */
1569
+ async chat(request) {
1570
+ const start = Date.now();
1571
+ try {
1572
+ const response = await this.makeRequest("/chat/completions", {
1573
+ method: "POST",
1574
+ body: JSON.stringify(this.transformRequest(request))
1575
+ });
1576
+ if (!response.ok) {
1577
+ const error = await this.parseError(response);
1578
+ throw error;
1579
+ }
1580
+ const data = await response.json();
1581
+ const result = this.transformResponse(data, request.model);
1582
+ this.updateHealth(true, Date.now() - start);
1583
+ return result;
1584
+ } catch (error) {
1585
+ this.updateHealth(false, Date.now() - start);
1586
+ throw this.wrapError(error);
1587
+ }
1588
+ }
1589
+ /**
1590
+ * Execute a streaming chat completion request
1591
+ */
1592
+ async *chatStream(request) {
1593
+ const start = Date.now();
1594
+ try {
1595
+ const response = await this.makeRequest("/chat/completions", {
1596
+ method: "POST",
1597
+ body: JSON.stringify({
1598
+ ...this.transformRequest(request),
1599
+ stream: true,
1600
+ stream_options: { include_usage: true }
1601
+ })
1602
+ });
1603
+ if (!response.ok) {
1604
+ const error = await this.parseError(response);
1605
+ throw error;
1606
+ }
1607
+ const reader = response.body?.getReader();
1608
+ if (!reader) {
1609
+ throw new ProviderError("No response body", "openai");
1610
+ }
1611
+ const decoder = new TextDecoder();
1612
+ let buffer = "";
1613
+ while (true) {
1614
+ const { done, value } = await reader.read();
1615
+ if (done) break;
1616
+ buffer += decoder.decode(value, { stream: true });
1617
+ const lines = buffer.split("\n");
1618
+ buffer = lines.pop() || "";
1619
+ for (const line of lines) {
1620
+ const trimmed = line.trim();
1621
+ if (!trimmed || trimmed === "data: [DONE]") continue;
1622
+ if (!trimmed.startsWith("data: ")) continue;
1623
+ try {
1624
+ const json = JSON.parse(trimmed.slice(6));
1625
+ yield this.transformChunk(json, request.model);
1626
+ } catch {
1627
+ }
1628
+ }
1629
+ }
1630
+ this.updateHealth(true, Date.now() - start);
1631
+ } catch (error) {
1632
+ this.updateHealth(false, Date.now() - start);
1633
+ throw this.wrapError(error);
1634
+ }
1635
+ }
1636
+ /**
1637
+ * Get model information
1638
+ */
1639
+ getModelInfo(model) {
1640
+ if (!this.supportsModel(model)) {
1641
+ return null;
1642
+ }
1643
+ return getModelInfo(model, "openai");
1644
+ }
1645
+ /**
1646
+ * Make an HTTP request to the OpenAI API
1647
+ */
1648
+ async makeRequest(path, options) {
1649
+ const headers = {
1650
+ "Content-Type": "application/json",
1651
+ Authorization: `Bearer ${this.apiKey}`,
1652
+ ...this.config.headers
1653
+ };
1654
+ if (this.organization) {
1655
+ headers["OpenAI-Organization"] = this.organization;
1656
+ }
1657
+ if (this.project) {
1658
+ headers["OpenAI-Project"] = this.project;
1659
+ }
1660
+ const controller = new AbortController();
1661
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
1662
+ try {
1663
+ return await fetch(`${this.baseUrl}${path}`, {
1664
+ ...options,
1665
+ headers,
1666
+ signal: controller.signal
1667
+ });
1668
+ } finally {
1669
+ clearTimeout(timeoutId);
1670
+ }
1671
+ }
1672
+ /**
1673
+ * Transform gateway request to OpenAI format
1674
+ */
1675
+ transformRequest(request) {
1676
+ const transformed = {
1677
+ model: request.model,
1678
+ messages: request.messages
1679
+ };
1680
+ if (request.temperature !== void 0) {
1681
+ transformed.temperature = request.temperature;
1682
+ }
1683
+ if (request.max_tokens !== void 0) {
1684
+ transformed.max_tokens = request.max_tokens;
1685
+ }
1686
+ if (request.top_p !== void 0) {
1687
+ transformed.top_p = request.top_p;
1688
+ }
1689
+ if (request.frequency_penalty !== void 0) {
1690
+ transformed.frequency_penalty = request.frequency_penalty;
1691
+ }
1692
+ if (request.presence_penalty !== void 0) {
1693
+ transformed.presence_penalty = request.presence_penalty;
1694
+ }
1695
+ if (request.stop !== void 0) {
1696
+ transformed.stop = request.stop;
1697
+ }
1698
+ if (request.tools !== void 0) {
1699
+ transformed.tools = request.tools;
1700
+ }
1701
+ if (request.tool_choice !== void 0) {
1702
+ transformed.tool_choice = request.tool_choice;
1703
+ }
1704
+ if (request.response_format !== void 0) {
1705
+ transformed.response_format = request.response_format;
1706
+ }
1707
+ if (request.seed !== void 0) {
1708
+ transformed.seed = request.seed;
1709
+ }
1710
+ if (request.user !== void 0) {
1711
+ transformed.user = request.user;
1712
+ }
1713
+ return transformed;
1714
+ }
1715
+ /**
1716
+ * Transform OpenAI response to gateway format
1717
+ */
1718
+ transformResponse(data, model) {
1719
+ return {
1720
+ id: data.id || generateRequestId(),
1721
+ object: "chat.completion",
1722
+ created: data.created || Math.floor(Date.now() / 1e3),
1723
+ model: data.model || model,
1724
+ choices: data.choices,
1725
+ usage: data.usage,
1726
+ system_fingerprint: data.system_fingerprint
1727
+ };
1728
+ }
1729
+ /**
1730
+ * Transform OpenAI stream chunk to gateway format
1731
+ */
1732
+ transformChunk(data, model) {
1733
+ return {
1734
+ id: data.id || generateRequestId(),
1735
+ object: "chat.completion.chunk",
1736
+ created: data.created || Math.floor(Date.now() / 1e3),
1737
+ model: data.model || model,
1738
+ choices: data.choices,
1739
+ system_fingerprint: data.system_fingerprint,
1740
+ usage: data.usage
1741
+ };
1742
+ }
1743
+ /**
1744
+ * Parse error response from OpenAI
1745
+ */
1746
+ async parseError(response) {
1747
+ let message = `OpenAI API error: ${response.status}`;
1748
+ const retryable = response.status >= 500 || response.status === 429;
1749
+ try {
1750
+ const data = await response.json();
1751
+ if (data.error?.message) {
1752
+ message = data.error.message;
1753
+ }
1754
+ } catch {
1755
+ }
1756
+ return new ProviderError(message, "openai", void 0, retryable);
1757
+ }
1758
+ /**
1759
+ * Wrap unknown errors
1760
+ */
1761
+ wrapError(error) {
1762
+ if (error instanceof ProviderError) {
1763
+ return error;
1764
+ }
1765
+ if (error instanceof Error) {
1766
+ const isTimeout = error.name === "AbortError" || error.message.includes("timeout");
1767
+ return new ProviderError(
1768
+ error.message,
1769
+ "openai",
1770
+ error,
1771
+ isTimeout
1772
+ // Timeouts are retryable
1773
+ );
1774
+ }
1775
+ return new ProviderError("Unknown error", "openai", void 0, true);
1776
+ }
1777
+ };
1778
+
1779
+ // src/providers/registry/AnthropicProvider.ts
1780
+ var DEFAULT_ANTHROPIC_MODELS = [
1781
+ "claude-3-5-sonnet-20241022",
1782
+ "claude-3-5-sonnet-latest",
1783
+ "claude-sonnet-4-20250514",
1784
+ "claude-3-5-haiku-20241022",
1785
+ "claude-3-opus-20240229",
1786
+ "claude-3-sonnet-20240229",
1787
+ "claude-3-haiku-20240307"
1788
+ ];
1789
+ var ANTHROPIC_API_VERSION = "2023-06-01";
1790
+ var AnthropicProvider = class extends Provider {
1791
+ apiKey;
1792
+ baseUrl;
1793
+ timeout;
1794
+ anthropicBeta;
1795
+ constructor(options = {}) {
1796
+ const apiKey = options.apiKey || process.env.ANTHROPIC_API_KEY;
1797
+ if (!apiKey) {
1798
+ throw new Error("Anthropic API key is required");
1799
+ }
1800
+ const config = {
1801
+ name: "anthropic",
1802
+ apiKey,
1803
+ baseUrl: options.baseUrl || "https://api.anthropic.com",
1804
+ models: options.models || DEFAULT_ANTHROPIC_MODELS,
1805
+ timeout: options.timeout || 6e4,
1806
+ maxRetries: options.maxRetries || 3,
1807
+ headers: options.headers
1808
+ };
1809
+ super(config);
1810
+ this.apiKey = apiKey;
1811
+ this.baseUrl = config.baseUrl;
1812
+ this.timeout = config.timeout;
1813
+ this.anthropicBeta = options.anthropicBeta;
1814
+ }
1815
+ /**
1816
+ * Execute a chat completion request
1817
+ */
1818
+ async chat(request) {
1819
+ const start = Date.now();
1820
+ try {
1821
+ const anthropicRequest = this.transformToAnthropic(request);
1822
+ const response = await this.makeRequest("/v1/messages", {
1823
+ method: "POST",
1824
+ body: JSON.stringify(anthropicRequest)
1825
+ });
1826
+ if (!response.ok) {
1827
+ const error = await this.parseError(response);
1828
+ throw error;
1829
+ }
1830
+ const data = await response.json();
1831
+ const result = this.transformFromAnthropic(data, request.model);
1832
+ this.updateHealth(true, Date.now() - start);
1833
+ return result;
1834
+ } catch (error) {
1835
+ this.updateHealth(false, Date.now() - start);
1836
+ throw this.wrapError(error);
1837
+ }
1838
+ }
1839
+ /**
1840
+ * Execute a streaming chat completion request
1841
+ */
1842
+ async *chatStream(request) {
1843
+ const start = Date.now();
1844
+ try {
1845
+ const anthropicRequest = this.transformToAnthropic(request);
1846
+ const response = await this.makeRequest("/v1/messages", {
1847
+ method: "POST",
1848
+ body: JSON.stringify({
1849
+ ...anthropicRequest,
1850
+ stream: true
1851
+ })
1852
+ });
1853
+ if (!response.ok) {
1854
+ const error = await this.parseError(response);
1855
+ throw error;
1856
+ }
1857
+ const reader = response.body?.getReader();
1858
+ if (!reader) {
1859
+ throw new ProviderError("No response body", "anthropic");
1860
+ }
1861
+ const decoder = new TextDecoder();
1862
+ let buffer = "";
1863
+ const requestId = generateRequestId();
1864
+ let inputTokens = 0;
1865
+ let outputTokens = 0;
1866
+ while (true) {
1867
+ const { done, value } = await reader.read();
1868
+ if (done) break;
1869
+ buffer += decoder.decode(value, { stream: true });
1870
+ const lines = buffer.split("\n");
1871
+ buffer = lines.pop() || "";
1872
+ for (const line of lines) {
1873
+ const trimmed = line.trim();
1874
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
1875
+ try {
1876
+ const event = JSON.parse(trimmed.slice(6));
1877
+ const chunk = this.transformStreamEvent(
1878
+ event,
1879
+ request.model,
1880
+ requestId
1881
+ );
1882
+ if (event.type === "message_start" && event.message?.usage) {
1883
+ inputTokens = event.message.usage.input_tokens || 0;
1884
+ }
1885
+ if (event.type === "message_delta" && event.usage) {
1886
+ outputTokens = event.usage.output_tokens || 0;
1887
+ }
1888
+ if (chunk) {
1889
+ if (event.type === "message_stop") {
1890
+ chunk.usage = {
1891
+ prompt_tokens: inputTokens,
1892
+ completion_tokens: outputTokens,
1893
+ total_tokens: inputTokens + outputTokens
1894
+ };
1895
+ }
1896
+ yield chunk;
1897
+ }
1898
+ } catch {
1899
+ }
1900
+ }
1901
+ }
1902
+ this.updateHealth(true, Date.now() - start);
1903
+ } catch (error) {
1904
+ this.updateHealth(false, Date.now() - start);
1905
+ throw this.wrapError(error);
1906
+ }
1907
+ }
1908
+ /**
1909
+ * Get model information
1910
+ */
1911
+ getModelInfo(model) {
1912
+ if (!this.supportsModel(model)) {
1913
+ return null;
1914
+ }
1915
+ return getModelInfo(model, "anthropic");
1916
+ }
1917
+ /**
1918
+ * Make an HTTP request to the Anthropic API
1919
+ */
1920
+ async makeRequest(path, options) {
1921
+ const headers = {
1922
+ "Content-Type": "application/json",
1923
+ "x-api-key": this.apiKey,
1924
+ "anthropic-version": ANTHROPIC_API_VERSION,
1925
+ ...this.config.headers
1926
+ };
1927
+ if (this.anthropicBeta && this.anthropicBeta.length > 0) {
1928
+ headers["anthropic-beta"] = this.anthropicBeta.join(",");
1929
+ }
1930
+ const controller = new AbortController();
1931
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
1932
+ try {
1933
+ return await fetch(`${this.baseUrl}${path}`, {
1934
+ ...options,
1935
+ headers,
1936
+ signal: controller.signal
1937
+ });
1938
+ } finally {
1939
+ clearTimeout(timeoutId);
1940
+ }
1941
+ }
1942
+ /**
1943
+ * Transform OpenAI-compatible request to Anthropic format
1944
+ */
1945
+ transformToAnthropic(request) {
1946
+ const { messages, systemPrompt } = this.extractSystemPrompt(
1947
+ request.messages
1948
+ );
1949
+ const transformed = {
1950
+ model: request.model,
1951
+ messages: messages.map((m) => this.transformMessage(m)),
1952
+ max_tokens: request.max_tokens || 4096
1953
+ };
1954
+ if (systemPrompt) {
1955
+ transformed.system = systemPrompt;
1956
+ }
1957
+ if (request.temperature !== void 0) {
1958
+ transformed.temperature = request.temperature;
1959
+ }
1960
+ if (request.top_p !== void 0) {
1961
+ transformed.top_p = request.top_p;
1962
+ }
1963
+ if (request.stop !== void 0) {
1964
+ transformed.stop_sequences = Array.isArray(request.stop) ? request.stop : [request.stop];
1965
+ }
1966
+ if (request.tools && request.tools.length > 0) {
1967
+ transformed.tools = request.tools.map((tool) => ({
1968
+ name: tool.function.name,
1969
+ description: tool.function.description,
1970
+ input_schema: tool.function.parameters || { type: "object" }
1971
+ }));
1972
+ if (request.tool_choice) {
1973
+ if (request.tool_choice === "auto") {
1974
+ transformed.tool_choice = { type: "auto" };
1975
+ } else if (request.tool_choice === "required") {
1976
+ transformed.tool_choice = { type: "any" };
1977
+ } else if (request.tool_choice === "none") {
1978
+ delete transformed.tools;
1979
+ } else if (typeof request.tool_choice === "object" && request.tool_choice.function) {
1980
+ transformed.tool_choice = {
1981
+ type: "tool",
1982
+ name: request.tool_choice.function.name
1983
+ };
1984
+ }
1985
+ }
1986
+ }
1987
+ return transformed;
1988
+ }
1989
+ /**
1990
+ * Extract system prompt from messages
1991
+ */
1992
+ extractSystemPrompt(messages) {
1993
+ const systemMessages = messages.filter((m) => m.role === "system");
1994
+ const otherMessages = messages.filter((m) => m.role !== "system");
1995
+ const systemPrompt = systemMessages.length > 0 ? systemMessages.map(
1996
+ (m) => typeof m.content === "string" ? m.content : JSON.stringify(m.content)
1997
+ ).join("\n") : null;
1998
+ return { messages: otherMessages, systemPrompt };
1999
+ }
2000
+ /**
2001
+ * Transform a single message to Anthropic format
2002
+ */
2003
+ transformMessage(message) {
2004
+ if (message.role === "tool") {
2005
+ return {
2006
+ role: "user",
2007
+ content: [
2008
+ {
2009
+ type: "tool_result",
2010
+ tool_use_id: message.tool_call_id,
2011
+ content: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2012
+ }
2013
+ ]
2014
+ };
2015
+ }
2016
+ if (message.role === "assistant" && message.tool_calls) {
2017
+ const content = [];
2018
+ if (message.content) {
2019
+ content.push({
2020
+ type: "text",
2021
+ text: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2022
+ });
2023
+ }
2024
+ for (const toolCall of message.tool_calls) {
2025
+ content.push({
2026
+ type: "tool_use",
2027
+ id: toolCall.id,
2028
+ name: toolCall.function.name,
2029
+ input: JSON.parse(toolCall.function.arguments)
2030
+ });
2031
+ }
2032
+ return { role: "assistant", content };
2033
+ }
2034
+ return {
2035
+ role: message.role,
2036
+ content: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2037
+ };
2038
+ }
2039
+ /**
2040
+ * Transform Anthropic response to OpenAI-compatible format
2041
+ */
2042
+ transformFromAnthropic(data, model) {
2043
+ const content = data.content;
2044
+ let textContent = "";
2045
+ const toolCalls = [];
2046
+ for (const block of content) {
2047
+ if (block.type === "text") {
2048
+ textContent += block.text;
2049
+ } else if (block.type === "tool_use") {
2050
+ toolCalls.push({
2051
+ id: block.id,
2052
+ type: "function",
2053
+ function: {
2054
+ name: block.name,
2055
+ arguments: JSON.stringify(block.input)
2056
+ }
2057
+ });
2058
+ }
2059
+ }
2060
+ const message = {
2061
+ role: "assistant",
2062
+ content: textContent || null
2063
+ };
2064
+ if (toolCalls.length > 0) {
2065
+ message.tool_calls = toolCalls;
2066
+ }
2067
+ const stopReason = data.stop_reason;
2068
+ let finishReason = null;
2069
+ if (stopReason === "end_turn") {
2070
+ finishReason = "stop";
2071
+ } else if (stopReason === "max_tokens") {
2072
+ finishReason = "length";
2073
+ } else if (stopReason === "tool_use") {
2074
+ finishReason = "tool_calls";
2075
+ }
2076
+ const usage = data.usage;
2077
+ return {
2078
+ id: data.id || generateRequestId(),
2079
+ object: "chat.completion",
2080
+ created: Math.floor(Date.now() / 1e3),
2081
+ model: data.model || model,
2082
+ choices: [
2083
+ {
2084
+ index: 0,
2085
+ message,
2086
+ finish_reason: finishReason
2087
+ }
2088
+ ],
2089
+ usage: {
2090
+ prompt_tokens: usage.input_tokens,
2091
+ completion_tokens: usage.output_tokens,
2092
+ total_tokens: usage.input_tokens + usage.output_tokens
2093
+ }
2094
+ };
2095
+ }
2096
+ /**
2097
+ * Transform Anthropic stream event to OpenAI-compatible chunk
2098
+ */
2099
+ transformStreamEvent(event, model, requestId) {
2100
+ const eventType = event.type;
2101
+ if (eventType === "content_block_delta") {
2102
+ const delta = event.delta;
2103
+ if (delta.type === "text_delta") {
2104
+ return {
2105
+ id: requestId,
2106
+ object: "chat.completion.chunk",
2107
+ created: Math.floor(Date.now() / 1e3),
2108
+ model,
2109
+ choices: [
2110
+ {
2111
+ index: 0,
2112
+ delta: {
2113
+ content: delta.text
2114
+ },
2115
+ finish_reason: null
2116
+ }
2117
+ ]
2118
+ };
2119
+ }
2120
+ if (delta.type === "input_json_delta") {
2121
+ return {
2122
+ id: requestId,
2123
+ object: "chat.completion.chunk",
2124
+ created: Math.floor(Date.now() / 1e3),
2125
+ model,
2126
+ choices: [
2127
+ {
2128
+ index: 0,
2129
+ delta: {},
2130
+ finish_reason: null
2131
+ }
2132
+ ]
2133
+ };
2134
+ }
2135
+ }
2136
+ if (eventType === "content_block_start") {
2137
+ const contentBlock = event.content_block;
2138
+ if (contentBlock?.type === "tool_use") {
2139
+ return {
2140
+ id: requestId,
2141
+ object: "chat.completion.chunk",
2142
+ created: Math.floor(Date.now() / 1e3),
2143
+ model,
2144
+ choices: [
2145
+ {
2146
+ index: 0,
2147
+ delta: {
2148
+ tool_calls: [
2149
+ {
2150
+ id: contentBlock.id,
2151
+ type: "function",
2152
+ function: {
2153
+ name: contentBlock.name,
2154
+ arguments: ""
2155
+ }
2156
+ }
2157
+ ]
2158
+ },
2159
+ finish_reason: null
2160
+ }
2161
+ ]
2162
+ };
2163
+ }
2164
+ }
2165
+ if (eventType === "message_stop") {
2166
+ return {
2167
+ id: requestId,
2168
+ object: "chat.completion.chunk",
2169
+ created: Math.floor(Date.now() / 1e3),
2170
+ model,
2171
+ choices: [
2172
+ {
2173
+ index: 0,
2174
+ delta: {},
2175
+ finish_reason: "stop"
2176
+ }
2177
+ ]
2178
+ };
2179
+ }
2180
+ return null;
2181
+ }
2182
+ /**
2183
+ * Parse error response from Anthropic
2184
+ */
2185
+ async parseError(response) {
2186
+ let message = `Anthropic API error: ${response.status}`;
2187
+ const retryable = response.status >= 500 || response.status === 429;
2188
+ try {
2189
+ const data = await response.json();
2190
+ if (data.error?.message) {
2191
+ message = data.error.message;
2192
+ }
2193
+ } catch {
2194
+ }
2195
+ return new ProviderError(message, "anthropic", void 0, retryable);
2196
+ }
2197
+ /**
2198
+ * Wrap unknown errors
2199
+ */
2200
+ wrapError(error) {
2201
+ if (error instanceof ProviderError) {
2202
+ return error;
2203
+ }
2204
+ if (error instanceof Error) {
2205
+ const isTimeout = error.name === "AbortError" || error.message.includes("timeout");
2206
+ return new ProviderError(error.message, "anthropic", error, isTimeout);
2207
+ }
2208
+ return new ProviderError("Unknown error", "anthropic", void 0, true);
2209
+ }
2210
+ };
2211
+
2212
+ // src/providers/registry/GoogleProvider.ts
2213
+ var DEFAULT_GEMINI_MODELS = [
2214
+ "gemini-1.5-pro",
2215
+ "gemini-1.5-pro-latest",
2216
+ "gemini-1.5-flash",
2217
+ "gemini-1.5-flash-latest",
2218
+ "gemini-2.0-flash-exp",
2219
+ "gemini-pro"
2220
+ ];
2221
+ var GoogleProvider = class extends Provider {
2222
+ apiKey;
2223
+ baseUrl;
2224
+ timeout;
2225
+ constructor(options = {}) {
2226
+ const apiKey = options.apiKey || process.env.GOOGLE_API_KEY;
2227
+ if (!apiKey) {
2228
+ throw new Error("Google API key is required");
2229
+ }
2230
+ const config = {
2231
+ name: "google",
2232
+ apiKey,
2233
+ baseUrl: options.baseUrl || "https://generativelanguage.googleapis.com/v1beta",
2234
+ models: options.models || DEFAULT_GEMINI_MODELS,
2235
+ timeout: options.timeout || 6e4,
2236
+ maxRetries: options.maxRetries || 3,
2237
+ headers: options.headers
2238
+ };
2239
+ super(config);
2240
+ this.apiKey = apiKey;
2241
+ this.baseUrl = config.baseUrl;
2242
+ this.timeout = config.timeout;
2243
+ }
2244
+ /**
2245
+ * Execute a chat completion request
2246
+ */
2247
+ async chat(request) {
2248
+ const start = Date.now();
2249
+ try {
2250
+ const geminiRequest = this.transformToGemini(request);
2251
+ const endpoint = `/models/${request.model}:generateContent`;
2252
+ const response = await this.makeRequest(endpoint, {
2253
+ method: "POST",
2254
+ body: JSON.stringify(geminiRequest)
2255
+ });
2256
+ if (!response.ok) {
2257
+ const error = await this.parseError(response);
2258
+ throw error;
2259
+ }
2260
+ const data = await response.json();
2261
+ const result = this.transformFromGemini(data, request.model);
2262
+ this.updateHealth(true, Date.now() - start);
2263
+ return result;
2264
+ } catch (error) {
2265
+ this.updateHealth(false, Date.now() - start);
2266
+ throw this.wrapError(error);
2267
+ }
2268
+ }
2269
+ /**
2270
+ * Execute a streaming chat completion request
2271
+ */
2272
+ async *chatStream(request) {
2273
+ const start = Date.now();
2274
+ try {
2275
+ const geminiRequest = this.transformToGemini(request);
2276
+ const endpoint = `/models/${request.model}:streamGenerateContent`;
2277
+ const response = await this.makeRequest(endpoint, {
2278
+ method: "POST",
2279
+ body: JSON.stringify(geminiRequest)
2280
+ });
2281
+ if (!response.ok) {
2282
+ const error = await this.parseError(response);
2283
+ throw error;
2284
+ }
2285
+ const reader = response.body?.getReader();
2286
+ if (!reader) {
2287
+ throw new ProviderError("No response body", "google");
2288
+ }
2289
+ const decoder = new TextDecoder();
2290
+ let buffer = "";
2291
+ const requestId = generateRequestId();
2292
+ let totalInputTokens = 0;
2293
+ let totalOutputTokens = 0;
2294
+ while (true) {
2295
+ const { done, value } = await reader.read();
2296
+ if (done) break;
2297
+ buffer += decoder.decode(value, { stream: true });
2298
+ const lines = buffer.split("\n");
2299
+ buffer = lines.pop() || "";
2300
+ for (const line of lines) {
2301
+ const trimmed = line.trim();
2302
+ if (!trimmed || trimmed === "[" || trimmed === "]" || trimmed === ",")
2303
+ continue;
2304
+ let jsonStr = trimmed;
2305
+ if (jsonStr.startsWith(",")) jsonStr = jsonStr.slice(1);
2306
+ if (jsonStr.endsWith(",")) jsonStr = jsonStr.slice(0, -1);
2307
+ if (!jsonStr.startsWith("{")) continue;
2308
+ try {
2309
+ const data = JSON.parse(jsonStr);
2310
+ const chunk = this.transformStreamChunk(
2311
+ data,
2312
+ request.model,
2313
+ requestId
2314
+ );
2315
+ if (data.usageMetadata) {
2316
+ totalInputTokens = data.usageMetadata.promptTokenCount || 0;
2317
+ totalOutputTokens = data.usageMetadata.candidatesTokenCount || 0;
2318
+ }
2319
+ if (chunk) {
2320
+ yield chunk;
2321
+ }
2322
+ } catch {
2323
+ }
2324
+ }
2325
+ }
2326
+ yield {
2327
+ id: requestId,
2328
+ object: "chat.completion.chunk",
2329
+ created: Math.floor(Date.now() / 1e3),
2330
+ model: request.model,
2331
+ choices: [
2332
+ {
2333
+ index: 0,
2334
+ delta: {},
2335
+ finish_reason: "stop"
2336
+ }
2337
+ ],
2338
+ usage: {
2339
+ prompt_tokens: totalInputTokens,
2340
+ completion_tokens: totalOutputTokens,
2341
+ total_tokens: totalInputTokens + totalOutputTokens
2342
+ }
2343
+ };
2344
+ this.updateHealth(true, Date.now() - start);
2345
+ } catch (error) {
2346
+ this.updateHealth(false, Date.now() - start);
2347
+ throw this.wrapError(error);
2348
+ }
2349
+ }
2350
+ /**
2351
+ * Get model information
2352
+ */
2353
+ getModelInfo(model) {
2354
+ if (!this.supportsModel(model)) {
2355
+ return null;
2356
+ }
2357
+ return getModelInfo(model, "google");
2358
+ }
2359
+ /**
2360
+ * Make an HTTP request to the Google API
2361
+ */
2362
+ async makeRequest(path, options) {
2363
+ const url = `${this.baseUrl}${path}?key=${this.apiKey}`;
2364
+ const headers = {
2365
+ "Content-Type": "application/json",
2366
+ ...this.config.headers
2367
+ };
2368
+ const controller = new AbortController();
2369
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
2370
+ try {
2371
+ return await fetch(url, {
2372
+ ...options,
2373
+ headers,
2374
+ signal: controller.signal
2375
+ });
2376
+ } finally {
2377
+ clearTimeout(timeoutId);
2378
+ }
2379
+ }
2380
+ /**
2381
+ * Transform OpenAI-compatible request to Gemini format
2382
+ */
2383
+ transformToGemini(request) {
2384
+ const contents = this.transformMessages(request.messages);
2385
+ const transformed = {
2386
+ contents
2387
+ };
2388
+ const generationConfig = {};
2389
+ if (request.temperature !== void 0) {
2390
+ generationConfig.temperature = request.temperature;
2391
+ }
2392
+ if (request.max_tokens !== void 0) {
2393
+ generationConfig.maxOutputTokens = request.max_tokens;
2394
+ }
2395
+ if (request.top_p !== void 0) {
2396
+ generationConfig.topP = request.top_p;
2397
+ }
2398
+ if (request.stop !== void 0) {
2399
+ generationConfig.stopSequences = Array.isArray(request.stop) ? request.stop : [request.stop];
2400
+ }
2401
+ if (Object.keys(generationConfig).length > 0) {
2402
+ transformed.generationConfig = generationConfig;
2403
+ }
2404
+ const systemMessages = request.messages.filter((m) => m.role === "system");
2405
+ if (systemMessages.length > 0) {
2406
+ const systemText = systemMessages.map(
2407
+ (m) => typeof m.content === "string" ? m.content : JSON.stringify(m.content)
2408
+ ).join("\n");
2409
+ transformed.systemInstruction = { parts: [{ text: systemText }] };
2410
+ }
2411
+ if (request.tools && request.tools.length > 0) {
2412
+ transformed.tools = [
2413
+ {
2414
+ functionDeclarations: request.tools.map((tool) => ({
2415
+ name: tool.function.name,
2416
+ description: tool.function.description,
2417
+ parameters: tool.function.parameters || { type: "object" }
2418
+ }))
2419
+ }
2420
+ ];
2421
+ }
2422
+ return transformed;
2423
+ }
2424
+ /**
2425
+ * Transform messages to Gemini format
2426
+ */
2427
+ transformMessages(messages) {
2428
+ const contents = [];
2429
+ for (const message of messages) {
2430
+ if (message.role === "system") continue;
2431
+ const role = message.role === "assistant" ? "model" : "user";
2432
+ const parts = [];
2433
+ if (message.content) {
2434
+ parts.push({
2435
+ text: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2436
+ });
2437
+ }
2438
+ if (message.tool_calls) {
2439
+ for (const toolCall of message.tool_calls) {
2440
+ parts.push({
2441
+ functionCall: {
2442
+ name: toolCall.function.name,
2443
+ args: JSON.parse(toolCall.function.arguments)
2444
+ }
2445
+ });
2446
+ }
2447
+ }
2448
+ if (message.role === "tool" && message.tool_call_id) {
2449
+ parts.push({
2450
+ functionResponse: {
2451
+ name: message.name || "tool_result",
2452
+ response: {
2453
+ result: typeof message.content === "string" ? message.content : JSON.stringify(message.content)
2454
+ }
2455
+ }
2456
+ });
2457
+ }
2458
+ if (parts.length > 0) {
2459
+ contents.push({ role, parts });
2460
+ }
2461
+ }
2462
+ return contents;
2463
+ }
2464
+ /**
2465
+ * Transform Gemini response to OpenAI-compatible format
2466
+ */
2467
+ transformFromGemini(data, model) {
2468
+ const candidates = data.candidates;
2469
+ const candidate = candidates?.[0];
2470
+ if (!candidate) {
2471
+ throw new ProviderError("No response candidates", "google");
2472
+ }
2473
+ const content = candidate.content;
2474
+ const parts = content?.parts || [];
2475
+ let textContent = "";
2476
+ const toolCalls = [];
2477
+ for (const part of parts) {
2478
+ if (part.text) {
2479
+ textContent += part.text;
2480
+ }
2481
+ if (part.functionCall) {
2482
+ const fc = part.functionCall;
2483
+ toolCalls.push({
2484
+ id: `call_${Math.random().toString(36).substring(2, 11)}`,
2485
+ type: "function",
2486
+ function: {
2487
+ name: fc.name,
2488
+ arguments: JSON.stringify(fc.args)
2489
+ }
2490
+ });
2491
+ }
2492
+ }
2493
+ const message = {
2494
+ role: "assistant",
2495
+ content: textContent || null
2496
+ };
2497
+ if (toolCalls.length > 0) {
2498
+ message.tool_calls = toolCalls;
2499
+ }
2500
+ const finishReason = candidate.finishReason;
2501
+ let mappedReason = null;
2502
+ if (finishReason === "STOP") {
2503
+ mappedReason = "stop";
2504
+ } else if (finishReason === "MAX_TOKENS") {
2505
+ mappedReason = "length";
2506
+ } else if (toolCalls.length > 0) {
2507
+ mappedReason = "tool_calls";
2508
+ }
2509
+ const usageMetadata = data.usageMetadata;
2510
+ const usage = {
2511
+ prompt_tokens: usageMetadata?.promptTokenCount || 0,
2512
+ completion_tokens: usageMetadata?.candidatesTokenCount || 0,
2513
+ total_tokens: usageMetadata?.totalTokenCount || 0
2514
+ };
2515
+ return {
2516
+ id: generateRequestId(),
2517
+ object: "chat.completion",
2518
+ created: Math.floor(Date.now() / 1e3),
2519
+ model,
2520
+ choices: [
2521
+ {
2522
+ index: 0,
2523
+ message,
2524
+ finish_reason: mappedReason
2525
+ }
2526
+ ],
2527
+ usage
2528
+ };
2529
+ }
2530
+ /**
2531
+ * Transform Gemini stream chunk to OpenAI-compatible format
2532
+ */
2533
+ transformStreamChunk(data, model, requestId) {
2534
+ const candidates = data.candidates;
2535
+ const candidate = candidates?.[0];
2536
+ if (!candidate) {
2537
+ return null;
2538
+ }
2539
+ const content = candidate.content;
2540
+ const parts = content?.parts || [];
2541
+ let textContent = "";
2542
+ for (const part of parts) {
2543
+ if (part.text) {
2544
+ textContent += part.text;
2545
+ }
2546
+ }
2547
+ if (!textContent) {
2548
+ return null;
2549
+ }
2550
+ return {
2551
+ id: requestId,
2552
+ object: "chat.completion.chunk",
2553
+ created: Math.floor(Date.now() / 1e3),
2554
+ model,
2555
+ choices: [
2556
+ {
2557
+ index: 0,
2558
+ delta: {
2559
+ content: textContent
2560
+ },
2561
+ finish_reason: null
2562
+ }
2563
+ ]
2564
+ };
2565
+ }
2566
+ /**
2567
+ * Parse error response from Google
2568
+ */
2569
+ async parseError(response) {
2570
+ let message = `Google API error: ${response.status}`;
2571
+ const retryable = response.status >= 500 || response.status === 429;
2572
+ try {
2573
+ const data = await response.json();
2574
+ if (data.error?.message) {
2575
+ message = data.error.message;
2576
+ }
2577
+ } catch {
2578
+ }
2579
+ return new ProviderError(message, "google", void 0, retryable);
2580
+ }
2581
+ /**
2582
+ * Wrap unknown errors
2583
+ */
2584
+ wrapError(error) {
2585
+ if (error instanceof ProviderError) {
2586
+ return error;
2587
+ }
2588
+ if (error instanceof Error) {
2589
+ const isTimeout = error.name === "AbortError" || error.message.includes("timeout");
2590
+ return new ProviderError(error.message, "google", error, isTimeout);
2591
+ }
2592
+ return new ProviderError("Unknown error", "google", void 0, true);
2593
+ }
2594
+ };
2595
+ var Gateway = class extends events.EventEmitter {
2596
+ _config;
2597
+ registry;
2598
+ router;
2599
+ healthMonitor;
2600
+ cache;
2601
+ logger;
2602
+ metrics;
2603
+ /**
2604
+ * Chat completions API interface (OpenAI-compatible)
2605
+ */
2606
+ chat = {
2607
+ completions: {
2608
+ create: this.createCompletion.bind(this)
2609
+ }
2610
+ };
2611
+ constructor(config) {
2612
+ super();
2613
+ this._config = config;
2614
+ this.logger = pino__default.default({
2615
+ level: config.telemetry?.logging?.level || "info",
2616
+ transport: process.env.NODE_ENV !== "production" ? { target: "pino-pretty" } : void 0
2617
+ });
2618
+ this.registry = new ProviderRegistry();
2619
+ for (const providerConfig of config.providers) {
2620
+ const provider = this.createProvider(providerConfig);
2621
+ if (provider) {
2622
+ this.registry.register(provider);
2623
+ }
2624
+ }
2625
+ this.router = this.createRouter(config);
2626
+ this.healthMonitor = new HealthMonitor({
2627
+ checkInterval: 6e4,
2628
+ unhealthyThreshold: 3,
2629
+ degradedThreshold: 1,
2630
+ circuitBreaker: {
2631
+ failureThreshold: 5,
2632
+ successThreshold: 3,
2633
+ timeout: 3e4
2634
+ }
2635
+ });
2636
+ if (config.cache?.enabled) {
2637
+ this.cache = new lruCache.LRUCache({
2638
+ max: config.cache.maxEntries || 1e3,
2639
+ ttl: (config.cache.ttl || 3600) * 1e3
2640
+ });
2641
+ } else {
2642
+ this.cache = null;
2643
+ }
2644
+ this.metrics = this.createInitialMetrics();
2645
+ this.healthMonitor.on("unhealthy", (provider) => {
2646
+ this.logger.warn({ provider }, "Provider marked unhealthy");
2647
+ this.emit("provider:unhealthy", provider);
2648
+ });
2649
+ this.healthMonitor.on("circuit-open", (provider) => {
2650
+ this.logger.warn({ provider }, "Circuit breaker opened");
2651
+ });
2652
+ }
2653
+ /**
2654
+ * Create a chat completion (main API)
2655
+ */
2656
+ async createCompletion(request) {
2657
+ this.validateRequest(request);
2658
+ if (request.stream) {
2659
+ return this.createStreamingCompletion(request);
2660
+ }
2661
+ return this.createNonStreamingCompletion(request);
2662
+ }
2663
+ /**
2664
+ * Non-streaming completion
2665
+ */
2666
+ async createNonStreamingCompletion(request) {
2667
+ const requestId = request._gateway?.requestId || generateRequestId();
2668
+ const start = Date.now();
2669
+ this.emit("request:start", {
2670
+ requestId,
2671
+ model: request.model
2672
+ });
2673
+ try {
2674
+ if (this.cache && request._gateway?.cachePolicy !== "no-cache") {
2675
+ const cacheKey = hashRequest(request);
2676
+ const cached = this.cache.get(cacheKey);
2677
+ if (cached) {
2678
+ this.metrics.cache.hits++;
2679
+ this.updateCacheHitRate();
2680
+ this.logger.debug({ requestId, cacheKey }, "Cache hit");
2681
+ const response = {
2682
+ ...cached,
2683
+ _gateway: {
2684
+ ...cached._gateway,
2685
+ cached: true,
2686
+ cacheKey,
2687
+ latencyMs: Date.now() - start
2688
+ }
2689
+ };
2690
+ this.emit("request:complete", {
2691
+ requestId,
2692
+ provider: cached._gateway?.provider || "cache",
2693
+ model: cached.model,
2694
+ latencyMs: Date.now() - start,
2695
+ cost: 0,
2696
+ cached: true,
2697
+ tokens: { input: 0, output: 0 }
2698
+ });
2699
+ return response;
2700
+ }
2701
+ this.metrics.cache.misses++;
2702
+ }
2703
+ const routingContext = {
2704
+ excludeProviders: request._gateway?.excludeProviders,
2705
+ preferredProvider: request._gateway?.preferredProvider,
2706
+ maxCost: request._gateway?.maxCost,
2707
+ maxLatency: request._gateway?.maxLatency
2708
+ };
2709
+ let lastError = null;
2710
+ let attempts = 0;
2711
+ const maxAttempts = 3;
2712
+ while (attempts < maxAttempts) {
2713
+ attempts++;
2714
+ const decision = this.router.route(request, this.registry, {
2715
+ ...routingContext,
2716
+ previousAttempts: attempts > 1 ? [{ provider: "", model: "", error: lastError?.message }] : void 0
2717
+ });
2718
+ const provider = this.registry.get(decision.provider);
2719
+ if (!provider) {
2720
+ lastError = new GatewayError(
2721
+ `Provider not found: ${decision.provider}`,
2722
+ "PROVIDER_NOT_FOUND",
2723
+ 500
2724
+ );
2725
+ continue;
2726
+ }
2727
+ if (!this.healthMonitor.isRequestAllowed(decision.provider)) {
2728
+ this.logger.debug(
2729
+ { provider: decision.provider },
2730
+ "Circuit breaker open, skipping"
2731
+ );
2732
+ routingContext.excludeProviders = [
2733
+ ...routingContext.excludeProviders || [],
2734
+ decision.provider
2735
+ ];
2736
+ continue;
2737
+ }
2738
+ try {
2739
+ const providerRequest = {
2740
+ ...request,
2741
+ model: decision.model
2742
+ };
2743
+ const providerStart = Date.now();
2744
+ const response = await provider.chat(providerRequest);
2745
+ const latencyMs = Date.now() - providerStart;
2746
+ this.healthMonitor.recordRequest(decision.provider, true, latencyMs);
2747
+ const cost = calculateCost(decision.model, response.usage);
2748
+ this.updateMetrics(decision, response, latencyMs, cost);
2749
+ const gatewayResponse = {
2750
+ ...response,
2751
+ _gateway: {
2752
+ provider: decision.provider,
2753
+ originalModel: request.model,
2754
+ latencyMs,
2755
+ cost,
2756
+ cached: false,
2757
+ retries: attempts - 1,
2758
+ routingDecision: decision
2759
+ }
2760
+ };
2761
+ if (this.cache && request._gateway?.cachePolicy !== "no-cache") {
2762
+ const cacheKey = hashRequest(request);
2763
+ this.cache.set(cacheKey, gatewayResponse);
2764
+ }
2765
+ this.emit("request:complete", {
2766
+ requestId,
2767
+ provider: decision.provider,
2768
+ model: decision.model,
2769
+ latencyMs: Date.now() - start,
2770
+ cost,
2771
+ cached: false,
2772
+ tokens: {
2773
+ input: response.usage.prompt_tokens,
2774
+ output: response.usage.completion_tokens
2775
+ }
2776
+ });
2777
+ return gatewayResponse;
2778
+ } catch (error) {
2779
+ lastError = error instanceof Error ? error : new Error(String(error));
2780
+ this.healthMonitor.recordRequest(
2781
+ decision.provider,
2782
+ false,
2783
+ Date.now() - start
2784
+ );
2785
+ if (error instanceof ProviderError && !error.retryable) {
2786
+ throw error;
2787
+ }
2788
+ this.logger.warn(
2789
+ {
2790
+ provider: decision.provider,
2791
+ error: lastError.message,
2792
+ attempt: attempts
2793
+ },
2794
+ "Request failed, retrying"
2795
+ );
2796
+ routingContext.excludeProviders = [
2797
+ ...routingContext.excludeProviders || [],
2798
+ decision.provider
2799
+ ];
2800
+ }
2801
+ }
2802
+ this.metrics.requests.failed++;
2803
+ this.emit("request:error", { requestId, error: lastError });
2804
+ throw lastError || new GatewayError("All attempts failed", "ALL_ATTEMPTS_FAILED", 502);
2805
+ } catch (error) {
2806
+ this.metrics.requests.failed++;
2807
+ this.emit("request:error", {
2808
+ requestId,
2809
+ error: error instanceof Error ? error : new Error(String(error))
2810
+ });
2811
+ throw error;
2812
+ }
2813
+ }
2814
+ /**
2815
+ * Streaming completion
2816
+ */
2817
+ async *createStreamingCompletion(request) {
2818
+ const requestId = request._gateway?.requestId || generateRequestId();
2819
+ const start = Date.now();
2820
+ this.emit("request:start", { requestId, model: request.model });
2821
+ const decision = this.router.route(request, this.registry, {
2822
+ excludeProviders: request._gateway?.excludeProviders,
2823
+ preferredProvider: request._gateway?.preferredProvider
2824
+ });
2825
+ const provider = this.registry.get(decision.provider);
2826
+ if (!provider) {
2827
+ throw new GatewayError(
2828
+ `Provider not found: ${decision.provider}`,
2829
+ "PROVIDER_NOT_FOUND",
2830
+ 500
2831
+ );
2832
+ }
2833
+ const providerRequest = { ...request, model: decision.model };
2834
+ try {
2835
+ let totalInputTokens = 0;
2836
+ let totalOutputTokens = 0;
2837
+ for await (const chunk of provider.chatStream(providerRequest)) {
2838
+ if (chunk.usage) {
2839
+ totalInputTokens = chunk.usage.prompt_tokens;
2840
+ totalOutputTokens = chunk.usage.completion_tokens;
2841
+ }
2842
+ yield chunk;
2843
+ }
2844
+ const latencyMs = Date.now() - start;
2845
+ const cost = calculateCost(decision.model, {
2846
+ prompt_tokens: totalInputTokens,
2847
+ completion_tokens: totalOutputTokens,
2848
+ total_tokens: totalInputTokens + totalOutputTokens
2849
+ });
2850
+ this.healthMonitor.recordRequest(decision.provider, true, latencyMs);
2851
+ this.emit("request:complete", {
2852
+ requestId,
2853
+ provider: decision.provider,
2854
+ model: decision.model,
2855
+ latencyMs,
2856
+ cost,
2857
+ cached: false,
2858
+ tokens: { input: totalInputTokens, output: totalOutputTokens }
2859
+ });
2860
+ } catch (error) {
2861
+ this.healthMonitor.recordRequest(
2862
+ decision.provider,
2863
+ false,
2864
+ Date.now() - start
2865
+ );
2866
+ this.emit("request:error", {
2867
+ requestId,
2868
+ provider: decision.provider,
2869
+ error: error instanceof Error ? error : new Error(String(error))
2870
+ });
2871
+ throw error;
2872
+ }
2873
+ }
2874
+ /**
2875
+ * Validate a request
2876
+ */
2877
+ validateRequest(request) {
2878
+ if (!request.model) {
2879
+ throw new ValidationError("Model is required");
2880
+ }
2881
+ if (!request.messages || request.messages.length === 0) {
2882
+ throw new ValidationError(
2883
+ "Messages array is required and cannot be empty"
2884
+ );
2885
+ }
2886
+ for (const message of request.messages) {
2887
+ if (!message.role) {
2888
+ throw new ValidationError("Message role is required");
2889
+ }
2890
+ if (!["system", "user", "assistant", "tool"].includes(message.role)) {
2891
+ throw new ValidationError(`Invalid message role: ${message.role}`);
2892
+ }
2893
+ }
2894
+ }
2895
+ /**
2896
+ * Create a provider instance from config
2897
+ */
2898
+ createProvider(config) {
2899
+ switch (config.name) {
2900
+ case "openai":
2901
+ return new OpenAIProvider({
2902
+ apiKey: config.apiKey,
2903
+ baseUrl: config.baseUrl,
2904
+ models: config.models,
2905
+ timeout: config.timeout
2906
+ });
2907
+ case "anthropic":
2908
+ return new AnthropicProvider({
2909
+ apiKey: config.apiKey,
2910
+ baseUrl: config.baseUrl,
2911
+ models: config.models,
2912
+ timeout: config.timeout
2913
+ });
2914
+ case "google":
2915
+ return new GoogleProvider({
2916
+ apiKey: config.apiKey,
2917
+ baseUrl: config.baseUrl,
2918
+ models: config.models,
2919
+ timeout: config.timeout
2920
+ });
2921
+ default:
2922
+ this.logger.warn({ provider: config.name }, "Unknown provider type");
2923
+ return null;
2924
+ }
2925
+ }
2926
+ /**
2927
+ * Create router from config
2928
+ */
2929
+ createRouter(config) {
2930
+ const strategyName = config.routing?.strategy || "round-robin";
2931
+ let strategy;
2932
+ switch (strategyName) {
2933
+ case "failover":
2934
+ strategy = new FailoverStrategy({
2935
+ chain: config.routing?.fallbackChain || [
2936
+ "openai",
2937
+ "anthropic",
2938
+ "google"
2939
+ ]
2940
+ });
2941
+ break;
2942
+ case "cost-optimized":
2943
+ strategy = new CostOptimizedStrategy();
2944
+ break;
2945
+ case "latency-optimized":
2946
+ strategy = new LatencyOptimizedStrategy();
2947
+ break;
2948
+ case "round-robin":
2949
+ default:
2950
+ strategy = new RoundRobinStrategy({
2951
+ weights: config.routing?.weights
2952
+ });
2953
+ break;
2954
+ }
2955
+ return new Router(strategy, {
2956
+ fallbackChain: config.routing?.fallbackChain
2957
+ });
2958
+ }
2959
+ /**
2960
+ * Create initial metrics object
2961
+ */
2962
+ createInitialMetrics() {
2963
+ return {
2964
+ requests: { total: 0, successful: 0, failed: 0, cached: 0 },
2965
+ latency: { avg: 0, p50: 0, p95: 0, p99: 0 },
2966
+ tokens: { input: 0, output: 0, total: 0 },
2967
+ cost: { total: 0, byProvider: {}, byModel: {} },
2968
+ cache: { hits: 0, misses: 0, hitRate: 0 },
2969
+ providers: {}
2970
+ };
2971
+ }
2972
+ /**
2973
+ * Update metrics after a request
2974
+ */
2975
+ updateMetrics(decision, response, latencyMs, cost) {
2976
+ this.metrics.requests.total++;
2977
+ this.metrics.requests.successful++;
2978
+ this.metrics.latency.avg = (this.metrics.latency.avg * (this.metrics.requests.successful - 1) + latencyMs) / this.metrics.requests.successful;
2979
+ this.metrics.tokens.input += response.usage.prompt_tokens;
2980
+ this.metrics.tokens.output += response.usage.completion_tokens;
2981
+ this.metrics.tokens.total += response.usage.total_tokens;
2982
+ this.metrics.cost.total += cost;
2983
+ this.metrics.cost.byProvider[decision.provider] = (this.metrics.cost.byProvider[decision.provider] || 0) + cost;
2984
+ this.metrics.cost.byModel[decision.model] = (this.metrics.cost.byModel[decision.model] || 0) + cost;
2985
+ this.updateCacheHitRate();
2986
+ this.metrics.providers = this.registry.getHealthStatus();
2987
+ }
2988
+ /**
2989
+ * Update cache hit rate metric
2990
+ */
2991
+ updateCacheHitRate() {
2992
+ const totalCacheOps = this.metrics.cache.hits + this.metrics.cache.misses;
2993
+ this.metrics.cache.hitRate = totalCacheOps > 0 ? this.metrics.cache.hits / totalCacheOps : 0;
2994
+ }
2995
+ /**
2996
+ * Get current metrics
2997
+ */
2998
+ getMetrics() {
2999
+ return { ...this.metrics };
3000
+ }
3001
+ /**
3002
+ * Get gateway configuration
3003
+ */
3004
+ getConfig() {
3005
+ return { ...this._config };
3006
+ }
3007
+ /**
3008
+ * Get provider registry
3009
+ */
3010
+ getRegistry() {
3011
+ return this.registry;
3012
+ }
3013
+ /**
3014
+ * Get router
3015
+ */
3016
+ getRouter() {
3017
+ return this.router;
3018
+ }
3019
+ /**
3020
+ * Check health of all providers
3021
+ */
3022
+ async checkHealth() {
3023
+ const healthStatus = await this.registry.checkHealth();
3024
+ const result = {};
3025
+ for (const [name, health] of Object.entries(healthStatus)) {
3026
+ result[name] = health.status === "healthy";
3027
+ }
3028
+ return result;
3029
+ }
3030
+ /**
3031
+ * Shut down the gateway
3032
+ */
3033
+ shutdown() {
3034
+ this.registry.stopHealthChecks();
3035
+ this.cache?.clear();
3036
+ this.logger.info("Gateway shut down");
3037
+ }
3038
+ };
3039
+ function createHTTPServer(options) {
3040
+ const { gateway, basePath = "" } = options;
3041
+ const app = new hono.Hono();
3042
+ app.use("*", logger.logger());
3043
+ if (options.cors) {
3044
+ app.use(
3045
+ "*",
3046
+ cors.cors({
3047
+ origin: options.cors.origin || "*",
3048
+ allowMethods: options.cors.methods || ["GET", "POST", "OPTIONS"],
3049
+ allowHeaders: options.cors.headers || [
3050
+ "Content-Type",
3051
+ "Authorization",
3052
+ "X-Request-Id"
3053
+ ]
3054
+ })
3055
+ );
3056
+ }
3057
+ app.get(`${basePath}/health`, async (c) => {
3058
+ const health = await gateway.checkHealth();
3059
+ const allHealthy = Object.values(health).every((h) => h);
3060
+ return c.json(
3061
+ {
3062
+ status: allHealthy ? "healthy" : "degraded",
3063
+ providers: health,
3064
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
3065
+ },
3066
+ allHealthy ? 200 : 503
3067
+ );
3068
+ });
3069
+ app.get(`${basePath}/metrics`, (c) => {
3070
+ const metrics = gateway.getMetrics();
3071
+ return c.json(metrics);
3072
+ });
3073
+ app.get(`${basePath}/v1/models`, (c) => {
3074
+ const registry = gateway.getRegistry();
3075
+ const models = registry.getAllModels();
3076
+ const data = models.map((model) => {
3077
+ const modelInfo = registry.getModelInfo(model);
3078
+ return {
3079
+ id: model,
3080
+ object: "model",
3081
+ created: Math.floor(Date.now() / 1e3),
3082
+ owned_by: modelInfo?.provider || "unknown"
3083
+ };
3084
+ });
3085
+ return c.json({
3086
+ object: "list",
3087
+ data
3088
+ });
3089
+ });
3090
+ app.post(`${basePath}/v1/chat/completions`, async (c) => {
3091
+ try {
3092
+ const body = await c.req.json();
3093
+ const requestId = c.req.header("X-Request-Id");
3094
+ if (requestId) {
3095
+ body._gateway = { ...body._gateway, requestId };
3096
+ }
3097
+ if (body.stream) {
3098
+ return streaming.streamSSE(c, async (stream) => {
3099
+ try {
3100
+ const generator = await gateway.chat.completions.create(
3101
+ body
3102
+ );
3103
+ for await (const chunk of generator) {
3104
+ await stream.writeSSE({
3105
+ data: JSON.stringify(chunk)
3106
+ });
3107
+ }
3108
+ await stream.writeSSE({ data: "[DONE]" });
3109
+ } catch (error) {
3110
+ const errorResponse = formatError(error);
3111
+ await stream.writeSSE({
3112
+ data: JSON.stringify({ error: errorResponse })
3113
+ });
3114
+ }
3115
+ });
3116
+ }
3117
+ const response = await gateway.chat.completions.create(
3118
+ body
3119
+ );
3120
+ return c.json(response);
3121
+ } catch (error) {
3122
+ const { status, body } = formatError(error);
3123
+ return c.json(body, status);
3124
+ }
3125
+ });
3126
+ app.post(`${basePath}/v1/completions`, (c) => {
3127
+ return c.json(
3128
+ {
3129
+ error: {
3130
+ message: "The completions endpoint is deprecated. Please use /v1/chat/completions instead.",
3131
+ type: "invalid_request_error",
3132
+ code: "deprecated_endpoint"
3133
+ }
3134
+ },
3135
+ 400
3136
+ );
3137
+ });
3138
+ app.all("*", (c) => {
3139
+ return c.json(
3140
+ {
3141
+ error: {
3142
+ message: `Unknown endpoint: ${c.req.method} ${c.req.path}`,
3143
+ type: "invalid_request_error",
3144
+ code: "unknown_endpoint"
3145
+ }
3146
+ },
3147
+ 404
3148
+ );
3149
+ });
3150
+ return app;
3151
+ }
3152
+ function startServer(app, options) {
3153
+ const port = options.port || 3e3;
3154
+ const host = options.host || "0.0.0.0";
3155
+ const server = nodeServer.serve({
3156
+ fetch: app.fetch,
3157
+ port,
3158
+ hostname: host
3159
+ });
3160
+ console.log(`Gateway server running on http://${host}:${port}`);
3161
+ return server;
3162
+ }
3163
+ function formatError(error) {
3164
+ if (error instanceof ValidationError) {
3165
+ return {
3166
+ status: 400,
3167
+ body: {
3168
+ error: {
3169
+ message: error.message,
3170
+ type: "invalid_request_error",
3171
+ code: error.code
3172
+ }
3173
+ }
3174
+ };
3175
+ }
3176
+ if (error instanceof GatewayError) {
3177
+ return {
3178
+ status: error.statusCode,
3179
+ body: {
3180
+ error: {
3181
+ message: error.message,
3182
+ type: "gateway_error",
3183
+ code: error.code
3184
+ }
3185
+ }
3186
+ };
3187
+ }
3188
+ if (error instanceof Error) {
3189
+ if ("name" in error && error.name === "ValidationError" && "code" in error) {
3190
+ return {
3191
+ status: 400,
3192
+ body: {
3193
+ error: {
3194
+ message: error.message,
3195
+ type: "invalid_request_error",
3196
+ code: error.code
3197
+ }
3198
+ }
3199
+ };
3200
+ }
3201
+ if ("name" in error && error.name === "GatewayError" && "statusCode" in error && "code" in error) {
3202
+ return {
3203
+ status: error.statusCode,
3204
+ body: {
3205
+ error: {
3206
+ message: error.message,
3207
+ type: "gateway_error",
3208
+ code: error.code
3209
+ }
3210
+ }
3211
+ };
3212
+ }
3213
+ return {
3214
+ status: 500,
3215
+ body: {
3216
+ error: {
3217
+ message: error.message,
3218
+ type: "internal_error",
3219
+ code: "internal_error"
3220
+ }
3221
+ }
3222
+ };
3223
+ }
3224
+ return {
3225
+ status: 500,
3226
+ body: {
3227
+ error: {
3228
+ message: "An unknown error occurred",
3229
+ type: "internal_error",
3230
+ code: "unknown_error"
3231
+ }
3232
+ }
3233
+ };
3234
+ }
3235
+
3236
+ // src/telemetry/Metrics.ts
3237
+ var MetricsCollector = class {
3238
+ prefix;
3239
+ counters = /* @__PURE__ */ new Map();
3240
+ gauges = /* @__PURE__ */ new Map();
3241
+ histograms = /* @__PURE__ */ new Map();
3242
+ latencyBuckets;
3243
+ tokenBuckets;
3244
+ constructor(config = {}) {
3245
+ this.prefix = config.prefix || "agentsea_gateway";
3246
+ this.latencyBuckets = config.histogramBuckets?.latency || [
3247
+ 50,
3248
+ 100,
3249
+ 250,
3250
+ 500,
3251
+ 1e3,
3252
+ 2500,
3253
+ 5e3,
3254
+ 1e4
3255
+ ];
3256
+ this.tokenBuckets = config.histogramBuckets?.tokens || [
3257
+ 100,
3258
+ 500,
3259
+ 1e3,
3260
+ 2e3,
3261
+ 5e3,
3262
+ 1e4,
3263
+ 5e4
3264
+ ];
3265
+ }
3266
+ /**
3267
+ * Get token histogram buckets
3268
+ */
3269
+ getTokenBuckets() {
3270
+ return [...this.tokenBuckets];
3271
+ }
3272
+ /**
3273
+ * Increment a counter
3274
+ */
3275
+ incrementCounter(name, value = 1, labels) {
3276
+ const key = this.formatKey(name, labels);
3277
+ const current = this.counters.get(key) || 0;
3278
+ this.counters.set(key, current + value);
3279
+ }
3280
+ /**
3281
+ * Set a gauge value
3282
+ */
3283
+ setGauge(name, value, labels) {
3284
+ const key = this.formatKey(name, labels);
3285
+ this.gauges.set(key, value);
3286
+ }
3287
+ /**
3288
+ * Record a histogram observation
3289
+ */
3290
+ recordHistogram(name, value, labels, buckets) {
3291
+ const key = this.formatKey(name, labels);
3292
+ let histogram = this.histograms.get(key);
3293
+ if (!histogram) {
3294
+ histogram = {
3295
+ count: 0,
3296
+ sum: 0,
3297
+ buckets: /* @__PURE__ */ new Map()
3298
+ };
3299
+ const bucketsToUse = buckets || this.latencyBuckets;
3300
+ for (const bucket of bucketsToUse) {
3301
+ histogram.buckets.set(bucket, 0);
3302
+ }
3303
+ histogram.buckets.set(Infinity, 0);
3304
+ this.histograms.set(key, histogram);
3305
+ }
3306
+ histogram.count++;
3307
+ histogram.sum += value;
3308
+ for (const [bucket, count] of histogram.buckets) {
3309
+ if (value <= bucket) {
3310
+ histogram.buckets.set(bucket, count + 1);
3311
+ }
3312
+ }
3313
+ }
3314
+ /**
3315
+ * Record request metrics
3316
+ */
3317
+ recordRequest(data) {
3318
+ const labels = { provider: data.provider, model: data.model };
3319
+ this.incrementCounter("requests_total", 1, {
3320
+ ...labels,
3321
+ status: data.status,
3322
+ cached: String(data.cached)
3323
+ });
3324
+ this.recordHistogram("request_latency_ms", data.latencyMs, labels);
3325
+ this.incrementCounter("tokens_input_total", data.inputTokens, labels);
3326
+ this.incrementCounter("tokens_output_total", data.outputTokens, labels);
3327
+ this.incrementCounter(
3328
+ "cost_microdollars_total",
3329
+ Math.round(data.cost * 1e6),
3330
+ labels
3331
+ );
3332
+ if (data.cached) {
3333
+ this.incrementCounter("cache_hits_total", 1);
3334
+ }
3335
+ }
3336
+ /**
3337
+ * Get counter value
3338
+ */
3339
+ getCounter(name, labels) {
3340
+ const key = this.formatKey(name, labels);
3341
+ return this.counters.get(key) || 0;
3342
+ }
3343
+ /**
3344
+ * Get gauge value
3345
+ */
3346
+ getGauge(name, labels) {
3347
+ const key = this.formatKey(name, labels);
3348
+ return this.gauges.get(key) || 0;
3349
+ }
3350
+ /**
3351
+ * Get histogram data
3352
+ */
3353
+ getHistogram(name, labels) {
3354
+ const key = this.formatKey(name, labels);
3355
+ return this.histograms.get(key);
3356
+ }
3357
+ /**
3358
+ * Get all metrics as a summary object
3359
+ */
3360
+ getSummary() {
3361
+ const requestsTotal = this.sumAllCounters("requests_total");
3362
+ const requestsSuccess = this.sumCountersByLabel(
3363
+ "requests_total",
3364
+ "status",
3365
+ "success"
3366
+ );
3367
+ const requestsError = this.sumCountersByLabel(
3368
+ "requests_total",
3369
+ "status",
3370
+ "error"
3371
+ );
3372
+ const requestsCached = this.sumCountersByLabel(
3373
+ "requests_total",
3374
+ "cached",
3375
+ "true"
3376
+ );
3377
+ const latencyHistogram = this.aggregateHistograms("request_latency_ms");
3378
+ const avgLatency = latencyHistogram.count > 0 ? latencyHistogram.sum / latencyHistogram.count : 0;
3379
+ const inputTokens = this.sumAllCounters("tokens_input_total");
3380
+ const outputTokens = this.sumAllCounters("tokens_output_total");
3381
+ const totalCostMicro = this.sumAllCounters("cost_microdollars_total");
3382
+ const cacheHits = this.getCounter("cache_hits_total");
3383
+ const cacheMisses = requestsTotal - cacheHits;
3384
+ return {
3385
+ requests: {
3386
+ total: requestsTotal,
3387
+ successful: requestsSuccess,
3388
+ failed: requestsError,
3389
+ cached: requestsCached
3390
+ },
3391
+ latency: {
3392
+ avg: avgLatency,
3393
+ p50: this.calculatePercentile("request_latency_ms", 0.5),
3394
+ p95: this.calculatePercentile("request_latency_ms", 0.95),
3395
+ p99: this.calculatePercentile("request_latency_ms", 0.99)
3396
+ },
3397
+ tokens: {
3398
+ input: inputTokens,
3399
+ output: outputTokens,
3400
+ total: inputTokens + outputTokens
3401
+ },
3402
+ cost: {
3403
+ total: totalCostMicro / 1e6,
3404
+ byProvider: this.getCostByLabel("provider"),
3405
+ byModel: this.getCostByLabel("model")
3406
+ },
3407
+ cache: {
3408
+ hits: cacheHits,
3409
+ misses: cacheMisses,
3410
+ hitRate: requestsTotal > 0 ? cacheHits / requestsTotal : 0
3411
+ },
3412
+ providers: {}
3413
+ };
3414
+ }
3415
+ /**
3416
+ * Export metrics in Prometheus format
3417
+ */
3418
+ toPrometheusFormat() {
3419
+ const lines = [];
3420
+ for (const [key, value] of this.counters) {
3421
+ lines.push(`${this.prefix}_${key} ${value}`);
3422
+ }
3423
+ for (const [key, value] of this.gauges) {
3424
+ lines.push(`${this.prefix}_${key} ${value}`);
3425
+ }
3426
+ for (const [key, histogram] of this.histograms) {
3427
+ for (const [bucket, count] of histogram.buckets) {
3428
+ const le = bucket === Infinity ? "+Inf" : bucket;
3429
+ lines.push(`${this.prefix}_${key}_bucket{le="${le}"} ${count}`);
3430
+ }
3431
+ lines.push(`${this.prefix}_${key}_sum ${histogram.sum}`);
3432
+ lines.push(`${this.prefix}_${key}_count ${histogram.count}`);
3433
+ }
3434
+ return lines.join("\n");
3435
+ }
3436
+ /**
3437
+ * Reset all metrics
3438
+ */
3439
+ reset() {
3440
+ this.counters.clear();
3441
+ this.gauges.clear();
3442
+ this.histograms.clear();
3443
+ }
3444
+ /**
3445
+ * Format metric key with labels
3446
+ */
3447
+ formatKey(name, labels) {
3448
+ if (!labels || Object.keys(labels).length === 0) {
3449
+ return name;
3450
+ }
3451
+ const labelStr = Object.entries(labels).sort(([a], [b]) => a.localeCompare(b)).map(([k, v]) => `${k}="${v}"`).join(",");
3452
+ return `${name}{${labelStr}}`;
3453
+ }
3454
+ /**
3455
+ * Sum counters by a specific label value
3456
+ */
3457
+ sumCountersByLabel(name, labelKey, labelValue) {
3458
+ let sum = 0;
3459
+ for (const [key, value] of this.counters) {
3460
+ if (key.startsWith(name) && key.includes(`${labelKey}="${labelValue}"`)) {
3461
+ sum += value;
3462
+ }
3463
+ }
3464
+ return sum;
3465
+ }
3466
+ /**
3467
+ * Sum all counters with a given name prefix
3468
+ */
3469
+ sumAllCounters(namePrefix) {
3470
+ let sum = 0;
3471
+ for (const [key, value] of this.counters) {
3472
+ if (key.startsWith(namePrefix)) {
3473
+ sum += value;
3474
+ }
3475
+ }
3476
+ return sum;
3477
+ }
3478
+ /**
3479
+ * Get cost breakdown by label
3480
+ */
3481
+ getCostByLabel(labelKey) {
3482
+ const result = {};
3483
+ const prefix = "cost_microdollars_total";
3484
+ for (const [key, value] of this.counters) {
3485
+ if (key.startsWith(prefix)) {
3486
+ const match = key.match(new RegExp(`${labelKey}="([^"]+)"`));
3487
+ if (match) {
3488
+ const labelValue = match[1];
3489
+ result[labelValue] = (result[labelValue] || 0) + value / 1e6;
3490
+ }
3491
+ }
3492
+ }
3493
+ return result;
3494
+ }
3495
+ /**
3496
+ * Aggregate histograms for a metric name
3497
+ */
3498
+ aggregateHistograms(name) {
3499
+ const result = {
3500
+ count: 0,
3501
+ sum: 0,
3502
+ buckets: /* @__PURE__ */ new Map()
3503
+ };
3504
+ for (const [key, histogram] of this.histograms) {
3505
+ if (key.startsWith(name)) {
3506
+ result.count += histogram.count;
3507
+ result.sum += histogram.sum;
3508
+ for (const [bucket, count] of histogram.buckets) {
3509
+ const existing = result.buckets.get(bucket) || 0;
3510
+ result.buckets.set(bucket, existing + count);
3511
+ }
3512
+ }
3513
+ }
3514
+ return result;
3515
+ }
3516
+ /**
3517
+ * Calculate percentile from histogram (approximate)
3518
+ */
3519
+ calculatePercentile(name, percentile) {
3520
+ const histogram = this.aggregateHistograms(name);
3521
+ if (histogram.count === 0) return 0;
3522
+ const sortedBuckets = Array.from(histogram.buckets.entries()).sort(
3523
+ ([a], [b]) => a - b
3524
+ );
3525
+ const targetCount = histogram.count * percentile;
3526
+ let prevBucket = 0;
3527
+ let prevCount = 0;
3528
+ for (const [bucket, count] of sortedBuckets) {
3529
+ if (count >= targetCount) {
3530
+ const bucketRange = bucket - prevBucket;
3531
+ const bucketCount = count - prevCount;
3532
+ if (bucketCount === 0) {
3533
+ return prevBucket;
3534
+ }
3535
+ const positionInBucket = targetCount - prevCount;
3536
+ const fraction = positionInBucket / bucketCount;
3537
+ return prevBucket + bucketRange * Math.max(0, Math.min(1, fraction));
3538
+ }
3539
+ prevBucket = bucket;
3540
+ prevCount = count;
3541
+ }
3542
+ return prevBucket;
3543
+ }
3544
+ };
3545
+
3546
+ exports.AnthropicProvider = AnthropicProvider;
3547
+ exports.AuthenticationError = AuthenticationError;
3548
+ exports.CircuitBreaker = CircuitBreaker;
3549
+ exports.CostOptimizedStrategy = CostOptimizedStrategy;
3550
+ exports.DEFAULT_MODEL_MAPPINGS = DEFAULT_MODEL_MAPPINGS;
3551
+ exports.FailoverStrategy = FailoverStrategy;
3552
+ exports.Gateway = Gateway;
3553
+ exports.GatewayError = GatewayError;
3554
+ exports.GoogleProvider = GoogleProvider;
3555
+ exports.HealthMonitor = HealthMonitor;
3556
+ exports.LatencyOptimizedStrategy = LatencyOptimizedStrategy;
3557
+ exports.MODEL_CONTEXT_WINDOWS = MODEL_CONTEXT_WINDOWS;
3558
+ exports.MODEL_MAX_OUTPUT = MODEL_MAX_OUTPUT;
3559
+ exports.MODEL_PRICING = MODEL_PRICING;
3560
+ exports.MetricsCollector = MetricsCollector;
3561
+ exports.OpenAIProvider = OpenAIProvider;
3562
+ exports.Provider = Provider;
3563
+ exports.ProviderError = ProviderError;
3564
+ exports.ProviderRegistry = ProviderRegistry;
3565
+ exports.RateLimitError = RateLimitError;
3566
+ exports.RoundRobinStrategy = RoundRobinStrategy;
3567
+ exports.Router = Router;
3568
+ exports.VIRTUAL_MODELS = VIRTUAL_MODELS;
3569
+ exports.ValidationError = ValidationError;
3570
+ exports.calculateCost = calculateCost;
3571
+ exports.countMessageTokens = countMessageTokens;
3572
+ exports.countTokens = countTokens;
3573
+ exports.createHTTPServer = createHTTPServer;
3574
+ exports.createRouterConfig = createRouterConfig;
3575
+ exports.createSystemFingerprint = createSystemFingerprint;
3576
+ exports.estimateCost = estimateCost;
3577
+ exports.estimateRequestTokens = estimateRequestTokens;
3578
+ exports.findCheapestModel = findCheapestModel;
3579
+ exports.freeEncoder = freeEncoder;
3580
+ exports.generateCacheKey = generateCacheKey;
3581
+ exports.generateId = generateId;
3582
+ exports.generateRequestId = generateRequestId;
3583
+ exports.getModelCapabilities = getModelCapabilities;
3584
+ exports.getModelInfo = getModelInfo;
3585
+ exports.getModelPricing = getModelPricing;
3586
+ exports.hash = hash;
3587
+ exports.hashRequest = hashRequest;
3588
+ exports.sortModelsByCost = sortModelsByCost;
3589
+ exports.startServer = startServer;
3590
+ exports.truncateToTokenLimit = truncateToTokenLimit;
3591
+ //# sourceMappingURL=index.js.map
3592
+ //# sourceMappingURL=index.js.map