@lov3kaizen/agentsea-gateway 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,759 @@
1
+ import { EventEmitter } from 'events';
2
+ import { Hono } from 'hono';
3
+ import { ServerType } from '@hono/node-server';
4
+
5
+ interface ChatMessage {
6
+ role: 'system' | 'user' | 'assistant' | 'tool';
7
+ content: string | ContentPart[] | null;
8
+ name?: string;
9
+ tool_calls?: ToolCall[];
10
+ tool_call_id?: string;
11
+ }
12
+ interface ContentPart {
13
+ type: 'text' | 'image_url';
14
+ text?: string;
15
+ image_url?: {
16
+ url: string;
17
+ detail?: 'auto' | 'low' | 'high';
18
+ };
19
+ }
20
+ interface ToolCall {
21
+ id: string;
22
+ type: 'function';
23
+ function: {
24
+ name: string;
25
+ arguments: string;
26
+ };
27
+ }
28
+ interface Tool {
29
+ type: 'function';
30
+ function: {
31
+ name: string;
32
+ description?: string;
33
+ parameters?: Record<string, unknown>;
34
+ };
35
+ }
36
+ interface ChatCompletionRequest {
37
+ model: string;
38
+ messages: ChatMessage[];
39
+ temperature?: number;
40
+ max_tokens?: number;
41
+ top_p?: number;
42
+ frequency_penalty?: number;
43
+ presence_penalty?: number;
44
+ stop?: string | string[];
45
+ stream?: boolean;
46
+ tools?: Tool[];
47
+ tool_choice?: 'none' | 'auto' | 'required' | {
48
+ type: 'function';
49
+ function: {
50
+ name: string;
51
+ };
52
+ };
53
+ response_format?: {
54
+ type: 'text' | 'json_object';
55
+ };
56
+ seed?: number;
57
+ user?: string;
58
+ _gateway?: GatewayRequestMetadata;
59
+ }
60
+ interface GatewayRequestMetadata {
61
+ tenantId?: string;
62
+ requestId?: string;
63
+ routingHint?: string;
64
+ preferredProvider?: string;
65
+ excludeProviders?: string[];
66
+ maxCost?: number;
67
+ maxLatency?: number;
68
+ cachePolicy?: 'default' | 'no-cache' | 'force-cache';
69
+ tags?: Record<string, string>;
70
+ }
71
+ interface ChatCompletionResponse {
72
+ id: string;
73
+ object: 'chat.completion';
74
+ created: number;
75
+ model: string;
76
+ choices: ChatCompletionChoice[];
77
+ usage: UsageInfo;
78
+ system_fingerprint?: string;
79
+ _gateway?: GatewayResponseMetadata;
80
+ }
81
+ interface ChatCompletionChoice {
82
+ index: number;
83
+ message: ChatMessage;
84
+ finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
85
+ logprobs?: unknown;
86
+ }
87
+ interface UsageInfo {
88
+ prompt_tokens: number;
89
+ completion_tokens: number;
90
+ total_tokens: number;
91
+ }
92
+ interface GatewayResponseMetadata {
93
+ provider: string;
94
+ originalModel: string;
95
+ latencyMs: number;
96
+ cost: number;
97
+ cached: boolean;
98
+ cacheKey?: string;
99
+ retries: number;
100
+ routingDecision?: RoutingDecision;
101
+ }
102
+ interface ChatCompletionChunk {
103
+ id: string;
104
+ object: 'chat.completion.chunk';
105
+ created: number;
106
+ model: string;
107
+ choices: ChatCompletionChunkChoice[];
108
+ system_fingerprint?: string;
109
+ usage?: UsageInfo;
110
+ }
111
+ interface ChatCompletionChunkChoice {
112
+ index: number;
113
+ delta: Partial<ChatMessage>;
114
+ finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
115
+ logprobs?: unknown;
116
+ }
117
+ interface ProviderConfig {
118
+ name: string;
119
+ apiKey?: string;
120
+ baseUrl?: string;
121
+ models: string[];
122
+ timeout?: number;
123
+ maxRetries?: number;
124
+ headers?: Record<string, string>;
125
+ metadata?: Record<string, unknown>;
126
+ }
127
+ interface ProviderHealth {
128
+ status: 'healthy' | 'degraded' | 'unhealthy';
129
+ latencyMs: number;
130
+ lastCheck: Date;
131
+ errorRate: number;
132
+ consecutiveFailures: number;
133
+ }
134
+ interface ModelInfo {
135
+ id: string;
136
+ provider: string;
137
+ contextWindow: number;
138
+ maxOutputTokens: number;
139
+ inputPricePerMillion: number;
140
+ outputPricePerMillion: number;
141
+ capabilities: ModelCapabilities;
142
+ }
143
+ interface ModelCapabilities {
144
+ streaming: boolean;
145
+ tools: boolean;
146
+ vision: boolean;
147
+ json_mode: boolean;
148
+ system_prompts: boolean;
149
+ }
150
+ type RoutingStrategy = 'round-robin' | 'failover' | 'cost-optimized' | 'latency-optimized' | 'load-balanced' | 'conditional';
151
+ interface RoutingDecision {
152
+ provider: string;
153
+ model: string;
154
+ reason: string;
155
+ alternatives: Array<{
156
+ provider: string;
157
+ model: string;
158
+ score: number;
159
+ }>;
160
+ timestamp: Date;
161
+ }
162
+ interface RouterConfig {
163
+ strategy: RoutingStrategy;
164
+ fallbackChain?: string[];
165
+ weights?: Record<string, number>;
166
+ rules?: RoutingRule[];
167
+ }
168
+ interface RoutingRule {
169
+ condition: (request: ChatCompletionRequest) => boolean;
170
+ route: string;
171
+ reason: string;
172
+ }
173
+ interface CacheConfig {
174
+ enabled: boolean;
175
+ ttl: number;
176
+ maxEntries: number;
177
+ type: 'exact' | 'semantic';
178
+ similarityThreshold?: number;
179
+ }
180
+ interface CacheEntry {
181
+ key: string;
182
+ response: ChatCompletionResponse;
183
+ createdAt: Date;
184
+ expiresAt: Date;
185
+ hits: number;
186
+ metadata: Record<string, unknown>;
187
+ }
188
+ interface CacheStats {
189
+ hits: number;
190
+ misses: number;
191
+ hitRate: number;
192
+ size: number;
193
+ evictions: number;
194
+ }
195
+ interface RateLimitConfig {
196
+ requestsPerMinute?: number;
197
+ requestsPerHour?: number;
198
+ requestsPerDay?: number;
199
+ tokensPerMinute?: number;
200
+ tokensPerDay?: number;
201
+ maxConcurrent?: number;
202
+ }
203
+ interface RateLimitResult {
204
+ allowed: boolean;
205
+ remaining: number;
206
+ reset: Date;
207
+ limit: number;
208
+ retryAfter?: number;
209
+ }
210
+ interface TelemetryConfig {
211
+ metrics?: boolean;
212
+ tracing?: boolean;
213
+ logging?: {
214
+ level: 'debug' | 'info' | 'warn' | 'error';
215
+ redact?: string[];
216
+ };
217
+ }
218
+ interface GatewayMetrics {
219
+ requests: {
220
+ total: number;
221
+ successful: number;
222
+ failed: number;
223
+ cached: number;
224
+ };
225
+ latency: {
226
+ avg: number;
227
+ p50: number;
228
+ p95: number;
229
+ p99: number;
230
+ };
231
+ tokens: {
232
+ input: number;
233
+ output: number;
234
+ total: number;
235
+ };
236
+ cost: {
237
+ total: number;
238
+ byProvider: Record<string, number>;
239
+ byModel: Record<string, number>;
240
+ };
241
+ cache: {
242
+ hits: number;
243
+ misses: number;
244
+ hitRate: number;
245
+ };
246
+ providers: Record<string, ProviderHealth>;
247
+ }
248
+ interface GatewayConfig {
249
+ providers: ProviderConfig[];
250
+ routing?: RouterConfig;
251
+ cache?: CacheConfig;
252
+ rateLimit?: RateLimitConfig | {
253
+ default: RateLimitConfig;
254
+ perTenant?: boolean;
255
+ };
256
+ telemetry?: TelemetryConfig;
257
+ auth?: AuthConfig;
258
+ server?: ServerConfig;
259
+ }
260
+ interface AuthConfig {
261
+ type: 'api-key' | 'jwt' | 'none';
262
+ keys?: string[];
263
+ jwtSecret?: string;
264
+ validateTenant?: (tenantId: string) => Promise<boolean>;
265
+ }
266
+ interface ServerConfig {
267
+ port?: number;
268
+ host?: string;
269
+ cors?: {
270
+ origin?: string | string[];
271
+ methods?: string[];
272
+ headers?: string[];
273
+ };
274
+ basePath?: string;
275
+ }
276
+ declare class GatewayError extends Error {
277
+ code: string;
278
+ statusCode: number;
279
+ provider?: string | undefined;
280
+ retryable: boolean;
281
+ constructor(message: string, code: string, statusCode?: number, provider?: string | undefined, retryable?: boolean);
282
+ }
283
+ declare class ProviderError extends GatewayError {
284
+ originalError?: Error | undefined;
285
+ constructor(message: string, provider: string, originalError?: Error | undefined, retryable?: boolean);
286
+ }
287
+ declare class RateLimitError extends GatewayError {
288
+ retryAfter: number;
289
+ constructor(message: string, retryAfter: number, provider?: string);
290
+ }
291
+ declare class AuthenticationError extends GatewayError {
292
+ constructor(message: string);
293
+ }
294
+ declare class ValidationError extends GatewayError {
295
+ constructor(message: string);
296
+ }
297
+
298
+ declare abstract class Provider {
299
+ readonly name: string;
300
+ readonly config: ProviderConfig;
301
+ protected health: ProviderHealth;
302
+ constructor(config: ProviderConfig);
303
+ abstract chat(request: ChatCompletionRequest): Promise<ChatCompletionResponse>;
304
+ abstract chatStream(request: ChatCompletionRequest): AsyncGenerator<ChatCompletionChunk, void, unknown>;
305
+ supportsModel(model: string): boolean;
306
+ abstract getModelInfo(model: string): ModelInfo | null;
307
+ getModels(): string[];
308
+ getHealth(): ProviderHealth;
309
+ healthCheck(): Promise<ProviderHealth>;
310
+ protected updateHealth(success: boolean, latencyMs: number): void;
311
+ isHealthy(): boolean;
312
+ isAvailable(): boolean;
313
+ }
314
+ interface ProviderOptions {
315
+ apiKey?: string;
316
+ baseUrl?: string;
317
+ models?: string[];
318
+ timeout?: number;
319
+ maxRetries?: number;
320
+ headers?: Record<string, string>;
321
+ }
322
+
323
+ interface ProviderWithModels {
324
+ provider: Provider;
325
+ models: string[];
326
+ }
327
+ declare class ProviderRegistry {
328
+ private providers;
329
+ private modelToProvider;
330
+ private healthCheckInterval;
331
+ constructor(providers?: Provider[]);
332
+ register(provider: Provider): void;
333
+ unregister(name: string): boolean;
334
+ get(name: string): Provider | undefined;
335
+ getAll(): Provider[];
336
+ getNames(): string[];
337
+ getProvidersForModel(model: string): Provider[];
338
+ getProviderForModel(model: string): Provider | undefined;
339
+ hasModel(model: string): boolean;
340
+ getAllModels(): string[];
341
+ getModelInfo(model: string): ModelInfo | null;
342
+ getHealthStatus(): Record<string, ProviderHealth>;
343
+ getHealthyProviders(): Provider[];
344
+ getAvailableProviders(): Provider[];
345
+ checkHealth(): Promise<Record<string, ProviderHealth>>;
346
+ startHealthChecks(intervalMs?: number): void;
347
+ stopHealthChecks(): void;
348
+ get size(): number;
349
+ }
350
+
351
+ interface RoutingStrategyInterface {
352
+ readonly name: RoutingStrategy;
353
+ route(request: ChatCompletionRequest, registry: ProviderRegistry, context?: RoutingContext): RoutingDecision;
354
+ }
355
+ interface RoutingContext {
356
+ excludeProviders?: string[];
357
+ preferredProvider?: string;
358
+ maxCost?: number;
359
+ maxLatency?: number;
360
+ previousAttempts?: Array<{
361
+ provider: string;
362
+ model: string;
363
+ error?: string;
364
+ }>;
365
+ }
366
+ interface ModelMapping {
367
+ [model: string]: Array<{
368
+ provider: string;
369
+ model: string;
370
+ }>;
371
+ }
372
+ declare const DEFAULT_MODEL_MAPPINGS: ModelMapping;
373
+ declare const VIRTUAL_MODELS: readonly ["best", "cheapest", "fastest"];
374
+ type VirtualModel = (typeof VIRTUAL_MODELS)[number];
375
+ declare class Router {
376
+ private strategy;
377
+ private modelMappings;
378
+ private fallbackChain;
379
+ constructor(strategy: RoutingStrategyInterface, config?: {
380
+ modelMappings?: ModelMapping;
381
+ fallbackChain?: string[];
382
+ });
383
+ route(request: ChatCompletionRequest, registry: ProviderRegistry, context?: RoutingContext): RoutingDecision;
384
+ isVirtualModel(model: string): model is VirtualModel;
385
+ private routeVirtualModel;
386
+ private routeBest;
387
+ private routeCheapest;
388
+ private routeFastest;
389
+ getEquivalentModels(model: string): Array<{
390
+ provider: string;
391
+ model: string;
392
+ }>;
393
+ setStrategy(strategy: RoutingStrategyInterface): void;
394
+ getStrategyName(): RoutingStrategy;
395
+ getFallbackChain(): string[];
396
+ }
397
+ declare function createRouterConfig(options: Partial<RouterConfig>): RouterConfig;
398
+
399
+ interface RoundRobinConfig {
400
+ weights?: Record<string, number>;
401
+ }
402
+ declare class RoundRobinStrategy implements RoutingStrategyInterface {
403
+ readonly name: "round-robin";
404
+ private currentIndex;
405
+ private weights;
406
+ constructor(config?: RoundRobinConfig);
407
+ route(request: ChatCompletionRequest, registry: ProviderRegistry, context?: RoutingContext): RoutingDecision;
408
+ reset(): void;
409
+ }
410
+
411
+ interface FailoverConfig {
412
+ chain: string[];
413
+ modelMappings?: Record<string, Record<string, string>>;
414
+ }
415
+ declare class FailoverStrategy implements RoutingStrategyInterface {
416
+ readonly name: "failover";
417
+ private chain;
418
+ private modelMappings;
419
+ constructor(config: FailoverConfig);
420
+ route(request: ChatCompletionRequest, registry: ProviderRegistry, context?: RoutingContext): RoutingDecision;
421
+ getNextProvider(currentProvider: string): string | null;
422
+ getChain(): string[];
423
+ setChain(chain: string[]): void;
424
+ }
425
+
426
+ interface CostOptimizedConfig {
427
+ maxCostPerRequest?: number;
428
+ preferLocal?: boolean;
429
+ qualityThreshold?: number;
430
+ fallbackOnBudget?: 'cheapest' | 'error';
431
+ }
432
+ declare class CostOptimizedStrategy implements RoutingStrategyInterface {
433
+ readonly name: "cost-optimized";
434
+ private config;
435
+ constructor(config?: CostOptimizedConfig);
436
+ route(request: ChatCompletionRequest, registry: ProviderRegistry, context?: RoutingContext): RoutingDecision;
437
+ setConfig(config: Partial<CostOptimizedConfig>): void;
438
+ getConfig(): CostOptimizedConfig;
439
+ }
440
+
441
+ interface LatencyOptimizedConfig {
442
+ maxLatencyMs?: number;
443
+ warmupRequests?: number;
444
+ adaptiveRouting?: boolean;
445
+ }
446
+ interface LatencyStats {
447
+ count: number;
448
+ total: number;
449
+ min: number;
450
+ max: number;
451
+ avg: number;
452
+ p95: number;
453
+ samples: number[];
454
+ }
455
+ declare class LatencyOptimizedStrategy implements RoutingStrategyInterface {
456
+ readonly name: "latency-optimized";
457
+ private config;
458
+ private latencyStats;
459
+ private readonly maxSamples;
460
+ constructor(config?: LatencyOptimizedConfig);
461
+ route(request: ChatCompletionRequest, registry: ProviderRegistry, context?: RoutingContext): RoutingDecision;
462
+ recordLatency(provider: string, latencyMs: number): void;
463
+ getStats(provider: string): LatencyStats | undefined;
464
+ getAllStats(): Record<string, LatencyStats>;
465
+ clearStats(): void;
466
+ setConfig(config: Partial<LatencyOptimizedConfig>): void;
467
+ }
468
+
469
+ interface GatewayEvents {
470
+ 'request:start': (event: {
471
+ requestId: string;
472
+ model: string;
473
+ provider?: string;
474
+ }) => void;
475
+ 'request:complete': (event: {
476
+ requestId: string;
477
+ provider: string;
478
+ model: string;
479
+ latencyMs: number;
480
+ cost: number;
481
+ cached: boolean;
482
+ tokens: {
483
+ input: number;
484
+ output: number;
485
+ };
486
+ }) => void;
487
+ 'request:error': (event: {
488
+ requestId: string;
489
+ provider?: string;
490
+ error: Error;
491
+ }) => void;
492
+ 'provider:unhealthy': (provider: string) => void;
493
+ 'provider:healthy': (provider: string) => void;
494
+ }
495
+ declare class Gateway extends EventEmitter {
496
+ private readonly _config;
497
+ private readonly registry;
498
+ private readonly router;
499
+ private readonly healthMonitor;
500
+ private readonly cache;
501
+ private readonly logger;
502
+ private metrics;
503
+ readonly chat: {
504
+ completions: {
505
+ create: (request: ChatCompletionRequest) => Promise<ChatCompletionResponse | AsyncGenerator<ChatCompletionChunk>>;
506
+ };
507
+ };
508
+ constructor(config: GatewayConfig);
509
+ createCompletion(request: ChatCompletionRequest): Promise<ChatCompletionResponse | AsyncGenerator<ChatCompletionChunk>>;
510
+ private createNonStreamingCompletion;
511
+ private createStreamingCompletion;
512
+ private validateRequest;
513
+ private createProvider;
514
+ private createRouter;
515
+ private createInitialMetrics;
516
+ private updateMetrics;
517
+ private updateCacheHitRate;
518
+ getMetrics(): GatewayMetrics;
519
+ getConfig(): GatewayConfig;
520
+ getRegistry(): ProviderRegistry;
521
+ getRouter(): Router;
522
+ checkHealth(): Promise<Record<string, boolean>>;
523
+ shutdown(): void;
524
+ }
525
+
526
+ type CircuitState = 'closed' | 'open' | 'half-open';
527
+ interface CircuitBreakerConfig {
528
+ failureThreshold: number;
529
+ successThreshold: number;
530
+ timeout: number;
531
+ volumeThreshold?: number;
532
+ }
533
+ interface HealthMonitorConfig {
534
+ checkInterval: number;
535
+ unhealthyThreshold: number;
536
+ degradedThreshold: number;
537
+ circuitBreaker?: CircuitBreakerConfig;
538
+ }
539
+ declare class CircuitBreaker {
540
+ private readonly providerName;
541
+ private readonly config;
542
+ private state;
543
+ private failures;
544
+ private successes;
545
+ private lastFailure;
546
+ private nextAttempt;
547
+ constructor(providerName: string, config: CircuitBreakerConfig);
548
+ isAllowed(): boolean;
549
+ recordSuccess(): void;
550
+ recordFailure(): void;
551
+ private trip;
552
+ reset(): void;
553
+ getState(): CircuitState;
554
+ getStatus(): {
555
+ providerName: string;
556
+ state: CircuitState;
557
+ failures: number;
558
+ nextAttempt: Date | null;
559
+ lastFailure: Date | null;
560
+ };
561
+ }
562
+ declare class HealthMonitor extends EventEmitter {
563
+ private readonly config;
564
+ private healthHistory;
565
+ private circuitBreakers;
566
+ private readonly maxHistorySize;
567
+ constructor(config: HealthMonitorConfig);
568
+ recordHealth(providerName: string, health: ProviderHealth): void;
569
+ recordRequest(providerName: string, success: boolean, _latencyMs: number): void;
570
+ isRequestAllowed(providerName: string): boolean;
571
+ private getOrCreateCircuitBreaker;
572
+ getHistory(providerName: string): ProviderHealth[];
573
+ getAverageLatency(providerName: string): number;
574
+ getErrorRate(providerName: string): number;
575
+ getCircuitStatus(providerName: string): ReturnType<CircuitBreaker['getStatus']> | null;
576
+ getAllCircuitStatuses(): Record<string, ReturnType<CircuitBreaker['getStatus']>>;
577
+ resetCircuit(providerName: string): void;
578
+ clear(): void;
579
+ }
580
+
581
+ interface OpenAIProviderOptions extends ProviderOptions {
582
+ organization?: string;
583
+ project?: string;
584
+ }
585
+ declare class OpenAIProvider extends Provider {
586
+ private readonly apiKey;
587
+ private readonly baseUrl;
588
+ private readonly organization?;
589
+ private readonly project?;
590
+ private readonly timeout;
591
+ constructor(options?: OpenAIProviderOptions);
592
+ chat(request: ChatCompletionRequest): Promise<ChatCompletionResponse>;
593
+ chatStream(request: ChatCompletionRequest): AsyncGenerator<ChatCompletionChunk, void, unknown>;
594
+ getModelInfo(model: string): ModelInfo | null;
595
+ private makeRequest;
596
+ private transformRequest;
597
+ private transformResponse;
598
+ private transformChunk;
599
+ private parseError;
600
+ private wrapError;
601
+ }
602
+
603
+ interface AnthropicProviderOptions extends ProviderOptions {
604
+ anthropicBeta?: string[];
605
+ }
606
+ declare class AnthropicProvider extends Provider {
607
+ private readonly apiKey;
608
+ private readonly baseUrl;
609
+ private readonly timeout;
610
+ private readonly anthropicBeta?;
611
+ constructor(options?: AnthropicProviderOptions);
612
+ chat(request: ChatCompletionRequest): Promise<ChatCompletionResponse>;
613
+ chatStream(request: ChatCompletionRequest): AsyncGenerator<ChatCompletionChunk, void, unknown>;
614
+ getModelInfo(model: string): ModelInfo | null;
615
+ private makeRequest;
616
+ private transformToAnthropic;
617
+ private extractSystemPrompt;
618
+ private transformMessage;
619
+ private transformFromAnthropic;
620
+ private transformStreamEvent;
621
+ private parseError;
622
+ private wrapError;
623
+ }
624
+
625
+ interface GoogleProviderOptions extends ProviderOptions {
626
+ projectId?: string;
627
+ location?: string;
628
+ }
629
+ declare class GoogleProvider extends Provider {
630
+ private readonly apiKey;
631
+ private readonly baseUrl;
632
+ private readonly timeout;
633
+ constructor(options?: GoogleProviderOptions);
634
+ chat(request: ChatCompletionRequest): Promise<ChatCompletionResponse>;
635
+ chatStream(request: ChatCompletionRequest): AsyncGenerator<ChatCompletionChunk, void, unknown>;
636
+ getModelInfo(model: string): ModelInfo | null;
637
+ private makeRequest;
638
+ private transformToGemini;
639
+ private transformMessages;
640
+ private transformFromGemini;
641
+ private transformStreamChunk;
642
+ private parseError;
643
+ private wrapError;
644
+ }
645
+
646
+ interface HTTPServerOptions extends ServerConfig {
647
+ gateway: Gateway;
648
+ }
649
+ declare function createHTTPServer(options: HTTPServerOptions): Hono;
650
+ declare function startServer(app: Hono, options: {
651
+ port?: number;
652
+ host?: string;
653
+ }): ServerType;
654
+
655
+ interface MetricsConfig {
656
+ prefix?: string;
657
+ labels?: string[];
658
+ histogramBuckets?: {
659
+ latency?: number[];
660
+ tokens?: number[];
661
+ };
662
+ }
663
+ interface HistogramData {
664
+ count: number;
665
+ sum: number;
666
+ buckets: Map<number, number>;
667
+ }
668
+ declare class MetricsCollector {
669
+ private readonly prefix;
670
+ private counters;
671
+ private gauges;
672
+ private histograms;
673
+ private readonly latencyBuckets;
674
+ private readonly tokenBuckets;
675
+ constructor(config?: MetricsConfig);
676
+ getTokenBuckets(): number[];
677
+ incrementCounter(name: string, value?: number, labels?: Record<string, string>): void;
678
+ setGauge(name: string, value: number, labels?: Record<string, string>): void;
679
+ recordHistogram(name: string, value: number, labels?: Record<string, string>, buckets?: number[]): void;
680
+ recordRequest(data: {
681
+ provider: string;
682
+ model: string;
683
+ status: 'success' | 'error';
684
+ latencyMs: number;
685
+ inputTokens: number;
686
+ outputTokens: number;
687
+ cost: number;
688
+ cached: boolean;
689
+ }): void;
690
+ getCounter(name: string, labels?: Record<string, string>): number;
691
+ getGauge(name: string, labels?: Record<string, string>): number;
692
+ getHistogram(name: string, labels?: Record<string, string>): HistogramData | undefined;
693
+ getSummary(): GatewayMetrics;
694
+ toPrometheusFormat(): string;
695
+ reset(): void;
696
+ private formatKey;
697
+ private sumCountersByLabel;
698
+ private sumAllCounters;
699
+ private getCostByLabel;
700
+ private aggregateHistograms;
701
+ private calculatePercentile;
702
+ }
703
+
704
+ declare const MODEL_PRICING: Record<string, {
705
+ input: number;
706
+ output: number;
707
+ }>;
708
+ declare const MODEL_CONTEXT_WINDOWS: Record<string, number>;
709
+ declare const MODEL_MAX_OUTPUT: Record<string, number>;
710
+ declare function calculateCost(model: string, usage: UsageInfo): number;
711
+ declare function estimateCost(model: string, estimatedInputTokens: number, estimatedOutputTokens: number): number;
712
+ declare function getModelPricing(model: string): {
713
+ input: number;
714
+ output: number;
715
+ } | null;
716
+ declare function getModelInfo(model: string, provider: string): ModelInfo;
717
+ declare function getModelCapabilities(model: string, provider: string): ModelInfo['capabilities'];
718
+ declare function findCheapestModel(models: string[], _requiredCapabilities?: Partial<ModelInfo['capabilities']>): string | null;
719
+ declare function sortModelsByCost(models: string[], direction?: 'asc' | 'desc'): string[];
720
+
721
+ declare function countTokens(text: string): number;
722
+ declare function countMessageTokens(messages: Array<{
723
+ role: string;
724
+ content: unknown;
725
+ }>): number;
726
+ declare function estimateRequestTokens(messages: Array<{
727
+ role: string;
728
+ content: unknown;
729
+ }>, tools?: Array<{
730
+ function: {
731
+ name: string;
732
+ description?: string;
733
+ parameters?: unknown;
734
+ };
735
+ }>): number;
736
+ declare function truncateToTokenLimit(text: string, maxTokens: number): string;
737
+ declare function freeEncoder(): void;
738
+
739
+ declare function hashRequest(request: {
740
+ model: string;
741
+ messages: Array<{
742
+ role: string;
743
+ content: unknown;
744
+ }>;
745
+ temperature?: number;
746
+ max_tokens?: number;
747
+ tools?: unknown[];
748
+ tool_choice?: unknown;
749
+ }): string;
750
+ declare function generateId(prefix?: string): string;
751
+ declare function generateRequestId(): string;
752
+ declare function generateCacheKey(provider: string, model: string, requestHash: string): string;
753
+ declare function hash(str: string): string;
754
+ declare function createSystemFingerprint(config: {
755
+ version: string;
756
+ providers: string[];
757
+ }): string;
758
+
759
+ export { AnthropicProvider, type AnthropicProviderOptions, type AuthConfig, AuthenticationError, type CacheConfig, type CacheEntry, type CacheStats, type ChatCompletionChoice, type ChatCompletionChunk, type ChatCompletionChunkChoice, type ChatCompletionRequest, type ChatCompletionResponse, type ChatMessage, CircuitBreaker, type CircuitBreakerConfig, type CircuitState, type ContentPart, type CostOptimizedConfig, CostOptimizedStrategy, DEFAULT_MODEL_MAPPINGS, type FailoverConfig, FailoverStrategy, Gateway, type GatewayConfig, GatewayError, type GatewayEvents, type GatewayMetrics, type GatewayRequestMetadata, type GatewayResponseMetadata, GoogleProvider, type GoogleProviderOptions, type HTTPServerOptions, HealthMonitor, type HealthMonitorConfig, type HistogramData, type LatencyOptimizedConfig, LatencyOptimizedStrategy, MODEL_CONTEXT_WINDOWS, MODEL_MAX_OUTPUT, MODEL_PRICING, MetricsCollector, type MetricsConfig, type ModelCapabilities, type ModelInfo, type ModelMapping, OpenAIProvider, type OpenAIProviderOptions, Provider, type ProviderConfig, ProviderError, type ProviderHealth, type ProviderOptions, ProviderRegistry, type ProviderWithModels, type RateLimitConfig, RateLimitError, type RateLimitResult, type RoundRobinConfig, RoundRobinStrategy, Router, type RouterConfig, type RoutingContext, type RoutingDecision, type RoutingRule, type RoutingStrategy, type RoutingStrategyInterface, type ServerConfig, type TelemetryConfig, type Tool, type ToolCall, type UsageInfo, VIRTUAL_MODELS, ValidationError, type VirtualModel, calculateCost, countMessageTokens, countTokens, createHTTPServer, createRouterConfig, createSystemFingerprint, estimateCost, estimateRequestTokens, findCheapestModel, freeEncoder, generateCacheKey, generateId, generateRequestId, getModelCapabilities, getModelInfo, getModelPricing, hash, hashRequest, sortModelsByCost, startServer, truncateToTokenLimit };