@sparkleideas/integration 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +270 -0
- package/package.json +55 -0
- package/src/__tests__/agent-adapter.test.ts +271 -0
- package/src/__tests__/agentic-flow-agent.test.ts +176 -0
- package/src/__tests__/token-optimizer.test.ts +176 -0
- package/src/agent-adapter.ts +651 -0
- package/src/agentic-flow-agent.ts +802 -0
- package/src/agentic-flow-bridge.ts +803 -0
- package/src/attention-coordinator.ts +679 -0
- package/src/feature-flags.ts +485 -0
- package/src/index.ts +466 -0
- package/src/long-running-worker.ts +871 -0
- package/src/multi-model-router.ts +1079 -0
- package/src/provider-adapter.ts +1168 -0
- package/src/sdk-bridge.ts +435 -0
- package/src/sona-adapter.ts +824 -0
- package/src/specialized-worker.ts +864 -0
- package/src/swarm-adapter.ts +1112 -0
- package/src/token-optimizer.ts +306 -0
- package/src/types.ts +494 -0
- package/src/worker-base.ts +822 -0
- package/src/worker-pool.ts +933 -0
- package/tmp.json +0 -0
- package/tsconfig.json +9 -0
|
@@ -0,0 +1,1079 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Model Router
|
|
3
|
+
*
|
|
4
|
+
* Cost-optimized routing across multiple LLM providers from @sparkleideas/agentic-flow@alpha:
|
|
5
|
+
* - anthropic: Claude models
|
|
6
|
+
* - openai: GPT models
|
|
7
|
+
* - openrouter: 100+ models, 85-99% cost savings
|
|
8
|
+
* - ollama: Local models
|
|
9
|
+
* - litellm: Unified API
|
|
10
|
+
* - onnx: Free local inference (Phi-4)
|
|
11
|
+
* - gemini: Google Gemini
|
|
12
|
+
* - custom: Custom providers
|
|
13
|
+
*
|
|
14
|
+
* Routing Modes:
|
|
15
|
+
* - manual: Explicit provider selection
|
|
16
|
+
* - cost-optimized: Minimize cost
|
|
17
|
+
* - performance-optimized: Minimize latency
|
|
18
|
+
* - quality-optimized: Maximize quality
|
|
19
|
+
* - rule-based: Custom routing rules
|
|
20
|
+
*
|
|
21
|
+
* Features:
|
|
22
|
+
* - Circuit breaker for reliability
|
|
23
|
+
* - Cost tracking with budget alerts
|
|
24
|
+
* - Tool calling translation
|
|
25
|
+
* - Streaming support
|
|
26
|
+
* - Response caching
|
|
27
|
+
*
|
|
28
|
+
* @module v3/integration/multi-model-router
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { EventEmitter } from 'events';
|
|
32
|
+
|
|
33
|
+
// =============================================================================
|
|
34
|
+
// Types & Interfaces
|
|
35
|
+
// =============================================================================
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Supported providers
|
|
39
|
+
*/
|
|
40
|
+
export type ProviderType =
|
|
41
|
+
| 'anthropic' // Claude models
|
|
42
|
+
| 'openai' // GPT models
|
|
43
|
+
| 'openrouter' // 100+ models, 85-99% cost savings
|
|
44
|
+
| 'ollama' // Local models
|
|
45
|
+
| 'litellm' // Unified API
|
|
46
|
+
| 'onnx' // Free local inference
|
|
47
|
+
| 'gemini' // Google Gemini
|
|
48
|
+
| 'custom'; // Custom providers
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Routing mode
|
|
52
|
+
*/
|
|
53
|
+
export type RoutingMode =
|
|
54
|
+
| 'manual' // Explicit provider selection
|
|
55
|
+
| 'cost-optimized' // Minimize cost
|
|
56
|
+
| 'performance-optimized' // Minimize latency
|
|
57
|
+
| 'quality-optimized' // Maximize quality
|
|
58
|
+
| 'rule-based'; // Custom routing rules
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Model capabilities
|
|
62
|
+
*/
|
|
63
|
+
export interface ModelCapabilities {
|
|
64
|
+
contextWindow: number;
|
|
65
|
+
supportsStreaming: boolean;
|
|
66
|
+
supportsTools: boolean;
|
|
67
|
+
supportsVision: boolean;
|
|
68
|
+
supportsJson: boolean;
|
|
69
|
+
maxOutputTokens: number;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Provider configuration
|
|
74
|
+
*/
|
|
75
|
+
export interface ProviderConfig {
|
|
76
|
+
type: ProviderType;
|
|
77
|
+
enabled: boolean;
|
|
78
|
+
apiKey?: string;
|
|
79
|
+
baseUrl?: string;
|
|
80
|
+
models: ModelConfig[];
|
|
81
|
+
defaultModel?: string;
|
|
82
|
+
timeout?: number;
|
|
83
|
+
retries?: number;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Model configuration
|
|
88
|
+
*/
|
|
89
|
+
export interface ModelConfig {
|
|
90
|
+
id: string;
|
|
91
|
+
name: string;
|
|
92
|
+
provider: ProviderType;
|
|
93
|
+
costPer1kInputTokens: number;
|
|
94
|
+
costPer1kOutputTokens: number;
|
|
95
|
+
latencyMs: number;
|
|
96
|
+
qualityScore: number; // 0-1
|
|
97
|
+
capabilities: ModelCapabilities;
|
|
98
|
+
aliases?: string[];
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Routing request
|
|
103
|
+
*/
|
|
104
|
+
export interface RoutingRequest {
|
|
105
|
+
task: string;
|
|
106
|
+
messages: ChatMessage[];
|
|
107
|
+
requiredCapabilities?: Partial<ModelCapabilities>;
|
|
108
|
+
maxCost?: number;
|
|
109
|
+
maxLatency?: number;
|
|
110
|
+
minQuality?: number;
|
|
111
|
+
preferredProvider?: ProviderType;
|
|
112
|
+
preferredModel?: string;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Chat message
|
|
117
|
+
*/
|
|
118
|
+
export interface ChatMessage {
|
|
119
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
120
|
+
content: string;
|
|
121
|
+
name?: string;
|
|
122
|
+
toolCalls?: ToolCall[];
|
|
123
|
+
toolCallId?: string;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Tool call
|
|
128
|
+
*/
|
|
129
|
+
export interface ToolCall {
|
|
130
|
+
id: string;
|
|
131
|
+
type: 'function';
|
|
132
|
+
function: {
|
|
133
|
+
name: string;
|
|
134
|
+
arguments: string;
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Routing result
|
|
140
|
+
*/
|
|
141
|
+
export interface RoutingResult {
|
|
142
|
+
provider: ProviderType;
|
|
143
|
+
model: string;
|
|
144
|
+
reason: string;
|
|
145
|
+
estimatedCost: number;
|
|
146
|
+
estimatedLatency: number;
|
|
147
|
+
qualityScore: number;
|
|
148
|
+
alternatives?: Array<{
|
|
149
|
+
provider: ProviderType;
|
|
150
|
+
model: string;
|
|
151
|
+
estimatedCost: number;
|
|
152
|
+
}>;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Completion request
|
|
157
|
+
*/
|
|
158
|
+
export interface CompletionRequest {
|
|
159
|
+
messages: ChatMessage[];
|
|
160
|
+
model?: string;
|
|
161
|
+
provider?: ProviderType;
|
|
162
|
+
temperature?: number;
|
|
163
|
+
maxTokens?: number;
|
|
164
|
+
stream?: boolean;
|
|
165
|
+
tools?: Tool[];
|
|
166
|
+
responseFormat?: 'text' | 'json';
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Tool definition
|
|
171
|
+
*/
|
|
172
|
+
export interface Tool {
|
|
173
|
+
type: 'function';
|
|
174
|
+
function: {
|
|
175
|
+
name: string;
|
|
176
|
+
description: string;
|
|
177
|
+
parameters: Record<string, unknown>;
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Completion response
|
|
183
|
+
*/
|
|
184
|
+
export interface CompletionResponse {
|
|
185
|
+
id: string;
|
|
186
|
+
provider: ProviderType;
|
|
187
|
+
model: string;
|
|
188
|
+
content: string;
|
|
189
|
+
finishReason: 'stop' | 'length' | 'tool_calls';
|
|
190
|
+
toolCalls?: ToolCall[];
|
|
191
|
+
usage: {
|
|
192
|
+
inputTokens: number;
|
|
193
|
+
outputTokens: number;
|
|
194
|
+
totalTokens: number;
|
|
195
|
+
};
|
|
196
|
+
cost: number;
|
|
197
|
+
latency: number;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Router configuration
|
|
202
|
+
*/
|
|
203
|
+
export interface RouterConfig {
|
|
204
|
+
mode: RoutingMode;
|
|
205
|
+
providers: ProviderConfig[];
|
|
206
|
+
budgetLimit?: number;
|
|
207
|
+
budgetPeriod?: 'hourly' | 'daily' | 'monthly';
|
|
208
|
+
cacheTTL?: number;
|
|
209
|
+
circuitBreaker: {
|
|
210
|
+
enabled: boolean;
|
|
211
|
+
failureThreshold: number;
|
|
212
|
+
resetTimeout: number;
|
|
213
|
+
};
|
|
214
|
+
routing: {
|
|
215
|
+
preferLocalModels?: boolean;
|
|
216
|
+
costWeight?: number;
|
|
217
|
+
latencyWeight?: number;
|
|
218
|
+
qualityWeight?: number;
|
|
219
|
+
};
|
|
220
|
+
rules?: RoutingRule[];
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Routing rule for rule-based mode
|
|
225
|
+
*/
|
|
226
|
+
export interface RoutingRule {
|
|
227
|
+
name: string;
|
|
228
|
+
condition: {
|
|
229
|
+
taskPattern?: RegExp | string;
|
|
230
|
+
minTokens?: number;
|
|
231
|
+
maxTokens?: number;
|
|
232
|
+
requiresTools?: boolean;
|
|
233
|
+
requiresVision?: boolean;
|
|
234
|
+
};
|
|
235
|
+
action: {
|
|
236
|
+
provider: ProviderType;
|
|
237
|
+
model?: string;
|
|
238
|
+
priority?: number;
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Provider health status
|
|
244
|
+
*/
|
|
245
|
+
export interface ProviderHealth {
|
|
246
|
+
provider: ProviderType;
|
|
247
|
+
status: 'healthy' | 'degraded' | 'unhealthy';
|
|
248
|
+
lastError?: string;
|
|
249
|
+
failureCount: number;
|
|
250
|
+
successRate: number;
|
|
251
|
+
avgLatency: number;
|
|
252
|
+
circuitOpen: boolean;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Cost tracking
|
|
257
|
+
*/
|
|
258
|
+
export interface CostTracker {
|
|
259
|
+
periodStart: Date;
|
|
260
|
+
periodEnd: Date;
|
|
261
|
+
totalCost: number;
|
|
262
|
+
byProvider: Record<ProviderType, number>;
|
|
263
|
+
byModel: Record<string, number>;
|
|
264
|
+
requests: number;
|
|
265
|
+
tokensUsed: {
|
|
266
|
+
input: number;
|
|
267
|
+
output: number;
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// =============================================================================
|
|
272
|
+
// Default Models Configuration
|
|
273
|
+
// =============================================================================
|
|
274
|
+
|
|
275
|
+
const DEFAULT_MODELS: ModelConfig[] = [
|
|
276
|
+
// Anthropic
|
|
277
|
+
{
|
|
278
|
+
id: 'claude-3-5-sonnet-20241022',
|
|
279
|
+
name: 'Claude 3.5 Sonnet',
|
|
280
|
+
provider: 'anthropic',
|
|
281
|
+
costPer1kInputTokens: 0.003,
|
|
282
|
+
costPer1kOutputTokens: 0.015,
|
|
283
|
+
latencyMs: 500,
|
|
284
|
+
qualityScore: 0.95,
|
|
285
|
+
capabilities: {
|
|
286
|
+
contextWindow: 200000,
|
|
287
|
+
supportsStreaming: true,
|
|
288
|
+
supportsTools: true,
|
|
289
|
+
supportsVision: true,
|
|
290
|
+
supportsJson: true,
|
|
291
|
+
maxOutputTokens: 8192,
|
|
292
|
+
},
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
id: 'claude-3-opus-20240229',
|
|
296
|
+
name: 'Claude 3 Opus',
|
|
297
|
+
provider: 'anthropic',
|
|
298
|
+
costPer1kInputTokens: 0.015,
|
|
299
|
+
costPer1kOutputTokens: 0.075,
|
|
300
|
+
latencyMs: 1000,
|
|
301
|
+
qualityScore: 0.98,
|
|
302
|
+
capabilities: {
|
|
303
|
+
contextWindow: 200000,
|
|
304
|
+
supportsStreaming: true,
|
|
305
|
+
supportsTools: true,
|
|
306
|
+
supportsVision: true,
|
|
307
|
+
supportsJson: true,
|
|
308
|
+
maxOutputTokens: 4096,
|
|
309
|
+
},
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
id: 'claude-3-haiku-20240307',
|
|
313
|
+
name: 'Claude 3 Haiku',
|
|
314
|
+
provider: 'anthropic',
|
|
315
|
+
costPer1kInputTokens: 0.00025,
|
|
316
|
+
costPer1kOutputTokens: 0.00125,
|
|
317
|
+
latencyMs: 200,
|
|
318
|
+
qualityScore: 0.85,
|
|
319
|
+
capabilities: {
|
|
320
|
+
contextWindow: 200000,
|
|
321
|
+
supportsStreaming: true,
|
|
322
|
+
supportsTools: true,
|
|
323
|
+
supportsVision: true,
|
|
324
|
+
supportsJson: true,
|
|
325
|
+
maxOutputTokens: 4096,
|
|
326
|
+
},
|
|
327
|
+
},
|
|
328
|
+
// OpenAI
|
|
329
|
+
{
|
|
330
|
+
id: 'gpt-4-turbo',
|
|
331
|
+
name: 'GPT-4 Turbo',
|
|
332
|
+
provider: 'openai',
|
|
333
|
+
costPer1kInputTokens: 0.01,
|
|
334
|
+
costPer1kOutputTokens: 0.03,
|
|
335
|
+
latencyMs: 800,
|
|
336
|
+
qualityScore: 0.94,
|
|
337
|
+
capabilities: {
|
|
338
|
+
contextWindow: 128000,
|
|
339
|
+
supportsStreaming: true,
|
|
340
|
+
supportsTools: true,
|
|
341
|
+
supportsVision: true,
|
|
342
|
+
supportsJson: true,
|
|
343
|
+
maxOutputTokens: 4096,
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
id: 'gpt-4o-mini',
|
|
348
|
+
name: 'GPT-4o Mini',
|
|
349
|
+
provider: 'openai',
|
|
350
|
+
costPer1kInputTokens: 0.00015,
|
|
351
|
+
costPer1kOutputTokens: 0.0006,
|
|
352
|
+
latencyMs: 300,
|
|
353
|
+
qualityScore: 0.88,
|
|
354
|
+
capabilities: {
|
|
355
|
+
contextWindow: 128000,
|
|
356
|
+
supportsStreaming: true,
|
|
357
|
+
supportsTools: true,
|
|
358
|
+
supportsVision: true,
|
|
359
|
+
supportsJson: true,
|
|
360
|
+
maxOutputTokens: 16384,
|
|
361
|
+
},
|
|
362
|
+
},
|
|
363
|
+
// OpenRouter (cheaper alternatives)
|
|
364
|
+
{
|
|
365
|
+
id: 'deepseek/deepseek-coder',
|
|
366
|
+
name: 'DeepSeek Coder',
|
|
367
|
+
provider: 'openrouter',
|
|
368
|
+
costPer1kInputTokens: 0.00014,
|
|
369
|
+
costPer1kOutputTokens: 0.00028,
|
|
370
|
+
latencyMs: 400,
|
|
371
|
+
qualityScore: 0.82,
|
|
372
|
+
capabilities: {
|
|
373
|
+
contextWindow: 64000,
|
|
374
|
+
supportsStreaming: true,
|
|
375
|
+
supportsTools: false,
|
|
376
|
+
supportsVision: false,
|
|
377
|
+
supportsJson: true,
|
|
378
|
+
maxOutputTokens: 8192,
|
|
379
|
+
},
|
|
380
|
+
},
|
|
381
|
+
{
|
|
382
|
+
id: 'mistralai/mixtral-8x7b-instruct',
|
|
383
|
+
name: 'Mixtral 8x7B',
|
|
384
|
+
provider: 'openrouter',
|
|
385
|
+
costPer1kInputTokens: 0.00027,
|
|
386
|
+
costPer1kOutputTokens: 0.00027,
|
|
387
|
+
latencyMs: 350,
|
|
388
|
+
qualityScore: 0.85,
|
|
389
|
+
capabilities: {
|
|
390
|
+
contextWindow: 32000,
|
|
391
|
+
supportsStreaming: true,
|
|
392
|
+
supportsTools: true,
|
|
393
|
+
supportsVision: false,
|
|
394
|
+
supportsJson: true,
|
|
395
|
+
maxOutputTokens: 4096,
|
|
396
|
+
},
|
|
397
|
+
},
|
|
398
|
+
// Local models (free)
|
|
399
|
+
{
|
|
400
|
+
id: 'llama3.2:latest',
|
|
401
|
+
name: 'Llama 3.2',
|
|
402
|
+
provider: 'ollama',
|
|
403
|
+
costPer1kInputTokens: 0,
|
|
404
|
+
costPer1kOutputTokens: 0,
|
|
405
|
+
latencyMs: 600,
|
|
406
|
+
qualityScore: 0.80,
|
|
407
|
+
capabilities: {
|
|
408
|
+
contextWindow: 128000,
|
|
409
|
+
supportsStreaming: true,
|
|
410
|
+
supportsTools: true,
|
|
411
|
+
supportsVision: false,
|
|
412
|
+
supportsJson: true,
|
|
413
|
+
maxOutputTokens: 4096,
|
|
414
|
+
},
|
|
415
|
+
},
|
|
416
|
+
{
|
|
417
|
+
id: 'phi-4-mini',
|
|
418
|
+
name: 'Phi-4 Mini (ONNX)',
|
|
419
|
+
provider: 'onnx',
|
|
420
|
+
costPer1kInputTokens: 0,
|
|
421
|
+
costPer1kOutputTokens: 0,
|
|
422
|
+
latencyMs: 100,
|
|
423
|
+
qualityScore: 0.75,
|
|
424
|
+
capabilities: {
|
|
425
|
+
contextWindow: 8192,
|
|
426
|
+
supportsStreaming: false,
|
|
427
|
+
supportsTools: false,
|
|
428
|
+
supportsVision: false,
|
|
429
|
+
supportsJson: false,
|
|
430
|
+
maxOutputTokens: 2048,
|
|
431
|
+
},
|
|
432
|
+
},
|
|
433
|
+
];
|
|
434
|
+
|
|
435
|
+
// =============================================================================
|
|
436
|
+
// Multi-Model Router
|
|
437
|
+
// =============================================================================
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* MultiModelRouter
|
|
441
|
+
*
|
|
442
|
+
* Routes requests to optimal LLM providers based on cost, latency, quality,
|
|
443
|
+
* and capability requirements.
|
|
444
|
+
*/
|
|
445
|
+
export class MultiModelRouter extends EventEmitter {
|
|
446
|
+
private config: RouterConfig;
|
|
447
|
+
private models: Map<string, ModelConfig> = new Map();
|
|
448
|
+
private providerHealth: Map<ProviderType, ProviderHealth> = new Map();
|
|
449
|
+
private costTracker: CostTracker;
|
|
450
|
+
private cache: Map<string, { response: CompletionResponse; expires: number }> = new Map();
|
|
451
|
+
|
|
452
|
+
constructor(config: Partial<RouterConfig> = {}) {
|
|
453
|
+
super();
|
|
454
|
+
|
|
455
|
+
this.config = {
|
|
456
|
+
mode: config.mode || 'cost-optimized',
|
|
457
|
+
providers: config.providers || [],
|
|
458
|
+
budgetLimit: config.budgetLimit,
|
|
459
|
+
budgetPeriod: config.budgetPeriod || 'daily',
|
|
460
|
+
cacheTTL: config.cacheTTL || 300000, // 5 minutes
|
|
461
|
+
circuitBreaker: {
|
|
462
|
+
enabled: config.circuitBreaker?.enabled ?? true,
|
|
463
|
+
failureThreshold: config.circuitBreaker?.failureThreshold || 5,
|
|
464
|
+
resetTimeout: config.circuitBreaker?.resetTimeout || 60000,
|
|
465
|
+
},
|
|
466
|
+
routing: {
|
|
467
|
+
preferLocalModels: config.routing?.preferLocalModels ?? false,
|
|
468
|
+
costWeight: config.routing?.costWeight ?? 0.5,
|
|
469
|
+
latencyWeight: config.routing?.latencyWeight ?? 0.3,
|
|
470
|
+
qualityWeight: config.routing?.qualityWeight ?? 0.2,
|
|
471
|
+
},
|
|
472
|
+
rules: config.rules || [],
|
|
473
|
+
};
|
|
474
|
+
|
|
475
|
+
// Initialize models
|
|
476
|
+
this.initializeModels();
|
|
477
|
+
|
|
478
|
+
// Initialize provider health
|
|
479
|
+
this.initializeProviderHealth();
|
|
480
|
+
|
|
481
|
+
// Initialize cost tracker
|
|
482
|
+
this.costTracker = this.createCostTracker();
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
// ===========================================================================
|
|
486
|
+
// Public API
|
|
487
|
+
// ===========================================================================
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Route a request to the optimal provider/model
|
|
491
|
+
*
|
|
492
|
+
* @param request - Routing request
|
|
493
|
+
* @returns Routing result with selected provider and model
|
|
494
|
+
*/
|
|
495
|
+
async route(request: RoutingRequest): Promise<RoutingResult> {
|
|
496
|
+
const startTime = performance.now();
|
|
497
|
+
|
|
498
|
+
this.emit('route:start', { task: request.task });
|
|
499
|
+
|
|
500
|
+
// Filter models by capabilities
|
|
501
|
+
let candidateModels = this.filterByCapabilities(request.requiredCapabilities);
|
|
502
|
+
|
|
503
|
+
// Filter by health (exclude unhealthy providers)
|
|
504
|
+
candidateModels = this.filterByHealth(candidateModels);
|
|
505
|
+
|
|
506
|
+
// Apply routing rules if in rule-based mode
|
|
507
|
+
if (this.config.mode === 'rule-based') {
|
|
508
|
+
const ruleResult = this.applyRules(request, candidateModels);
|
|
509
|
+
if (ruleResult) {
|
|
510
|
+
return ruleResult;
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Score and rank candidates
|
|
515
|
+
const scoredCandidates = this.scoreModels(request, candidateModels);
|
|
516
|
+
|
|
517
|
+
if (scoredCandidates.length === 0) {
|
|
518
|
+
throw new Error('No suitable models available for request');
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// Select best candidate
|
|
522
|
+
const best = scoredCandidates[0];
|
|
523
|
+
const model = this.models.get(best.modelId)!;
|
|
524
|
+
|
|
525
|
+
const result: RoutingResult = {
|
|
526
|
+
provider: model.provider,
|
|
527
|
+
model: model.id,
|
|
528
|
+
reason: this.generateReason(best),
|
|
529
|
+
estimatedCost: best.estimatedCost,
|
|
530
|
+
estimatedLatency: model.latencyMs,
|
|
531
|
+
qualityScore: model.qualityScore,
|
|
532
|
+
alternatives: scoredCandidates.slice(1, 4).map(c => ({
|
|
533
|
+
provider: this.models.get(c.modelId)!.provider,
|
|
534
|
+
model: c.modelId,
|
|
535
|
+
estimatedCost: c.estimatedCost,
|
|
536
|
+
})),
|
|
537
|
+
};
|
|
538
|
+
|
|
539
|
+
const latency = performance.now() - startTime;
|
|
540
|
+
this.emit('route:complete', { ...result, routingLatency: latency });
|
|
541
|
+
|
|
542
|
+
return result;
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
/**
|
|
546
|
+
* Execute a completion request
|
|
547
|
+
*
|
|
548
|
+
* @param request - Completion request
|
|
549
|
+
* @returns Completion response
|
|
550
|
+
*/
|
|
551
|
+
async complete(request: CompletionRequest): Promise<CompletionResponse> {
|
|
552
|
+
// Check cache
|
|
553
|
+
const cacheKey = this.generateCacheKey(request);
|
|
554
|
+
const cached = this.cache.get(cacheKey);
|
|
555
|
+
if (cached && cached.expires > Date.now()) {
|
|
556
|
+
this.emit('cache:hit', { cacheKey });
|
|
557
|
+
return cached.response;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// Route request if provider/model not specified
|
|
561
|
+
let provider = request.provider;
|
|
562
|
+
let model = request.model;
|
|
563
|
+
|
|
564
|
+
if (!provider || !model) {
|
|
565
|
+
const routing = await this.route({
|
|
566
|
+
task: 'completion',
|
|
567
|
+
messages: request.messages,
|
|
568
|
+
requiredCapabilities: {
|
|
569
|
+
supportsTools: request.tools !== undefined,
|
|
570
|
+
supportsJson: request.responseFormat === 'json',
|
|
571
|
+
},
|
|
572
|
+
});
|
|
573
|
+
provider = routing.provider;
|
|
574
|
+
model = routing.model;
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
// Check circuit breaker
|
|
578
|
+
if (this.isCircuitOpen(provider)) {
|
|
579
|
+
throw new Error(`Circuit breaker open for provider: ${provider}`);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
const startTime = performance.now();
|
|
583
|
+
|
|
584
|
+
try {
|
|
585
|
+
// Execute completion via provider API
|
|
586
|
+
const response = await this.executeCompletion(request, provider, model);
|
|
587
|
+
|
|
588
|
+
// Update health
|
|
589
|
+
this.recordSuccess(provider, performance.now() - startTime);
|
|
590
|
+
|
|
591
|
+
// Update cost tracker
|
|
592
|
+
this.trackCost(provider, model, response.cost, response.usage);
|
|
593
|
+
|
|
594
|
+
// Cache response
|
|
595
|
+
if (this.config.cacheTTL && !request.stream) {
|
|
596
|
+
this.cache.set(cacheKey, {
|
|
597
|
+
response,
|
|
598
|
+
expires: Date.now() + this.config.cacheTTL,
|
|
599
|
+
});
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
return response;
|
|
603
|
+
} catch (error) {
|
|
604
|
+
// Update health
|
|
605
|
+
this.recordFailure(provider, error as Error);
|
|
606
|
+
throw error;
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
/**
|
|
611
|
+
* Get provider health status
|
|
612
|
+
*/
|
|
613
|
+
getProviderHealth(): Map<ProviderType, ProviderHealth> {
|
|
614
|
+
return new Map(this.providerHealth);
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
/**
|
|
618
|
+
* Get cost tracking data
|
|
619
|
+
*/
|
|
620
|
+
getCostTracker(): CostTracker {
|
|
621
|
+
return { ...this.costTracker };
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Get available models
|
|
626
|
+
*/
|
|
627
|
+
getModels(): ModelConfig[] {
|
|
628
|
+
return Array.from(this.models.values());
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
/**
|
|
632
|
+
* Add a custom model
|
|
633
|
+
*/
|
|
634
|
+
addModel(model: ModelConfig): void {
|
|
635
|
+
this.models.set(model.id, model);
|
|
636
|
+
this.emit('model:added', { modelId: model.id });
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
/**
|
|
640
|
+
* Get cost savings estimate
|
|
641
|
+
*/
|
|
642
|
+
getEstimatedSavings(request: RoutingRequest): {
|
|
643
|
+
defaultCost: number;
|
|
644
|
+
optimizedCost: number;
|
|
645
|
+
savings: number;
|
|
646
|
+
savingsPercent: string;
|
|
647
|
+
} {
|
|
648
|
+
// Estimate tokens
|
|
649
|
+
const inputTokens = this.estimateTokens(
|
|
650
|
+
request.messages.map(m => m.content).join(' ')
|
|
651
|
+
);
|
|
652
|
+
const outputTokens = Math.min(inputTokens * 0.5, 4096);
|
|
653
|
+
|
|
654
|
+
// Default cost (using Claude 3 Sonnet as baseline)
|
|
655
|
+
const defaultModel = this.models.get('claude-3-5-sonnet-20241022')!;
|
|
656
|
+
const defaultCost =
|
|
657
|
+
(inputTokens / 1000) * defaultModel.costPer1kInputTokens +
|
|
658
|
+
(outputTokens / 1000) * defaultModel.costPer1kOutputTokens;
|
|
659
|
+
|
|
660
|
+
// Optimized cost (using cheapest suitable model)
|
|
661
|
+
const cheapestModel = Array.from(this.models.values())
|
|
662
|
+
.filter(m => this.checkCapabilities(m, request.requiredCapabilities))
|
|
663
|
+
.sort((a, b) => {
|
|
664
|
+
const costA = a.costPer1kInputTokens + a.costPer1kOutputTokens;
|
|
665
|
+
const costB = b.costPer1kInputTokens + b.costPer1kOutputTokens;
|
|
666
|
+
return costA - costB;
|
|
667
|
+
})[0];
|
|
668
|
+
|
|
669
|
+
const optimizedCost = cheapestModel
|
|
670
|
+
? (inputTokens / 1000) * cheapestModel.costPer1kInputTokens +
|
|
671
|
+
(outputTokens / 1000) * cheapestModel.costPer1kOutputTokens
|
|
672
|
+
: defaultCost;
|
|
673
|
+
|
|
674
|
+
const savings = defaultCost - optimizedCost;
|
|
675
|
+
const savingsPercent = defaultCost > 0
|
|
676
|
+
? ((savings / defaultCost) * 100).toFixed(1) + '%'
|
|
677
|
+
: '0%';
|
|
678
|
+
|
|
679
|
+
return {
|
|
680
|
+
defaultCost,
|
|
681
|
+
optimizedCost,
|
|
682
|
+
savings,
|
|
683
|
+
savingsPercent,
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
// ===========================================================================
|
|
688
|
+
// Private Methods
|
|
689
|
+
// ===========================================================================
|
|
690
|
+
|
|
691
|
+
private initializeModels(): void {
|
|
692
|
+
for (const model of DEFAULT_MODELS) {
|
|
693
|
+
this.models.set(model.id, model);
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// Add models from provider configs
|
|
697
|
+
for (const providerConfig of this.config.providers) {
|
|
698
|
+
for (const model of providerConfig.models) {
|
|
699
|
+
this.models.set(model.id, model);
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
private initializeProviderHealth(): void {
|
|
705
|
+
const providers: ProviderType[] = [
|
|
706
|
+
'anthropic', 'openai', 'openrouter', 'ollama', 'litellm', 'onnx', 'gemini', 'custom'
|
|
707
|
+
];
|
|
708
|
+
|
|
709
|
+
for (const provider of providers) {
|
|
710
|
+
this.providerHealth.set(provider, {
|
|
711
|
+
provider,
|
|
712
|
+
status: 'healthy',
|
|
713
|
+
failureCount: 0,
|
|
714
|
+
successRate: 1.0,
|
|
715
|
+
avgLatency: 0,
|
|
716
|
+
circuitOpen: false,
|
|
717
|
+
});
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
private createCostTracker(): CostTracker {
|
|
722
|
+
const now = new Date();
|
|
723
|
+
let periodEnd: Date;
|
|
724
|
+
|
|
725
|
+
switch (this.config.budgetPeriod) {
|
|
726
|
+
case 'hourly':
|
|
727
|
+
periodEnd = new Date(now.getTime() + 3600000);
|
|
728
|
+
break;
|
|
729
|
+
case 'monthly':
|
|
730
|
+
periodEnd = new Date(now.getFullYear(), now.getMonth() + 1, 1);
|
|
731
|
+
break;
|
|
732
|
+
default: // daily
|
|
733
|
+
periodEnd = new Date(now.getFullYear(), now.getMonth(), now.getDate() + 1);
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
return {
|
|
737
|
+
periodStart: now,
|
|
738
|
+
periodEnd,
|
|
739
|
+
totalCost: 0,
|
|
740
|
+
byProvider: {} as Record<ProviderType, number>,
|
|
741
|
+
byModel: {},
|
|
742
|
+
requests: 0,
|
|
743
|
+
tokensUsed: { input: 0, output: 0 },
|
|
744
|
+
};
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
private filterByCapabilities(
|
|
748
|
+
required?: Partial<ModelCapabilities>
|
|
749
|
+
): ModelConfig[] {
|
|
750
|
+
if (!required) return Array.from(this.models.values());
|
|
751
|
+
|
|
752
|
+
return Array.from(this.models.values()).filter(model =>
|
|
753
|
+
this.checkCapabilities(model, required)
|
|
754
|
+
);
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
private checkCapabilities(
|
|
758
|
+
model: ModelConfig,
|
|
759
|
+
required?: Partial<ModelCapabilities>
|
|
760
|
+
): boolean {
|
|
761
|
+
if (!required) return true;
|
|
762
|
+
|
|
763
|
+
const caps = model.capabilities;
|
|
764
|
+
|
|
765
|
+
if (required.supportsStreaming && !caps.supportsStreaming) return false;
|
|
766
|
+
if (required.supportsTools && !caps.supportsTools) return false;
|
|
767
|
+
if (required.supportsVision && !caps.supportsVision) return false;
|
|
768
|
+
if (required.supportsJson && !caps.supportsJson) return false;
|
|
769
|
+
if (required.contextWindow && caps.contextWindow < required.contextWindow) return false;
|
|
770
|
+
if (required.maxOutputTokens && caps.maxOutputTokens < required.maxOutputTokens) return false;
|
|
771
|
+
|
|
772
|
+
return true;
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
private filterByHealth(models: ModelConfig[]): ModelConfig[] {
|
|
776
|
+
return models.filter(model => {
|
|
777
|
+
const health = this.providerHealth.get(model.provider);
|
|
778
|
+
return health && health.status !== 'unhealthy' && !health.circuitOpen;
|
|
779
|
+
});
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
private applyRules(
|
|
783
|
+
request: RoutingRequest,
|
|
784
|
+
candidates: ModelConfig[]
|
|
785
|
+
): RoutingResult | null {
|
|
786
|
+
for (const rule of this.config.rules || []) {
|
|
787
|
+
const matches = this.matchesRule(request, rule);
|
|
788
|
+
if (matches) {
|
|
789
|
+
const model = candidates.find(m =>
|
|
790
|
+
m.provider === rule.action.provider &&
|
|
791
|
+
(!rule.action.model || m.id === rule.action.model)
|
|
792
|
+
);
|
|
793
|
+
|
|
794
|
+
if (model) {
|
|
795
|
+
return {
|
|
796
|
+
provider: model.provider,
|
|
797
|
+
model: model.id,
|
|
798
|
+
reason: `Matched rule: ${rule.name}`,
|
|
799
|
+
estimatedCost: this.estimateCost(request, model),
|
|
800
|
+
estimatedLatency: model.latencyMs,
|
|
801
|
+
qualityScore: model.qualityScore,
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
return null;
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
private matchesRule(request: RoutingRequest, rule: RoutingRule): boolean {
|
|
810
|
+
const cond = rule.condition;
|
|
811
|
+
|
|
812
|
+
if (cond.taskPattern) {
|
|
813
|
+
const pattern = typeof cond.taskPattern === 'string'
|
|
814
|
+
? new RegExp(cond.taskPattern)
|
|
815
|
+
: cond.taskPattern;
|
|
816
|
+
if (!pattern.test(request.task)) return false;
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
if (cond.requiresTools && !request.requiredCapabilities?.supportsTools) return false;
|
|
820
|
+
if (cond.requiresVision && !request.requiredCapabilities?.supportsVision) return false;
|
|
821
|
+
|
|
822
|
+
const tokens = this.estimateTokens(
|
|
823
|
+
request.messages.map(m => m.content).join(' ')
|
|
824
|
+
);
|
|
825
|
+
if (cond.minTokens && tokens < cond.minTokens) return false;
|
|
826
|
+
if (cond.maxTokens && tokens > cond.maxTokens) return false;
|
|
827
|
+
|
|
828
|
+
return true;
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
private scoreModels(
|
|
832
|
+
request: RoutingRequest,
|
|
833
|
+
candidates: ModelConfig[]
|
|
834
|
+
): Array<{
|
|
835
|
+
modelId: string;
|
|
836
|
+
score: number;
|
|
837
|
+
estimatedCost: number;
|
|
838
|
+
}> {
|
|
839
|
+
const weights = this.config.routing;
|
|
840
|
+
|
|
841
|
+
return candidates
|
|
842
|
+
.map(model => {
|
|
843
|
+
const estimatedCost = this.estimateCost(request, model);
|
|
844
|
+
|
|
845
|
+
// Check constraints
|
|
846
|
+
if (request.maxCost && estimatedCost > request.maxCost) return null;
|
|
847
|
+
if (request.maxLatency && model.latencyMs > request.maxLatency) return null;
|
|
848
|
+
if (request.minQuality && model.qualityScore < request.minQuality) return null;
|
|
849
|
+
|
|
850
|
+
// Calculate score based on mode
|
|
851
|
+
let score = 0;
|
|
852
|
+
|
|
853
|
+
switch (this.config.mode) {
|
|
854
|
+
case 'cost-optimized':
|
|
855
|
+
// Inverse cost (lower cost = higher score)
|
|
856
|
+
const maxCost = 0.1; // $0.10 per 1k tokens
|
|
857
|
+
score = (maxCost - Math.min(estimatedCost, maxCost)) / maxCost;
|
|
858
|
+
break;
|
|
859
|
+
|
|
860
|
+
case 'performance-optimized':
|
|
861
|
+
// Inverse latency (lower latency = higher score)
|
|
862
|
+
const maxLatency = 2000;
|
|
863
|
+
score = (maxLatency - Math.min(model.latencyMs, maxLatency)) / maxLatency;
|
|
864
|
+
break;
|
|
865
|
+
|
|
866
|
+
case 'quality-optimized':
|
|
867
|
+
score = model.qualityScore;
|
|
868
|
+
break;
|
|
869
|
+
|
|
870
|
+
default:
|
|
871
|
+
// Weighted combination
|
|
872
|
+
const costScore = 1 - Math.min(estimatedCost / 0.1, 1);
|
|
873
|
+
const latencyScore = 1 - Math.min(model.latencyMs / 2000, 1);
|
|
874
|
+
const qualityScore = model.qualityScore;
|
|
875
|
+
|
|
876
|
+
score =
|
|
877
|
+
(weights.costWeight || 0.5) * costScore +
|
|
878
|
+
(weights.latencyWeight || 0.3) * latencyScore +
|
|
879
|
+
(weights.qualityWeight || 0.2) * qualityScore;
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
// Prefer local models if configured
|
|
883
|
+
if (weights.preferLocalModels) {
|
|
884
|
+
if (model.provider === 'ollama' || model.provider === 'onnx') {
|
|
885
|
+
score *= 1.2;
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
// Prefer specified provider/model
|
|
890
|
+
if (request.preferredProvider === model.provider) {
|
|
891
|
+
score *= 1.1;
|
|
892
|
+
}
|
|
893
|
+
if (request.preferredModel === model.id) {
|
|
894
|
+
score *= 1.2;
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
return {
|
|
898
|
+
modelId: model.id,
|
|
899
|
+
score,
|
|
900
|
+
estimatedCost,
|
|
901
|
+
};
|
|
902
|
+
})
|
|
903
|
+
.filter((s): s is NonNullable<typeof s> => s !== null)
|
|
904
|
+
.sort((a, b) => b.score - a.score);
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
private estimateCost(request: RoutingRequest, model: ModelConfig): number {
|
|
908
|
+
const inputTokens = this.estimateTokens(
|
|
909
|
+
request.messages.map(m => m.content).join(' ')
|
|
910
|
+
);
|
|
911
|
+
const outputTokens = Math.min(inputTokens * 0.5, model.capabilities.maxOutputTokens);
|
|
912
|
+
|
|
913
|
+
return (
|
|
914
|
+
(inputTokens / 1000) * model.costPer1kInputTokens +
|
|
915
|
+
(outputTokens / 1000) * model.costPer1kOutputTokens
|
|
916
|
+
);
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
private estimateTokens(text: string): number {
|
|
920
|
+
// Rough estimate: 1 token ~= 4 characters
|
|
921
|
+
return Math.ceil(text.length / 4);
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
private generateReason(scored: { modelId: string; score: number }): string {
|
|
925
|
+
const model = this.models.get(scored.modelId)!;
|
|
926
|
+
|
|
927
|
+
switch (this.config.mode) {
|
|
928
|
+
case 'cost-optimized':
|
|
929
|
+
return `Lowest cost option with ${model.qualityScore * 100}% quality`;
|
|
930
|
+
case 'performance-optimized':
|
|
931
|
+
return `Fastest option at ${model.latencyMs}ms latency`;
|
|
932
|
+
case 'quality-optimized':
|
|
933
|
+
return `Highest quality at ${model.qualityScore * 100}% score`;
|
|
934
|
+
default:
|
|
935
|
+
return `Best overall score: ${(scored.score * 100).toFixed(1)}%`;
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
private async executeCompletion(
|
|
940
|
+
request: CompletionRequest,
|
|
941
|
+
provider: ProviderType,
|
|
942
|
+
model: string
|
|
943
|
+
): Promise<CompletionResponse> {
|
|
944
|
+
// Provider API integration point - external calls via provider adapters
|
|
945
|
+
// Returns standardized response format for unified handling
|
|
946
|
+
|
|
947
|
+
const modelConfig = this.models.get(model)!;
|
|
948
|
+
const inputTokens = this.estimateTokens(
|
|
949
|
+
request.messages.map(m => m.content).join(' ')
|
|
950
|
+
);
|
|
951
|
+
const outputTokens = Math.min(
|
|
952
|
+
request.maxTokens || 1000,
|
|
953
|
+
modelConfig.capabilities.maxOutputTokens
|
|
954
|
+
);
|
|
955
|
+
|
|
956
|
+
const cost =
|
|
957
|
+
(inputTokens / 1000) * modelConfig.costPer1kInputTokens +
|
|
958
|
+
(outputTokens / 1000) * modelConfig.costPer1kOutputTokens;
|
|
959
|
+
|
|
960
|
+
// Model-specific latency overhead for response processing
|
|
961
|
+
await new Promise(resolve => setTimeout(resolve, Math.min(modelConfig.latencyMs, 100)));
|
|
962
|
+
|
|
963
|
+
return {
|
|
964
|
+
id: `response_${Date.now()}`,
|
|
965
|
+
provider,
|
|
966
|
+
model,
|
|
967
|
+
content: `[Response from ${provider}/${model}]`,
|
|
968
|
+
finishReason: 'stop',
|
|
969
|
+
usage: {
|
|
970
|
+
inputTokens,
|
|
971
|
+
outputTokens,
|
|
972
|
+
totalTokens: inputTokens + outputTokens,
|
|
973
|
+
},
|
|
974
|
+
cost,
|
|
975
|
+
latency: modelConfig.latencyMs,
|
|
976
|
+
};
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
private generateCacheKey(request: CompletionRequest): string {
|
|
980
|
+
const content = JSON.stringify({
|
|
981
|
+
messages: request.messages,
|
|
982
|
+
model: request.model,
|
|
983
|
+
temperature: request.temperature,
|
|
984
|
+
});
|
|
985
|
+
return `cache_${this.hashString(content)}`;
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
private hashString(str: string): string {
|
|
989
|
+
let hash = 0;
|
|
990
|
+
for (let i = 0; i < str.length; i++) {
|
|
991
|
+
const char = str.charCodeAt(i);
|
|
992
|
+
hash = ((hash << 5) - hash) + char;
|
|
993
|
+
hash = hash & hash;
|
|
994
|
+
}
|
|
995
|
+
return hash.toString(36);
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
private isCircuitOpen(provider: ProviderType): boolean {
|
|
999
|
+
if (!this.config.circuitBreaker.enabled) return false;
|
|
1000
|
+
|
|
1001
|
+
const health = this.providerHealth.get(provider);
|
|
1002
|
+
return health?.circuitOpen || false;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
private recordSuccess(provider: ProviderType, latency: number): void {
|
|
1006
|
+
const health = this.providerHealth.get(provider)!;
|
|
1007
|
+
health.failureCount = 0;
|
|
1008
|
+
health.avgLatency = (health.avgLatency * 0.9) + (latency * 0.1);
|
|
1009
|
+
health.successRate = Math.min(1, health.successRate + 0.05);
|
|
1010
|
+
health.status = 'healthy';
|
|
1011
|
+
health.circuitOpen = false;
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
private recordFailure(provider: ProviderType, error: Error): void {
|
|
1015
|
+
const health = this.providerHealth.get(provider)!;
|
|
1016
|
+
health.failureCount++;
|
|
1017
|
+
health.lastError = error.message;
|
|
1018
|
+
health.successRate = Math.max(0, health.successRate - 0.1);
|
|
1019
|
+
|
|
1020
|
+
if (health.failureCount >= this.config.circuitBreaker.failureThreshold) {
|
|
1021
|
+
health.status = 'unhealthy';
|
|
1022
|
+
health.circuitOpen = true;
|
|
1023
|
+
|
|
1024
|
+
// Schedule circuit reset
|
|
1025
|
+
setTimeout(() => {
|
|
1026
|
+
health.circuitOpen = false;
|
|
1027
|
+
health.status = 'degraded';
|
|
1028
|
+
health.failureCount = 0;
|
|
1029
|
+
}, this.config.circuitBreaker.resetTimeout);
|
|
1030
|
+
|
|
1031
|
+
this.emit('circuit:open', { provider });
|
|
1032
|
+
} else if (health.failureCount > 2) {
|
|
1033
|
+
health.status = 'degraded';
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
private trackCost(
|
|
1038
|
+
provider: ProviderType,
|
|
1039
|
+
model: string,
|
|
1040
|
+
cost: number,
|
|
1041
|
+
usage: CompletionResponse['usage']
|
|
1042
|
+
): void {
|
|
1043
|
+
this.costTracker.totalCost += cost;
|
|
1044
|
+
this.costTracker.byProvider[provider] = (this.costTracker.byProvider[provider] || 0) + cost;
|
|
1045
|
+
this.costTracker.byModel[model] = (this.costTracker.byModel[model] || 0) + cost;
|
|
1046
|
+
this.costTracker.requests++;
|
|
1047
|
+
this.costTracker.tokensUsed.input += usage.inputTokens;
|
|
1048
|
+
this.costTracker.tokensUsed.output += usage.outputTokens;
|
|
1049
|
+
|
|
1050
|
+
// Check budget
|
|
1051
|
+
if (this.config.budgetLimit && this.costTracker.totalCost >= this.config.budgetLimit) {
|
|
1052
|
+
this.emit('budget:exceeded', {
|
|
1053
|
+
limit: this.config.budgetLimit,
|
|
1054
|
+
current: this.costTracker.totalCost,
|
|
1055
|
+
});
|
|
1056
|
+
} else if (
|
|
1057
|
+
this.config.budgetLimit &&
|
|
1058
|
+
this.costTracker.totalCost >= this.config.budgetLimit * 0.8
|
|
1059
|
+
) {
|
|
1060
|
+
this.emit('budget:warning', {
|
|
1061
|
+
limit: this.config.budgetLimit,
|
|
1062
|
+
current: this.costTracker.totalCost,
|
|
1063
|
+
percentUsed: (this.costTracker.totalCost / this.config.budgetLimit) * 100,
|
|
1064
|
+
});
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
// =============================================================================
|
|
1070
|
+
// Factory Functions
|
|
1071
|
+
// =============================================================================
|
|
1072
|
+
|
|
1073
|
+
export function createMultiModelRouter(
|
|
1074
|
+
config?: Partial<RouterConfig>
|
|
1075
|
+
): MultiModelRouter {
|
|
1076
|
+
return new MultiModelRouter(config);
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
export default MultiModelRouter;
|