elsabro 2.3.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +698 -20
  2. package/bin/install.js +0 -0
  3. package/flows/development-flow.json +452 -0
  4. package/flows/quick-flow.json +118 -0
  5. package/hooks/hooks-config-updated.json +285 -0
  6. package/hooks/skill-discovery.sh +539 -0
  7. package/package.json +3 -2
  8. package/references/SYSTEM_INDEX.md +400 -5
  9. package/references/agent-marketplace.md +2274 -0
  10. package/references/agent-protocol.md +1126 -0
  11. package/references/ai-code-suggestions.md +2413 -0
  12. package/references/checkpointing.md +595 -0
  13. package/references/collaboration-patterns.md +851 -0
  14. package/references/collaborative-sessions.md +1081 -0
  15. package/references/configuration-management.md +1810 -0
  16. package/references/cost-tracking.md +1095 -0
  17. package/references/enterprise-sso.md +2001 -0
  18. package/references/error-contracts-v2.md +968 -0
  19. package/references/event-driven.md +1031 -0
  20. package/references/flow-orchestration.md +940 -0
  21. package/references/flow-visualization.md +1557 -0
  22. package/references/ide-integrations.md +3513 -0
  23. package/references/interrupt-system.md +681 -0
  24. package/references/kubernetes-deployment.md +3099 -0
  25. package/references/memory-system.md +683 -0
  26. package/references/mobile-companion.md +3236 -0
  27. package/references/multi-llm-providers.md +2494 -0
  28. package/references/multi-project-memory.md +1182 -0
  29. package/references/observability.md +793 -0
  30. package/references/output-schemas.md +858 -0
  31. package/references/performance-profiler.md +955 -0
  32. package/references/plugin-system.md +1526 -0
  33. package/references/prompt-management.md +292 -0
  34. package/references/sandbox-execution.md +303 -0
  35. package/references/security-system.md +1253 -0
  36. package/references/skill-marketplace-integration.md +3901 -0
  37. package/references/streaming.md +696 -0
  38. package/references/testing-framework.md +1151 -0
  39. package/references/time-travel.md +802 -0
  40. package/references/tool-registry.md +886 -0
  41. package/references/voice-commands.md +3296 -0
  42. package/templates/agent-marketplace-config.json +220 -0
  43. package/templates/agent-protocol-config.json +136 -0
  44. package/templates/ai-suggestions-config.json +100 -0
  45. package/templates/checkpoint-state.json +61 -0
  46. package/templates/collaboration-config.json +157 -0
  47. package/templates/collaborative-sessions-config.json +153 -0
  48. package/templates/configuration-config.json +245 -0
  49. package/templates/cost-tracking-config.json +148 -0
  50. package/templates/enterprise-sso-config.json +438 -0
  51. package/templates/events-config.json +148 -0
  52. package/templates/flow-visualization-config.json +196 -0
  53. package/templates/ide-integrations-config.json +442 -0
  54. package/templates/kubernetes-config.json +764 -0
  55. package/templates/memory-state.json +84 -0
  56. package/templates/mobile-companion-config.json +600 -0
  57. package/templates/multi-llm-config.json +544 -0
  58. package/templates/multi-project-memory-config.json +145 -0
  59. package/templates/observability-config.json +109 -0
  60. package/templates/performance-profiler-config.json +125 -0
  61. package/templates/plugin-config.json +170 -0
  62. package/templates/prompt-management-config.json +86 -0
  63. package/templates/sandbox-config.json +185 -0
  64. package/templates/schemas-config.json +65 -0
  65. package/templates/security-config.json +120 -0
  66. package/templates/skill-marketplace-config.json +441 -0
  67. package/templates/streaming-config.json +72 -0
  68. package/templates/testing-config.json +81 -0
  69. package/templates/timetravel-config.json +62 -0
  70. package/templates/tool-registry-config.json +109 -0
  71. package/templates/voice-commands-config.json +658 -0
@@ -0,0 +1,2494 @@
1
+ # Multi-LLM Providers System (v3.6)
2
+
3
+ Sistema unificado de gestion de multiples proveedores de LLM con routing inteligente, fallback automatico y optimizacion de costos.
4
+
5
+ ## Arquitectura
6
+
7
+ ```
8
+ ┌─────────────────────────────────────────────────────────────────────────────┐
9
+ │ MULTI-LLM PROVIDERS SYSTEM │
10
+ ├─────────────────────────────────────────────────────────────────────────────┤
11
+ │ │
12
+ │ ┌────────────────────────────────────────────────────────────────────┐ │
13
+ │ │ UNIFIED API │ │
14
+ │ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────────┐ │ │
15
+ │ │ │ complete() │ │ stream() │ │ countTokens() │ │ │
16
+ │ │ │ chat() │ │ embed() │ │ estimateCost() │ │ │
17
+ │ │ └──────────────┘ └──────────────┘ └──────────────────────────┘ │ │
18
+ │ └────────────────────────────────────────────────────────────────────┘ │
19
+ │ │ │
20
+ │ ▼ │
21
+ │ ┌────────────────────────────────────────────────────────────────────┐ │
22
+ │ │ MODEL ROUTER │ │
23
+ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌───────────┐ │ │
24
+ │ │ │ Task-Based │ │ Cost │ │ Load │ │ Rate │ │ │
25
+ │ │ │ Routing │ │ Optimization│ │ Balancing │ │ Limiting │ │ │
26
+ │ │ └─────────────┘ └─────────────┘ └─────────────┘ └───────────┘ │ │
27
+ │ └────────────────────────────────────────────────────────────────────┘ │
28
+ │ │ │
29
+ │ ▼ │
30
+ │ ┌────────────────────────────────────────────────────────────────────┐ │
31
+ │ │ PROVIDER REGISTRY │ │
32
+ │ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
33
+ │ │ │ Claude │ │ OpenAI │ │ Gemini │ │ Local │ │ Azure │ │ │
34
+ │ │ │(default)│ │ GPT-4 │ │ Flash │ │ Ollama │ │ OpenAI │ │ │
35
+ │ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ │
36
+ │ └────────────────────────────────────────────────────────────────────┘ │
37
+ │ │ │
38
+ │ ▼ │
39
+ │ ┌────────────────────────────────────────────────────────────────────┐ │
40
+ │ │ HEALTH & MONITORING │ │
41
+ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
42
+ │ │ │ Health │ │ Status │ │ Automatic │ │ │
43
+ │ │ │ Checks │ │ Monitoring │ │ Fallback │ │ │
44
+ │ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
45
+ │ └────────────────────────────────────────────────────────────────────┘ │
46
+ │ │
47
+ └─────────────────────────────────────────────────────────────────────────────┘
48
+ ```
49
+
50
+ ---
51
+
52
+ ## ProviderRegistry
53
+
54
+ ### Interfaces Base
55
+
56
+ ```typescript
57
+ interface LLMProvider<TConfig extends ProviderConfig = ProviderConfig> {
58
+ readonly id: string;
59
+ readonly name: string;
60
+ readonly version: string;
61
+ readonly config: TConfig;
62
+
63
+ // Core methods
64
+ complete(request: CompletionRequest): Promise<CompletionResponse>;
65
+ stream(request: CompletionRequest): AsyncIterable<StreamChunk>;
66
+
67
+ // Optional capabilities
68
+ embed?(texts: string[]): Promise<EmbeddingResponse>;
69
+ countTokens(text: string, model?: string): Promise<number>;
70
+
71
+ // Health & status
72
+ healthCheck(): Promise<HealthStatus>;
73
+ getStatus(): ProviderStatus;
74
+
75
+ // Lifecycle
76
+ initialize(): Promise<void>;
77
+ shutdown(): Promise<void>;
78
+ }
79
+
80
+ interface ProviderConfig {
81
+ apiKey?: string;
82
+ baseUrl?: string;
83
+ timeout?: number;
84
+ maxRetries?: number;
85
+ retryDelay?: number;
86
+ rateLimit?: RateLimitConfig;
87
+ headers?: Record<string, string>;
88
+ }
89
+
90
+ interface RateLimitConfig {
91
+ requestsPerMinute: number;
92
+ tokensPerMinute: number;
93
+ tokensPerDay?: number;
94
+ concurrent?: number;
95
+ }
96
+
97
+ interface CompletionRequest {
98
+ model: string;
99
+ messages: Message[];
100
+ temperature?: number;
101
+ maxTokens?: number;
102
+ topP?: number;
103
+ stopSequences?: string[];
104
+ tools?: ToolDefinition[];
105
+ toolChoice?: 'auto' | 'none' | 'required' | { name: string };
106
+ responseFormat?: 'text' | 'json' | { schema: JSONSchema };
107
+ metadata?: Record<string, unknown>;
108
+ }
109
+
110
+ interface Message {
111
+ role: 'system' | 'user' | 'assistant' | 'tool';
112
+ content: string | ContentBlock[];
113
+ name?: string;
114
+ toolCallId?: string;
115
+ toolCalls?: ToolCall[];
116
+ }
117
+
118
+ interface ContentBlock {
119
+ type: 'text' | 'image' | 'file';
120
+ text?: string;
121
+ imageUrl?: string;
122
+ imageBase64?: string;
123
+ mimeType?: string;
124
+ fileName?: string;
125
+ }
126
+
127
+ interface CompletionResponse {
128
+ id: string;
129
+ model: string;
130
+ content: string;
131
+ toolCalls?: ToolCall[];
132
+ usage: TokenUsage;
133
+ finishReason: 'stop' | 'length' | 'tool_calls' | 'content_filter';
134
+ metadata?: {
135
+ provider: string;
136
+ latencyMs: number;
137
+ cached?: boolean;
138
+ };
139
+ }
140
+
141
+ interface StreamChunk {
142
+ type: 'content' | 'tool_call' | 'usage' | 'done';
143
+ content?: string;
144
+ toolCall?: Partial<ToolCall>;
145
+ usage?: Partial<TokenUsage>;
146
+ finishReason?: string;
147
+ }
148
+
149
+ interface TokenUsage {
150
+ promptTokens: number;
151
+ completionTokens: number;
152
+ totalTokens: number;
153
+ cacheReadTokens?: number;
154
+ cacheWriteTokens?: number;
155
+ }
156
+
157
+ interface ToolCall {
158
+ id: string;
159
+ name: string;
160
+ arguments: string;
161
+ }
162
+
163
+ interface HealthStatus {
164
+ healthy: boolean;
165
+ latencyMs: number;
166
+ lastCheck: string;
167
+ error?: string;
168
+ details?: Record<string, unknown>;
169
+ }
170
+
171
+ interface ProviderStatus {
172
+ id: string;
173
+ name: string;
174
+ healthy: boolean;
175
+ enabled: boolean;
176
+ availableModels: string[];
177
+ rateLimitRemaining?: {
178
+ requests: number;
179
+ tokens: number;
180
+ };
181
+ lastError?: {
182
+ message: string;
183
+ timestamp: string;
184
+ code?: string;
185
+ };
186
+ }
187
+ ```
188
+
189
+ ### ProviderRegistry Implementation
190
+
191
+ ```typescript
192
+ interface RegisteredProvider {
193
+ provider: LLMProvider;
194
+ priority: number;
195
+ enabled: boolean;
196
+ healthStatus: HealthStatus;
197
+ metrics: ProviderMetrics;
198
+ }
199
+
200
+ interface ProviderMetrics {
201
+ totalRequests: number;
202
+ successfulRequests: number;
203
+ failedRequests: number;
204
+ totalTokens: number;
205
+ totalCost: number;
206
+ avgLatencyMs: number;
207
+ p95LatencyMs: number;
208
+ lastUsed?: string;
209
+ }
210
+
211
+ class ProviderRegistry {
212
+ private providers: Map<string, RegisteredProvider> = new Map();
213
+ private healthCheckInterval: NodeJS.Timer | null = null;
214
+ private config: ProviderRegistryConfig;
215
+
216
+ constructor(config: ProviderRegistryConfig) {
217
+ this.config = config;
218
+ }
219
+
220
+ // Register a provider
221
+ async register(
222
+ provider: LLMProvider,
223
+ options: { priority?: number; enabled?: boolean } = {}
224
+ ): Promise<void> {
225
+ const { priority = 100, enabled = true } = options;
226
+
227
+ // Initialize provider
228
+ await provider.initialize();
229
+
230
+ // Initial health check
231
+ const healthStatus = await provider.healthCheck();
232
+
233
+ this.providers.set(provider.id, {
234
+ provider,
235
+ priority,
236
+ enabled,
237
+ healthStatus,
238
+ metrics: {
239
+ totalRequests: 0,
240
+ successfulRequests: 0,
241
+ failedRequests: 0,
242
+ totalTokens: 0,
243
+ totalCost: 0,
244
+ avgLatencyMs: 0,
245
+ p95LatencyMs: 0
246
+ }
247
+ });
248
+
249
+ EventBus.publish('provider.registered', {
250
+ id: provider.id,
251
+ name: provider.name,
252
+ priority,
253
+ healthy: healthStatus.healthy
254
+ });
255
+ }
256
+
257
+ // Unregister a provider
258
+ async unregister(providerId: string): Promise<boolean> {
259
+ const registered = this.providers.get(providerId);
260
+ if (!registered) return false;
261
+
262
+ await registered.provider.shutdown();
263
+ this.providers.delete(providerId);
264
+
265
+ EventBus.publish('provider.unregistered', { id: providerId });
266
+ return true;
267
+ }
268
+
269
+ // Get provider by ID
270
+ get(providerId: string): LLMProvider | undefined {
271
+ return this.providers.get(providerId)?.provider;
272
+ }
273
+
274
+ // Get all healthy providers sorted by priority
275
+ getHealthyProviders(): LLMProvider[] {
276
+ return Array.from(this.providers.values())
277
+ .filter(p => p.enabled && p.healthStatus.healthy)
278
+ .sort((a, b) => a.priority - b.priority)
279
+ .map(p => p.provider);
280
+ }
281
+
282
+ // Get provider for specific model
283
+ getProviderForModel(model: string): LLMProvider | undefined {
284
+ for (const registered of this.providers.values()) {
285
+ if (!registered.enabled || !registered.healthStatus.healthy) continue;
286
+
287
+ const status = registered.provider.getStatus();
288
+ if (status.availableModels.includes(model)) {
289
+ return registered.provider;
290
+ }
291
+ }
292
+ return undefined;
293
+ }
294
+
295
+ // List all providers with status
296
+ list(): ProviderStatus[] {
297
+ return Array.from(this.providers.values()).map(r => ({
298
+ ...r.provider.getStatus(),
299
+ healthy: r.healthStatus.healthy,
300
+ enabled: r.enabled
301
+ }));
302
+ }
303
+
304
+ // Enable/disable provider
305
+ setEnabled(providerId: string, enabled: boolean): void {
306
+ const registered = this.providers.get(providerId);
307
+ if (registered) {
308
+ registered.enabled = enabled;
309
+ EventBus.publish('provider.status_changed', { id: providerId, enabled });
310
+ }
311
+ }
312
+
313
+ // Update metrics after request
314
+ recordMetrics(
315
+ providerId: string,
316
+ success: boolean,
317
+ latencyMs: number,
318
+ tokens: number,
319
+ cost: number
320
+ ): void {
321
+ const registered = this.providers.get(providerId);
322
+ if (!registered) return;
323
+
324
+ const m = registered.metrics;
325
+ m.totalRequests++;
326
+ if (success) {
327
+ m.successfulRequests++;
328
+ } else {
329
+ m.failedRequests++;
330
+ }
331
+ m.totalTokens += tokens;
332
+ m.totalCost += cost;
333
+ m.lastUsed = new Date().toISOString();
334
+
335
+ // Rolling average for latency
336
+ m.avgLatencyMs = (m.avgLatencyMs * (m.totalRequests - 1) + latencyMs) / m.totalRequests;
337
+ }
338
+
339
+ // Get provider metrics
340
+ getMetrics(providerId: string): ProviderMetrics | undefined {
341
+ return this.providers.get(providerId)?.metrics;
342
+ }
343
+
344
+ // Start health check loop
345
+ startHealthChecks(intervalMs: number = 30000): void {
346
+ this.healthCheckInterval = setInterval(async () => {
347
+ for (const [id, registered] of this.providers) {
348
+ try {
349
+ const status = await registered.provider.healthCheck();
350
+ const wasHealthy = registered.healthStatus.healthy;
351
+ registered.healthStatus = status;
352
+
353
+ if (wasHealthy !== status.healthy) {
354
+ EventBus.publish('provider.health_changed', {
355
+ id,
356
+ healthy: status.healthy,
357
+ error: status.error
358
+ });
359
+ }
360
+ } catch (error) {
361
+ registered.healthStatus = {
362
+ healthy: false,
363
+ latencyMs: 0,
364
+ lastCheck: new Date().toISOString(),
365
+ error: error instanceof Error ? error.message : 'Health check failed'
366
+ };
367
+ }
368
+ }
369
+ }, intervalMs);
370
+ }
371
+
372
+ // Stop health checks
373
+ stopHealthChecks(): void {
374
+ if (this.healthCheckInterval) {
375
+ clearInterval(this.healthCheckInterval);
376
+ this.healthCheckInterval = null;
377
+ }
378
+ }
379
+
380
+ // Shutdown all providers
381
+ async shutdown(): Promise<void> {
382
+ this.stopHealthChecks();
383
+ for (const registered of this.providers.values()) {
384
+ await registered.provider.shutdown();
385
+ }
386
+ this.providers.clear();
387
+ }
388
+ }
389
+ ```
390
+
391
+ ---
392
+
393
+ ## Provider Implementations
394
+
395
+ ### ClaudeProvider (Default)
396
+
397
+ ```typescript
398
+ interface ClaudeConfig extends ProviderConfig {
399
+ apiKey: string;
400
+ baseUrl?: string;
401
+ defaultModel?: string;
402
+ betaHeaders?: string[];
403
+ }
404
+
405
+ class ClaudeProvider implements LLMProvider<ClaudeConfig> {
406
+ readonly id = 'claude';
407
+ readonly name = 'Anthropic Claude';
408
+ readonly version = '1.0.0';
409
+ readonly config: ClaudeConfig;
410
+
411
+ private client: Anthropic;
412
+
413
+ static readonly MODELS = {
414
+ 'claude-opus-4-5': {
415
+ id: 'claude-opus-4-5-20251101',
416
+ contextWindow: 200000,
417
+ maxOutput: 32000,
418
+ inputPrice: 15.0, // per 1M tokens
419
+ outputPrice: 75.0,
420
+ capabilities: ['vision', 'tools', 'extended_thinking']
421
+ },
422
+ 'claude-sonnet-4': {
423
+ id: 'claude-sonnet-4-20250514',
424
+ contextWindow: 200000,
425
+ maxOutput: 64000,
426
+ inputPrice: 3.0,
427
+ outputPrice: 15.0,
428
+ capabilities: ['vision', 'tools', 'computer_use']
429
+ },
430
+ 'claude-3-5-haiku': {
431
+ id: 'claude-3-5-haiku-20241022',
432
+ contextWindow: 200000,
433
+ maxOutput: 8192,
434
+ inputPrice: 0.80,
435
+ outputPrice: 4.0,
436
+ capabilities: ['vision', 'tools']
437
+ }
438
+ };
439
+
440
+ constructor(config: ClaudeConfig) {
441
+ this.config = {
442
+ baseUrl: 'https://api.anthropic.com',
443
+ defaultModel: 'claude-sonnet-4',
444
+ timeout: 120000,
445
+ maxRetries: 3,
446
+ ...config
447
+ };
448
+ }
449
+
450
+ async initialize(): Promise<void> {
451
+ this.client = new Anthropic({
452
+ apiKey: this.config.apiKey,
453
+ baseURL: this.config.baseUrl,
454
+ timeout: this.config.timeout,
455
+ maxRetries: this.config.maxRetries
456
+ });
457
+ }
458
+
459
+ async complete(request: CompletionRequest): Promise<CompletionResponse> {
460
+ const startTime = Date.now();
461
+ const modelId = this.resolveModel(request.model);
462
+
463
+ const response = await this.client.messages.create({
464
+ model: modelId,
465
+ max_tokens: request.maxTokens || 4096,
466
+ messages: this.convertMessages(request.messages),
467
+ system: this.extractSystemMessage(request.messages),
468
+ temperature: request.temperature,
469
+ top_p: request.topP,
470
+ stop_sequences: request.stopSequences,
471
+ tools: request.tools ? this.convertTools(request.tools) : undefined,
472
+ tool_choice: request.toolChoice ? this.convertToolChoice(request.toolChoice) : undefined
473
+ });
474
+
475
+ return {
476
+ id: response.id,
477
+ model: response.model,
478
+ content: this.extractContent(response),
479
+ toolCalls: this.extractToolCalls(response),
480
+ usage: {
481
+ promptTokens: response.usage.input_tokens,
482
+ completionTokens: response.usage.output_tokens,
483
+ totalTokens: response.usage.input_tokens + response.usage.output_tokens,
484
+ cacheReadTokens: response.usage.cache_read_input_tokens,
485
+ cacheWriteTokens: response.usage.cache_creation_input_tokens
486
+ },
487
+ finishReason: this.mapStopReason(response.stop_reason),
488
+ metadata: {
489
+ provider: this.id,
490
+ latencyMs: Date.now() - startTime
491
+ }
492
+ };
493
+ }
494
+
495
+ async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
496
+ const modelId = this.resolveModel(request.model);
497
+
498
+ const stream = await this.client.messages.stream({
499
+ model: modelId,
500
+ max_tokens: request.maxTokens || 4096,
501
+ messages: this.convertMessages(request.messages),
502
+ system: this.extractSystemMessage(request.messages),
503
+ temperature: request.temperature
504
+ });
505
+
506
+ for await (const event of stream) {
507
+ if (event.type === 'content_block_delta') {
508
+ if (event.delta.type === 'text_delta') {
509
+ yield { type: 'content', content: event.delta.text };
510
+ } else if (event.delta.type === 'input_json_delta') {
511
+ yield { type: 'tool_call', toolCall: { arguments: event.delta.partial_json } };
512
+ }
513
+ } else if (event.type === 'message_delta') {
514
+ yield {
515
+ type: 'usage',
516
+ usage: { completionTokens: event.usage?.output_tokens },
517
+ finishReason: event.delta.stop_reason
518
+ };
519
+ }
520
+ }
521
+
522
+ yield { type: 'done' };
523
+ }
524
+
525
+ async countTokens(text: string, model?: string): Promise<number> {
526
+ const response = await this.client.messages.countTokens({
527
+ model: this.resolveModel(model || this.config.defaultModel!),
528
+ messages: [{ role: 'user', content: text }]
529
+ });
530
+ return response.input_tokens;
531
+ }
532
+
533
+ async healthCheck(): Promise<HealthStatus> {
534
+ const startTime = Date.now();
535
+ try {
536
+ await this.client.messages.create({
537
+ model: 'claude-3-5-haiku-20241022',
538
+ max_tokens: 10,
539
+ messages: [{ role: 'user', content: 'Hi' }]
540
+ });
541
+ return {
542
+ healthy: true,
543
+ latencyMs: Date.now() - startTime,
544
+ lastCheck: new Date().toISOString()
545
+ };
546
+ } catch (error) {
547
+ return {
548
+ healthy: false,
549
+ latencyMs: Date.now() - startTime,
550
+ lastCheck: new Date().toISOString(),
551
+ error: error instanceof Error ? error.message : 'Unknown error'
552
+ };
553
+ }
554
+ }
555
+
556
+ getStatus(): ProviderStatus {
557
+ return {
558
+ id: this.id,
559
+ name: this.name,
560
+ healthy: true,
561
+ enabled: true,
562
+ availableModels: Object.keys(ClaudeProvider.MODELS)
563
+ };
564
+ }
565
+
566
+ async shutdown(): Promise<void> {
567
+ // Cleanup if needed
568
+ }
569
+
570
+ private resolveModel(model: string): string {
571
+ const modelInfo = ClaudeProvider.MODELS[model as keyof typeof ClaudeProvider.MODELS];
572
+ return modelInfo?.id || model;
573
+ }
574
+
575
+ private convertMessages(messages: Message[]): AnthropicMessage[] {
576
+ return messages
577
+ .filter(m => m.role !== 'system')
578
+ .map(m => ({
579
+ role: m.role === 'tool' ? 'user' : m.role,
580
+ content: this.convertContent(m)
581
+ }));
582
+ }
583
+
584
+ private extractSystemMessage(messages: Message[]): string | undefined {
585
+ const systemMsg = messages.find(m => m.role === 'system');
586
+ return typeof systemMsg?.content === 'string' ? systemMsg.content : undefined;
587
+ }
588
+
589
+ private convertContent(message: Message): string | AnthropicContentBlock[] {
590
+ if (typeof message.content === 'string') {
591
+ if (message.role === 'tool') {
592
+ return [{
593
+ type: 'tool_result',
594
+ tool_use_id: message.toolCallId!,
595
+ content: message.content
596
+ }];
597
+ }
598
+ return message.content;
599
+ }
600
+
601
+ return message.content.map(block => {
602
+ if (block.type === 'text') {
603
+ return { type: 'text', text: block.text! };
604
+ } else if (block.type === 'image') {
605
+ return {
606
+ type: 'image',
607
+ source: {
608
+ type: block.imageBase64 ? 'base64' : 'url',
609
+ media_type: block.mimeType || 'image/jpeg',
610
+ data: block.imageBase64,
611
+ url: block.imageUrl
612
+ }
613
+ };
614
+ }
615
+ return { type: 'text', text: '' };
616
+ });
617
+ }
618
+
619
+ private convertTools(tools: ToolDefinition[]): AnthropicTool[] {
620
+ return tools.map(t => ({
621
+ name: t.name,
622
+ description: t.description,
623
+ input_schema: t.parameters
624
+ }));
625
+ }
626
+
627
+ private convertToolChoice(choice: CompletionRequest['toolChoice']): AnthropicToolChoice {
628
+ if (choice === 'auto') return { type: 'auto' };
629
+ if (choice === 'none') return { type: 'none' };
630
+ if (choice === 'required') return { type: 'any' };
631
+ if (typeof choice === 'object') return { type: 'tool', name: choice.name };
632
+ return { type: 'auto' };
633
+ }
634
+
635
+ private extractContent(response: AnthropicResponse): string {
636
+ return response.content
637
+ .filter(b => b.type === 'text')
638
+ .map(b => b.text)
639
+ .join('');
640
+ }
641
+
642
+ private extractToolCalls(response: AnthropicResponse): ToolCall[] | undefined {
643
+ const toolUses = response.content.filter(b => b.type === 'tool_use');
644
+ if (toolUses.length === 0) return undefined;
645
+
646
+ return toolUses.map(t => ({
647
+ id: t.id,
648
+ name: t.name,
649
+ arguments: JSON.stringify(t.input)
650
+ }));
651
+ }
652
+
653
+ private mapStopReason(reason: string | null): CompletionResponse['finishReason'] {
654
+ switch (reason) {
655
+ case 'end_turn': return 'stop';
656
+ case 'max_tokens': return 'length';
657
+ case 'tool_use': return 'tool_calls';
658
+ default: return 'stop';
659
+ }
660
+ }
661
+ }
662
+ ```
663
+
664
+ ### OpenAIProvider
665
+
666
+ ```typescript
667
+ interface OpenAIConfig extends ProviderConfig {
668
+ apiKey: string;
669
+ organization?: string;
670
+ baseUrl?: string;
671
+ defaultModel?: string;
672
+ }
673
+
674
+ class OpenAIProvider implements LLMProvider<OpenAIConfig> {
675
+ readonly id = 'openai';
676
+ readonly name = 'OpenAI';
677
+ readonly version = '1.0.0';
678
+ readonly config: OpenAIConfig;
679
+
680
+ private client: OpenAI;
681
+
682
+ static readonly MODELS = {
683
+ 'gpt-4o': {
684
+ id: 'gpt-4o',
685
+ contextWindow: 128000,
686
+ maxOutput: 16384,
687
+ inputPrice: 2.50,
688
+ outputPrice: 10.0,
689
+ capabilities: ['vision', 'tools', 'json_mode']
690
+ },
691
+ 'gpt-4-turbo': {
692
+ id: 'gpt-4-turbo',
693
+ contextWindow: 128000,
694
+ maxOutput: 4096,
695
+ inputPrice: 10.0,
696
+ outputPrice: 30.0,
697
+ capabilities: ['vision', 'tools', 'json_mode']
698
+ },
699
+ 'gpt-3.5-turbo': {
700
+ id: 'gpt-3.5-turbo',
701
+ contextWindow: 16385,
702
+ maxOutput: 4096,
703
+ inputPrice: 0.50,
704
+ outputPrice: 1.50,
705
+ capabilities: ['tools', 'json_mode']
706
+ },
707
+ 'gpt-4o-mini': {
708
+ id: 'gpt-4o-mini',
709
+ contextWindow: 128000,
710
+ maxOutput: 16384,
711
+ inputPrice: 0.15,
712
+ outputPrice: 0.60,
713
+ capabilities: ['vision', 'tools', 'json_mode']
714
+ }
715
+ };
716
+
717
+ constructor(config: OpenAIConfig) {
718
+ this.config = {
719
+ baseUrl: 'https://api.openai.com/v1',
720
+ defaultModel: 'gpt-4o',
721
+ timeout: 120000,
722
+ maxRetries: 3,
723
+ ...config
724
+ };
725
+ }
726
+
727
+ async initialize(): Promise<void> {
728
+ this.client = new OpenAI({
729
+ apiKey: this.config.apiKey,
730
+ organization: this.config.organization,
731
+ baseURL: this.config.baseUrl,
732
+ timeout: this.config.timeout,
733
+ maxRetries: this.config.maxRetries
734
+ });
735
+ }
736
+
737
+ async complete(request: CompletionRequest): Promise<CompletionResponse> {
738
+ const startTime = Date.now();
739
+
740
+ const response = await this.client.chat.completions.create({
741
+ model: request.model,
742
+ messages: this.convertMessages(request.messages),
743
+ temperature: request.temperature,
744
+ max_tokens: request.maxTokens,
745
+ top_p: request.topP,
746
+ stop: request.stopSequences,
747
+ tools: request.tools ? this.convertTools(request.tools) : undefined,
748
+ tool_choice: request.toolChoice as OpenAIToolChoice,
749
+ response_format: this.convertResponseFormat(request.responseFormat)
750
+ });
751
+
752
+ const choice = response.choices[0];
753
+
754
+ return {
755
+ id: response.id,
756
+ model: response.model,
757
+ content: choice.message.content || '',
758
+ toolCalls: choice.message.tool_calls?.map(tc => ({
759
+ id: tc.id,
760
+ name: tc.function.name,
761
+ arguments: tc.function.arguments
762
+ })),
763
+ usage: {
764
+ promptTokens: response.usage?.prompt_tokens || 0,
765
+ completionTokens: response.usage?.completion_tokens || 0,
766
+ totalTokens: response.usage?.total_tokens || 0
767
+ },
768
+ finishReason: this.mapFinishReason(choice.finish_reason),
769
+ metadata: {
770
+ provider: this.id,
771
+ latencyMs: Date.now() - startTime
772
+ }
773
+ };
774
+ }
775
+
776
+ async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
777
+ const stream = await this.client.chat.completions.create({
778
+ model: request.model,
779
+ messages: this.convertMessages(request.messages),
780
+ temperature: request.temperature,
781
+ max_tokens: request.maxTokens,
782
+ stream: true
783
+ });
784
+
785
+ for await (const chunk of stream) {
786
+ const choice = chunk.choices[0];
787
+ if (choice?.delta?.content) {
788
+ yield { type: 'content', content: choice.delta.content };
789
+ }
790
+ if (choice?.delta?.tool_calls) {
791
+ for (const tc of choice.delta.tool_calls) {
792
+ yield {
793
+ type: 'tool_call',
794
+ toolCall: {
795
+ id: tc.id,
796
+ name: tc.function?.name,
797
+ arguments: tc.function?.arguments
798
+ }
799
+ };
800
+ }
801
+ }
802
+ if (choice?.finish_reason) {
803
+ yield { type: 'done', finishReason: choice.finish_reason };
804
+ }
805
+ }
806
+ }
807
+
808
+ async embed(texts: string[]): Promise<EmbeddingResponse> {
809
+ const response = await this.client.embeddings.create({
810
+ model: 'text-embedding-3-small',
811
+ input: texts
812
+ });
813
+
814
+ return {
815
+ embeddings: response.data.map(d => d.embedding),
816
+ usage: {
817
+ totalTokens: response.usage.total_tokens
818
+ }
819
+ };
820
+ }
821
+
822
+ async countTokens(text: string, model?: string): Promise<number> {
823
+ // Use tiktoken for accurate counting
824
+ const encoding = getEncoding(model || 'gpt-4o');
825
+ return encoding.encode(text).length;
826
+ }
827
+
828
+ async healthCheck(): Promise<HealthStatus> {
829
+ const startTime = Date.now();
830
+ try {
831
+ await this.client.models.list();
832
+ return {
833
+ healthy: true,
834
+ latencyMs: Date.now() - startTime,
835
+ lastCheck: new Date().toISOString()
836
+ };
837
+ } catch (error) {
838
+ return {
839
+ healthy: false,
840
+ latencyMs: Date.now() - startTime,
841
+ lastCheck: new Date().toISOString(),
842
+ error: error instanceof Error ? error.message : 'Unknown error'
843
+ };
844
+ }
845
+ }
846
+
847
+ getStatus(): ProviderStatus {
848
+ return {
849
+ id: this.id,
850
+ name: this.name,
851
+ healthy: true,
852
+ enabled: true,
853
+ availableModels: Object.keys(OpenAIProvider.MODELS)
854
+ };
855
+ }
856
+
857
+ async shutdown(): Promise<void> {}
858
+
859
+ private convertMessages(messages: Message[]): OpenAIChatMessage[] {
860
+ return messages.map(m => {
861
+ if (m.role === 'tool') {
862
+ return {
863
+ role: 'tool' as const,
864
+ content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content),
865
+ tool_call_id: m.toolCallId!
866
+ };
867
+ }
868
+
869
+ const content = typeof m.content === 'string'
870
+ ? m.content
871
+ : m.content.map(b => {
872
+ if (b.type === 'text') return { type: 'text', text: b.text };
873
+ if (b.type === 'image') {
874
+ return {
875
+ type: 'image_url',
876
+ image_url: { url: b.imageUrl || `data:${b.mimeType};base64,${b.imageBase64}` }
877
+ };
878
+ }
879
+ return { type: 'text', text: '' };
880
+ });
881
+
882
+ return {
883
+ role: m.role,
884
+ content,
885
+ tool_calls: m.toolCalls?.map(tc => ({
886
+ id: tc.id,
887
+ type: 'function' as const,
888
+ function: { name: tc.name, arguments: tc.arguments }
889
+ }))
890
+ };
891
+ });
892
+ }
893
+
894
+ private convertTools(tools: ToolDefinition[]): OpenAITool[] {
895
+ return tools.map(t => ({
896
+ type: 'function' as const,
897
+ function: {
898
+ name: t.name,
899
+ description: t.description,
900
+ parameters: t.parameters
901
+ }
902
+ }));
903
+ }
904
+
905
+ private convertResponseFormat(format?: CompletionRequest['responseFormat']) {
906
+ if (!format) return undefined;
907
+ if (format === 'json') return { type: 'json_object' as const };
908
+ if (typeof format === 'object') {
909
+ return { type: 'json_schema' as const, json_schema: format.schema };
910
+ }
911
+ return undefined;
912
+ }
913
+
914
+ private mapFinishReason(reason: string | null): CompletionResponse['finishReason'] {
915
+ switch (reason) {
916
+ case 'stop': return 'stop';
917
+ case 'length': return 'length';
918
+ case 'tool_calls': return 'tool_calls';
919
+ case 'content_filter': return 'content_filter';
920
+ default: return 'stop';
921
+ }
922
+ }
923
+ }
924
+ ```
925
+
926
+ ### GeminiProvider
927
+
928
+ ```typescript
929
+ interface GeminiConfig extends ProviderConfig {
930
+ apiKey: string;
931
+ baseUrl?: string;
932
+ defaultModel?: string;
933
+ }
934
+
935
+ class GeminiProvider implements LLMProvider<GeminiConfig> {
936
+ readonly id = 'gemini';
937
+ readonly name = 'Google Gemini';
938
+ readonly version = '1.0.0';
939
+ readonly config: GeminiConfig;
940
+
941
+ private client: GoogleGenerativeAI;
942
+
943
+ static readonly MODELS = {
944
+ 'gemini-2.0-flash': {
945
+ id: 'gemini-2.0-flash',
946
+ contextWindow: 1000000,
947
+ maxOutput: 8192,
948
+ inputPrice: 0.075,
949
+ outputPrice: 0.30,
950
+ capabilities: ['vision', 'tools', 'grounding']
951
+ },
952
+ 'gemini-1.5-pro': {
953
+ id: 'gemini-1.5-pro',
954
+ contextWindow: 2000000,
955
+ maxOutput: 8192,
956
+ inputPrice: 1.25,
957
+ outputPrice: 5.0,
958
+ capabilities: ['vision', 'tools', 'grounding', 'code_execution']
959
+ },
960
+ 'gemini-1.5-flash': {
961
+ id: 'gemini-1.5-flash',
962
+ contextWindow: 1000000,
963
+ maxOutput: 8192,
964
+ inputPrice: 0.075,
965
+ outputPrice: 0.30,
966
+ capabilities: ['vision', 'tools']
967
+ }
968
+ };
969
+
970
+ constructor(config: GeminiConfig) {
971
+ this.config = {
972
+ defaultModel: 'gemini-2.0-flash',
973
+ timeout: 120000,
974
+ maxRetries: 3,
975
+ ...config
976
+ };
977
+ }
978
+
979
+ async initialize(): Promise<void> {
980
+ this.client = new GoogleGenerativeAI(this.config.apiKey);
981
+ }
982
+
983
+ async complete(request: CompletionRequest): Promise<CompletionResponse> {
984
+ const startTime = Date.now();
985
+ const model = this.client.getGenerativeModel({ model: request.model });
986
+
987
+ const chat = model.startChat({
988
+ history: this.convertHistory(request.messages),
989
+ generationConfig: {
990
+ temperature: request.temperature,
991
+ maxOutputTokens: request.maxTokens,
992
+ topP: request.topP,
993
+ stopSequences: request.stopSequences
994
+ }
995
+ });
996
+
997
+ const lastMessage = request.messages[request.messages.length - 1];
998
+ const result = await chat.sendMessage(
999
+ typeof lastMessage.content === 'string'
1000
+ ? lastMessage.content
1001
+ : this.convertParts(lastMessage.content)
1002
+ );
1003
+
1004
+ const response = result.response;
1005
+ const usage = response.usageMetadata;
1006
+
1007
+ return {
1008
+ id: `gemini-${Date.now()}`,
1009
+ model: request.model,
1010
+ content: response.text(),
1011
+ toolCalls: this.extractToolCalls(response),
1012
+ usage: {
1013
+ promptTokens: usage?.promptTokenCount || 0,
1014
+ completionTokens: usage?.candidatesTokenCount || 0,
1015
+ totalTokens: usage?.totalTokenCount || 0
1016
+ },
1017
+ finishReason: this.mapFinishReason(response.candidates?.[0]?.finishReason),
1018
+ metadata: {
1019
+ provider: this.id,
1020
+ latencyMs: Date.now() - startTime
1021
+ }
1022
+ };
1023
+ }
1024
+
1025
+ async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
1026
+ const model = this.client.getGenerativeModel({ model: request.model });
1027
+
1028
+ const chat = model.startChat({
1029
+ history: this.convertHistory(request.messages),
1030
+ generationConfig: {
1031
+ temperature: request.temperature,
1032
+ maxOutputTokens: request.maxTokens
1033
+ }
1034
+ });
1035
+
1036
+ const lastMessage = request.messages[request.messages.length - 1];
1037
+ const result = await chat.sendMessageStream(
1038
+ typeof lastMessage.content === 'string' ? lastMessage.content : ''
1039
+ );
1040
+
1041
+ for await (const chunk of result.stream) {
1042
+ const text = chunk.text();
1043
+ if (text) {
1044
+ yield { type: 'content', content: text };
1045
+ }
1046
+ }
1047
+
1048
+ yield { type: 'done' };
1049
+ }
1050
+
1051
+ async embed(texts: string[]): Promise<EmbeddingResponse> {
1052
+ const model = this.client.getGenerativeModel({ model: 'text-embedding-004' });
1053
+
1054
+ const embeddings: number[][] = [];
1055
+ for (const text of texts) {
1056
+ const result = await model.embedContent(text);
1057
+ embeddings.push(result.embedding.values);
1058
+ }
1059
+
1060
+ return {
1061
+ embeddings,
1062
+ usage: { totalTokens: 0 } // Gemini doesn't report token usage for embeddings
1063
+ };
1064
+ }
1065
+
1066
+ async countTokens(text: string, model?: string): Promise<number> {
1067
+ const genModel = this.client.getGenerativeModel({
1068
+ model: model || this.config.defaultModel!
1069
+ });
1070
+ const result = await genModel.countTokens(text);
1071
+ return result.totalTokens;
1072
+ }
1073
+
1074
+ async healthCheck(): Promise<HealthStatus> {
1075
+ const startTime = Date.now();
1076
+ try {
1077
+ const model = this.client.getGenerativeModel({ model: 'gemini-1.5-flash' });
1078
+ await model.countTokens('test');
1079
+ return {
1080
+ healthy: true,
1081
+ latencyMs: Date.now() - startTime,
1082
+ lastCheck: new Date().toISOString()
1083
+ };
1084
+ } catch (error) {
1085
+ return {
1086
+ healthy: false,
1087
+ latencyMs: Date.now() - startTime,
1088
+ lastCheck: new Date().toISOString(),
1089
+ error: error instanceof Error ? error.message : 'Unknown error'
1090
+ };
1091
+ }
1092
+ }
1093
+
1094
+ getStatus(): ProviderStatus {
1095
+ return {
1096
+ id: this.id,
1097
+ name: this.name,
1098
+ healthy: true,
1099
+ enabled: true,
1100
+ availableModels: Object.keys(GeminiProvider.MODELS)
1101
+ };
1102
+ }
1103
+
1104
+ async shutdown(): Promise<void> {}
1105
+
1106
+ private convertHistory(messages: Message[]): GeminiContent[] {
1107
+ return messages.slice(0, -1).map(m => ({
1108
+ role: m.role === 'assistant' ? 'model' : 'user',
1109
+ parts: typeof m.content === 'string'
1110
+ ? [{ text: m.content }]
1111
+ : this.convertParts(m.content)
1112
+ }));
1113
+ }
1114
+
1115
+ private convertParts(content: ContentBlock[]): GeminiPart[] {
1116
+ return content.map(b => {
1117
+ if (b.type === 'text') return { text: b.text! };
1118
+ if (b.type === 'image') {
1119
+ return {
1120
+ inlineData: {
1121
+ mimeType: b.mimeType || 'image/jpeg',
1122
+ data: b.imageBase64!
1123
+ }
1124
+ };
1125
+ }
1126
+ return { text: '' };
1127
+ });
1128
+ }
1129
+
1130
+ private extractToolCalls(response: GeminiResponse): ToolCall[] | undefined {
1131
+ const functionCalls = response.functionCalls();
1132
+ if (!functionCalls || functionCalls.length === 0) return undefined;
1133
+
1134
+ return functionCalls.map((fc, i) => ({
1135
+ id: `call-${i}`,
1136
+ name: fc.name,
1137
+ arguments: JSON.stringify(fc.args)
1138
+ }));
1139
+ }
1140
+
1141
+ private mapFinishReason(reason?: string): CompletionResponse['finishReason'] {
1142
+ switch (reason) {
1143
+ case 'STOP': return 'stop';
1144
+ case 'MAX_TOKENS': return 'length';
1145
+ case 'SAFETY': return 'content_filter';
1146
+ default: return 'stop';
1147
+ }
1148
+ }
1149
+ }
1150
+ ```
1151
+
1152
+ ### LocalProvider (Ollama)
1153
+
1154
+ ```typescript
1155
+ interface LocalConfig extends ProviderConfig {
1156
+ baseUrl?: string;
1157
+ defaultModel?: string;
1158
+ }
1159
+
1160
+ class LocalProvider implements LLMProvider<LocalConfig> {
1161
+ readonly id = 'local';
1162
+ readonly name = 'Ollama Local';
1163
+ readonly version = '1.0.0';
1164
+ readonly config: LocalConfig;
1165
+
1166
+ static readonly MODELS = {
1167
+ 'llama3': {
1168
+ id: 'llama3:latest',
1169
+ contextWindow: 8192,
1170
+ maxOutput: 4096,
1171
+ inputPrice: 0,
1172
+ outputPrice: 0,
1173
+ capabilities: ['tools']
1174
+ },
1175
+ 'llama3.2': {
1176
+ id: 'llama3.2:latest',
1177
+ contextWindow: 128000,
1178
+ maxOutput: 4096,
1179
+ inputPrice: 0,
1180
+ outputPrice: 0,
1181
+ capabilities: ['tools', 'vision']
1182
+ },
1183
+ 'mistral': {
1184
+ id: 'mistral:latest',
1185
+ contextWindow: 32768,
1186
+ maxOutput: 4096,
1187
+ inputPrice: 0,
1188
+ outputPrice: 0,
1189
+ capabilities: ['tools']
1190
+ },
1191
+ 'codellama': {
1192
+ id: 'codellama:latest',
1193
+ contextWindow: 16384,
1194
+ maxOutput: 4096,
1195
+ inputPrice: 0,
1196
+ outputPrice: 0,
1197
+ capabilities: ['code']
1198
+ },
1199
+ 'deepseek-coder': {
1200
+ id: 'deepseek-coder:latest',
1201
+ contextWindow: 16384,
1202
+ maxOutput: 4096,
1203
+ inputPrice: 0,
1204
+ outputPrice: 0,
1205
+ capabilities: ['code']
1206
+ }
1207
+ };
1208
+
1209
+ constructor(config: LocalConfig = {}) {
1210
+ this.config = {
1211
+ baseUrl: 'http://localhost:11434',
1212
+ defaultModel: 'llama3',
1213
+ timeout: 300000,
1214
+ maxRetries: 1,
1215
+ ...config
1216
+ };
1217
+ }
1218
+
1219
+ async initialize(): Promise<void> {
1220
+ // Verify Ollama is running
1221
+ await this.healthCheck();
1222
+ }
1223
+
1224
+ async complete(request: CompletionRequest): Promise<CompletionResponse> {
1225
+ const startTime = Date.now();
1226
+
1227
+ const response = await fetch(`${this.config.baseUrl}/api/chat`, {
1228
+ method: 'POST',
1229
+ headers: { 'Content-Type': 'application/json' },
1230
+ body: JSON.stringify({
1231
+ model: this.resolveModel(request.model),
1232
+ messages: this.convertMessages(request.messages),
1233
+ options: {
1234
+ temperature: request.temperature,
1235
+ num_predict: request.maxTokens,
1236
+ top_p: request.topP,
1237
+ stop: request.stopSequences
1238
+ },
1239
+ stream: false
1240
+ })
1241
+ });
1242
+
1243
+ if (!response.ok) {
1244
+ throw new Error(`Ollama error: ${response.statusText}`);
1245
+ }
1246
+
1247
+ const data = await response.json();
1248
+
1249
+ return {
1250
+ id: `ollama-${Date.now()}`,
1251
+ model: request.model,
1252
+ content: data.message.content,
1253
+ usage: {
1254
+ promptTokens: data.prompt_eval_count || 0,
1255
+ completionTokens: data.eval_count || 0,
1256
+ totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
1257
+ },
1258
+ finishReason: 'stop',
1259
+ metadata: {
1260
+ provider: this.id,
1261
+ latencyMs: Date.now() - startTime
1262
+ }
1263
+ };
1264
+ }
1265
+
1266
+ async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
1267
+ const response = await fetch(`${this.config.baseUrl}/api/chat`, {
1268
+ method: 'POST',
1269
+ headers: { 'Content-Type': 'application/json' },
1270
+ body: JSON.stringify({
1271
+ model: this.resolveModel(request.model),
1272
+ messages: this.convertMessages(request.messages),
1273
+ options: {
1274
+ temperature: request.temperature,
1275
+ num_predict: request.maxTokens
1276
+ },
1277
+ stream: true
1278
+ })
1279
+ });
1280
+
1281
+ const reader = response.body!.getReader();
1282
+ const decoder = new TextDecoder();
1283
+
1284
+ while (true) {
1285
+ const { done, value } = await reader.read();
1286
+ if (done) break;
1287
+
1288
+ const lines = decoder.decode(value).split('\n').filter(l => l.trim());
1289
+ for (const line of lines) {
1290
+ const data = JSON.parse(line);
1291
+ if (data.message?.content) {
1292
+ yield { type: 'content', content: data.message.content };
1293
+ }
1294
+ if (data.done) {
1295
+ yield {
1296
+ type: 'usage',
1297
+ usage: {
1298
+ promptTokens: data.prompt_eval_count,
1299
+ completionTokens: data.eval_count
1300
+ }
1301
+ };
1302
+ yield { type: 'done' };
1303
+ }
1304
+ }
1305
+ }
1306
+ }
1307
+
1308
+ async embed(texts: string[]): Promise<EmbeddingResponse> {
1309
+ const embeddings: number[][] = [];
1310
+
1311
+ for (const text of texts) {
1312
+ const response = await fetch(`${this.config.baseUrl}/api/embeddings`, {
1313
+ method: 'POST',
1314
+ headers: { 'Content-Type': 'application/json' },
1315
+ body: JSON.stringify({
1316
+ model: 'nomic-embed-text',
1317
+ prompt: text
1318
+ })
1319
+ });
1320
+
1321
+ const data = await response.json();
1322
+ embeddings.push(data.embedding);
1323
+ }
1324
+
1325
+ return { embeddings, usage: { totalTokens: 0 } };
1326
+ }
1327
+
1328
+ async countTokens(text: string): Promise<number> {
1329
+ // Approximate token count for local models
1330
+ return Math.ceil(text.length / 4);
1331
+ }
1332
+
1333
+ async healthCheck(): Promise<HealthStatus> {
1334
+ const startTime = Date.now();
1335
+ try {
1336
+ const response = await fetch(`${this.config.baseUrl}/api/tags`);
1337
+ if (!response.ok) throw new Error('Ollama not responding');
1338
+
1339
+ return {
1340
+ healthy: true,
1341
+ latencyMs: Date.now() - startTime,
1342
+ lastCheck: new Date().toISOString()
1343
+ };
1344
+ } catch (error) {
1345
+ return {
1346
+ healthy: false,
1347
+ latencyMs: Date.now() - startTime,
1348
+ lastCheck: new Date().toISOString(),
1349
+ error: error instanceof Error ? error.message : 'Ollama not available'
1350
+ };
1351
+ }
1352
+ }
1353
+
1354
+ getStatus(): ProviderStatus {
1355
+ return {
1356
+ id: this.id,
1357
+ name: this.name,
1358
+ healthy: true,
1359
+ enabled: true,
1360
+ availableModels: Object.keys(LocalProvider.MODELS)
1361
+ };
1362
+ }
1363
+
1364
+ async shutdown(): Promise<void> {}
1365
+
1366
+ private resolveModel(model: string): string {
1367
+ const modelInfo = LocalProvider.MODELS[model as keyof typeof LocalProvider.MODELS];
1368
+ return modelInfo?.id || model;
1369
+ }
1370
+
1371
+ private convertMessages(messages: Message[]): OllamaMessage[] {
1372
+ return messages.map(m => ({
1373
+ role: m.role === 'tool' ? 'assistant' : m.role,
1374
+ content: typeof m.content === 'string' ? m.content : m.content.map(b => b.text).join('')
1375
+ }));
1376
+ }
1377
+ }
1378
+ ```
1379
+
1380
+ ### AzureProvider
1381
+
1382
+ ```typescript
1383
+ interface AzureConfig extends ProviderConfig {
1384
+ apiKey: string;
1385
+ endpoint: string;
1386
+ apiVersion?: string;
1387
+ deployments: Record<string, string>; // model -> deployment name
1388
+ }
1389
+
1390
+ class AzureProvider implements LLMProvider<AzureConfig> {
1391
+ readonly id = 'azure';
1392
+ readonly name = 'Azure OpenAI';
1393
+ readonly version = '1.0.0';
1394
+ readonly config: AzureConfig;
1395
+
1396
+ private client: AzureOpenAI;
1397
+
1398
+ constructor(config: AzureConfig) {
1399
+ this.config = {
1400
+ apiVersion: '2024-02-01',
1401
+ timeout: 120000,
1402
+ maxRetries: 3,
1403
+ ...config
1404
+ };
1405
+ }
1406
+
1407
+ async initialize(): Promise<void> {
1408
+ this.client = new AzureOpenAI({
1409
+ apiKey: this.config.apiKey,
1410
+ endpoint: this.config.endpoint,
1411
+ apiVersion: this.config.apiVersion
1412
+ });
1413
+ }
1414
+
1415
+ async complete(request: CompletionRequest): Promise<CompletionResponse> {
1416
+ const startTime = Date.now();
1417
+ const deployment = this.getDeployment(request.model);
1418
+
1419
+ const response = await this.client.chat.completions.create({
1420
+ model: deployment,
1421
+ messages: this.convertMessages(request.messages),
1422
+ temperature: request.temperature,
1423
+ max_tokens: request.maxTokens,
1424
+ top_p: request.topP,
1425
+ stop: request.stopSequences
1426
+ });
1427
+
1428
+ const choice = response.choices[0];
1429
+
1430
+ return {
1431
+ id: response.id,
1432
+ model: request.model,
1433
+ content: choice.message.content || '',
1434
+ toolCalls: choice.message.tool_calls?.map(tc => ({
1435
+ id: tc.id,
1436
+ name: tc.function.name,
1437
+ arguments: tc.function.arguments
1438
+ })),
1439
+ usage: {
1440
+ promptTokens: response.usage?.prompt_tokens || 0,
1441
+ completionTokens: response.usage?.completion_tokens || 0,
1442
+ totalTokens: response.usage?.total_tokens || 0
1443
+ },
1444
+ finishReason: this.mapFinishReason(choice.finish_reason),
1445
+ metadata: {
1446
+ provider: this.id,
1447
+ latencyMs: Date.now() - startTime
1448
+ }
1449
+ };
1450
+ }
1451
+
1452
+ async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
1453
+ const deployment = this.getDeployment(request.model);
1454
+
1455
+ const stream = await this.client.chat.completions.create({
1456
+ model: deployment,
1457
+ messages: this.convertMessages(request.messages),
1458
+ temperature: request.temperature,
1459
+ max_tokens: request.maxTokens,
1460
+ stream: true
1461
+ });
1462
+
1463
+ for await (const chunk of stream) {
1464
+ const choice = chunk.choices[0];
1465
+ if (choice?.delta?.content) {
1466
+ yield { type: 'content', content: choice.delta.content };
1467
+ }
1468
+ if (choice?.finish_reason) {
1469
+ yield { type: 'done', finishReason: choice.finish_reason };
1470
+ }
1471
+ }
1472
+ }
1473
+
1474
+ async countTokens(text: string, model?: string): Promise<number> {
1475
+ const encoding = getEncoding('cl100k_base');
1476
+ return encoding.encode(text).length;
1477
+ }
1478
+
1479
+ async healthCheck(): Promise<HealthStatus> {
1480
+ const startTime = Date.now();
1481
+ try {
1482
+ await this.client.chat.completions.create({
1483
+ model: Object.values(this.config.deployments)[0],
1484
+ messages: [{ role: 'user', content: 'Hi' }],
1485
+ max_tokens: 5
1486
+ });
1487
+ return {
1488
+ healthy: true,
1489
+ latencyMs: Date.now() - startTime,
1490
+ lastCheck: new Date().toISOString()
1491
+ };
1492
+ } catch (error) {
1493
+ return {
1494
+ healthy: false,
1495
+ latencyMs: Date.now() - startTime,
1496
+ lastCheck: new Date().toISOString(),
1497
+ error: error instanceof Error ? error.message : 'Unknown error'
1498
+ };
1499
+ }
1500
+ }
1501
+
1502
+ getStatus(): ProviderStatus {
1503
+ return {
1504
+ id: this.id,
1505
+ name: this.name,
1506
+ healthy: true,
1507
+ enabled: true,
1508
+ availableModels: Object.keys(this.config.deployments)
1509
+ };
1510
+ }
1511
+
1512
+ async shutdown(): Promise<void> {}
1513
+
1514
+ private getDeployment(model: string): string {
1515
+ const deployment = this.config.deployments[model];
1516
+ if (!deployment) {
1517
+ throw new Error(`No Azure deployment configured for model: ${model}`);
1518
+ }
1519
+ return deployment;
1520
+ }
1521
+
1522
+ private convertMessages(messages: Message[]): AzureMessage[] {
1523
+ return messages.map(m => ({
1524
+ role: m.role,
1525
+ content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content)
1526
+ }));
1527
+ }
1528
+
1529
+ private mapFinishReason(reason: string | null): CompletionResponse['finishReason'] {
1530
+ switch (reason) {
1531
+ case 'stop': return 'stop';
1532
+ case 'length': return 'length';
1533
+ case 'tool_calls': return 'tool_calls';
1534
+ case 'content_filter': return 'content_filter';
1535
+ default: return 'stop';
1536
+ }
1537
+ }
1538
+ }
1539
+ ```
1540
+
1541
+ ---
1542
+
1543
+ ## ModelRouter
1544
+
1545
+ ### Routing Inteligente
1546
+
1547
+ ```typescript
1548
+ interface RoutingRule {
1549
+ id: string;
1550
+ name: string;
1551
+ conditions: RoutingCondition[];
1552
+ target: RoutingTarget;
1553
+ priority: number;
1554
+ enabled: boolean;
1555
+ }
1556
+
1557
+ interface RoutingCondition {
1558
+ type: 'task_type' | 'token_count' | 'cost_limit' | 'capability' | 'time_of_day' | 'custom';
1559
+ operator: 'eq' | 'ne' | 'gt' | 'lt' | 'gte' | 'lte' | 'contains' | 'matches';
1560
+ value: unknown;
1561
+ field?: string;
1562
+ }
1563
+
1564
+ interface RoutingTarget {
1565
+ type: 'model' | 'provider' | 'alias' | 'fallback_chain';
1566
+ value: string | string[];
1567
+ }
1568
+
1569
+ interface TaskContext {
1570
+ taskType: TaskType;
1571
+ estimatedTokens: number;
1572
+ maxCost?: number;
1573
+ requiredCapabilities?: string[];
1574
+ priority?: 'low' | 'normal' | 'high' | 'critical';
1575
+ metadata?: Record<string, unknown>;
1576
+ }
1577
+
1578
+ type TaskType =
1579
+ | 'code_generation'
1580
+ | 'code_review'
1581
+ | 'documentation'
1582
+ | 'analysis'
1583
+ | 'conversation'
1584
+ | 'summarization'
1585
+ | 'translation'
1586
+ | 'creative'
1587
+ | 'reasoning'
1588
+ | 'quick_task';
1589
+
1590
+ class ModelRouter {
1591
+ private registry: ProviderRegistry;
1592
+ private rules: RoutingRule[] = [];
1593
+ private aliases: Map<string, string> = new Map();
1594
+ private rateLimiters: Map<string, RateLimiter> = new Map();
1595
+ private config: RouterConfig;
1596
+
1597
+ constructor(registry: ProviderRegistry, config: RouterConfig) {
1598
+ this.registry = registry;
1599
+ this.config = config;
1600
+ this.loadDefaultRules();
1601
+ this.loadAliases();
1602
+ }
1603
+
1604
+ // Route request to best model/provider
1605
+ async route(
1606
+ request: CompletionRequest,
1607
+ context: TaskContext
1608
+ ): Promise<{ provider: LLMProvider; model: string }> {
1609
+
1610
+ // Resolve alias if present
1611
+ const resolvedModel = this.resolveAlias(request.model);
1612
+
1613
+ // Find matching rules
1614
+ const matchingRules = this.rules
1615
+ .filter(r => r.enabled && this.evaluateConditions(r.conditions, context, request))
1616
+ .sort((a, b) => b.priority - a.priority);
1617
+
1618
+ if (matchingRules.length > 0) {
1619
+ const rule = matchingRules[0];
1620
+ return this.resolveTarget(rule.target, context);
1621
+ }
1622
+
1623
+ // Default routing: find provider for model
1624
+ const provider = this.registry.getProviderForModel(resolvedModel);
1625
+ if (!provider) {
1626
+ // Try fallback chain
1627
+ return this.tryFallbackChain(resolvedModel, context);
1628
+ }
1629
+
1630
+ // Check rate limits
1631
+ await this.checkRateLimit(provider.id, resolvedModel);
1632
+
1633
+ return { provider, model: resolvedModel };
1634
+ }
1635
+
1636
+ // Cost-optimized routing
1637
+ async routeOptimized(
1638
+ request: CompletionRequest,
1639
+ context: TaskContext
1640
+ ): Promise<{ provider: LLMProvider; model: string }> {
1641
+
1642
+ const candidates = this.getCandidateModels(context);
1643
+
1644
+ // Sort by cost (ascending)
1645
+ const sortedCandidates = candidates.sort((a, b) => {
1646
+ const costA = this.estimateCost(a.model, context.estimatedTokens);
1647
+ const costB = this.estimateCost(b.model, context.estimatedTokens);
1648
+ return costA - costB;
1649
+ });
1650
+
1651
+ // Find cheapest that meets requirements
1652
+ for (const candidate of sortedCandidates) {
1653
+ if (this.meetsRequirements(candidate.model, context)) {
1654
+ const provider = this.registry.getProviderForModel(candidate.model);
1655
+ if (provider) {
1656
+ await this.checkRateLimit(provider.id, candidate.model);
1657
+ return { provider, model: candidate.model };
1658
+ }
1659
+ }
1660
+ }
1661
+
1662
+ // Fallback to default
1663
+ return this.route(request, context);
1664
+ }
1665
+
1666
+ // Load balancing across providers
1667
+ async routeBalanced(
1668
+ request: CompletionRequest,
1669
+ context: TaskContext
1670
+ ): Promise<{ provider: LLMProvider; model: string }> {
1671
+
1672
+ const resolvedModel = this.resolveAlias(request.model);
1673
+ const compatibleProviders = this.getProvidersForModel(resolvedModel);
1674
+
1675
+ if (compatibleProviders.length === 0) {
1676
+ return this.tryFallbackChain(resolvedModel, context);
1677
+ }
1678
+
1679
+ // Round-robin with health awareness
1680
+ const healthyProviders = compatibleProviders.filter(p => {
1681
+ const status = this.registry.get(p.id)?.getStatus();
1682
+ return status?.healthy;
1683
+ });
1684
+
1685
+ if (healthyProviders.length === 0) {
1686
+ throw new Error(`No healthy providers available for model: ${resolvedModel}`);
1687
+ }
1688
+
1689
+ // Select based on current load
1690
+ const selected = this.selectLeastLoaded(healthyProviders);
1691
+ await this.checkRateLimit(selected.id, resolvedModel);
1692
+
1693
+ return { provider: selected, model: resolvedModel };
1694
+ }
1695
+
1696
+ // Add routing rule
1697
+ addRule(rule: RoutingRule): void {
1698
+ this.rules.push(rule);
1699
+ this.rules.sort((a, b) => b.priority - a.priority);
1700
+ }
1701
+
1702
+ // Remove routing rule
1703
+ removeRule(ruleId: string): boolean {
1704
+ const index = this.rules.findIndex(r => r.id === ruleId);
1705
+ if (index >= 0) {
1706
+ this.rules.splice(index, 1);
1707
+ return true;
1708
+ }
1709
+ return false;
1710
+ }
1711
+
1712
+ // Set model alias
1713
+ setAlias(alias: string, model: string): void {
1714
+ this.aliases.set(alias, model);
1715
+ }
1716
+
1717
+ // Get model alias
1718
+ resolveAlias(modelOrAlias: string): string {
1719
+ return this.aliases.get(modelOrAlias) || modelOrAlias;
1720
+ }
1721
+
1722
+ // Estimate cost for request
1723
+ estimateCost(model: string, tokens: number): number {
1724
+ const modelInfo = this.getModelInfo(model);
1725
+ if (!modelInfo) return 0;
1726
+
1727
+ const inputTokens = Math.ceil(tokens * 0.7);
1728
+ const outputTokens = Math.ceil(tokens * 0.3);
1729
+
1730
+ return (
1731
+ (inputTokens / 1_000_000) * modelInfo.inputPrice +
1732
+ (outputTokens / 1_000_000) * modelInfo.outputPrice
1733
+ );
1734
+ }
1735
+
1736
+ private loadDefaultRules(): void {
1737
+ this.rules = [
1738
+ {
1739
+ id: 'quick-tasks',
1740
+ name: 'Route quick tasks to fast models',
1741
+ conditions: [
1742
+ { type: 'task_type', operator: 'eq', value: 'quick_task' }
1743
+ ],
1744
+ target: { type: 'alias', value: 'fast' },
1745
+ priority: 100,
1746
+ enabled: true
1747
+ },
1748
+ {
1749
+ id: 'code-generation',
1750
+ name: 'Route code generation to capable models',
1751
+ conditions: [
1752
+ { type: 'task_type', operator: 'eq', value: 'code_generation' },
1753
+ { type: 'token_count', operator: 'gt', value: 1000 }
1754
+ ],
1755
+ target: { type: 'alias', value: 'smart' },
1756
+ priority: 90,
1757
+ enabled: true
1758
+ },
1759
+ {
1760
+ id: 'reasoning',
1761
+ name: 'Route complex reasoning to best models',
1762
+ conditions: [
1763
+ { type: 'task_type', operator: 'eq', value: 'reasoning' }
1764
+ ],
1765
+ target: { type: 'model', value: 'claude-opus-4-5' },
1766
+ priority: 95,
1767
+ enabled: true
1768
+ },
1769
+ {
1770
+ id: 'cost-sensitive',
1771
+ name: 'Route cost-sensitive requests to cheap models',
1772
+ conditions: [
1773
+ { type: 'cost_limit', operator: 'lt', value: 0.01 }
1774
+ ],
1775
+ target: { type: 'alias', value: 'cheap' },
1776
+ priority: 80,
1777
+ enabled: true
1778
+ }
1779
+ ];
1780
+ }
1781
+
1782
+ private loadAliases(): void {
1783
+ // Model aliases for convenience
1784
+ this.aliases.set('fast', 'claude-3-5-haiku');
1785
+ this.aliases.set('smart', 'claude-sonnet-4');
1786
+ this.aliases.set('best', 'claude-opus-4-5');
1787
+ this.aliases.set('cheap', 'gpt-4o-mini');
1788
+ this.aliases.set('local', 'llama3');
1789
+ this.aliases.set('code', 'claude-sonnet-4');
1790
+ this.aliases.set('vision', 'gpt-4o');
1791
+ }
1792
+
1793
+ private evaluateConditions(
1794
+ conditions: RoutingCondition[],
1795
+ context: TaskContext,
1796
+ request: CompletionRequest
1797
+ ): boolean {
1798
+ return conditions.every(c => this.evaluateCondition(c, context, request));
1799
+ }
1800
+
1801
+ private evaluateCondition(
1802
+ condition: RoutingCondition,
1803
+ context: TaskContext,
1804
+ request: CompletionRequest
1805
+ ): boolean {
1806
+ let value: unknown;
1807
+
1808
+ switch (condition.type) {
1809
+ case 'task_type':
1810
+ value = context.taskType;
1811
+ break;
1812
+ case 'token_count':
1813
+ value = context.estimatedTokens;
1814
+ break;
1815
+ case 'cost_limit':
1816
+ value = context.maxCost;
1817
+ break;
1818
+ case 'capability':
1819
+ value = context.requiredCapabilities;
1820
+ break;
1821
+ case 'custom':
1822
+ value = condition.field ? context.metadata?.[condition.field] : undefined;
1823
+ break;
1824
+ default:
1825
+ return false;
1826
+ }
1827
+
1828
+ return this.compareValues(value, condition.operator, condition.value);
1829
+ }
1830
+
1831
+ private compareValues(actual: unknown, operator: string, expected: unknown): boolean {
1832
+ switch (operator) {
1833
+ case 'eq': return actual === expected;
1834
+ case 'ne': return actual !== expected;
1835
+ case 'gt': return Number(actual) > Number(expected);
1836
+ case 'lt': return Number(actual) < Number(expected);
1837
+ case 'gte': return Number(actual) >= Number(expected);
1838
+ case 'lte': return Number(actual) <= Number(expected);
1839
+ case 'contains':
1840
+ return Array.isArray(actual) && actual.includes(expected);
1841
+ case 'matches':
1842
+ return typeof actual === 'string' && new RegExp(String(expected)).test(actual);
1843
+ default:
1844
+ return false;
1845
+ }
1846
+ }
1847
+
1848
+ private async resolveTarget(
1849
+ target: RoutingTarget,
1850
+ context: TaskContext
1851
+ ): Promise<{ provider: LLMProvider; model: string }> {
1852
+ switch (target.type) {
1853
+ case 'model': {
1854
+ const model = target.value as string;
1855
+ const provider = this.registry.getProviderForModel(model);
1856
+ if (!provider) throw new Error(`No provider for model: ${model}`);
1857
+ return { provider, model };
1858
+ }
1859
+ case 'alias': {
1860
+ const model = this.resolveAlias(target.value as string);
1861
+ const provider = this.registry.getProviderForModel(model);
1862
+ if (!provider) throw new Error(`No provider for alias: ${target.value}`);
1863
+ return { provider, model };
1864
+ }
1865
+ case 'fallback_chain': {
1866
+ const models = target.value as string[];
1867
+ return this.tryFallbackChainFromList(models, context);
1868
+ }
1869
+ default:
1870
+ throw new Error(`Unknown target type: ${target.type}`);
1871
+ }
1872
+ }
1873
+
1874
+ private async tryFallbackChain(
1875
+ model: string,
1876
+ context: TaskContext
1877
+ ): Promise<{ provider: LLMProvider; model: string }> {
1878
+ const fallbackChain = this.config.fallbackChains[model] || this.config.defaultFallbackChain;
1879
+ return this.tryFallbackChainFromList(fallbackChain, context);
1880
+ }
1881
+
1882
+ private async tryFallbackChainFromList(
1883
+ models: string[],
1884
+ context: TaskContext
1885
+ ): Promise<{ provider: LLMProvider; model: string }> {
1886
+ for (const model of models) {
1887
+ const provider = this.registry.getProviderForModel(model);
1888
+ if (provider) {
1889
+ const status = provider.getStatus();
1890
+ if (status.healthy) {
1891
+ return { provider, model };
1892
+ }
1893
+ }
1894
+ }
1895
+ throw new Error('All providers in fallback chain unavailable');
1896
+ }
1897
+
1898
+ private getCandidateModels(context: TaskContext): { model: string; provider: string }[] {
1899
+ const candidates: { model: string; provider: string }[] = [];
1900
+
1901
+ for (const status of this.registry.list()) {
1902
+ if (!status.healthy || !status.enabled) continue;
1903
+
1904
+ for (const model of status.availableModels) {
1905
+ if (this.meetsCapabilities(model, context.requiredCapabilities)) {
1906
+ candidates.push({ model, provider: status.id });
1907
+ }
1908
+ }
1909
+ }
1910
+
1911
+ return candidates;
1912
+ }
1913
+
1914
+ private meetsCapabilities(model: string, required?: string[]): boolean {
1915
+ if (!required || required.length === 0) return true;
1916
+
1917
+ const modelInfo = this.getModelInfo(model);
1918
+ if (!modelInfo) return false;
1919
+
1920
+ return required.every(cap => modelInfo.capabilities.includes(cap));
1921
+ }
1922
+
1923
+ private meetsRequirements(model: string, context: TaskContext): boolean {
1924
+ const modelInfo = this.getModelInfo(model);
1925
+ if (!modelInfo) return false;
1926
+
1927
+ // Check context window
1928
+ if (context.estimatedTokens > modelInfo.contextWindow) return false;
1929
+
1930
+ // Check cost limit
1931
+ if (context.maxCost) {
1932
+ const estimatedCost = this.estimateCost(model, context.estimatedTokens);
1933
+ if (estimatedCost > context.maxCost) return false;
1934
+ }
1935
+
1936
+ // Check capabilities
1937
+ if (!this.meetsCapabilities(model, context.requiredCapabilities)) return false;
1938
+
1939
+ return true;
1940
+ }
1941
+
1942
+ private getModelInfo(model: string): ModelInfo | undefined {
1943
+ // Check all providers for model info
1944
+ const allModels = {
1945
+ ...ClaudeProvider.MODELS,
1946
+ ...OpenAIProvider.MODELS,
1947
+ ...GeminiProvider.MODELS,
1948
+ ...LocalProvider.MODELS
1949
+ };
1950
+
1951
+ return allModels[model as keyof typeof allModels];
1952
+ }
1953
+
1954
+ private getProvidersForModel(model: string): LLMProvider[] {
1955
+ return this.registry.list()
1956
+ .filter(s => s.availableModels.includes(model) && s.healthy && s.enabled)
1957
+ .map(s => this.registry.get(s.id)!)
1958
+ .filter(Boolean);
1959
+ }
1960
+
1961
+ private selectLeastLoaded(providers: LLMProvider[]): LLMProvider {
1962
+ let minLoad = Infinity;
1963
+ let selected = providers[0];
1964
+
1965
+ for (const provider of providers) {
1966
+ const metrics = this.registry.getMetrics(provider.id);
1967
+ const load = metrics?.totalRequests || 0;
1968
+ if (load < minLoad) {
1969
+ minLoad = load;
1970
+ selected = provider;
1971
+ }
1972
+ }
1973
+
1974
+ return selected;
1975
+ }
1976
+
1977
+ private async checkRateLimit(providerId: string, model: string): Promise<void> {
1978
+ const key = `${providerId}:${model}`;
1979
+ let limiter = this.rateLimiters.get(key);
1980
+
1981
+ if (!limiter) {
1982
+ const modelInfo = this.getModelInfo(model);
1983
+ limiter = new RateLimiter({
1984
+ requestsPerMinute: modelInfo?.rateLimit?.requestsPerMinute || 60,
1985
+ tokensPerMinute: modelInfo?.rateLimit?.tokensPerMinute || 100000
1986
+ });
1987
+ this.rateLimiters.set(key, limiter);
1988
+ }
1989
+
1990
+ await limiter.acquire();
1991
+ }
1992
+ }
1993
+ ```
1994
+
1995
+ ---
1996
+
1997
+ ## UnifiedAPI
1998
+
1999
+ ### API Consistente
2000
+
2001
+ ```typescript
2002
+ class UnifiedLLMAPI {
2003
+ private registry: ProviderRegistry;
2004
+ private router: ModelRouter;
2005
+ private config: UnifiedAPIConfig;
2006
+
2007
+ constructor(config: UnifiedAPIConfig) {
2008
+ this.config = config;
2009
+ this.registry = new ProviderRegistry(config.registry);
2010
+ this.router = new ModelRouter(this.registry, config.router);
2011
+ }
2012
+
2013
+ // Initialize all providers
2014
+ async initialize(): Promise<void> {
2015
+ // Register configured providers
2016
+ if (this.config.providers.claude?.enabled) {
2017
+ await this.registry.register(
2018
+ new ClaudeProvider(this.config.providers.claude),
2019
+ { priority: 1 }
2020
+ );
2021
+ }
2022
+
2023
+ if (this.config.providers.openai?.enabled) {
2024
+ await this.registry.register(
2025
+ new OpenAIProvider(this.config.providers.openai),
2026
+ { priority: 2 }
2027
+ );
2028
+ }
2029
+
2030
+ if (this.config.providers.gemini?.enabled) {
2031
+ await this.registry.register(
2032
+ new GeminiProvider(this.config.providers.gemini),
2033
+ { priority: 3 }
2034
+ );
2035
+ }
2036
+
2037
+ if (this.config.providers.local?.enabled) {
2038
+ await this.registry.register(
2039
+ new LocalProvider(this.config.providers.local),
2040
+ { priority: 10 }
2041
+ );
2042
+ }
2043
+
2044
+ if (this.config.providers.azure?.enabled) {
2045
+ await this.registry.register(
2046
+ new AzureProvider(this.config.providers.azure),
2047
+ { priority: 2 }
2048
+ );
2049
+ }
2050
+
2051
+ // Start health checks
2052
+ this.registry.startHealthChecks(this.config.healthCheckInterval);
2053
+ }
2054
+
2055
+ // Main completion method
2056
+ async complete(
2057
+ request: CompletionRequest,
2058
+ options: CompletionOptions = {}
2059
+ ): Promise<CompletionResponse> {
2060
+ const context = this.buildContext(request, options);
2061
+
2062
+ // Route to best provider/model
2063
+ const { provider, model } = options.costOptimized
2064
+ ? await this.router.routeOptimized(request, context)
2065
+ : await this.router.route(request, context);
2066
+
2067
+ const startTime = Date.now();
2068
+
2069
+ try {
2070
+ // Execute with retry and fallback
2071
+ const response = await this.executeWithFallback(
2072
+ provider,
2073
+ { ...request, model },
2074
+ context
2075
+ );
2076
+
2077
+ // Record metrics
2078
+ const latencyMs = Date.now() - startTime;
2079
+ const cost = this.calculateCost(model, response.usage);
2080
+
2081
+ this.registry.recordMetrics(
2082
+ provider.id,
2083
+ true,
2084
+ latencyMs,
2085
+ response.usage.totalTokens,
2086
+ cost
2087
+ );
2088
+
2089
+ // Emit event
2090
+ EventBus.publish('llm.completion', {
2091
+ provider: provider.id,
2092
+ model,
2093
+ tokens: response.usage.totalTokens,
2094
+ latencyMs,
2095
+ cost
2096
+ });
2097
+
2098
+ return response;
2099
+
2100
+ } catch (error) {
2101
+ this.registry.recordMetrics(provider.id, false, Date.now() - startTime, 0, 0);
2102
+ throw error;
2103
+ }
2104
+ }
2105
+
2106
+ // Streaming completion
2107
+ async *stream(
2108
+ request: CompletionRequest,
2109
+ options: CompletionOptions = {}
2110
+ ): AsyncIterable<StreamChunk> {
2111
+ const context = this.buildContext(request, options);
2112
+ const { provider, model } = await this.router.route(request, context);
2113
+
2114
+ yield* provider.stream({ ...request, model });
2115
+ }
2116
+
2117
+ // Chat convenience method
2118
+ async chat(
2119
+ messages: Message[],
2120
+ options: ChatOptions = {}
2121
+ ): Promise<CompletionResponse> {
2122
+ return this.complete({
2123
+ model: options.model || this.config.defaultModel,
2124
+ messages,
2125
+ temperature: options.temperature,
2126
+ maxTokens: options.maxTokens,
2127
+ tools: options.tools,
2128
+ toolChoice: options.toolChoice
2129
+ }, options);
2130
+ }
2131
+
2132
+ // Embed texts
2133
+ async embed(
2134
+ texts: string[],
2135
+ options: EmbedOptions = {}
2136
+ ): Promise<EmbeddingResponse> {
2137
+ const providerId = options.provider || 'openai';
2138
+ const provider = this.registry.get(providerId);
2139
+
2140
+ if (!provider?.embed) {
2141
+ throw new Error(`Provider ${providerId} does not support embeddings`);
2142
+ }
2143
+
2144
+ return provider.embed(texts);
2145
+ }
2146
+
2147
+ // Count tokens
2148
+ async countTokens(
2149
+ text: string,
2150
+ model?: string
2151
+ ): Promise<number> {
2152
+ const targetModel = model || this.config.defaultModel;
2153
+ const provider = this.registry.getProviderForModel(targetModel);
2154
+
2155
+ if (!provider) {
2156
+ // Fallback to approximate count
2157
+ return Math.ceil(text.length / 4);
2158
+ }
2159
+
2160
+ return provider.countTokens(text, targetModel);
2161
+ }
2162
+
2163
+ // Estimate cost
2164
+ estimateCost(
2165
+ model: string,
2166
+ inputTokens: number,
2167
+ outputTokens: number
2168
+ ): number {
2169
+ return this.router.estimateCost(model, inputTokens + outputTokens);
2170
+ }
2171
+
2172
+ // Get provider status
2173
+ getProviderStatus(): ProviderStatus[] {
2174
+ return this.registry.list();
2175
+ }
2176
+
2177
+ // Get available models
2178
+ getAvailableModels(): ModelInfo[] {
2179
+ const models: ModelInfo[] = [];
2180
+
2181
+ for (const status of this.registry.list()) {
2182
+ if (!status.enabled) continue;
2183
+
2184
+ for (const model of status.availableModels) {
2185
+ models.push({
2186
+ id: model,
2187
+ provider: status.id,
2188
+ ...this.getModelDetails(model)
2189
+ });
2190
+ }
2191
+ }
2192
+
2193
+ return models;
2194
+ }
2195
+
2196
+ // Shutdown
2197
+ async shutdown(): Promise<void> {
2198
+ await this.registry.shutdown();
2199
+ }
2200
+
2201
+ private buildContext(
2202
+ request: CompletionRequest,
2203
+ options: CompletionOptions
2204
+ ): TaskContext {
2205
+ return {
2206
+ taskType: options.taskType || 'conversation',
2207
+ estimatedTokens: options.estimatedTokens || this.estimateTokens(request),
2208
+ maxCost: options.maxCost,
2209
+ requiredCapabilities: options.requiredCapabilities,
2210
+ priority: options.priority,
2211
+ metadata: options.metadata
2212
+ };
2213
+ }
2214
+
2215
+ private estimateTokens(request: CompletionRequest): number {
2216
+ let total = 0;
2217
+ for (const msg of request.messages) {
2218
+ const content = typeof msg.content === 'string'
2219
+ ? msg.content
2220
+ : msg.content.map(b => b.text || '').join('');
2221
+ total += Math.ceil(content.length / 4);
2222
+ }
2223
+ return total + (request.maxTokens || 1000);
2224
+ }
2225
+
2226
+ private async executeWithFallback(
2227
+ provider: LLMProvider,
2228
+ request: CompletionRequest,
2229
+ context: TaskContext,
2230
+ attempt: number = 1
2231
+ ): Promise<CompletionResponse> {
2232
+ try {
2233
+ return await provider.complete(request);
2234
+ } catch (error) {
2235
+ if (attempt >= this.config.maxRetries) {
2236
+ // Try fallback provider
2237
+ if (this.config.enableFallback) {
2238
+ const fallbackResult = await this.router.routeBalanced(request, context);
2239
+ if (fallbackResult.provider.id !== provider.id) {
2240
+ return fallbackResult.provider.complete({
2241
+ ...request,
2242
+ model: fallbackResult.model
2243
+ });
2244
+ }
2245
+ }
2246
+ throw error;
2247
+ }
2248
+
2249
+ // Retry with exponential backoff
2250
+ await this.sleep(Math.pow(2, attempt) * 1000);
2251
+ return this.executeWithFallback(provider, request, context, attempt + 1);
2252
+ }
2253
+ }
2254
+
2255
+ private calculateCost(model: string, usage: TokenUsage): number {
2256
+ const modelInfo = this.getModelDetails(model);
2257
+ if (!modelInfo) return 0;
2258
+
2259
+ return (
2260
+ (usage.promptTokens / 1_000_000) * modelInfo.inputPrice +
2261
+ (usage.completionTokens / 1_000_000) * modelInfo.outputPrice
2262
+ );
2263
+ }
2264
+
2265
+ private getModelDetails(model: string): ModelDetails | undefined {
2266
+ const allModels = {
2267
+ ...ClaudeProvider.MODELS,
2268
+ ...OpenAIProvider.MODELS,
2269
+ ...GeminiProvider.MODELS,
2270
+ ...LocalProvider.MODELS
2271
+ };
2272
+ return allModels[model as keyof typeof allModels];
2273
+ }
2274
+
2275
+ private sleep(ms: number): Promise<void> {
2276
+ return new Promise(resolve => setTimeout(resolve, ms));
2277
+ }
2278
+ }
2279
+ ```
2280
+
2281
+ ---
2282
+
2283
+ ## Matriz de Capacidades
2284
+
2285
+ | Modelo | Provider | Context | Output | Vision | Tools | Streaming | Input $/1M | Output $/1M |
2286
+ |--------|----------|---------|--------|--------|-------|-----------|------------|-------------|
2287
+ | claude-opus-4-5 | Claude | 200K | 32K | Yes | Yes | Yes | $15.00 | $75.00 |
2288
+ | claude-sonnet-4 | Claude | 200K | 64K | Yes | Yes | Yes | $3.00 | $15.00 |
2289
+ | claude-3-5-haiku | Claude | 200K | 8K | Yes | Yes | Yes | $0.80 | $4.00 |
2290
+ | gpt-4o | OpenAI | 128K | 16K | Yes | Yes | Yes | $2.50 | $10.00 |
2291
+ | gpt-4-turbo | OpenAI | 128K | 4K | Yes | Yes | Yes | $10.00 | $30.00 |
2292
+ | gpt-4o-mini | OpenAI | 128K | 16K | Yes | Yes | Yes | $0.15 | $0.60 |
2293
+ | gpt-3.5-turbo | OpenAI | 16K | 4K | No | Yes | Yes | $0.50 | $1.50 |
2294
+ | gemini-2.0-flash | Gemini | 1M | 8K | Yes | Yes | Yes | $0.075 | $0.30 |
2295
+ | gemini-1.5-pro | Gemini | 2M | 8K | Yes | Yes | Yes | $1.25 | $5.00 |
2296
+ | llama3 | Local | 8K | 4K | No | Yes | Yes | $0.00 | $0.00 |
2297
+ | llama3.2 | Local | 128K | 4K | Yes | Yes | Yes | $0.00 | $0.00 |
2298
+ | mistral | Local | 32K | 4K | No | Yes | Yes | $0.00 | $0.00 |
2299
+ | codellama | Local | 16K | 4K | No | No | Yes | $0.00 | $0.00 |
2300
+
2301
+ ---
2302
+
2303
+ ## Ejemplos de Uso
2304
+
2305
+ ### Inicializacion Basica
2306
+
2307
+ ```typescript
2308
+ import { UnifiedLLMAPI } from '@elsabro/llm-providers';
2309
+
2310
+ const llm = new UnifiedLLMAPI({
2311
+ defaultModel: 'claude-sonnet-4',
2312
+ providers: {
2313
+ claude: {
2314
+ enabled: true,
2315
+ apiKey: process.env.ANTHROPIC_API_KEY
2316
+ },
2317
+ openai: {
2318
+ enabled: true,
2319
+ apiKey: process.env.OPENAI_API_KEY
2320
+ },
2321
+ local: {
2322
+ enabled: true,
2323
+ baseUrl: 'http://localhost:11434'
2324
+ }
2325
+ },
2326
+ router: {
2327
+ defaultFallbackChain: ['claude-sonnet-4', 'gpt-4o', 'llama3']
2328
+ }
2329
+ });
2330
+
2331
+ await llm.initialize();
2332
+ ```
2333
+
2334
+ ### Completion Simple
2335
+
2336
+ ```typescript
2337
+ const response = await llm.complete({
2338
+ model: 'claude-sonnet-4',
2339
+ messages: [
2340
+ { role: 'user', content: 'Explain quantum computing in simple terms' }
2341
+ ],
2342
+ maxTokens: 500
2343
+ });
2344
+
2345
+ console.log(response.content);
2346
+ console.log(`Tokens: ${response.usage.totalTokens}`);
2347
+ ```
2348
+
2349
+ ### Routing por Tipo de Tarea
2350
+
2351
+ ```typescript
2352
+ // Code generation - routes to smart model
2353
+ const codeResponse = await llm.complete({
2354
+ model: 'auto',
2355
+ messages: [
2356
+ { role: 'user', content: 'Write a binary search function in TypeScript' }
2357
+ ]
2358
+ }, {
2359
+ taskType: 'code_generation',
2360
+ estimatedTokens: 500
2361
+ });
2362
+
2363
+ // Quick task - routes to fast model
2364
+ const quickResponse = await llm.complete({
2365
+ model: 'auto',
2366
+ messages: [
2367
+ { role: 'user', content: 'What is 2+2?' }
2368
+ ]
2369
+ }, {
2370
+ taskType: 'quick_task'
2371
+ });
2372
+ ```
2373
+
2374
+ ### Cost Optimization
2375
+
2376
+ ```typescript
2377
+ const response = await llm.complete({
2378
+ model: 'auto',
2379
+ messages: [
2380
+ { role: 'user', content: 'Summarize this document...' }
2381
+ ]
2382
+ }, {
2383
+ costOptimized: true,
2384
+ maxCost: 0.01, // Max $0.01 per request
2385
+ taskType: 'summarization'
2386
+ });
2387
+ ```
2388
+
2389
+ ### Streaming
2390
+
2391
+ ```typescript
2392
+ const stream = llm.stream({
2393
+ model: 'claude-sonnet-4',
2394
+ messages: [
2395
+ { role: 'user', content: 'Write a short story about a robot' }
2396
+ ]
2397
+ });
2398
+
2399
+ for await (const chunk of stream) {
2400
+ if (chunk.type === 'content') {
2401
+ process.stdout.write(chunk.content || '');
2402
+ }
2403
+ }
2404
+ ```
2405
+
2406
+ ### Using Model Aliases
2407
+
2408
+ ```typescript
2409
+ // Use predefined aliases
2410
+ const fastResponse = await llm.chat([
2411
+ { role: 'user', content: 'Quick question: capital of France?' }
2412
+ ], { model: 'fast' }); // Routes to claude-3-5-haiku
2413
+
2414
+ const smartResponse = await llm.chat([
2415
+ { role: 'user', content: 'Complex reasoning task...' }
2416
+ ], { model: 'smart' }); // Routes to claude-sonnet-4
2417
+
2418
+ const bestResponse = await llm.chat([
2419
+ { role: 'user', content: 'Very complex task...' }
2420
+ ], { model: 'best' }); // Routes to claude-opus-4-5
2421
+ ```
2422
+
2423
+ ### Provider Health Check
2424
+
2425
+ ```typescript
2426
+ const status = llm.getProviderStatus();
2427
+
2428
+ for (const provider of status) {
2429
+ console.log(`${provider.name}: ${provider.healthy ? 'OK' : 'DOWN'}`);
2430
+ console.log(` Models: ${provider.availableModels.join(', ')}`);
2431
+ }
2432
+ ```
2433
+
2434
+ ### Tool Use
2435
+
2436
+ ```typescript
2437
+ const response = await llm.complete({
2438
+ model: 'claude-sonnet-4',
2439
+ messages: [
2440
+ { role: 'user', content: 'What is the weather in Paris?' }
2441
+ ],
2442
+ tools: [{
2443
+ name: 'get_weather',
2444
+ description: 'Get current weather for a location',
2445
+ parameters: {
2446
+ type: 'object',
2447
+ properties: {
2448
+ location: { type: 'string', description: 'City name' }
2449
+ },
2450
+ required: ['location']
2451
+ }
2452
+ }],
2453
+ toolChoice: 'auto'
2454
+ });
2455
+
2456
+ if (response.toolCalls) {
2457
+ for (const call of response.toolCalls) {
2458
+ console.log(`Tool: ${call.name}`);
2459
+ console.log(`Args: ${call.arguments}`);
2460
+ }
2461
+ }
2462
+ ```
2463
+
2464
+ ---
2465
+
2466
+ ## Comandos
2467
+
2468
+ ```bash
2469
+ /elsabro:llm status # Ver estado de providers
2470
+ /elsabro:llm models # Listar modelos disponibles
2471
+ /elsabro:llm test <provider> # Probar provider
2472
+ /elsabro:llm route <model> # Ver routing para modelo
2473
+ /elsabro:llm cost <model> <tokens> # Estimar costo
2474
+ /elsabro:llm alias set <name> <model> # Crear alias
2475
+ /elsabro:llm config # Ver configuracion actual
2476
+ ```
2477
+
2478
+ ---
2479
+
2480
+ ## Configuracion
2481
+
2482
+ Ver `templates/multi-llm-config.json` para configuracion completa.
2483
+
2484
+ ---
2485
+
2486
+ ## Changelog
2487
+
2488
+ - **v3.6.0**: Initial Multi-LLM Providers System
2489
+ - ProviderRegistry with health checks
2490
+ - 5 provider implementations (Claude, OpenAI, Gemini, Local, Azure)
2491
+ - ModelRouter with intelligent routing
2492
+ - UnifiedAPI with consistent interface
2493
+ - Cost optimization and load balancing
2494
+ - Streaming support cross-provider