elsabro 2.3.0 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +698 -20
- package/bin/install.js +0 -0
- package/flows/development-flow.json +452 -0
- package/flows/quick-flow.json +118 -0
- package/hooks/hooks-config-updated.json +285 -0
- package/hooks/skill-discovery.sh +539 -0
- package/package.json +3 -2
- package/references/SYSTEM_INDEX.md +400 -5
- package/references/agent-marketplace.md +2274 -0
- package/references/agent-protocol.md +1126 -0
- package/references/ai-code-suggestions.md +2413 -0
- package/references/checkpointing.md +595 -0
- package/references/collaboration-patterns.md +851 -0
- package/references/collaborative-sessions.md +1081 -0
- package/references/configuration-management.md +1810 -0
- package/references/cost-tracking.md +1095 -0
- package/references/enterprise-sso.md +2001 -0
- package/references/error-contracts-v2.md +968 -0
- package/references/event-driven.md +1031 -0
- package/references/flow-orchestration.md +940 -0
- package/references/flow-visualization.md +1557 -0
- package/references/ide-integrations.md +3513 -0
- package/references/interrupt-system.md +681 -0
- package/references/kubernetes-deployment.md +3099 -0
- package/references/memory-system.md +683 -0
- package/references/mobile-companion.md +3236 -0
- package/references/multi-llm-providers.md +2494 -0
- package/references/multi-project-memory.md +1182 -0
- package/references/observability.md +793 -0
- package/references/output-schemas.md +858 -0
- package/references/performance-profiler.md +955 -0
- package/references/plugin-system.md +1526 -0
- package/references/prompt-management.md +292 -0
- package/references/sandbox-execution.md +303 -0
- package/references/security-system.md +1253 -0
- package/references/skill-marketplace-integration.md +3901 -0
- package/references/streaming.md +696 -0
- package/references/testing-framework.md +1151 -0
- package/references/time-travel.md +802 -0
- package/references/tool-registry.md +886 -0
- package/references/voice-commands.md +3296 -0
- package/templates/agent-marketplace-config.json +220 -0
- package/templates/agent-protocol-config.json +136 -0
- package/templates/ai-suggestions-config.json +100 -0
- package/templates/checkpoint-state.json +61 -0
- package/templates/collaboration-config.json +157 -0
- package/templates/collaborative-sessions-config.json +153 -0
- package/templates/configuration-config.json +245 -0
- package/templates/cost-tracking-config.json +148 -0
- package/templates/enterprise-sso-config.json +438 -0
- package/templates/events-config.json +148 -0
- package/templates/flow-visualization-config.json +196 -0
- package/templates/ide-integrations-config.json +442 -0
- package/templates/kubernetes-config.json +764 -0
- package/templates/memory-state.json +84 -0
- package/templates/mobile-companion-config.json +600 -0
- package/templates/multi-llm-config.json +544 -0
- package/templates/multi-project-memory-config.json +145 -0
- package/templates/observability-config.json +109 -0
- package/templates/performance-profiler-config.json +125 -0
- package/templates/plugin-config.json +170 -0
- package/templates/prompt-management-config.json +86 -0
- package/templates/sandbox-config.json +185 -0
- package/templates/schemas-config.json +65 -0
- package/templates/security-config.json +120 -0
- package/templates/skill-marketplace-config.json +441 -0
- package/templates/streaming-config.json +72 -0
- package/templates/testing-config.json +81 -0
- package/templates/timetravel-config.json +62 -0
- package/templates/tool-registry-config.json +109 -0
- package/templates/voice-commands-config.json +658 -0
|
@@ -0,0 +1,2494 @@
|
|
|
1
|
+
# Multi-LLM Providers System (v3.6)
|
|
2
|
+
|
|
3
|
+
Sistema unificado de gestion de multiples proveedores de LLM con routing inteligente, fallback automatico y optimizacion de costos.
|
|
4
|
+
|
|
5
|
+
## Arquitectura
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
┌─────────────────────────────────────────────────────────────────────────────┐
|
|
9
|
+
│ MULTI-LLM PROVIDERS SYSTEM │
|
|
10
|
+
├─────────────────────────────────────────────────────────────────────────────┤
|
|
11
|
+
│ │
|
|
12
|
+
│ ┌────────────────────────────────────────────────────────────────────┐ │
|
|
13
|
+
│ │ UNIFIED API │ │
|
|
14
|
+
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────────┐ │ │
|
|
15
|
+
│ │ │ complete() │ │ stream() │ │ countTokens() │ │ │
|
|
16
|
+
│ │ │ chat() │ │ embed() │ │ estimateCost() │ │ │
|
|
17
|
+
│ │ └──────────────┘ └──────────────┘ └──────────────────────────┘ │ │
|
|
18
|
+
│ └────────────────────────────────────────────────────────────────────┘ │
|
|
19
|
+
│ │ │
|
|
20
|
+
│ ▼ │
|
|
21
|
+
│ ┌────────────────────────────────────────────────────────────────────┐ │
|
|
22
|
+
│ │ MODEL ROUTER │ │
|
|
23
|
+
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌───────────┐ │ │
|
|
24
|
+
│ │ │ Task-Based │ │ Cost │ │ Load │ │ Rate │ │ │
|
|
25
|
+
│ │ │ Routing │ │ Optimization│ │ Balancing │ │ Limiting │ │ │
|
|
26
|
+
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └───────────┘ │ │
|
|
27
|
+
│ └────────────────────────────────────────────────────────────────────┘ │
|
|
28
|
+
│ │ │
|
|
29
|
+
│ ▼ │
|
|
30
|
+
│ ┌────────────────────────────────────────────────────────────────────┐ │
|
|
31
|
+
│ │ PROVIDER REGISTRY │ │
|
|
32
|
+
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
|
33
|
+
│ │ │ Claude │ │ OpenAI │ │ Gemini │ │ Local │ │ Azure │ │ │
|
|
34
|
+
│ │ │(default)│ │ GPT-4 │ │ Flash │ │ Ollama │ │ OpenAI │ │ │
|
|
35
|
+
│ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ │
|
|
36
|
+
│ └────────────────────────────────────────────────────────────────────┘ │
|
|
37
|
+
│ │ │
|
|
38
|
+
│ ▼ │
|
|
39
|
+
│ ┌────────────────────────────────────────────────────────────────────┐ │
|
|
40
|
+
│ │ HEALTH & MONITORING │ │
|
|
41
|
+
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
|
42
|
+
│ │ │ Health │ │ Status │ │ Automatic │ │ │
|
|
43
|
+
│ │ │ Checks │ │ Monitoring │ │ Fallback │ │ │
|
|
44
|
+
│ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
|
45
|
+
│ └────────────────────────────────────────────────────────────────────┘ │
|
|
46
|
+
│ │
|
|
47
|
+
└─────────────────────────────────────────────────────────────────────────────┘
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## ProviderRegistry
|
|
53
|
+
|
|
54
|
+
### Interfaces Base
|
|
55
|
+
|
|
56
|
+
```typescript
|
|
57
|
+
interface LLMProvider<TConfig extends ProviderConfig = ProviderConfig> {
|
|
58
|
+
readonly id: string;
|
|
59
|
+
readonly name: string;
|
|
60
|
+
readonly version: string;
|
|
61
|
+
readonly config: TConfig;
|
|
62
|
+
|
|
63
|
+
// Core methods
|
|
64
|
+
complete(request: CompletionRequest): Promise<CompletionResponse>;
|
|
65
|
+
stream(request: CompletionRequest): AsyncIterable<StreamChunk>;
|
|
66
|
+
|
|
67
|
+
// Optional capabilities
|
|
68
|
+
embed?(texts: string[]): Promise<EmbeddingResponse>;
|
|
69
|
+
countTokens(text: string, model?: string): Promise<number>;
|
|
70
|
+
|
|
71
|
+
// Health & status
|
|
72
|
+
healthCheck(): Promise<HealthStatus>;
|
|
73
|
+
getStatus(): ProviderStatus;
|
|
74
|
+
|
|
75
|
+
// Lifecycle
|
|
76
|
+
initialize(): Promise<void>;
|
|
77
|
+
shutdown(): Promise<void>;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
interface ProviderConfig {
|
|
81
|
+
apiKey?: string;
|
|
82
|
+
baseUrl?: string;
|
|
83
|
+
timeout?: number;
|
|
84
|
+
maxRetries?: number;
|
|
85
|
+
retryDelay?: number;
|
|
86
|
+
rateLimit?: RateLimitConfig;
|
|
87
|
+
headers?: Record<string, string>;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
interface RateLimitConfig {
|
|
91
|
+
requestsPerMinute: number;
|
|
92
|
+
tokensPerMinute: number;
|
|
93
|
+
tokensPerDay?: number;
|
|
94
|
+
concurrent?: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
interface CompletionRequest {
|
|
98
|
+
model: string;
|
|
99
|
+
messages: Message[];
|
|
100
|
+
temperature?: number;
|
|
101
|
+
maxTokens?: number;
|
|
102
|
+
topP?: number;
|
|
103
|
+
stopSequences?: string[];
|
|
104
|
+
tools?: ToolDefinition[];
|
|
105
|
+
toolChoice?: 'auto' | 'none' | 'required' | { name: string };
|
|
106
|
+
responseFormat?: 'text' | 'json' | { schema: JSONSchema };
|
|
107
|
+
metadata?: Record<string, unknown>;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
interface Message {
|
|
111
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
112
|
+
content: string | ContentBlock[];
|
|
113
|
+
name?: string;
|
|
114
|
+
toolCallId?: string;
|
|
115
|
+
toolCalls?: ToolCall[];
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
interface ContentBlock {
|
|
119
|
+
type: 'text' | 'image' | 'file';
|
|
120
|
+
text?: string;
|
|
121
|
+
imageUrl?: string;
|
|
122
|
+
imageBase64?: string;
|
|
123
|
+
mimeType?: string;
|
|
124
|
+
fileName?: string;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
interface CompletionResponse {
|
|
128
|
+
id: string;
|
|
129
|
+
model: string;
|
|
130
|
+
content: string;
|
|
131
|
+
toolCalls?: ToolCall[];
|
|
132
|
+
usage: TokenUsage;
|
|
133
|
+
finishReason: 'stop' | 'length' | 'tool_calls' | 'content_filter';
|
|
134
|
+
metadata?: {
|
|
135
|
+
provider: string;
|
|
136
|
+
latencyMs: number;
|
|
137
|
+
cached?: boolean;
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
interface StreamChunk {
|
|
142
|
+
type: 'content' | 'tool_call' | 'usage' | 'done';
|
|
143
|
+
content?: string;
|
|
144
|
+
toolCall?: Partial<ToolCall>;
|
|
145
|
+
usage?: Partial<TokenUsage>;
|
|
146
|
+
finishReason?: string;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
interface TokenUsage {
|
|
150
|
+
promptTokens: number;
|
|
151
|
+
completionTokens: number;
|
|
152
|
+
totalTokens: number;
|
|
153
|
+
cacheReadTokens?: number;
|
|
154
|
+
cacheWriteTokens?: number;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
interface ToolCall {
|
|
158
|
+
id: string;
|
|
159
|
+
name: string;
|
|
160
|
+
arguments: string;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
interface HealthStatus {
|
|
164
|
+
healthy: boolean;
|
|
165
|
+
latencyMs: number;
|
|
166
|
+
lastCheck: string;
|
|
167
|
+
error?: string;
|
|
168
|
+
details?: Record<string, unknown>;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
interface ProviderStatus {
|
|
172
|
+
id: string;
|
|
173
|
+
name: string;
|
|
174
|
+
healthy: boolean;
|
|
175
|
+
enabled: boolean;
|
|
176
|
+
availableModels: string[];
|
|
177
|
+
rateLimitRemaining?: {
|
|
178
|
+
requests: number;
|
|
179
|
+
tokens: number;
|
|
180
|
+
};
|
|
181
|
+
lastError?: {
|
|
182
|
+
message: string;
|
|
183
|
+
timestamp: string;
|
|
184
|
+
code?: string;
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### ProviderRegistry Implementation
|
|
190
|
+
|
|
191
|
+
```typescript
|
|
192
|
+
interface RegisteredProvider {
|
|
193
|
+
provider: LLMProvider;
|
|
194
|
+
priority: number;
|
|
195
|
+
enabled: boolean;
|
|
196
|
+
healthStatus: HealthStatus;
|
|
197
|
+
metrics: ProviderMetrics;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
interface ProviderMetrics {
|
|
201
|
+
totalRequests: number;
|
|
202
|
+
successfulRequests: number;
|
|
203
|
+
failedRequests: number;
|
|
204
|
+
totalTokens: number;
|
|
205
|
+
totalCost: number;
|
|
206
|
+
avgLatencyMs: number;
|
|
207
|
+
p95LatencyMs: number;
|
|
208
|
+
lastUsed?: string;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
class ProviderRegistry {
|
|
212
|
+
private providers: Map<string, RegisteredProvider> = new Map();
|
|
213
|
+
private healthCheckInterval: NodeJS.Timer | null = null;
|
|
214
|
+
private config: ProviderRegistryConfig;
|
|
215
|
+
|
|
216
|
+
constructor(config: ProviderRegistryConfig) {
|
|
217
|
+
this.config = config;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Register a provider
|
|
221
|
+
async register(
|
|
222
|
+
provider: LLMProvider,
|
|
223
|
+
options: { priority?: number; enabled?: boolean } = {}
|
|
224
|
+
): Promise<void> {
|
|
225
|
+
const { priority = 100, enabled = true } = options;
|
|
226
|
+
|
|
227
|
+
// Initialize provider
|
|
228
|
+
await provider.initialize();
|
|
229
|
+
|
|
230
|
+
// Initial health check
|
|
231
|
+
const healthStatus = await provider.healthCheck();
|
|
232
|
+
|
|
233
|
+
this.providers.set(provider.id, {
|
|
234
|
+
provider,
|
|
235
|
+
priority,
|
|
236
|
+
enabled,
|
|
237
|
+
healthStatus,
|
|
238
|
+
metrics: {
|
|
239
|
+
totalRequests: 0,
|
|
240
|
+
successfulRequests: 0,
|
|
241
|
+
failedRequests: 0,
|
|
242
|
+
totalTokens: 0,
|
|
243
|
+
totalCost: 0,
|
|
244
|
+
avgLatencyMs: 0,
|
|
245
|
+
p95LatencyMs: 0
|
|
246
|
+
}
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
EventBus.publish('provider.registered', {
|
|
250
|
+
id: provider.id,
|
|
251
|
+
name: provider.name,
|
|
252
|
+
priority,
|
|
253
|
+
healthy: healthStatus.healthy
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Unregister a provider
|
|
258
|
+
async unregister(providerId: string): Promise<boolean> {
|
|
259
|
+
const registered = this.providers.get(providerId);
|
|
260
|
+
if (!registered) return false;
|
|
261
|
+
|
|
262
|
+
await registered.provider.shutdown();
|
|
263
|
+
this.providers.delete(providerId);
|
|
264
|
+
|
|
265
|
+
EventBus.publish('provider.unregistered', { id: providerId });
|
|
266
|
+
return true;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Get provider by ID
|
|
270
|
+
get(providerId: string): LLMProvider | undefined {
|
|
271
|
+
return this.providers.get(providerId)?.provider;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Get all healthy providers sorted by priority
|
|
275
|
+
getHealthyProviders(): LLMProvider[] {
|
|
276
|
+
return Array.from(this.providers.values())
|
|
277
|
+
.filter(p => p.enabled && p.healthStatus.healthy)
|
|
278
|
+
.sort((a, b) => a.priority - b.priority)
|
|
279
|
+
.map(p => p.provider);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Get provider for specific model
|
|
283
|
+
getProviderForModel(model: string): LLMProvider | undefined {
|
|
284
|
+
for (const registered of this.providers.values()) {
|
|
285
|
+
if (!registered.enabled || !registered.healthStatus.healthy) continue;
|
|
286
|
+
|
|
287
|
+
const status = registered.provider.getStatus();
|
|
288
|
+
if (status.availableModels.includes(model)) {
|
|
289
|
+
return registered.provider;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
return undefined;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// List all providers with status
|
|
296
|
+
list(): ProviderStatus[] {
|
|
297
|
+
return Array.from(this.providers.values()).map(r => ({
|
|
298
|
+
...r.provider.getStatus(),
|
|
299
|
+
healthy: r.healthStatus.healthy,
|
|
300
|
+
enabled: r.enabled
|
|
301
|
+
}));
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Enable/disable provider
|
|
305
|
+
setEnabled(providerId: string, enabled: boolean): void {
|
|
306
|
+
const registered = this.providers.get(providerId);
|
|
307
|
+
if (registered) {
|
|
308
|
+
registered.enabled = enabled;
|
|
309
|
+
EventBus.publish('provider.status_changed', { id: providerId, enabled });
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Update metrics after request
|
|
314
|
+
recordMetrics(
|
|
315
|
+
providerId: string,
|
|
316
|
+
success: boolean,
|
|
317
|
+
latencyMs: number,
|
|
318
|
+
tokens: number,
|
|
319
|
+
cost: number
|
|
320
|
+
): void {
|
|
321
|
+
const registered = this.providers.get(providerId);
|
|
322
|
+
if (!registered) return;
|
|
323
|
+
|
|
324
|
+
const m = registered.metrics;
|
|
325
|
+
m.totalRequests++;
|
|
326
|
+
if (success) {
|
|
327
|
+
m.successfulRequests++;
|
|
328
|
+
} else {
|
|
329
|
+
m.failedRequests++;
|
|
330
|
+
}
|
|
331
|
+
m.totalTokens += tokens;
|
|
332
|
+
m.totalCost += cost;
|
|
333
|
+
m.lastUsed = new Date().toISOString();
|
|
334
|
+
|
|
335
|
+
// Rolling average for latency
|
|
336
|
+
m.avgLatencyMs = (m.avgLatencyMs * (m.totalRequests - 1) + latencyMs) / m.totalRequests;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Get provider metrics
|
|
340
|
+
getMetrics(providerId: string): ProviderMetrics | undefined {
|
|
341
|
+
return this.providers.get(providerId)?.metrics;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Start health check loop
|
|
345
|
+
startHealthChecks(intervalMs: number = 30000): void {
|
|
346
|
+
this.healthCheckInterval = setInterval(async () => {
|
|
347
|
+
for (const [id, registered] of this.providers) {
|
|
348
|
+
try {
|
|
349
|
+
const status = await registered.provider.healthCheck();
|
|
350
|
+
const wasHealthy = registered.healthStatus.healthy;
|
|
351
|
+
registered.healthStatus = status;
|
|
352
|
+
|
|
353
|
+
if (wasHealthy !== status.healthy) {
|
|
354
|
+
EventBus.publish('provider.health_changed', {
|
|
355
|
+
id,
|
|
356
|
+
healthy: status.healthy,
|
|
357
|
+
error: status.error
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
} catch (error) {
|
|
361
|
+
registered.healthStatus = {
|
|
362
|
+
healthy: false,
|
|
363
|
+
latencyMs: 0,
|
|
364
|
+
lastCheck: new Date().toISOString(),
|
|
365
|
+
error: error instanceof Error ? error.message : 'Health check failed'
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}, intervalMs);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// Stop health checks
|
|
373
|
+
stopHealthChecks(): void {
|
|
374
|
+
if (this.healthCheckInterval) {
|
|
375
|
+
clearInterval(this.healthCheckInterval);
|
|
376
|
+
this.healthCheckInterval = null;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Shutdown all providers
|
|
381
|
+
async shutdown(): Promise<void> {
|
|
382
|
+
this.stopHealthChecks();
|
|
383
|
+
for (const registered of this.providers.values()) {
|
|
384
|
+
await registered.provider.shutdown();
|
|
385
|
+
}
|
|
386
|
+
this.providers.clear();
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
---
|
|
392
|
+
|
|
393
|
+
## Provider Implementations
|
|
394
|
+
|
|
395
|
+
### ClaudeProvider (Default)
|
|
396
|
+
|
|
397
|
+
```typescript
|
|
398
|
+
interface ClaudeConfig extends ProviderConfig {
|
|
399
|
+
apiKey: string;
|
|
400
|
+
baseUrl?: string;
|
|
401
|
+
defaultModel?: string;
|
|
402
|
+
betaHeaders?: string[];
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
class ClaudeProvider implements LLMProvider<ClaudeConfig> {
|
|
406
|
+
readonly id = 'claude';
|
|
407
|
+
readonly name = 'Anthropic Claude';
|
|
408
|
+
readonly version = '1.0.0';
|
|
409
|
+
readonly config: ClaudeConfig;
|
|
410
|
+
|
|
411
|
+
private client: Anthropic;
|
|
412
|
+
|
|
413
|
+
static readonly MODELS = {
|
|
414
|
+
'claude-opus-4-5': {
|
|
415
|
+
id: 'claude-opus-4-5-20251101',
|
|
416
|
+
contextWindow: 200000,
|
|
417
|
+
maxOutput: 32000,
|
|
418
|
+
inputPrice: 15.0, // per 1M tokens
|
|
419
|
+
outputPrice: 75.0,
|
|
420
|
+
capabilities: ['vision', 'tools', 'extended_thinking']
|
|
421
|
+
},
|
|
422
|
+
'claude-sonnet-4': {
|
|
423
|
+
id: 'claude-sonnet-4-20250514',
|
|
424
|
+
contextWindow: 200000,
|
|
425
|
+
maxOutput: 64000,
|
|
426
|
+
inputPrice: 3.0,
|
|
427
|
+
outputPrice: 15.0,
|
|
428
|
+
capabilities: ['vision', 'tools', 'computer_use']
|
|
429
|
+
},
|
|
430
|
+
'claude-3-5-haiku': {
|
|
431
|
+
id: 'claude-3-5-haiku-20241022',
|
|
432
|
+
contextWindow: 200000,
|
|
433
|
+
maxOutput: 8192,
|
|
434
|
+
inputPrice: 0.80,
|
|
435
|
+
outputPrice: 4.0,
|
|
436
|
+
capabilities: ['vision', 'tools']
|
|
437
|
+
}
|
|
438
|
+
};
|
|
439
|
+
|
|
440
|
+
constructor(config: ClaudeConfig) {
|
|
441
|
+
this.config = {
|
|
442
|
+
baseUrl: 'https://api.anthropic.com',
|
|
443
|
+
defaultModel: 'claude-sonnet-4',
|
|
444
|
+
timeout: 120000,
|
|
445
|
+
maxRetries: 3,
|
|
446
|
+
...config
|
|
447
|
+
};
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
async initialize(): Promise<void> {
|
|
451
|
+
this.client = new Anthropic({
|
|
452
|
+
apiKey: this.config.apiKey,
|
|
453
|
+
baseURL: this.config.baseUrl,
|
|
454
|
+
timeout: this.config.timeout,
|
|
455
|
+
maxRetries: this.config.maxRetries
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
async complete(request: CompletionRequest): Promise<CompletionResponse> {
|
|
460
|
+
const startTime = Date.now();
|
|
461
|
+
const modelId = this.resolveModel(request.model);
|
|
462
|
+
|
|
463
|
+
const response = await this.client.messages.create({
|
|
464
|
+
model: modelId,
|
|
465
|
+
max_tokens: request.maxTokens || 4096,
|
|
466
|
+
messages: this.convertMessages(request.messages),
|
|
467
|
+
system: this.extractSystemMessage(request.messages),
|
|
468
|
+
temperature: request.temperature,
|
|
469
|
+
top_p: request.topP,
|
|
470
|
+
stop_sequences: request.stopSequences,
|
|
471
|
+
tools: request.tools ? this.convertTools(request.tools) : undefined,
|
|
472
|
+
tool_choice: request.toolChoice ? this.convertToolChoice(request.toolChoice) : undefined
|
|
473
|
+
});
|
|
474
|
+
|
|
475
|
+
return {
|
|
476
|
+
id: response.id,
|
|
477
|
+
model: response.model,
|
|
478
|
+
content: this.extractContent(response),
|
|
479
|
+
toolCalls: this.extractToolCalls(response),
|
|
480
|
+
usage: {
|
|
481
|
+
promptTokens: response.usage.input_tokens,
|
|
482
|
+
completionTokens: response.usage.output_tokens,
|
|
483
|
+
totalTokens: response.usage.input_tokens + response.usage.output_tokens,
|
|
484
|
+
cacheReadTokens: response.usage.cache_read_input_tokens,
|
|
485
|
+
cacheWriteTokens: response.usage.cache_creation_input_tokens
|
|
486
|
+
},
|
|
487
|
+
finishReason: this.mapStopReason(response.stop_reason),
|
|
488
|
+
metadata: {
|
|
489
|
+
provider: this.id,
|
|
490
|
+
latencyMs: Date.now() - startTime
|
|
491
|
+
}
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
|
|
496
|
+
const modelId = this.resolveModel(request.model);
|
|
497
|
+
|
|
498
|
+
const stream = await this.client.messages.stream({
|
|
499
|
+
model: modelId,
|
|
500
|
+
max_tokens: request.maxTokens || 4096,
|
|
501
|
+
messages: this.convertMessages(request.messages),
|
|
502
|
+
system: this.extractSystemMessage(request.messages),
|
|
503
|
+
temperature: request.temperature
|
|
504
|
+
});
|
|
505
|
+
|
|
506
|
+
for await (const event of stream) {
|
|
507
|
+
if (event.type === 'content_block_delta') {
|
|
508
|
+
if (event.delta.type === 'text_delta') {
|
|
509
|
+
yield { type: 'content', content: event.delta.text };
|
|
510
|
+
} else if (event.delta.type === 'input_json_delta') {
|
|
511
|
+
yield { type: 'tool_call', toolCall: { arguments: event.delta.partial_json } };
|
|
512
|
+
}
|
|
513
|
+
} else if (event.type === 'message_delta') {
|
|
514
|
+
yield {
|
|
515
|
+
type: 'usage',
|
|
516
|
+
usage: { completionTokens: event.usage?.output_tokens },
|
|
517
|
+
finishReason: event.delta.stop_reason
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
yield { type: 'done' };
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
async countTokens(text: string, model?: string): Promise<number> {
|
|
526
|
+
const response = await this.client.messages.countTokens({
|
|
527
|
+
model: this.resolveModel(model || this.config.defaultModel!),
|
|
528
|
+
messages: [{ role: 'user', content: text }]
|
|
529
|
+
});
|
|
530
|
+
return response.input_tokens;
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
async healthCheck(): Promise<HealthStatus> {
|
|
534
|
+
const startTime = Date.now();
|
|
535
|
+
try {
|
|
536
|
+
await this.client.messages.create({
|
|
537
|
+
model: 'claude-3-5-haiku-20241022',
|
|
538
|
+
max_tokens: 10,
|
|
539
|
+
messages: [{ role: 'user', content: 'Hi' }]
|
|
540
|
+
});
|
|
541
|
+
return {
|
|
542
|
+
healthy: true,
|
|
543
|
+
latencyMs: Date.now() - startTime,
|
|
544
|
+
lastCheck: new Date().toISOString()
|
|
545
|
+
};
|
|
546
|
+
} catch (error) {
|
|
547
|
+
return {
|
|
548
|
+
healthy: false,
|
|
549
|
+
latencyMs: Date.now() - startTime,
|
|
550
|
+
lastCheck: new Date().toISOString(),
|
|
551
|
+
error: error instanceof Error ? error.message : 'Unknown error'
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
getStatus(): ProviderStatus {
|
|
557
|
+
return {
|
|
558
|
+
id: this.id,
|
|
559
|
+
name: this.name,
|
|
560
|
+
healthy: true,
|
|
561
|
+
enabled: true,
|
|
562
|
+
availableModels: Object.keys(ClaudeProvider.MODELS)
|
|
563
|
+
};
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
async shutdown(): Promise<void> {
|
|
567
|
+
// Cleanup if needed
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
private resolveModel(model: string): string {
|
|
571
|
+
const modelInfo = ClaudeProvider.MODELS[model as keyof typeof ClaudeProvider.MODELS];
|
|
572
|
+
return modelInfo?.id || model;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
private convertMessages(messages: Message[]): AnthropicMessage[] {
|
|
576
|
+
return messages
|
|
577
|
+
.filter(m => m.role !== 'system')
|
|
578
|
+
.map(m => ({
|
|
579
|
+
role: m.role === 'tool' ? 'user' : m.role,
|
|
580
|
+
content: this.convertContent(m)
|
|
581
|
+
}));
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
private extractSystemMessage(messages: Message[]): string | undefined {
|
|
585
|
+
const systemMsg = messages.find(m => m.role === 'system');
|
|
586
|
+
return typeof systemMsg?.content === 'string' ? systemMsg.content : undefined;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
private convertContent(message: Message): string | AnthropicContentBlock[] {
|
|
590
|
+
if (typeof message.content === 'string') {
|
|
591
|
+
if (message.role === 'tool') {
|
|
592
|
+
return [{
|
|
593
|
+
type: 'tool_result',
|
|
594
|
+
tool_use_id: message.toolCallId!,
|
|
595
|
+
content: message.content
|
|
596
|
+
}];
|
|
597
|
+
}
|
|
598
|
+
return message.content;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
return message.content.map(block => {
|
|
602
|
+
if (block.type === 'text') {
|
|
603
|
+
return { type: 'text', text: block.text! };
|
|
604
|
+
} else if (block.type === 'image') {
|
|
605
|
+
return {
|
|
606
|
+
type: 'image',
|
|
607
|
+
source: {
|
|
608
|
+
type: block.imageBase64 ? 'base64' : 'url',
|
|
609
|
+
media_type: block.mimeType || 'image/jpeg',
|
|
610
|
+
data: block.imageBase64,
|
|
611
|
+
url: block.imageUrl
|
|
612
|
+
}
|
|
613
|
+
};
|
|
614
|
+
}
|
|
615
|
+
return { type: 'text', text: '' };
|
|
616
|
+
});
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
private convertTools(tools: ToolDefinition[]): AnthropicTool[] {
|
|
620
|
+
return tools.map(t => ({
|
|
621
|
+
name: t.name,
|
|
622
|
+
description: t.description,
|
|
623
|
+
input_schema: t.parameters
|
|
624
|
+
}));
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
private convertToolChoice(choice: CompletionRequest['toolChoice']): AnthropicToolChoice {
|
|
628
|
+
if (choice === 'auto') return { type: 'auto' };
|
|
629
|
+
if (choice === 'none') return { type: 'none' };
|
|
630
|
+
if (choice === 'required') return { type: 'any' };
|
|
631
|
+
if (typeof choice === 'object') return { type: 'tool', name: choice.name };
|
|
632
|
+
return { type: 'auto' };
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
private extractContent(response: AnthropicResponse): string {
|
|
636
|
+
return response.content
|
|
637
|
+
.filter(b => b.type === 'text')
|
|
638
|
+
.map(b => b.text)
|
|
639
|
+
.join('');
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
private extractToolCalls(response: AnthropicResponse): ToolCall[] | undefined {
|
|
643
|
+
const toolUses = response.content.filter(b => b.type === 'tool_use');
|
|
644
|
+
if (toolUses.length === 0) return undefined;
|
|
645
|
+
|
|
646
|
+
return toolUses.map(t => ({
|
|
647
|
+
id: t.id,
|
|
648
|
+
name: t.name,
|
|
649
|
+
arguments: JSON.stringify(t.input)
|
|
650
|
+
}));
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
private mapStopReason(reason: string | null): CompletionResponse['finishReason'] {
|
|
654
|
+
switch (reason) {
|
|
655
|
+
case 'end_turn': return 'stop';
|
|
656
|
+
case 'max_tokens': return 'length';
|
|
657
|
+
case 'tool_use': return 'tool_calls';
|
|
658
|
+
default: return 'stop';
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
```
|
|
663
|
+
|
|
664
|
+
### OpenAIProvider
|
|
665
|
+
|
|
666
|
+
```typescript
|
|
667
|
+
interface OpenAIConfig extends ProviderConfig {
|
|
668
|
+
apiKey: string;
|
|
669
|
+
organization?: string;
|
|
670
|
+
baseUrl?: string;
|
|
671
|
+
defaultModel?: string;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
class OpenAIProvider implements LLMProvider<OpenAIConfig> {
|
|
675
|
+
readonly id = 'openai';
|
|
676
|
+
readonly name = 'OpenAI';
|
|
677
|
+
readonly version = '1.0.0';
|
|
678
|
+
readonly config: OpenAIConfig;
|
|
679
|
+
|
|
680
|
+
private client: OpenAI;
|
|
681
|
+
|
|
682
|
+
static readonly MODELS = {
|
|
683
|
+
'gpt-4o': {
|
|
684
|
+
id: 'gpt-4o',
|
|
685
|
+
contextWindow: 128000,
|
|
686
|
+
maxOutput: 16384,
|
|
687
|
+
inputPrice: 2.50,
|
|
688
|
+
outputPrice: 10.0,
|
|
689
|
+
capabilities: ['vision', 'tools', 'json_mode']
|
|
690
|
+
},
|
|
691
|
+
'gpt-4-turbo': {
|
|
692
|
+
id: 'gpt-4-turbo',
|
|
693
|
+
contextWindow: 128000,
|
|
694
|
+
maxOutput: 4096,
|
|
695
|
+
inputPrice: 10.0,
|
|
696
|
+
outputPrice: 30.0,
|
|
697
|
+
capabilities: ['vision', 'tools', 'json_mode']
|
|
698
|
+
},
|
|
699
|
+
'gpt-3.5-turbo': {
|
|
700
|
+
id: 'gpt-3.5-turbo',
|
|
701
|
+
contextWindow: 16385,
|
|
702
|
+
maxOutput: 4096,
|
|
703
|
+
inputPrice: 0.50,
|
|
704
|
+
outputPrice: 1.50,
|
|
705
|
+
capabilities: ['tools', 'json_mode']
|
|
706
|
+
},
|
|
707
|
+
'gpt-4o-mini': {
|
|
708
|
+
id: 'gpt-4o-mini',
|
|
709
|
+
contextWindow: 128000,
|
|
710
|
+
maxOutput: 16384,
|
|
711
|
+
inputPrice: 0.15,
|
|
712
|
+
outputPrice: 0.60,
|
|
713
|
+
capabilities: ['vision', 'tools', 'json_mode']
|
|
714
|
+
}
|
|
715
|
+
};
|
|
716
|
+
|
|
717
|
+
constructor(config: OpenAIConfig) {
|
|
718
|
+
this.config = {
|
|
719
|
+
baseUrl: 'https://api.openai.com/v1',
|
|
720
|
+
defaultModel: 'gpt-4o',
|
|
721
|
+
timeout: 120000,
|
|
722
|
+
maxRetries: 3,
|
|
723
|
+
...config
|
|
724
|
+
};
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
async initialize(): Promise<void> {
|
|
728
|
+
this.client = new OpenAI({
|
|
729
|
+
apiKey: this.config.apiKey,
|
|
730
|
+
organization: this.config.organization,
|
|
731
|
+
baseURL: this.config.baseUrl,
|
|
732
|
+
timeout: this.config.timeout,
|
|
733
|
+
maxRetries: this.config.maxRetries
|
|
734
|
+
});
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
async complete(request: CompletionRequest): Promise<CompletionResponse> {
|
|
738
|
+
const startTime = Date.now();
|
|
739
|
+
|
|
740
|
+
const response = await this.client.chat.completions.create({
|
|
741
|
+
model: request.model,
|
|
742
|
+
messages: this.convertMessages(request.messages),
|
|
743
|
+
temperature: request.temperature,
|
|
744
|
+
max_tokens: request.maxTokens,
|
|
745
|
+
top_p: request.topP,
|
|
746
|
+
stop: request.stopSequences,
|
|
747
|
+
tools: request.tools ? this.convertTools(request.tools) : undefined,
|
|
748
|
+
tool_choice: request.toolChoice as OpenAIToolChoice,
|
|
749
|
+
response_format: this.convertResponseFormat(request.responseFormat)
|
|
750
|
+
});
|
|
751
|
+
|
|
752
|
+
const choice = response.choices[0];
|
|
753
|
+
|
|
754
|
+
return {
|
|
755
|
+
id: response.id,
|
|
756
|
+
model: response.model,
|
|
757
|
+
content: choice.message.content || '',
|
|
758
|
+
toolCalls: choice.message.tool_calls?.map(tc => ({
|
|
759
|
+
id: tc.id,
|
|
760
|
+
name: tc.function.name,
|
|
761
|
+
arguments: tc.function.arguments
|
|
762
|
+
})),
|
|
763
|
+
usage: {
|
|
764
|
+
promptTokens: response.usage?.prompt_tokens || 0,
|
|
765
|
+
completionTokens: response.usage?.completion_tokens || 0,
|
|
766
|
+
totalTokens: response.usage?.total_tokens || 0
|
|
767
|
+
},
|
|
768
|
+
finishReason: this.mapFinishReason(choice.finish_reason),
|
|
769
|
+
metadata: {
|
|
770
|
+
provider: this.id,
|
|
771
|
+
latencyMs: Date.now() - startTime
|
|
772
|
+
}
|
|
773
|
+
};
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
|
|
777
|
+
const stream = await this.client.chat.completions.create({
|
|
778
|
+
model: request.model,
|
|
779
|
+
messages: this.convertMessages(request.messages),
|
|
780
|
+
temperature: request.temperature,
|
|
781
|
+
max_tokens: request.maxTokens,
|
|
782
|
+
stream: true
|
|
783
|
+
});
|
|
784
|
+
|
|
785
|
+
for await (const chunk of stream) {
|
|
786
|
+
const choice = chunk.choices[0];
|
|
787
|
+
if (choice?.delta?.content) {
|
|
788
|
+
yield { type: 'content', content: choice.delta.content };
|
|
789
|
+
}
|
|
790
|
+
if (choice?.delta?.tool_calls) {
|
|
791
|
+
for (const tc of choice.delta.tool_calls) {
|
|
792
|
+
yield {
|
|
793
|
+
type: 'tool_call',
|
|
794
|
+
toolCall: {
|
|
795
|
+
id: tc.id,
|
|
796
|
+
name: tc.function?.name,
|
|
797
|
+
arguments: tc.function?.arguments
|
|
798
|
+
}
|
|
799
|
+
};
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
if (choice?.finish_reason) {
|
|
803
|
+
yield { type: 'done', finishReason: choice.finish_reason };
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
async embed(texts: string[]): Promise<EmbeddingResponse> {
|
|
809
|
+
const response = await this.client.embeddings.create({
|
|
810
|
+
model: 'text-embedding-3-small',
|
|
811
|
+
input: texts
|
|
812
|
+
});
|
|
813
|
+
|
|
814
|
+
return {
|
|
815
|
+
embeddings: response.data.map(d => d.embedding),
|
|
816
|
+
usage: {
|
|
817
|
+
totalTokens: response.usage.total_tokens
|
|
818
|
+
}
|
|
819
|
+
};
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
async countTokens(text: string, model?: string): Promise<number> {
|
|
823
|
+
// Use tiktoken for accurate counting
|
|
824
|
+
const encoding = getEncoding(model || 'gpt-4o');
|
|
825
|
+
return encoding.encode(text).length;
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
async healthCheck(): Promise<HealthStatus> {
|
|
829
|
+
const startTime = Date.now();
|
|
830
|
+
try {
|
|
831
|
+
await this.client.models.list();
|
|
832
|
+
return {
|
|
833
|
+
healthy: true,
|
|
834
|
+
latencyMs: Date.now() - startTime,
|
|
835
|
+
lastCheck: new Date().toISOString()
|
|
836
|
+
};
|
|
837
|
+
} catch (error) {
|
|
838
|
+
return {
|
|
839
|
+
healthy: false,
|
|
840
|
+
latencyMs: Date.now() - startTime,
|
|
841
|
+
lastCheck: new Date().toISOString(),
|
|
842
|
+
error: error instanceof Error ? error.message : 'Unknown error'
|
|
843
|
+
};
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
getStatus(): ProviderStatus {
|
|
848
|
+
return {
|
|
849
|
+
id: this.id,
|
|
850
|
+
name: this.name,
|
|
851
|
+
healthy: true,
|
|
852
|
+
enabled: true,
|
|
853
|
+
availableModels: Object.keys(OpenAIProvider.MODELS)
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
async shutdown(): Promise<void> {}
|
|
858
|
+
|
|
859
|
+
private convertMessages(messages: Message[]): OpenAIChatMessage[] {
|
|
860
|
+
return messages.map(m => {
|
|
861
|
+
if (m.role === 'tool') {
|
|
862
|
+
return {
|
|
863
|
+
role: 'tool' as const,
|
|
864
|
+
content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content),
|
|
865
|
+
tool_call_id: m.toolCallId!
|
|
866
|
+
};
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
const content = typeof m.content === 'string'
|
|
870
|
+
? m.content
|
|
871
|
+
: m.content.map(b => {
|
|
872
|
+
if (b.type === 'text') return { type: 'text', text: b.text };
|
|
873
|
+
if (b.type === 'image') {
|
|
874
|
+
return {
|
|
875
|
+
type: 'image_url',
|
|
876
|
+
image_url: { url: b.imageUrl || `data:${b.mimeType};base64,${b.imageBase64}` }
|
|
877
|
+
};
|
|
878
|
+
}
|
|
879
|
+
return { type: 'text', text: '' };
|
|
880
|
+
});
|
|
881
|
+
|
|
882
|
+
return {
|
|
883
|
+
role: m.role,
|
|
884
|
+
content,
|
|
885
|
+
tool_calls: m.toolCalls?.map(tc => ({
|
|
886
|
+
id: tc.id,
|
|
887
|
+
type: 'function' as const,
|
|
888
|
+
function: { name: tc.name, arguments: tc.arguments }
|
|
889
|
+
}))
|
|
890
|
+
};
|
|
891
|
+
});
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
private convertTools(tools: ToolDefinition[]): OpenAITool[] {
|
|
895
|
+
return tools.map(t => ({
|
|
896
|
+
type: 'function' as const,
|
|
897
|
+
function: {
|
|
898
|
+
name: t.name,
|
|
899
|
+
description: t.description,
|
|
900
|
+
parameters: t.parameters
|
|
901
|
+
}
|
|
902
|
+
}));
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
private convertResponseFormat(format?: CompletionRequest['responseFormat']) {
|
|
906
|
+
if (!format) return undefined;
|
|
907
|
+
if (format === 'json') return { type: 'json_object' as const };
|
|
908
|
+
if (typeof format === 'object') {
|
|
909
|
+
return { type: 'json_schema' as const, json_schema: format.schema };
|
|
910
|
+
}
|
|
911
|
+
return undefined;
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
private mapFinishReason(reason: string | null): CompletionResponse['finishReason'] {
|
|
915
|
+
switch (reason) {
|
|
916
|
+
case 'stop': return 'stop';
|
|
917
|
+
case 'length': return 'length';
|
|
918
|
+
case 'tool_calls': return 'tool_calls';
|
|
919
|
+
case 'content_filter': return 'content_filter';
|
|
920
|
+
default: return 'stop';
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
```
|
|
925
|
+
|
|
926
|
+
### GeminiProvider
|
|
927
|
+
|
|
928
|
+
```typescript
|
|
929
|
+
interface GeminiConfig extends ProviderConfig {
|
|
930
|
+
apiKey: string;
|
|
931
|
+
baseUrl?: string;
|
|
932
|
+
defaultModel?: string;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
class GeminiProvider implements LLMProvider<GeminiConfig> {
|
|
936
|
+
readonly id = 'gemini';
|
|
937
|
+
readonly name = 'Google Gemini';
|
|
938
|
+
readonly version = '1.0.0';
|
|
939
|
+
readonly config: GeminiConfig;
|
|
940
|
+
|
|
941
|
+
private client: GoogleGenerativeAI;
|
|
942
|
+
|
|
943
|
+
static readonly MODELS = {
|
|
944
|
+
'gemini-2.0-flash': {
|
|
945
|
+
id: 'gemini-2.0-flash',
|
|
946
|
+
contextWindow: 1000000,
|
|
947
|
+
maxOutput: 8192,
|
|
948
|
+
inputPrice: 0.075,
|
|
949
|
+
outputPrice: 0.30,
|
|
950
|
+
capabilities: ['vision', 'tools', 'grounding']
|
|
951
|
+
},
|
|
952
|
+
'gemini-1.5-pro': {
|
|
953
|
+
id: 'gemini-1.5-pro',
|
|
954
|
+
contextWindow: 2000000,
|
|
955
|
+
maxOutput: 8192,
|
|
956
|
+
inputPrice: 1.25,
|
|
957
|
+
outputPrice: 5.0,
|
|
958
|
+
capabilities: ['vision', 'tools', 'grounding', 'code_execution']
|
|
959
|
+
},
|
|
960
|
+
'gemini-1.5-flash': {
|
|
961
|
+
id: 'gemini-1.5-flash',
|
|
962
|
+
contextWindow: 1000000,
|
|
963
|
+
maxOutput: 8192,
|
|
964
|
+
inputPrice: 0.075,
|
|
965
|
+
outputPrice: 0.30,
|
|
966
|
+
capabilities: ['vision', 'tools']
|
|
967
|
+
}
|
|
968
|
+
};
|
|
969
|
+
|
|
970
|
+
constructor(config: GeminiConfig) {
|
|
971
|
+
this.config = {
|
|
972
|
+
defaultModel: 'gemini-2.0-flash',
|
|
973
|
+
timeout: 120000,
|
|
974
|
+
maxRetries: 3,
|
|
975
|
+
...config
|
|
976
|
+
};
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
async initialize(): Promise<void> {
|
|
980
|
+
this.client = new GoogleGenerativeAI(this.config.apiKey);
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
async complete(request: CompletionRequest): Promise<CompletionResponse> {
|
|
984
|
+
const startTime = Date.now();
|
|
985
|
+
const model = this.client.getGenerativeModel({ model: request.model });
|
|
986
|
+
|
|
987
|
+
const chat = model.startChat({
|
|
988
|
+
history: this.convertHistory(request.messages),
|
|
989
|
+
generationConfig: {
|
|
990
|
+
temperature: request.temperature,
|
|
991
|
+
maxOutputTokens: request.maxTokens,
|
|
992
|
+
topP: request.topP,
|
|
993
|
+
stopSequences: request.stopSequences
|
|
994
|
+
}
|
|
995
|
+
});
|
|
996
|
+
|
|
997
|
+
const lastMessage = request.messages[request.messages.length - 1];
|
|
998
|
+
const result = await chat.sendMessage(
|
|
999
|
+
typeof lastMessage.content === 'string'
|
|
1000
|
+
? lastMessage.content
|
|
1001
|
+
: this.convertParts(lastMessage.content)
|
|
1002
|
+
);
|
|
1003
|
+
|
|
1004
|
+
const response = result.response;
|
|
1005
|
+
const usage = response.usageMetadata;
|
|
1006
|
+
|
|
1007
|
+
return {
|
|
1008
|
+
id: `gemini-${Date.now()}`,
|
|
1009
|
+
model: request.model,
|
|
1010
|
+
content: response.text(),
|
|
1011
|
+
toolCalls: this.extractToolCalls(response),
|
|
1012
|
+
usage: {
|
|
1013
|
+
promptTokens: usage?.promptTokenCount || 0,
|
|
1014
|
+
completionTokens: usage?.candidatesTokenCount || 0,
|
|
1015
|
+
totalTokens: usage?.totalTokenCount || 0
|
|
1016
|
+
},
|
|
1017
|
+
finishReason: this.mapFinishReason(response.candidates?.[0]?.finishReason),
|
|
1018
|
+
metadata: {
|
|
1019
|
+
provider: this.id,
|
|
1020
|
+
latencyMs: Date.now() - startTime
|
|
1021
|
+
}
|
|
1022
|
+
};
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
|
|
1026
|
+
const model = this.client.getGenerativeModel({ model: request.model });
|
|
1027
|
+
|
|
1028
|
+
const chat = model.startChat({
|
|
1029
|
+
history: this.convertHistory(request.messages),
|
|
1030
|
+
generationConfig: {
|
|
1031
|
+
temperature: request.temperature,
|
|
1032
|
+
maxOutputTokens: request.maxTokens
|
|
1033
|
+
}
|
|
1034
|
+
});
|
|
1035
|
+
|
|
1036
|
+
const lastMessage = request.messages[request.messages.length - 1];
|
|
1037
|
+
const result = await chat.sendMessageStream(
|
|
1038
|
+
typeof lastMessage.content === 'string' ? lastMessage.content : ''
|
|
1039
|
+
);
|
|
1040
|
+
|
|
1041
|
+
for await (const chunk of result.stream) {
|
|
1042
|
+
const text = chunk.text();
|
|
1043
|
+
if (text) {
|
|
1044
|
+
yield { type: 'content', content: text };
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
yield { type: 'done' };
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
async embed(texts: string[]): Promise<EmbeddingResponse> {
|
|
1052
|
+
const model = this.client.getGenerativeModel({ model: 'text-embedding-004' });
|
|
1053
|
+
|
|
1054
|
+
const embeddings: number[][] = [];
|
|
1055
|
+
for (const text of texts) {
|
|
1056
|
+
const result = await model.embedContent(text);
|
|
1057
|
+
embeddings.push(result.embedding.values);
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
return {
|
|
1061
|
+
embeddings,
|
|
1062
|
+
usage: { totalTokens: 0 } // Gemini doesn't report token usage for embeddings
|
|
1063
|
+
};
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
async countTokens(text: string, model?: string): Promise<number> {
|
|
1067
|
+
const genModel = this.client.getGenerativeModel({
|
|
1068
|
+
model: model || this.config.defaultModel!
|
|
1069
|
+
});
|
|
1070
|
+
const result = await genModel.countTokens(text);
|
|
1071
|
+
return result.totalTokens;
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
async healthCheck(): Promise<HealthStatus> {
|
|
1075
|
+
const startTime = Date.now();
|
|
1076
|
+
try {
|
|
1077
|
+
const model = this.client.getGenerativeModel({ model: 'gemini-1.5-flash' });
|
|
1078
|
+
await model.countTokens('test');
|
|
1079
|
+
return {
|
|
1080
|
+
healthy: true,
|
|
1081
|
+
latencyMs: Date.now() - startTime,
|
|
1082
|
+
lastCheck: new Date().toISOString()
|
|
1083
|
+
};
|
|
1084
|
+
} catch (error) {
|
|
1085
|
+
return {
|
|
1086
|
+
healthy: false,
|
|
1087
|
+
latencyMs: Date.now() - startTime,
|
|
1088
|
+
lastCheck: new Date().toISOString(),
|
|
1089
|
+
error: error instanceof Error ? error.message : 'Unknown error'
|
|
1090
|
+
};
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
getStatus(): ProviderStatus {
|
|
1095
|
+
return {
|
|
1096
|
+
id: this.id,
|
|
1097
|
+
name: this.name,
|
|
1098
|
+
healthy: true,
|
|
1099
|
+
enabled: true,
|
|
1100
|
+
availableModels: Object.keys(GeminiProvider.MODELS)
|
|
1101
|
+
};
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
async shutdown(): Promise<void> {}
|
|
1105
|
+
|
|
1106
|
+
private convertHistory(messages: Message[]): GeminiContent[] {
|
|
1107
|
+
return messages.slice(0, -1).map(m => ({
|
|
1108
|
+
role: m.role === 'assistant' ? 'model' : 'user',
|
|
1109
|
+
parts: typeof m.content === 'string'
|
|
1110
|
+
? [{ text: m.content }]
|
|
1111
|
+
: this.convertParts(m.content)
|
|
1112
|
+
}));
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
private convertParts(content: ContentBlock[]): GeminiPart[] {
|
|
1116
|
+
return content.map(b => {
|
|
1117
|
+
if (b.type === 'text') return { text: b.text! };
|
|
1118
|
+
if (b.type === 'image') {
|
|
1119
|
+
return {
|
|
1120
|
+
inlineData: {
|
|
1121
|
+
mimeType: b.mimeType || 'image/jpeg',
|
|
1122
|
+
data: b.imageBase64!
|
|
1123
|
+
}
|
|
1124
|
+
};
|
|
1125
|
+
}
|
|
1126
|
+
return { text: '' };
|
|
1127
|
+
});
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
private extractToolCalls(response: GeminiResponse): ToolCall[] | undefined {
|
|
1131
|
+
const functionCalls = response.functionCalls();
|
|
1132
|
+
if (!functionCalls || functionCalls.length === 0) return undefined;
|
|
1133
|
+
|
|
1134
|
+
return functionCalls.map((fc, i) => ({
|
|
1135
|
+
id: `call-${i}`,
|
|
1136
|
+
name: fc.name,
|
|
1137
|
+
arguments: JSON.stringify(fc.args)
|
|
1138
|
+
}));
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
private mapFinishReason(reason?: string): CompletionResponse['finishReason'] {
|
|
1142
|
+
switch (reason) {
|
|
1143
|
+
case 'STOP': return 'stop';
|
|
1144
|
+
case 'MAX_TOKENS': return 'length';
|
|
1145
|
+
case 'SAFETY': return 'content_filter';
|
|
1146
|
+
default: return 'stop';
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
```
|
|
1151
|
+
|
|
1152
|
+
### LocalProvider (Ollama)
|
|
1153
|
+
|
|
1154
|
+
```typescript
|
|
1155
|
+
interface LocalConfig extends ProviderConfig {
|
|
1156
|
+
baseUrl?: string;
|
|
1157
|
+
defaultModel?: string;
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
class LocalProvider implements LLMProvider<LocalConfig> {
|
|
1161
|
+
readonly id = 'local';
|
|
1162
|
+
readonly name = 'Ollama Local';
|
|
1163
|
+
readonly version = '1.0.0';
|
|
1164
|
+
readonly config: LocalConfig;
|
|
1165
|
+
|
|
1166
|
+
static readonly MODELS = {
|
|
1167
|
+
'llama3': {
|
|
1168
|
+
id: 'llama3:latest',
|
|
1169
|
+
contextWindow: 8192,
|
|
1170
|
+
maxOutput: 4096,
|
|
1171
|
+
inputPrice: 0,
|
|
1172
|
+
outputPrice: 0,
|
|
1173
|
+
capabilities: ['tools']
|
|
1174
|
+
},
|
|
1175
|
+
'llama3.2': {
|
|
1176
|
+
id: 'llama3.2:latest',
|
|
1177
|
+
contextWindow: 128000,
|
|
1178
|
+
maxOutput: 4096,
|
|
1179
|
+
inputPrice: 0,
|
|
1180
|
+
outputPrice: 0,
|
|
1181
|
+
capabilities: ['tools', 'vision']
|
|
1182
|
+
},
|
|
1183
|
+
'mistral': {
|
|
1184
|
+
id: 'mistral:latest',
|
|
1185
|
+
contextWindow: 32768,
|
|
1186
|
+
maxOutput: 4096,
|
|
1187
|
+
inputPrice: 0,
|
|
1188
|
+
outputPrice: 0,
|
|
1189
|
+
capabilities: ['tools']
|
|
1190
|
+
},
|
|
1191
|
+
'codellama': {
|
|
1192
|
+
id: 'codellama:latest',
|
|
1193
|
+
contextWindow: 16384,
|
|
1194
|
+
maxOutput: 4096,
|
|
1195
|
+
inputPrice: 0,
|
|
1196
|
+
outputPrice: 0,
|
|
1197
|
+
capabilities: ['code']
|
|
1198
|
+
},
|
|
1199
|
+
'deepseek-coder': {
|
|
1200
|
+
id: 'deepseek-coder:latest',
|
|
1201
|
+
contextWindow: 16384,
|
|
1202
|
+
maxOutput: 4096,
|
|
1203
|
+
inputPrice: 0,
|
|
1204
|
+
outputPrice: 0,
|
|
1205
|
+
capabilities: ['code']
|
|
1206
|
+
}
|
|
1207
|
+
};
|
|
1208
|
+
|
|
1209
|
+
constructor(config: LocalConfig = {}) {
|
|
1210
|
+
this.config = {
|
|
1211
|
+
baseUrl: 'http://localhost:11434',
|
|
1212
|
+
defaultModel: 'llama3',
|
|
1213
|
+
timeout: 300000,
|
|
1214
|
+
maxRetries: 1,
|
|
1215
|
+
...config
|
|
1216
|
+
};
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
async initialize(): Promise<void> {
|
|
1220
|
+
// Verify Ollama is running
|
|
1221
|
+
await this.healthCheck();
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
async complete(request: CompletionRequest): Promise<CompletionResponse> {
|
|
1225
|
+
const startTime = Date.now();
|
|
1226
|
+
|
|
1227
|
+
const response = await fetch(`${this.config.baseUrl}/api/chat`, {
|
|
1228
|
+
method: 'POST',
|
|
1229
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1230
|
+
body: JSON.stringify({
|
|
1231
|
+
model: this.resolveModel(request.model),
|
|
1232
|
+
messages: this.convertMessages(request.messages),
|
|
1233
|
+
options: {
|
|
1234
|
+
temperature: request.temperature,
|
|
1235
|
+
num_predict: request.maxTokens,
|
|
1236
|
+
top_p: request.topP,
|
|
1237
|
+
stop: request.stopSequences
|
|
1238
|
+
},
|
|
1239
|
+
stream: false
|
|
1240
|
+
})
|
|
1241
|
+
});
|
|
1242
|
+
|
|
1243
|
+
if (!response.ok) {
|
|
1244
|
+
throw new Error(`Ollama error: ${response.statusText}`);
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
const data = await response.json();
|
|
1248
|
+
|
|
1249
|
+
return {
|
|
1250
|
+
id: `ollama-${Date.now()}`,
|
|
1251
|
+
model: request.model,
|
|
1252
|
+
content: data.message.content,
|
|
1253
|
+
usage: {
|
|
1254
|
+
promptTokens: data.prompt_eval_count || 0,
|
|
1255
|
+
completionTokens: data.eval_count || 0,
|
|
1256
|
+
totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
|
|
1257
|
+
},
|
|
1258
|
+
finishReason: 'stop',
|
|
1259
|
+
metadata: {
|
|
1260
|
+
provider: this.id,
|
|
1261
|
+
latencyMs: Date.now() - startTime
|
|
1262
|
+
}
|
|
1263
|
+
};
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
|
|
1267
|
+
const response = await fetch(`${this.config.baseUrl}/api/chat`, {
|
|
1268
|
+
method: 'POST',
|
|
1269
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1270
|
+
body: JSON.stringify({
|
|
1271
|
+
model: this.resolveModel(request.model),
|
|
1272
|
+
messages: this.convertMessages(request.messages),
|
|
1273
|
+
options: {
|
|
1274
|
+
temperature: request.temperature,
|
|
1275
|
+
num_predict: request.maxTokens
|
|
1276
|
+
},
|
|
1277
|
+
stream: true
|
|
1278
|
+
})
|
|
1279
|
+
});
|
|
1280
|
+
|
|
1281
|
+
const reader = response.body!.getReader();
|
|
1282
|
+
const decoder = new TextDecoder();
|
|
1283
|
+
|
|
1284
|
+
while (true) {
|
|
1285
|
+
const { done, value } = await reader.read();
|
|
1286
|
+
if (done) break;
|
|
1287
|
+
|
|
1288
|
+
const lines = decoder.decode(value).split('\n').filter(l => l.trim());
|
|
1289
|
+
for (const line of lines) {
|
|
1290
|
+
const data = JSON.parse(line);
|
|
1291
|
+
if (data.message?.content) {
|
|
1292
|
+
yield { type: 'content', content: data.message.content };
|
|
1293
|
+
}
|
|
1294
|
+
if (data.done) {
|
|
1295
|
+
yield {
|
|
1296
|
+
type: 'usage',
|
|
1297
|
+
usage: {
|
|
1298
|
+
promptTokens: data.prompt_eval_count,
|
|
1299
|
+
completionTokens: data.eval_count
|
|
1300
|
+
}
|
|
1301
|
+
};
|
|
1302
|
+
yield { type: 'done' };
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
async embed(texts: string[]): Promise<EmbeddingResponse> {
|
|
1309
|
+
const embeddings: number[][] = [];
|
|
1310
|
+
|
|
1311
|
+
for (const text of texts) {
|
|
1312
|
+
const response = await fetch(`${this.config.baseUrl}/api/embeddings`, {
|
|
1313
|
+
method: 'POST',
|
|
1314
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1315
|
+
body: JSON.stringify({
|
|
1316
|
+
model: 'nomic-embed-text',
|
|
1317
|
+
prompt: text
|
|
1318
|
+
})
|
|
1319
|
+
});
|
|
1320
|
+
|
|
1321
|
+
const data = await response.json();
|
|
1322
|
+
embeddings.push(data.embedding);
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1325
|
+
return { embeddings, usage: { totalTokens: 0 } };
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
async countTokens(text: string): Promise<number> {
|
|
1329
|
+
// Approximate token count for local models
|
|
1330
|
+
return Math.ceil(text.length / 4);
|
|
1331
|
+
}
|
|
1332
|
+
|
|
1333
|
+
async healthCheck(): Promise<HealthStatus> {
|
|
1334
|
+
const startTime = Date.now();
|
|
1335
|
+
try {
|
|
1336
|
+
const response = await fetch(`${this.config.baseUrl}/api/tags`);
|
|
1337
|
+
if (!response.ok) throw new Error('Ollama not responding');
|
|
1338
|
+
|
|
1339
|
+
return {
|
|
1340
|
+
healthy: true,
|
|
1341
|
+
latencyMs: Date.now() - startTime,
|
|
1342
|
+
lastCheck: new Date().toISOString()
|
|
1343
|
+
};
|
|
1344
|
+
} catch (error) {
|
|
1345
|
+
return {
|
|
1346
|
+
healthy: false,
|
|
1347
|
+
latencyMs: Date.now() - startTime,
|
|
1348
|
+
lastCheck: new Date().toISOString(),
|
|
1349
|
+
error: error instanceof Error ? error.message : 'Ollama not available'
|
|
1350
|
+
};
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
getStatus(): ProviderStatus {
|
|
1355
|
+
return {
|
|
1356
|
+
id: this.id,
|
|
1357
|
+
name: this.name,
|
|
1358
|
+
healthy: true,
|
|
1359
|
+
enabled: true,
|
|
1360
|
+
availableModels: Object.keys(LocalProvider.MODELS)
|
|
1361
|
+
};
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
async shutdown(): Promise<void> {}
|
|
1365
|
+
|
|
1366
|
+
private resolveModel(model: string): string {
|
|
1367
|
+
const modelInfo = LocalProvider.MODELS[model as keyof typeof LocalProvider.MODELS];
|
|
1368
|
+
return modelInfo?.id || model;
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
private convertMessages(messages: Message[]): OllamaMessage[] {
|
|
1372
|
+
return messages.map(m => ({
|
|
1373
|
+
role: m.role === 'tool' ? 'assistant' : m.role,
|
|
1374
|
+
content: typeof m.content === 'string' ? m.content : m.content.map(b => b.text).join('')
|
|
1375
|
+
}));
|
|
1376
|
+
}
|
|
1377
|
+
}
|
|
1378
|
+
```
|
|
1379
|
+
|
|
1380
|
+
### AzureProvider
|
|
1381
|
+
|
|
1382
|
+
```typescript
|
|
1383
|
+
interface AzureConfig extends ProviderConfig {
|
|
1384
|
+
apiKey: string;
|
|
1385
|
+
endpoint: string;
|
|
1386
|
+
apiVersion?: string;
|
|
1387
|
+
deployments: Record<string, string>; // model -> deployment name
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
class AzureProvider implements LLMProvider<AzureConfig> {
|
|
1391
|
+
readonly id = 'azure';
|
|
1392
|
+
readonly name = 'Azure OpenAI';
|
|
1393
|
+
readonly version = '1.0.0';
|
|
1394
|
+
readonly config: AzureConfig;
|
|
1395
|
+
|
|
1396
|
+
private client: AzureOpenAI;
|
|
1397
|
+
|
|
1398
|
+
constructor(config: AzureConfig) {
|
|
1399
|
+
this.config = {
|
|
1400
|
+
apiVersion: '2024-02-01',
|
|
1401
|
+
timeout: 120000,
|
|
1402
|
+
maxRetries: 3,
|
|
1403
|
+
...config
|
|
1404
|
+
};
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1407
|
+
async initialize(): Promise<void> {
|
|
1408
|
+
this.client = new AzureOpenAI({
|
|
1409
|
+
apiKey: this.config.apiKey,
|
|
1410
|
+
endpoint: this.config.endpoint,
|
|
1411
|
+
apiVersion: this.config.apiVersion
|
|
1412
|
+
});
|
|
1413
|
+
}
|
|
1414
|
+
|
|
1415
|
+
async complete(request: CompletionRequest): Promise<CompletionResponse> {
|
|
1416
|
+
const startTime = Date.now();
|
|
1417
|
+
const deployment = this.getDeployment(request.model);
|
|
1418
|
+
|
|
1419
|
+
const response = await this.client.chat.completions.create({
|
|
1420
|
+
model: deployment,
|
|
1421
|
+
messages: this.convertMessages(request.messages),
|
|
1422
|
+
temperature: request.temperature,
|
|
1423
|
+
max_tokens: request.maxTokens,
|
|
1424
|
+
top_p: request.topP,
|
|
1425
|
+
stop: request.stopSequences
|
|
1426
|
+
});
|
|
1427
|
+
|
|
1428
|
+
const choice = response.choices[0];
|
|
1429
|
+
|
|
1430
|
+
return {
|
|
1431
|
+
id: response.id,
|
|
1432
|
+
model: request.model,
|
|
1433
|
+
content: choice.message.content || '',
|
|
1434
|
+
toolCalls: choice.message.tool_calls?.map(tc => ({
|
|
1435
|
+
id: tc.id,
|
|
1436
|
+
name: tc.function.name,
|
|
1437
|
+
arguments: tc.function.arguments
|
|
1438
|
+
})),
|
|
1439
|
+
usage: {
|
|
1440
|
+
promptTokens: response.usage?.prompt_tokens || 0,
|
|
1441
|
+
completionTokens: response.usage?.completion_tokens || 0,
|
|
1442
|
+
totalTokens: response.usage?.total_tokens || 0
|
|
1443
|
+
},
|
|
1444
|
+
finishReason: this.mapFinishReason(choice.finish_reason),
|
|
1445
|
+
metadata: {
|
|
1446
|
+
provider: this.id,
|
|
1447
|
+
latencyMs: Date.now() - startTime
|
|
1448
|
+
}
|
|
1449
|
+
};
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
|
|
1453
|
+
const deployment = this.getDeployment(request.model);
|
|
1454
|
+
|
|
1455
|
+
const stream = await this.client.chat.completions.create({
|
|
1456
|
+
model: deployment,
|
|
1457
|
+
messages: this.convertMessages(request.messages),
|
|
1458
|
+
temperature: request.temperature,
|
|
1459
|
+
max_tokens: request.maxTokens,
|
|
1460
|
+
stream: true
|
|
1461
|
+
});
|
|
1462
|
+
|
|
1463
|
+
for await (const chunk of stream) {
|
|
1464
|
+
const choice = chunk.choices[0];
|
|
1465
|
+
if (choice?.delta?.content) {
|
|
1466
|
+
yield { type: 'content', content: choice.delta.content };
|
|
1467
|
+
}
|
|
1468
|
+
if (choice?.finish_reason) {
|
|
1469
|
+
yield { type: 'done', finishReason: choice.finish_reason };
|
|
1470
|
+
}
|
|
1471
|
+
}
|
|
1472
|
+
}
|
|
1473
|
+
|
|
1474
|
+
async countTokens(text: string, model?: string): Promise<number> {
|
|
1475
|
+
const encoding = getEncoding('cl100k_base');
|
|
1476
|
+
return encoding.encode(text).length;
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
async healthCheck(): Promise<HealthStatus> {
|
|
1480
|
+
const startTime = Date.now();
|
|
1481
|
+
try {
|
|
1482
|
+
await this.client.chat.completions.create({
|
|
1483
|
+
model: Object.values(this.config.deployments)[0],
|
|
1484
|
+
messages: [{ role: 'user', content: 'Hi' }],
|
|
1485
|
+
max_tokens: 5
|
|
1486
|
+
});
|
|
1487
|
+
return {
|
|
1488
|
+
healthy: true,
|
|
1489
|
+
latencyMs: Date.now() - startTime,
|
|
1490
|
+
lastCheck: new Date().toISOString()
|
|
1491
|
+
};
|
|
1492
|
+
} catch (error) {
|
|
1493
|
+
return {
|
|
1494
|
+
healthy: false,
|
|
1495
|
+
latencyMs: Date.now() - startTime,
|
|
1496
|
+
lastCheck: new Date().toISOString(),
|
|
1497
|
+
error: error instanceof Error ? error.message : 'Unknown error'
|
|
1498
|
+
};
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1502
|
+
getStatus(): ProviderStatus {
|
|
1503
|
+
return {
|
|
1504
|
+
id: this.id,
|
|
1505
|
+
name: this.name,
|
|
1506
|
+
healthy: true,
|
|
1507
|
+
enabled: true,
|
|
1508
|
+
availableModels: Object.keys(this.config.deployments)
|
|
1509
|
+
};
|
|
1510
|
+
}
|
|
1511
|
+
|
|
1512
|
+
async shutdown(): Promise<void> {}
|
|
1513
|
+
|
|
1514
|
+
private getDeployment(model: string): string {
|
|
1515
|
+
const deployment = this.config.deployments[model];
|
|
1516
|
+
if (!deployment) {
|
|
1517
|
+
throw new Error(`No Azure deployment configured for model: ${model}`);
|
|
1518
|
+
}
|
|
1519
|
+
return deployment;
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
private convertMessages(messages: Message[]): AzureMessage[] {
|
|
1523
|
+
return messages.map(m => ({
|
|
1524
|
+
role: m.role,
|
|
1525
|
+
content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content)
|
|
1526
|
+
}));
|
|
1527
|
+
}
|
|
1528
|
+
|
|
1529
|
+
private mapFinishReason(reason: string | null): CompletionResponse['finishReason'] {
|
|
1530
|
+
switch (reason) {
|
|
1531
|
+
case 'stop': return 'stop';
|
|
1532
|
+
case 'length': return 'length';
|
|
1533
|
+
case 'tool_calls': return 'tool_calls';
|
|
1534
|
+
case 'content_filter': return 'content_filter';
|
|
1535
|
+
default: return 'stop';
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
```
|
|
1540
|
+
|
|
1541
|
+
---
|
|
1542
|
+
|
|
1543
|
+
## ModelRouter
|
|
1544
|
+
|
|
1545
|
+
### Routing Inteligente
|
|
1546
|
+
|
|
1547
|
+
```typescript
|
|
1548
|
+
interface RoutingRule {
|
|
1549
|
+
id: string;
|
|
1550
|
+
name: string;
|
|
1551
|
+
conditions: RoutingCondition[];
|
|
1552
|
+
target: RoutingTarget;
|
|
1553
|
+
priority: number;
|
|
1554
|
+
enabled: boolean;
|
|
1555
|
+
}
|
|
1556
|
+
|
|
1557
|
+
interface RoutingCondition {
|
|
1558
|
+
type: 'task_type' | 'token_count' | 'cost_limit' | 'capability' | 'time_of_day' | 'custom';
|
|
1559
|
+
operator: 'eq' | 'ne' | 'gt' | 'lt' | 'gte' | 'lte' | 'contains' | 'matches';
|
|
1560
|
+
value: unknown;
|
|
1561
|
+
field?: string;
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
interface RoutingTarget {
|
|
1565
|
+
type: 'model' | 'provider' | 'alias' | 'fallback_chain';
|
|
1566
|
+
value: string | string[];
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
interface TaskContext {
|
|
1570
|
+
taskType: TaskType;
|
|
1571
|
+
estimatedTokens: number;
|
|
1572
|
+
maxCost?: number;
|
|
1573
|
+
requiredCapabilities?: string[];
|
|
1574
|
+
priority?: 'low' | 'normal' | 'high' | 'critical';
|
|
1575
|
+
metadata?: Record<string, unknown>;
|
|
1576
|
+
}
|
|
1577
|
+
|
|
1578
|
+
type TaskType =
|
|
1579
|
+
| 'code_generation'
|
|
1580
|
+
| 'code_review'
|
|
1581
|
+
| 'documentation'
|
|
1582
|
+
| 'analysis'
|
|
1583
|
+
| 'conversation'
|
|
1584
|
+
| 'summarization'
|
|
1585
|
+
| 'translation'
|
|
1586
|
+
| 'creative'
|
|
1587
|
+
| 'reasoning'
|
|
1588
|
+
| 'quick_task';
|
|
1589
|
+
|
|
1590
|
+
class ModelRouter {
|
|
1591
|
+
private registry: ProviderRegistry;
|
|
1592
|
+
private rules: RoutingRule[] = [];
|
|
1593
|
+
private aliases: Map<string, string> = new Map();
|
|
1594
|
+
private rateLimiters: Map<string, RateLimiter> = new Map();
|
|
1595
|
+
private config: RouterConfig;
|
|
1596
|
+
|
|
1597
|
+
constructor(registry: ProviderRegistry, config: RouterConfig) {
|
|
1598
|
+
this.registry = registry;
|
|
1599
|
+
this.config = config;
|
|
1600
|
+
this.loadDefaultRules();
|
|
1601
|
+
this.loadAliases();
|
|
1602
|
+
}
|
|
1603
|
+
|
|
1604
|
+
// Route request to best model/provider
|
|
1605
|
+
async route(
|
|
1606
|
+
request: CompletionRequest,
|
|
1607
|
+
context: TaskContext
|
|
1608
|
+
): Promise<{ provider: LLMProvider; model: string }> {
|
|
1609
|
+
|
|
1610
|
+
// Resolve alias if present
|
|
1611
|
+
const resolvedModel = this.resolveAlias(request.model);
|
|
1612
|
+
|
|
1613
|
+
// Find matching rules
|
|
1614
|
+
const matchingRules = this.rules
|
|
1615
|
+
.filter(r => r.enabled && this.evaluateConditions(r.conditions, context, request))
|
|
1616
|
+
.sort((a, b) => b.priority - a.priority);
|
|
1617
|
+
|
|
1618
|
+
if (matchingRules.length > 0) {
|
|
1619
|
+
const rule = matchingRules[0];
|
|
1620
|
+
return this.resolveTarget(rule.target, context);
|
|
1621
|
+
}
|
|
1622
|
+
|
|
1623
|
+
// Default routing: find provider for model
|
|
1624
|
+
const provider = this.registry.getProviderForModel(resolvedModel);
|
|
1625
|
+
if (!provider) {
|
|
1626
|
+
// Try fallback chain
|
|
1627
|
+
return this.tryFallbackChain(resolvedModel, context);
|
|
1628
|
+
}
|
|
1629
|
+
|
|
1630
|
+
// Check rate limits
|
|
1631
|
+
await this.checkRateLimit(provider.id, resolvedModel);
|
|
1632
|
+
|
|
1633
|
+
return { provider, model: resolvedModel };
|
|
1634
|
+
}
|
|
1635
|
+
|
|
1636
|
+
// Cost-optimized routing
|
|
1637
|
+
async routeOptimized(
|
|
1638
|
+
request: CompletionRequest,
|
|
1639
|
+
context: TaskContext
|
|
1640
|
+
): Promise<{ provider: LLMProvider; model: string }> {
|
|
1641
|
+
|
|
1642
|
+
const candidates = this.getCandidateModels(context);
|
|
1643
|
+
|
|
1644
|
+
// Sort by cost (ascending)
|
|
1645
|
+
const sortedCandidates = candidates.sort((a, b) => {
|
|
1646
|
+
const costA = this.estimateCost(a.model, context.estimatedTokens);
|
|
1647
|
+
const costB = this.estimateCost(b.model, context.estimatedTokens);
|
|
1648
|
+
return costA - costB;
|
|
1649
|
+
});
|
|
1650
|
+
|
|
1651
|
+
// Find cheapest that meets requirements
|
|
1652
|
+
for (const candidate of sortedCandidates) {
|
|
1653
|
+
if (this.meetsRequirements(candidate.model, context)) {
|
|
1654
|
+
const provider = this.registry.getProviderForModel(candidate.model);
|
|
1655
|
+
if (provider) {
|
|
1656
|
+
await this.checkRateLimit(provider.id, candidate.model);
|
|
1657
|
+
return { provider, model: candidate.model };
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
// Fallback to default
|
|
1663
|
+
return this.route(request, context);
|
|
1664
|
+
}
|
|
1665
|
+
|
|
1666
|
+
// Load balancing across providers
|
|
1667
|
+
async routeBalanced(
|
|
1668
|
+
request: CompletionRequest,
|
|
1669
|
+
context: TaskContext
|
|
1670
|
+
): Promise<{ provider: LLMProvider; model: string }> {
|
|
1671
|
+
|
|
1672
|
+
const resolvedModel = this.resolveAlias(request.model);
|
|
1673
|
+
const compatibleProviders = this.getProvidersForModel(resolvedModel);
|
|
1674
|
+
|
|
1675
|
+
if (compatibleProviders.length === 0) {
|
|
1676
|
+
return this.tryFallbackChain(resolvedModel, context);
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
// Round-robin with health awareness
|
|
1680
|
+
const healthyProviders = compatibleProviders.filter(p => {
|
|
1681
|
+
const status = this.registry.get(p.id)?.getStatus();
|
|
1682
|
+
return status?.healthy;
|
|
1683
|
+
});
|
|
1684
|
+
|
|
1685
|
+
if (healthyProviders.length === 0) {
|
|
1686
|
+
throw new Error(`No healthy providers available for model: ${resolvedModel}`);
|
|
1687
|
+
}
|
|
1688
|
+
|
|
1689
|
+
// Select based on current load
|
|
1690
|
+
const selected = this.selectLeastLoaded(healthyProviders);
|
|
1691
|
+
await this.checkRateLimit(selected.id, resolvedModel);
|
|
1692
|
+
|
|
1693
|
+
return { provider: selected, model: resolvedModel };
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
// Add routing rule
|
|
1697
|
+
addRule(rule: RoutingRule): void {
|
|
1698
|
+
this.rules.push(rule);
|
|
1699
|
+
this.rules.sort((a, b) => b.priority - a.priority);
|
|
1700
|
+
}
|
|
1701
|
+
|
|
1702
|
+
// Remove routing rule
|
|
1703
|
+
removeRule(ruleId: string): boolean {
|
|
1704
|
+
const index = this.rules.findIndex(r => r.id === ruleId);
|
|
1705
|
+
if (index >= 0) {
|
|
1706
|
+
this.rules.splice(index, 1);
|
|
1707
|
+
return true;
|
|
1708
|
+
}
|
|
1709
|
+
return false;
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
// Set model alias
|
|
1713
|
+
setAlias(alias: string, model: string): void {
|
|
1714
|
+
this.aliases.set(alias, model);
|
|
1715
|
+
}
|
|
1716
|
+
|
|
1717
|
+
// Get model alias
|
|
1718
|
+
resolveAlias(modelOrAlias: string): string {
|
|
1719
|
+
return this.aliases.get(modelOrAlias) || modelOrAlias;
|
|
1720
|
+
}
|
|
1721
|
+
|
|
1722
|
+
// Estimate cost for request
|
|
1723
|
+
estimateCost(model: string, tokens: number): number {
|
|
1724
|
+
const modelInfo = this.getModelInfo(model);
|
|
1725
|
+
if (!modelInfo) return 0;
|
|
1726
|
+
|
|
1727
|
+
const inputTokens = Math.ceil(tokens * 0.7);
|
|
1728
|
+
const outputTokens = Math.ceil(tokens * 0.3);
|
|
1729
|
+
|
|
1730
|
+
return (
|
|
1731
|
+
(inputTokens / 1_000_000) * modelInfo.inputPrice +
|
|
1732
|
+
(outputTokens / 1_000_000) * modelInfo.outputPrice
|
|
1733
|
+
);
|
|
1734
|
+
}
|
|
1735
|
+
|
|
1736
|
+
private loadDefaultRules(): void {
|
|
1737
|
+
this.rules = [
|
|
1738
|
+
{
|
|
1739
|
+
id: 'quick-tasks',
|
|
1740
|
+
name: 'Route quick tasks to fast models',
|
|
1741
|
+
conditions: [
|
|
1742
|
+
{ type: 'task_type', operator: 'eq', value: 'quick_task' }
|
|
1743
|
+
],
|
|
1744
|
+
target: { type: 'alias', value: 'fast' },
|
|
1745
|
+
priority: 100,
|
|
1746
|
+
enabled: true
|
|
1747
|
+
},
|
|
1748
|
+
{
|
|
1749
|
+
id: 'code-generation',
|
|
1750
|
+
name: 'Route code generation to capable models',
|
|
1751
|
+
conditions: [
|
|
1752
|
+
{ type: 'task_type', operator: 'eq', value: 'code_generation' },
|
|
1753
|
+
{ type: 'token_count', operator: 'gt', value: 1000 }
|
|
1754
|
+
],
|
|
1755
|
+
target: { type: 'alias', value: 'smart' },
|
|
1756
|
+
priority: 90,
|
|
1757
|
+
enabled: true
|
|
1758
|
+
},
|
|
1759
|
+
{
|
|
1760
|
+
id: 'reasoning',
|
|
1761
|
+
name: 'Route complex reasoning to best models',
|
|
1762
|
+
conditions: [
|
|
1763
|
+
{ type: 'task_type', operator: 'eq', value: 'reasoning' }
|
|
1764
|
+
],
|
|
1765
|
+
target: { type: 'model', value: 'claude-opus-4-5' },
|
|
1766
|
+
priority: 95,
|
|
1767
|
+
enabled: true
|
|
1768
|
+
},
|
|
1769
|
+
{
|
|
1770
|
+
id: 'cost-sensitive',
|
|
1771
|
+
name: 'Route cost-sensitive requests to cheap models',
|
|
1772
|
+
conditions: [
|
|
1773
|
+
{ type: 'cost_limit', operator: 'lt', value: 0.01 }
|
|
1774
|
+
],
|
|
1775
|
+
target: { type: 'alias', value: 'cheap' },
|
|
1776
|
+
priority: 80,
|
|
1777
|
+
enabled: true
|
|
1778
|
+
}
|
|
1779
|
+
];
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
private loadAliases(): void {
|
|
1783
|
+
// Model aliases for convenience
|
|
1784
|
+
this.aliases.set('fast', 'claude-3-5-haiku');
|
|
1785
|
+
this.aliases.set('smart', 'claude-sonnet-4');
|
|
1786
|
+
this.aliases.set('best', 'claude-opus-4-5');
|
|
1787
|
+
this.aliases.set('cheap', 'gpt-4o-mini');
|
|
1788
|
+
this.aliases.set('local', 'llama3');
|
|
1789
|
+
this.aliases.set('code', 'claude-sonnet-4');
|
|
1790
|
+
this.aliases.set('vision', 'gpt-4o');
|
|
1791
|
+
}
|
|
1792
|
+
|
|
1793
|
+
private evaluateConditions(
|
|
1794
|
+
conditions: RoutingCondition[],
|
|
1795
|
+
context: TaskContext,
|
|
1796
|
+
request: CompletionRequest
|
|
1797
|
+
): boolean {
|
|
1798
|
+
return conditions.every(c => this.evaluateCondition(c, context, request));
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
private evaluateCondition(
|
|
1802
|
+
condition: RoutingCondition,
|
|
1803
|
+
context: TaskContext,
|
|
1804
|
+
request: CompletionRequest
|
|
1805
|
+
): boolean {
|
|
1806
|
+
let value: unknown;
|
|
1807
|
+
|
|
1808
|
+
switch (condition.type) {
|
|
1809
|
+
case 'task_type':
|
|
1810
|
+
value = context.taskType;
|
|
1811
|
+
break;
|
|
1812
|
+
case 'token_count':
|
|
1813
|
+
value = context.estimatedTokens;
|
|
1814
|
+
break;
|
|
1815
|
+
case 'cost_limit':
|
|
1816
|
+
value = context.maxCost;
|
|
1817
|
+
break;
|
|
1818
|
+
case 'capability':
|
|
1819
|
+
value = context.requiredCapabilities;
|
|
1820
|
+
break;
|
|
1821
|
+
case 'custom':
|
|
1822
|
+
value = condition.field ? context.metadata?.[condition.field] : undefined;
|
|
1823
|
+
break;
|
|
1824
|
+
default:
|
|
1825
|
+
return false;
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
return this.compareValues(value, condition.operator, condition.value);
|
|
1829
|
+
}
|
|
1830
|
+
|
|
1831
|
+
private compareValues(actual: unknown, operator: string, expected: unknown): boolean {
|
|
1832
|
+
switch (operator) {
|
|
1833
|
+
case 'eq': return actual === expected;
|
|
1834
|
+
case 'ne': return actual !== expected;
|
|
1835
|
+
case 'gt': return Number(actual) > Number(expected);
|
|
1836
|
+
case 'lt': return Number(actual) < Number(expected);
|
|
1837
|
+
case 'gte': return Number(actual) >= Number(expected);
|
|
1838
|
+
case 'lte': return Number(actual) <= Number(expected);
|
|
1839
|
+
case 'contains':
|
|
1840
|
+
return Array.isArray(actual) && actual.includes(expected);
|
|
1841
|
+
case 'matches':
|
|
1842
|
+
return typeof actual === 'string' && new RegExp(String(expected)).test(actual);
|
|
1843
|
+
default:
|
|
1844
|
+
return false;
|
|
1845
|
+
}
|
|
1846
|
+
}
|
|
1847
|
+
|
|
1848
|
+
private async resolveTarget(
|
|
1849
|
+
target: RoutingTarget,
|
|
1850
|
+
context: TaskContext
|
|
1851
|
+
): Promise<{ provider: LLMProvider; model: string }> {
|
|
1852
|
+
switch (target.type) {
|
|
1853
|
+
case 'model': {
|
|
1854
|
+
const model = target.value as string;
|
|
1855
|
+
const provider = this.registry.getProviderForModel(model);
|
|
1856
|
+
if (!provider) throw new Error(`No provider for model: ${model}`);
|
|
1857
|
+
return { provider, model };
|
|
1858
|
+
}
|
|
1859
|
+
case 'alias': {
|
|
1860
|
+
const model = this.resolveAlias(target.value as string);
|
|
1861
|
+
const provider = this.registry.getProviderForModel(model);
|
|
1862
|
+
if (!provider) throw new Error(`No provider for alias: ${target.value}`);
|
|
1863
|
+
return { provider, model };
|
|
1864
|
+
}
|
|
1865
|
+
case 'fallback_chain': {
|
|
1866
|
+
const models = target.value as string[];
|
|
1867
|
+
return this.tryFallbackChainFromList(models, context);
|
|
1868
|
+
}
|
|
1869
|
+
default:
|
|
1870
|
+
throw new Error(`Unknown target type: ${target.type}`);
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
private async tryFallbackChain(
|
|
1875
|
+
model: string,
|
|
1876
|
+
context: TaskContext
|
|
1877
|
+
): Promise<{ provider: LLMProvider; model: string }> {
|
|
1878
|
+
const fallbackChain = this.config.fallbackChains[model] || this.config.defaultFallbackChain;
|
|
1879
|
+
return this.tryFallbackChainFromList(fallbackChain, context);
|
|
1880
|
+
}
|
|
1881
|
+
|
|
1882
|
+
private async tryFallbackChainFromList(
|
|
1883
|
+
models: string[],
|
|
1884
|
+
context: TaskContext
|
|
1885
|
+
): Promise<{ provider: LLMProvider; model: string }> {
|
|
1886
|
+
for (const model of models) {
|
|
1887
|
+
const provider = this.registry.getProviderForModel(model);
|
|
1888
|
+
if (provider) {
|
|
1889
|
+
const status = provider.getStatus();
|
|
1890
|
+
if (status.healthy) {
|
|
1891
|
+
return { provider, model };
|
|
1892
|
+
}
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
throw new Error('All providers in fallback chain unavailable');
|
|
1896
|
+
}
|
|
1897
|
+
|
|
1898
|
+
private getCandidateModels(context: TaskContext): { model: string; provider: string }[] {
|
|
1899
|
+
const candidates: { model: string; provider: string }[] = [];
|
|
1900
|
+
|
|
1901
|
+
for (const status of this.registry.list()) {
|
|
1902
|
+
if (!status.healthy || !status.enabled) continue;
|
|
1903
|
+
|
|
1904
|
+
for (const model of status.availableModels) {
|
|
1905
|
+
if (this.meetsCapabilities(model, context.requiredCapabilities)) {
|
|
1906
|
+
candidates.push({ model, provider: status.id });
|
|
1907
|
+
}
|
|
1908
|
+
}
|
|
1909
|
+
}
|
|
1910
|
+
|
|
1911
|
+
return candidates;
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
private meetsCapabilities(model: string, required?: string[]): boolean {
|
|
1915
|
+
if (!required || required.length === 0) return true;
|
|
1916
|
+
|
|
1917
|
+
const modelInfo = this.getModelInfo(model);
|
|
1918
|
+
if (!modelInfo) return false;
|
|
1919
|
+
|
|
1920
|
+
return required.every(cap => modelInfo.capabilities.includes(cap));
|
|
1921
|
+
}
|
|
1922
|
+
|
|
1923
|
+
private meetsRequirements(model: string, context: TaskContext): boolean {
|
|
1924
|
+
const modelInfo = this.getModelInfo(model);
|
|
1925
|
+
if (!modelInfo) return false;
|
|
1926
|
+
|
|
1927
|
+
// Check context window
|
|
1928
|
+
if (context.estimatedTokens > modelInfo.contextWindow) return false;
|
|
1929
|
+
|
|
1930
|
+
// Check cost limit
|
|
1931
|
+
if (context.maxCost) {
|
|
1932
|
+
const estimatedCost = this.estimateCost(model, context.estimatedTokens);
|
|
1933
|
+
if (estimatedCost > context.maxCost) return false;
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1936
|
+
// Check capabilities
|
|
1937
|
+
if (!this.meetsCapabilities(model, context.requiredCapabilities)) return false;
|
|
1938
|
+
|
|
1939
|
+
return true;
|
|
1940
|
+
}
|
|
1941
|
+
|
|
1942
|
+
private getModelInfo(model: string): ModelInfo | undefined {
|
|
1943
|
+
// Check all providers for model info
|
|
1944
|
+
const allModels = {
|
|
1945
|
+
...ClaudeProvider.MODELS,
|
|
1946
|
+
...OpenAIProvider.MODELS,
|
|
1947
|
+
...GeminiProvider.MODELS,
|
|
1948
|
+
...LocalProvider.MODELS
|
|
1949
|
+
};
|
|
1950
|
+
|
|
1951
|
+
return allModels[model as keyof typeof allModels];
|
|
1952
|
+
}
|
|
1953
|
+
|
|
1954
|
+
private getProvidersForModel(model: string): LLMProvider[] {
|
|
1955
|
+
return this.registry.list()
|
|
1956
|
+
.filter(s => s.availableModels.includes(model) && s.healthy && s.enabled)
|
|
1957
|
+
.map(s => this.registry.get(s.id)!)
|
|
1958
|
+
.filter(Boolean);
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1961
|
+
private selectLeastLoaded(providers: LLMProvider[]): LLMProvider {
|
|
1962
|
+
let minLoad = Infinity;
|
|
1963
|
+
let selected = providers[0];
|
|
1964
|
+
|
|
1965
|
+
for (const provider of providers) {
|
|
1966
|
+
const metrics = this.registry.getMetrics(provider.id);
|
|
1967
|
+
const load = metrics?.totalRequests || 0;
|
|
1968
|
+
if (load < minLoad) {
|
|
1969
|
+
minLoad = load;
|
|
1970
|
+
selected = provider;
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
|
|
1974
|
+
return selected;
|
|
1975
|
+
}
|
|
1976
|
+
|
|
1977
|
+
private async checkRateLimit(providerId: string, model: string): Promise<void> {
|
|
1978
|
+
const key = `${providerId}:${model}`;
|
|
1979
|
+
let limiter = this.rateLimiters.get(key);
|
|
1980
|
+
|
|
1981
|
+
if (!limiter) {
|
|
1982
|
+
const modelInfo = this.getModelInfo(model);
|
|
1983
|
+
limiter = new RateLimiter({
|
|
1984
|
+
requestsPerMinute: modelInfo?.rateLimit?.requestsPerMinute || 60,
|
|
1985
|
+
tokensPerMinute: modelInfo?.rateLimit?.tokensPerMinute || 100000
|
|
1986
|
+
});
|
|
1987
|
+
this.rateLimiters.set(key, limiter);
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
await limiter.acquire();
|
|
1991
|
+
}
|
|
1992
|
+
}
|
|
1993
|
+
```
|
|
1994
|
+
|
|
1995
|
+
---
|
|
1996
|
+
|
|
1997
|
+
## UnifiedAPI
|
|
1998
|
+
|
|
1999
|
+
### API Consistente
|
|
2000
|
+
|
|
2001
|
+
```typescript
|
|
2002
|
+
class UnifiedLLMAPI {
|
|
2003
|
+
private registry: ProviderRegistry;
|
|
2004
|
+
private router: ModelRouter;
|
|
2005
|
+
private config: UnifiedAPIConfig;
|
|
2006
|
+
|
|
2007
|
+
constructor(config: UnifiedAPIConfig) {
|
|
2008
|
+
this.config = config;
|
|
2009
|
+
this.registry = new ProviderRegistry(config.registry);
|
|
2010
|
+
this.router = new ModelRouter(this.registry, config.router);
|
|
2011
|
+
}
|
|
2012
|
+
|
|
2013
|
+
// Initialize all providers
|
|
2014
|
+
async initialize(): Promise<void> {
|
|
2015
|
+
// Register configured providers
|
|
2016
|
+
if (this.config.providers.claude?.enabled) {
|
|
2017
|
+
await this.registry.register(
|
|
2018
|
+
new ClaudeProvider(this.config.providers.claude),
|
|
2019
|
+
{ priority: 1 }
|
|
2020
|
+
);
|
|
2021
|
+
}
|
|
2022
|
+
|
|
2023
|
+
if (this.config.providers.openai?.enabled) {
|
|
2024
|
+
await this.registry.register(
|
|
2025
|
+
new OpenAIProvider(this.config.providers.openai),
|
|
2026
|
+
{ priority: 2 }
|
|
2027
|
+
);
|
|
2028
|
+
}
|
|
2029
|
+
|
|
2030
|
+
if (this.config.providers.gemini?.enabled) {
|
|
2031
|
+
await this.registry.register(
|
|
2032
|
+
new GeminiProvider(this.config.providers.gemini),
|
|
2033
|
+
{ priority: 3 }
|
|
2034
|
+
);
|
|
2035
|
+
}
|
|
2036
|
+
|
|
2037
|
+
if (this.config.providers.local?.enabled) {
|
|
2038
|
+
await this.registry.register(
|
|
2039
|
+
new LocalProvider(this.config.providers.local),
|
|
2040
|
+
{ priority: 10 }
|
|
2041
|
+
);
|
|
2042
|
+
}
|
|
2043
|
+
|
|
2044
|
+
if (this.config.providers.azure?.enabled) {
|
|
2045
|
+
await this.registry.register(
|
|
2046
|
+
new AzureProvider(this.config.providers.azure),
|
|
2047
|
+
{ priority: 2 }
|
|
2048
|
+
);
|
|
2049
|
+
}
|
|
2050
|
+
|
|
2051
|
+
// Start health checks
|
|
2052
|
+
this.registry.startHealthChecks(this.config.healthCheckInterval);
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
// Main completion method
|
|
2056
|
+
async complete(
|
|
2057
|
+
request: CompletionRequest,
|
|
2058
|
+
options: CompletionOptions = {}
|
|
2059
|
+
): Promise<CompletionResponse> {
|
|
2060
|
+
const context = this.buildContext(request, options);
|
|
2061
|
+
|
|
2062
|
+
// Route to best provider/model
|
|
2063
|
+
const { provider, model } = options.costOptimized
|
|
2064
|
+
? await this.router.routeOptimized(request, context)
|
|
2065
|
+
: await this.router.route(request, context);
|
|
2066
|
+
|
|
2067
|
+
const startTime = Date.now();
|
|
2068
|
+
|
|
2069
|
+
try {
|
|
2070
|
+
// Execute with retry and fallback
|
|
2071
|
+
const response = await this.executeWithFallback(
|
|
2072
|
+
provider,
|
|
2073
|
+
{ ...request, model },
|
|
2074
|
+
context
|
|
2075
|
+
);
|
|
2076
|
+
|
|
2077
|
+
// Record metrics
|
|
2078
|
+
const latencyMs = Date.now() - startTime;
|
|
2079
|
+
const cost = this.calculateCost(model, response.usage);
|
|
2080
|
+
|
|
2081
|
+
this.registry.recordMetrics(
|
|
2082
|
+
provider.id,
|
|
2083
|
+
true,
|
|
2084
|
+
latencyMs,
|
|
2085
|
+
response.usage.totalTokens,
|
|
2086
|
+
cost
|
|
2087
|
+
);
|
|
2088
|
+
|
|
2089
|
+
// Emit event
|
|
2090
|
+
EventBus.publish('llm.completion', {
|
|
2091
|
+
provider: provider.id,
|
|
2092
|
+
model,
|
|
2093
|
+
tokens: response.usage.totalTokens,
|
|
2094
|
+
latencyMs,
|
|
2095
|
+
cost
|
|
2096
|
+
});
|
|
2097
|
+
|
|
2098
|
+
return response;
|
|
2099
|
+
|
|
2100
|
+
} catch (error) {
|
|
2101
|
+
this.registry.recordMetrics(provider.id, false, Date.now() - startTime, 0, 0);
|
|
2102
|
+
throw error;
|
|
2103
|
+
}
|
|
2104
|
+
}
|
|
2105
|
+
|
|
2106
|
+
// Streaming completion
|
|
2107
|
+
async *stream(
|
|
2108
|
+
request: CompletionRequest,
|
|
2109
|
+
options: CompletionOptions = {}
|
|
2110
|
+
): AsyncIterable<StreamChunk> {
|
|
2111
|
+
const context = this.buildContext(request, options);
|
|
2112
|
+
const { provider, model } = await this.router.route(request, context);
|
|
2113
|
+
|
|
2114
|
+
yield* provider.stream({ ...request, model });
|
|
2115
|
+
}
|
|
2116
|
+
|
|
2117
|
+
// Chat convenience method
|
|
2118
|
+
async chat(
|
|
2119
|
+
messages: Message[],
|
|
2120
|
+
options: ChatOptions = {}
|
|
2121
|
+
): Promise<CompletionResponse> {
|
|
2122
|
+
return this.complete({
|
|
2123
|
+
model: options.model || this.config.defaultModel,
|
|
2124
|
+
messages,
|
|
2125
|
+
temperature: options.temperature,
|
|
2126
|
+
maxTokens: options.maxTokens,
|
|
2127
|
+
tools: options.tools,
|
|
2128
|
+
toolChoice: options.toolChoice
|
|
2129
|
+
}, options);
|
|
2130
|
+
}
|
|
2131
|
+
|
|
2132
|
+
// Embed texts
|
|
2133
|
+
async embed(
|
|
2134
|
+
texts: string[],
|
|
2135
|
+
options: EmbedOptions = {}
|
|
2136
|
+
): Promise<EmbeddingResponse> {
|
|
2137
|
+
const providerId = options.provider || 'openai';
|
|
2138
|
+
const provider = this.registry.get(providerId);
|
|
2139
|
+
|
|
2140
|
+
if (!provider?.embed) {
|
|
2141
|
+
throw new Error(`Provider ${providerId} does not support embeddings`);
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
return provider.embed(texts);
|
|
2145
|
+
}
|
|
2146
|
+
|
|
2147
|
+
// Count tokens
|
|
2148
|
+
async countTokens(
|
|
2149
|
+
text: string,
|
|
2150
|
+
model?: string
|
|
2151
|
+
): Promise<number> {
|
|
2152
|
+
const targetModel = model || this.config.defaultModel;
|
|
2153
|
+
const provider = this.registry.getProviderForModel(targetModel);
|
|
2154
|
+
|
|
2155
|
+
if (!provider) {
|
|
2156
|
+
// Fallback to approximate count
|
|
2157
|
+
return Math.ceil(text.length / 4);
|
|
2158
|
+
}
|
|
2159
|
+
|
|
2160
|
+
return provider.countTokens(text, targetModel);
|
|
2161
|
+
}
|
|
2162
|
+
|
|
2163
|
+
// Estimate cost
|
|
2164
|
+
estimateCost(
|
|
2165
|
+
model: string,
|
|
2166
|
+
inputTokens: number,
|
|
2167
|
+
outputTokens: number
|
|
2168
|
+
): number {
|
|
2169
|
+
return this.router.estimateCost(model, inputTokens + outputTokens);
|
|
2170
|
+
}
|
|
2171
|
+
|
|
2172
|
+
// Get provider status
|
|
2173
|
+
getProviderStatus(): ProviderStatus[] {
|
|
2174
|
+
return this.registry.list();
|
|
2175
|
+
}
|
|
2176
|
+
|
|
2177
|
+
// Get available models
|
|
2178
|
+
getAvailableModels(): ModelInfo[] {
|
|
2179
|
+
const models: ModelInfo[] = [];
|
|
2180
|
+
|
|
2181
|
+
for (const status of this.registry.list()) {
|
|
2182
|
+
if (!status.enabled) continue;
|
|
2183
|
+
|
|
2184
|
+
for (const model of status.availableModels) {
|
|
2185
|
+
models.push({
|
|
2186
|
+
id: model,
|
|
2187
|
+
provider: status.id,
|
|
2188
|
+
...this.getModelDetails(model)
|
|
2189
|
+
});
|
|
2190
|
+
}
|
|
2191
|
+
}
|
|
2192
|
+
|
|
2193
|
+
return models;
|
|
2194
|
+
}
|
|
2195
|
+
|
|
2196
|
+
// Shutdown
|
|
2197
|
+
async shutdown(): Promise<void> {
|
|
2198
|
+
await this.registry.shutdown();
|
|
2199
|
+
}
|
|
2200
|
+
|
|
2201
|
+
private buildContext(
|
|
2202
|
+
request: CompletionRequest,
|
|
2203
|
+
options: CompletionOptions
|
|
2204
|
+
): TaskContext {
|
|
2205
|
+
return {
|
|
2206
|
+
taskType: options.taskType || 'conversation',
|
|
2207
|
+
estimatedTokens: options.estimatedTokens || this.estimateTokens(request),
|
|
2208
|
+
maxCost: options.maxCost,
|
|
2209
|
+
requiredCapabilities: options.requiredCapabilities,
|
|
2210
|
+
priority: options.priority,
|
|
2211
|
+
metadata: options.metadata
|
|
2212
|
+
};
|
|
2213
|
+
}
|
|
2214
|
+
|
|
2215
|
+
private estimateTokens(request: CompletionRequest): number {
|
|
2216
|
+
let total = 0;
|
|
2217
|
+
for (const msg of request.messages) {
|
|
2218
|
+
const content = typeof msg.content === 'string'
|
|
2219
|
+
? msg.content
|
|
2220
|
+
: msg.content.map(b => b.text || '').join('');
|
|
2221
|
+
total += Math.ceil(content.length / 4);
|
|
2222
|
+
}
|
|
2223
|
+
return total + (request.maxTokens || 1000);
|
|
2224
|
+
}
|
|
2225
|
+
|
|
2226
|
+
private async executeWithFallback(
|
|
2227
|
+
provider: LLMProvider,
|
|
2228
|
+
request: CompletionRequest,
|
|
2229
|
+
context: TaskContext,
|
|
2230
|
+
attempt: number = 1
|
|
2231
|
+
): Promise<CompletionResponse> {
|
|
2232
|
+
try {
|
|
2233
|
+
return await provider.complete(request);
|
|
2234
|
+
} catch (error) {
|
|
2235
|
+
if (attempt >= this.config.maxRetries) {
|
|
2236
|
+
// Try fallback provider
|
|
2237
|
+
if (this.config.enableFallback) {
|
|
2238
|
+
const fallbackResult = await this.router.routeBalanced(request, context);
|
|
2239
|
+
if (fallbackResult.provider.id !== provider.id) {
|
|
2240
|
+
return fallbackResult.provider.complete({
|
|
2241
|
+
...request,
|
|
2242
|
+
model: fallbackResult.model
|
|
2243
|
+
});
|
|
2244
|
+
}
|
|
2245
|
+
}
|
|
2246
|
+
throw error;
|
|
2247
|
+
}
|
|
2248
|
+
|
|
2249
|
+
// Retry with exponential backoff
|
|
2250
|
+
await this.sleep(Math.pow(2, attempt) * 1000);
|
|
2251
|
+
return this.executeWithFallback(provider, request, context, attempt + 1);
|
|
2252
|
+
}
|
|
2253
|
+
}
|
|
2254
|
+
|
|
2255
|
+
private calculateCost(model: string, usage: TokenUsage): number {
|
|
2256
|
+
const modelInfo = this.getModelDetails(model);
|
|
2257
|
+
if (!modelInfo) return 0;
|
|
2258
|
+
|
|
2259
|
+
return (
|
|
2260
|
+
(usage.promptTokens / 1_000_000) * modelInfo.inputPrice +
|
|
2261
|
+
(usage.completionTokens / 1_000_000) * modelInfo.outputPrice
|
|
2262
|
+
);
|
|
2263
|
+
}
|
|
2264
|
+
|
|
2265
|
+
private getModelDetails(model: string): ModelDetails | undefined {
|
|
2266
|
+
const allModels = {
|
|
2267
|
+
...ClaudeProvider.MODELS,
|
|
2268
|
+
...OpenAIProvider.MODELS,
|
|
2269
|
+
...GeminiProvider.MODELS,
|
|
2270
|
+
...LocalProvider.MODELS
|
|
2271
|
+
};
|
|
2272
|
+
return allModels[model as keyof typeof allModels];
|
|
2273
|
+
}
|
|
2274
|
+
|
|
2275
|
+
private sleep(ms: number): Promise<void> {
|
|
2276
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
2277
|
+
}
|
|
2278
|
+
}
|
|
2279
|
+
```
|
|
2280
|
+
|
|
2281
|
+
---
|
|
2282
|
+
|
|
2283
|
+
## Matriz de Capacidades
|
|
2284
|
+
|
|
2285
|
+
| Modelo | Provider | Context | Output | Vision | Tools | Streaming | Input $/1M | Output $/1M |
|
|
2286
|
+
|--------|----------|---------|--------|--------|-------|-----------|------------|-------------|
|
|
2287
|
+
| claude-opus-4-5 | Claude | 200K | 32K | Yes | Yes | Yes | $15.00 | $75.00 |
|
|
2288
|
+
| claude-sonnet-4 | Claude | 200K | 64K | Yes | Yes | Yes | $3.00 | $15.00 |
|
|
2289
|
+
| claude-3-5-haiku | Claude | 200K | 8K | Yes | Yes | Yes | $0.80 | $4.00 |
|
|
2290
|
+
| gpt-4o | OpenAI | 128K | 16K | Yes | Yes | Yes | $2.50 | $10.00 |
|
|
2291
|
+
| gpt-4-turbo | OpenAI | 128K | 4K | Yes | Yes | Yes | $10.00 | $30.00 |
|
|
2292
|
+
| gpt-4o-mini | OpenAI | 128K | 16K | Yes | Yes | Yes | $0.15 | $0.60 |
|
|
2293
|
+
| gpt-3.5-turbo | OpenAI | 16K | 4K | No | Yes | Yes | $0.50 | $1.50 |
|
|
2294
|
+
| gemini-2.0-flash | Gemini | 1M | 8K | Yes | Yes | Yes | $0.075 | $0.30 |
|
|
2295
|
+
| gemini-1.5-pro | Gemini | 2M | 8K | Yes | Yes | Yes | $1.25 | $5.00 |
|
|
2296
|
+
| llama3 | Local | 8K | 4K | No | Yes | Yes | $0.00 | $0.00 |
|
|
2297
|
+
| llama3.2 | Local | 128K | 4K | Yes | Yes | Yes | $0.00 | $0.00 |
|
|
2298
|
+
| mistral | Local | 32K | 4K | No | Yes | Yes | $0.00 | $0.00 |
|
|
2299
|
+
| codellama | Local | 16K | 4K | No | No | Yes | $0.00 | $0.00 |
|
|
2300
|
+
|
|
2301
|
+
---
|
|
2302
|
+
|
|
2303
|
+
## Ejemplos de Uso
|
|
2304
|
+
|
|
2305
|
+
### Inicializacion Basica
|
|
2306
|
+
|
|
2307
|
+
```typescript
|
|
2308
|
+
import { UnifiedLLMAPI } from '@elsabro/llm-providers';
|
|
2309
|
+
|
|
2310
|
+
const llm = new UnifiedLLMAPI({
|
|
2311
|
+
defaultModel: 'claude-sonnet-4',
|
|
2312
|
+
providers: {
|
|
2313
|
+
claude: {
|
|
2314
|
+
enabled: true,
|
|
2315
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
2316
|
+
},
|
|
2317
|
+
openai: {
|
|
2318
|
+
enabled: true,
|
|
2319
|
+
apiKey: process.env.OPENAI_API_KEY
|
|
2320
|
+
},
|
|
2321
|
+
local: {
|
|
2322
|
+
enabled: true,
|
|
2323
|
+
baseUrl: 'http://localhost:11434'
|
|
2324
|
+
}
|
|
2325
|
+
},
|
|
2326
|
+
router: {
|
|
2327
|
+
defaultFallbackChain: ['claude-sonnet-4', 'gpt-4o', 'llama3']
|
|
2328
|
+
}
|
|
2329
|
+
});
|
|
2330
|
+
|
|
2331
|
+
await llm.initialize();
|
|
2332
|
+
```
|
|
2333
|
+
|
|
2334
|
+
### Completion Simple
|
|
2335
|
+
|
|
2336
|
+
```typescript
|
|
2337
|
+
const response = await llm.complete({
|
|
2338
|
+
model: 'claude-sonnet-4',
|
|
2339
|
+
messages: [
|
|
2340
|
+
{ role: 'user', content: 'Explain quantum computing in simple terms' }
|
|
2341
|
+
],
|
|
2342
|
+
maxTokens: 500
|
|
2343
|
+
});
|
|
2344
|
+
|
|
2345
|
+
console.log(response.content);
|
|
2346
|
+
console.log(`Tokens: ${response.usage.totalTokens}`);
|
|
2347
|
+
```
|
|
2348
|
+
|
|
2349
|
+
### Routing por Tipo de Tarea
|
|
2350
|
+
|
|
2351
|
+
```typescript
|
|
2352
|
+
// Code generation - routes to smart model
|
|
2353
|
+
const codeResponse = await llm.complete({
|
|
2354
|
+
model: 'auto',
|
|
2355
|
+
messages: [
|
|
2356
|
+
{ role: 'user', content: 'Write a binary search function in TypeScript' }
|
|
2357
|
+
]
|
|
2358
|
+
}, {
|
|
2359
|
+
taskType: 'code_generation',
|
|
2360
|
+
estimatedTokens: 500
|
|
2361
|
+
});
|
|
2362
|
+
|
|
2363
|
+
// Quick task - routes to fast model
|
|
2364
|
+
const quickResponse = await llm.complete({
|
|
2365
|
+
model: 'auto',
|
|
2366
|
+
messages: [
|
|
2367
|
+
{ role: 'user', content: 'What is 2+2?' }
|
|
2368
|
+
]
|
|
2369
|
+
}, {
|
|
2370
|
+
taskType: 'quick_task'
|
|
2371
|
+
});
|
|
2372
|
+
```
|
|
2373
|
+
|
|
2374
|
+
### Cost Optimization
|
|
2375
|
+
|
|
2376
|
+
```typescript
|
|
2377
|
+
const response = await llm.complete({
|
|
2378
|
+
model: 'auto',
|
|
2379
|
+
messages: [
|
|
2380
|
+
{ role: 'user', content: 'Summarize this document...' }
|
|
2381
|
+
]
|
|
2382
|
+
}, {
|
|
2383
|
+
costOptimized: true,
|
|
2384
|
+
maxCost: 0.01, // Max $0.01 per request
|
|
2385
|
+
taskType: 'summarization'
|
|
2386
|
+
});
|
|
2387
|
+
```
|
|
2388
|
+
|
|
2389
|
+
### Streaming
|
|
2390
|
+
|
|
2391
|
+
```typescript
|
|
2392
|
+
const stream = llm.stream({
|
|
2393
|
+
model: 'claude-sonnet-4',
|
|
2394
|
+
messages: [
|
|
2395
|
+
{ role: 'user', content: 'Write a short story about a robot' }
|
|
2396
|
+
]
|
|
2397
|
+
});
|
|
2398
|
+
|
|
2399
|
+
for await (const chunk of stream) {
|
|
2400
|
+
if (chunk.type === 'content') {
|
|
2401
|
+
process.stdout.write(chunk.content || '');
|
|
2402
|
+
}
|
|
2403
|
+
}
|
|
2404
|
+
```
|
|
2405
|
+
|
|
2406
|
+
### Using Model Aliases
|
|
2407
|
+
|
|
2408
|
+
```typescript
|
|
2409
|
+
// Use predefined aliases
|
|
2410
|
+
const fastResponse = await llm.chat([
|
|
2411
|
+
{ role: 'user', content: 'Quick question: capital of France?' }
|
|
2412
|
+
], { model: 'fast' }); // Routes to claude-3-5-haiku
|
|
2413
|
+
|
|
2414
|
+
const smartResponse = await llm.chat([
|
|
2415
|
+
{ role: 'user', content: 'Complex reasoning task...' }
|
|
2416
|
+
], { model: 'smart' }); // Routes to claude-sonnet-4
|
|
2417
|
+
|
|
2418
|
+
const bestResponse = await llm.chat([
|
|
2419
|
+
{ role: 'user', content: 'Very complex task...' }
|
|
2420
|
+
], { model: 'best' }); // Routes to claude-opus-4-5
|
|
2421
|
+
```
|
|
2422
|
+
|
|
2423
|
+
### Provider Health Check
|
|
2424
|
+
|
|
2425
|
+
```typescript
|
|
2426
|
+
const status = llm.getProviderStatus();
|
|
2427
|
+
|
|
2428
|
+
for (const provider of status) {
|
|
2429
|
+
console.log(`${provider.name}: ${provider.healthy ? 'OK' : 'DOWN'}`);
|
|
2430
|
+
console.log(` Models: ${provider.availableModels.join(', ')}`);
|
|
2431
|
+
}
|
|
2432
|
+
```
|
|
2433
|
+
|
|
2434
|
+
### Tool Use
|
|
2435
|
+
|
|
2436
|
+
```typescript
|
|
2437
|
+
const response = await llm.complete({
|
|
2438
|
+
model: 'claude-sonnet-4',
|
|
2439
|
+
messages: [
|
|
2440
|
+
{ role: 'user', content: 'What is the weather in Paris?' }
|
|
2441
|
+
],
|
|
2442
|
+
tools: [{
|
|
2443
|
+
name: 'get_weather',
|
|
2444
|
+
description: 'Get current weather for a location',
|
|
2445
|
+
parameters: {
|
|
2446
|
+
type: 'object',
|
|
2447
|
+
properties: {
|
|
2448
|
+
location: { type: 'string', description: 'City name' }
|
|
2449
|
+
},
|
|
2450
|
+
required: ['location']
|
|
2451
|
+
}
|
|
2452
|
+
}],
|
|
2453
|
+
toolChoice: 'auto'
|
|
2454
|
+
});
|
|
2455
|
+
|
|
2456
|
+
if (response.toolCalls) {
|
|
2457
|
+
for (const call of response.toolCalls) {
|
|
2458
|
+
console.log(`Tool: ${call.name}`);
|
|
2459
|
+
console.log(`Args: ${call.arguments}`);
|
|
2460
|
+
}
|
|
2461
|
+
}
|
|
2462
|
+
```
|
|
2463
|
+
|
|
2464
|
+
---
|
|
2465
|
+
|
|
2466
|
+
## Comandos
|
|
2467
|
+
|
|
2468
|
+
```bash
|
|
2469
|
+
/elsabro:llm status # Ver estado de providers
|
|
2470
|
+
/elsabro:llm models # Listar modelos disponibles
|
|
2471
|
+
/elsabro:llm test <provider> # Probar provider
|
|
2472
|
+
/elsabro:llm route <model> # Ver routing para modelo
|
|
2473
|
+
/elsabro:llm cost <model> <tokens> # Estimar costo
|
|
2474
|
+
/elsabro:llm alias set <name> <model> # Crear alias
|
|
2475
|
+
/elsabro:llm config # Ver configuracion actual
|
|
2476
|
+
```
|
|
2477
|
+
|
|
2478
|
+
---
|
|
2479
|
+
|
|
2480
|
+
## Configuracion
|
|
2481
|
+
|
|
2482
|
+
Ver `templates/multi-llm-config.json` para configuracion completa.
|
|
2483
|
+
|
|
2484
|
+
---
|
|
2485
|
+
|
|
2486
|
+
## Changelog
|
|
2487
|
+
|
|
2488
|
+
- **v3.6.0**: Initial Multi-LLM Providers System
|
|
2489
|
+
- ProviderRegistry with health checks
|
|
2490
|
+
- 5 provider implementations (Claude, OpenAI, Gemini, Local, Azure)
|
|
2491
|
+
- ModelRouter with intelligent routing
|
|
2492
|
+
- UnifiedAPI with consistent interface
|
|
2493
|
+
- Cost optimization and load balancing
|
|
2494
|
+
- Streaming support cross-provider
|