claude-code-router-config 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,543 @@
1
+ /**
2
+ * Enhanced Smart Intent Router
3
+ * Intelligent routing with cost, performance, and quality optimization
4
+ *
5
+ * Features:
6
+ * - Intent-based routing
7
+ * - Cost-aware provider selection
8
+ * - Performance-based routing
9
+ * - Health monitoring integration
10
+ * - Auto-fallback mechanisms
11
+ * - Learning from past performance
12
+ *
13
+ * Configuration by Halil Ertekin
14
+ */
15
+
16
+ const fs = require('fs');
17
+ const path = require('path');
18
+ const os = require('os');
19
+
20
+ // Performance metrics storage
21
+ const METRICS_PATH = path.join(os.homedir(), '.claude-code-router', 'routing-metrics.json');
22
+
23
+ // Load historical performance data
24
+ function loadMetrics() {
25
+ try {
26
+ if (fs.existsSync(METRICS_PATH)) {
27
+ return JSON.parse(fs.readFileSync(METRICS_PATH, 'utf8'));
28
+ }
29
+ } catch (error) {
30
+ console.error('Failed to load metrics:', error);
31
+ }
32
+ return {
33
+ providers: {},
34
+ intents: {},
35
+ requests: []
36
+ };
37
+ }
38
+
39
+ // Save performance metrics
40
+ function saveMetrics(metrics) {
41
+ try {
42
+ // Keep only last 1000 requests
43
+ if (metrics.requests.length > 1000) {
44
+ metrics.requests = metrics.requests.slice(-1000);
45
+ }
46
+ fs.writeFileSync(METRICS_PATH, JSON.stringify(metrics, null, 2));
47
+ } catch (error) {
48
+ console.error('Failed to save metrics:', error);
49
+ }
50
+ }
51
+
52
+ // Enhanced intent definitions with routing strategies
53
+ const INTENTS = {
54
+ // Coding tasks → OpenAI (GPT-4o, O1) with fallbacks
55
+ CODING: {
56
+ patterns: [
57
+ /\b(implement|refactor|debug|fix|write|code|function|class|method|bug|error|compile|syntax)\b/i,
58
+ /\b(typescript|javascript|python|rust|go|java|react|vue|angular|swift|kotlin)\b/i,
59
+ /\b(api|endpoint|database|query|migration|schema|test|unit test)\b/i,
60
+ /\b(codex|o1|reasoning)\b/i
61
+ ],
62
+ route: "openai,gpt-4o",
63
+ strategy: "quality",
64
+ fallbacks: ["anthropic,claude-sonnet-4-latest", "qwen,qwen3-coder-plus"],
65
+ priority: "high"
66
+ },
67
+
68
+ // Deep reasoning → Anthropic Claude with cost consideration
69
+ REASONING: {
70
+ patterns: [
71
+ /\b(architect|design|analyze|plan|strategy|structure|system|trade-?off)\b/i,
72
+ /\b(why|explain|reason|understand|compare|evaluate|consider|review)\b/i,
73
+ /\b(decision|approach|best practice|pattern|principle|philosophy)\b/i
74
+ ],
75
+ route: "anthropic,claude-sonnet-4-latest",
76
+ strategy: "quality",
77
+ fallbacks: ["openai,gpt-4o", "gemini,gemini-2.5-pro"],
78
+ priority: "high"
79
+ },
80
+
81
+ // Fast responses → Gemini Flash
82
+ FAST: {
83
+ patterns: [
84
+ /\b(fast|quick|brief|short|summary|tldr|overview|hızlı)\b/i,
85
+ /\b(scan|check|verify|validate)\b/i
86
+ ],
87
+ route: "gemini,gemini-2.5-flash",
88
+ strategy: "performance",
89
+ fallbacks: ["qwen,qwen-turbo", "glm,glm-4.5"],
90
+ priority: "medium"
91
+ },
92
+
93
+ // Simple/cheap tasks → Qwen
94
+ SIMPLE: {
95
+ patterns: [
96
+ /\b(list|show|what is|simple|basic|help|how to|format)\b/i,
97
+ /\b(rename|move|delete|create file|mkdir|copy)\b/i,
98
+ /\b(ucuz|basit|kolay)\b/i
99
+ ],
100
+ route: "qwen,qwen-plus",
101
+ strategy: "cost",
102
+ fallbacks: ["glm,glm-4.5", "gemini,gemini-2.5-flash"],
103
+ priority: "low"
104
+ },
105
+
106
+ // Multilingual → GLM
107
+ MULTILINGUAL: {
108
+ patterns: [
109
+ /\b(translate|çevir|tercüme|chinese|türkçe|multilingual)\b/i,
110
+ /[\u4e00-\u9fff]/, // Chinese characters
111
+ /[\u0600-\u06FF]/, // Arabic
112
+ ],
113
+ route: "glm,glm-4.6",
114
+ strategy: "quality",
115
+ fallbacks: ["qwen,qwen-plus", "gemini,gemini-2.5-flash"],
116
+ priority: "medium"
117
+ },
118
+
119
+ // Heavy reasoning → O1
120
+ HEAVY_REASONING: {
121
+ patterns: [
122
+ /\b(complex algorithm|optimization|performance critical|system design)\b/i,
123
+ /\b(prove|mathematical|theorem|formal verification)\b/i
124
+ ],
125
+ route: "openai,o1",
126
+ strategy: "quality",
127
+ fallbacks: ["anthropic,claude-sonnet-4-latest", "openai,gpt-4o"],
128
+ priority: "highest"
129
+ },
130
+
131
+ // AgentSkills commands → Anthropic with specialized routing
132
+ AGENT_SKILLS: {
133
+ patterns: [
134
+ /\b\/sc:[\w-]+\b/i, // All /sc: commands
135
+ /\b(business panel|expert analysis|strategic review)\b/i,
136
+ /\b(mcp:|context7|magic|morphllm|playwright|serena)\b/i,
137
+ /\b(skill:|capability:|expertise:)\b/i
138
+ ],
139
+ route: "anthropic,claude-sonnet-4-latest",
140
+ strategy: "quality",
141
+ fallbacks: ["openai,gpt-4o", "gemini,gemini-2.5-pro"],
142
+ priority: "highest"
143
+ }
144
+ };
145
+
146
+ // Provider capabilities and costs
147
+ const PROVIDER_PROFILES = {
148
+ openai: {
149
+ costTier: "medium",
150
+ performanceTier: "high",
151
+ qualityTier: "high",
152
+ speedTier: "medium",
153
+ specialties: ["coding", "reasoning", "math"],
154
+ models: {
155
+ "gpt-4o": { cost: 1, speed: 1, quality: 1, capability: "general" },
156
+ "gpt-4o-mini": { cost: 0.1, speed: 1.5, quality: 0.8, capability: "general" },
157
+ "gpt-4-turbo": { cost: 2, speed: 1, quality: 1.1, capability: "general" },
158
+ "o1": { cost: 3, speed: 0.3, quality: 1.3, capability: "reasoning" },
159
+ "o1-mini": { cost: 0.6, speed: 0.5, quality: 1.1, capability: "reasoning" }
160
+ }
161
+ },
162
+ anthropic: {
163
+ costTier: "high",
164
+ performanceTier: "high",
165
+ qualityTier: "highest",
166
+ speedTier: "medium",
167
+ specialties: ["reasoning", "writing", "analysis", "safety"],
168
+ models: {
169
+ "claude-sonnet-4-latest": { cost: 3, speed: 1, quality: 1.3, capability: "general" },
170
+ "claude-3-5-sonnet-latest": { cost: 0.6, speed: 1.2, quality: 1, capability: "general" },
171
+ "claude-3-5-haiku-latest": { cost: 0.2, speed: 2, quality: 0.7, capability: "general" }
172
+ }
173
+ },
174
+ gemini: {
175
+ costTier: "low",
176
+ performanceTier: "high",
177
+ qualityTier: "medium",
178
+ speedTier: "highest",
179
+ specialties: ["speed", "context", "multilingual"],
180
+ models: {
181
+ "gemini-2.5-flash": { cost: 0.01, speed: 3, quality: 0.8, capability: "general" },
182
+ "gemini-2.5-pro": { cost: 0.25, speed: 1.5, quality: 1, capability: "general" },
183
+ "gemini-2.0-flash": { cost: 0.01, speed: 3, quality: 0.7, capability: "general" }
184
+ }
185
+ },
186
+ qwen: {
187
+ costTier: "low",
188
+ performanceTier: "medium",
189
+ qualityTier: "medium",
190
+ speedTier: "high",
191
+ specialties: ["cost-effective", "multilingual", "coding"],
192
+ models: {
193
+ "qwen-plus": { cost: 0.1, speed: 2, quality: 0.8, capability: "general" },
194
+ "qwen-max": { cost: 0.4, speed: 1.5, quality: 1, capability: "general" },
195
+ "qwen-turbo": { cost: 0.03, speed: 2.5, quality: 0.6, capability: "general" },
196
+ "qwen3-coder-plus": { cost: 0.4, speed: 1.8, quality: 1.1, capability: "coding" }
197
+ }
198
+ },
199
+ glm: {
200
+ costTier: "low",
201
+ performanceTier: "medium",
202
+ qualityTier: "medium",
203
+ speedTier: "high",
204
+ specialties: ["chinese", "multilingual", "translation"],
205
+ models: {
206
+ "glm-4.6": { cost: 0.1, speed: 2, quality: 0.8, capability: "multilingual" },
207
+ "glm-4.5": { cost: 0.1, speed: 2, quality: 0.8, capability: "multilingual" },
208
+ "glm-4-plus": { cost: 0.2, speed: 1.5, quality: 0.9, capability: "general" }
209
+ }
210
+ },
211
+ openrouter: {
212
+ costTier: "variable",
213
+ performanceTier: "variable",
214
+ qualityTier: "variable",
215
+ speedTier: "variable",
216
+ specialties: ["variety", "fallback", "specialized"],
217
+ models: {
218
+ "deepseek/deepseek-chat": { cost: 0.14, speed: 1.5, quality: 0.9, capability: "coding" },
219
+ "meta-llama/llama-3.2-3b-instruct": { cost: 0.1, speed: 2.5, quality: 0.7, capability: "general" }
220
+ }
221
+ }
222
+ };
223
+
224
+ // Smart routing strategies
225
+ const ROUTING_STRATEGIES = {
226
+ cost: {
227
+ name: "Cost-Optimized",
228
+ select: (candidates) => {
229
+ // Sort by cost (ascending)
230
+ return candidates.sort((a, b) => {
231
+ const costA = PROVIDER_PROFILES[a.provider]?.models[a.model]?.cost || 999;
232
+ const costB = PROVIDER_PROFILES[b.provider]?.models[b.model]?.cost || 999;
233
+ return costA - costB;
234
+ });
235
+ }
236
+ },
237
+
238
+ performance: {
239
+ name: "Performance-Optimized",
240
+ select: (candidates) => {
241
+ // Sort by speed (descending)
242
+ return candidates.sort((a, b) => {
243
+ const speedA = PROVIDER_PROFILES[a.provider]?.models[a.model]?.speed || 0;
244
+ const speedB = PROVIDER_PROFILES[b.provider]?.models[b.model]?.speed || 0;
245
+ return speedB - speedA;
246
+ });
247
+ }
248
+ },
249
+
250
+ quality: {
251
+ name: "Quality-Optimized",
252
+ select: (candidates) => {
253
+ // Sort by quality (descending)
254
+ return candidates.sort((a, b) => {
255
+ const qualityA = PROVIDER_PROFILES[a.provider]?.models[a.model]?.quality || 0;
256
+ const qualityB = PROVIDER_PROFILES[b.provider]?.models[b.model]?.quality || 0;
257
+ return qualityB - qualityA;
258
+ });
259
+ }
260
+ },
261
+
262
+ adaptive: {
263
+ name: "Adaptive",
264
+ select: (candidates, metrics, intent) => {
265
+ // Consider historical performance
266
+ const scored = candidates.map(candidate => {
267
+ let score = 0;
268
+ const profile = PROVIDER_PROFILES[candidate.provider]?.models[candidate.model];
269
+
270
+ if (!profile) return { ...candidate, score: 0 };
271
+
272
+ // Base score from profile
273
+ score += profile.quality * 2;
274
+ score += profile.speed;
275
+ score -= profile.cost * 0.5;
276
+
277
+ // Historical performance adjustment
278
+ const historical = getHistoricalPerformance(candidate.provider, candidate.model, intent, metrics);
279
+ score += historical.performanceModifier;
280
+
281
+ return { ...candidate, score };
282
+ });
283
+
284
+ return scored.sort((a, b) => b.score - a.score);
285
+ }
286
+ }
287
+ };
288
+
289
+ // Get historical performance for provider/model combination
290
+ function getHistoricalPerformance(provider, model, intent, metrics) {
291
+ const key = `${provider}/${model}`;
292
+ const intentMetrics = metrics.intents[intent] || {};
293
+ const providerMetrics = metrics.providers[key] || {};
294
+
295
+ // Default performance
296
+ const performance = {
297
+ avgLatency: 3000,
298
+ successRate: 0.95,
299
+ costPerRequest: 0.01,
300
+ performanceModifier: 0
301
+ };
302
+
303
+ // Apply historical data
304
+ if (providerMetrics.avgLatency) {
305
+ performance.avgLatency = providerMetrics.avgLatency;
306
+ performance.performanceModifier -= (performance.avgLatency - 3000) / 10000; // Penalty for slow
307
+ }
308
+
309
+ if (providerMetrics.successRate) {
310
+ performance.successRate = providerMetrics.successRate;
311
+ performance.performanceModifier += (performance.successRate - 0.95) * 10; // Bonus for reliability
312
+ }
313
+
314
+ if (intentMetrics.avgLatency) {
315
+ performance.performanceModifier -= (intentMetrics.avgLatency - 3000) / 8000;
316
+ }
317
+
318
+ return performance;
319
+ }
320
+
321
+ // Extract content from request
322
+ function extractContent(req) {
323
+ const messages = req.body?.messages || [];
324
+ return messages
325
+ .filter(m => m.role === "user" || m.role === "system")
326
+ .map(m => typeof m.content === "string" ? m.content : JSON.stringify(m.content))
327
+ .join(" ")
328
+ .slice(0, 3000); // Limit for performance
329
+ }
330
+
331
+ // Detect intent with scoring
332
+ function detectIntent(content) {
333
+ const scores = {};
334
+
335
+ // Calculate scores for each intent
336
+ for (const [intent, config] of Object.entries(INTENTS)) {
337
+ scores[intent] = config.patterns.reduce((score, pattern) => {
338
+ const matches = (content.match(pattern) || []).length;
339
+ return score + matches;
340
+ }, 0);
341
+
342
+ // Apply priority weighting
343
+ const priorityWeight = {
344
+ highest: 1.5,
345
+ high: 1.3,
346
+ medium: 1.0,
347
+ low: 0.8
348
+ };
349
+ scores[intent] *= priorityWeight[config.priority] || 1.0;
350
+ }
351
+
352
+ // Sort by score and filter out zero scores
353
+ const sorted = Object.entries(scores)
354
+ .filter(([_, score]) => score > 0)
355
+ .sort((a, b) => b[1] - a[1]);
356
+
357
+ return sorted.length > 0 ? {
358
+ intent: sorted[0][0],
359
+ confidence: sorted[0][1],
360
+ alternatives: sorted.slice(1, 3)
361
+ } : null;
362
+ }
363
+
364
+ // Get available providers from config
365
+ function getAvailableProviders(config) {
366
+ const providers = [];
367
+
368
+ if (config.Providers) {
369
+ config.Providers.forEach(provider => {
370
+ provider.models.forEach(model => {
371
+ providers.push({
372
+ provider: provider.name,
373
+ model,
374
+ route: `${provider.name},${model}`
375
+ });
376
+ });
377
+ });
378
+ }
379
+
380
+ return providers;
381
+ }
382
+
383
+ // Generate candidates for routing
384
+ function generateCandidates(detectedIntent, availableProviders) {
385
+ const candidates = [];
386
+
387
+ if (!detectedIntent || !availableProviders.length) {
388
+ return candidates;
389
+ }
390
+
391
+ // Add primary route
392
+ const primary = availableProviders.find(p => p.route === detectedIntent.route);
393
+ if (primary) {
394
+ candidates.push({
395
+ ...primary,
396
+ source: 'primary',
397
+ reason: 'Intent match'
398
+ });
399
+ }
400
+
401
+ // Add fallbacks
402
+ if (INTENTS[detectedIntent.intent]?.fallbacks) {
403
+ INTENTS[detectedIntent.intent].fallbacks.forEach((fallbackRoute, index) => {
404
+ const fallback = availableProviders.find(p => p.route === fallbackRoute);
405
+ if (fallback) {
406
+ candidates.push({
407
+ ...fallback,
408
+ source: 'fallback',
409
+ priority: index + 1,
410
+ reason: `Fallback ${index + 1}`
411
+ });
412
+ }
413
+ });
414
+ }
415
+
416
+ return candidates;
417
+ }
418
+
419
+ // Apply routing strategy
420
+ function applyRoutingStrategy(candidates, strategy, metrics, intent) {
421
+ if (!candidates.length) return null;
422
+
423
+ const router = ROUTING_STRATEGIES[strategy] || ROUTING_STRATEGIES.adaptive;
424
+ const sorted = router.select(candidates, metrics, intent?.intent);
425
+
426
+ return sorted[0];
427
+ }
428
+
429
+ // Main router function
430
+ module.exports = async function smartRouter(req, config) {
431
+ const metrics = loadMetrics();
432
+ const startTime = Date.now();
433
+
434
+ try {
435
+ // Extract content and detect intent
436
+ const content = extractContent(req);
437
+ const detectedIntent = detectIntent(content);
438
+
439
+ // Get available providers
440
+ const availableProviders = getAvailableProviders(config);
441
+
442
+ // Generate routing candidates
443
+ const candidates = generateCandidates(detectedIntent, availableProviders);
444
+
445
+ // Determine routing strategy
446
+ let strategy = 'adaptive'; // Default
447
+
448
+ // Override strategy based on intent or config
449
+ if (detectedIntent && INTENTS[detectedIntent.intent]) {
450
+ strategy = INTENTS[detectedIntent.intent].strategy;
451
+ }
452
+
453
+ // Check for cost/quality optimization settings in config
454
+ if (config.CostOptimization?.enabled) {
455
+ strategy = 'cost';
456
+ } else if (config.PerformanceOptimization?.enabled) {
457
+ strategy = 'performance';
458
+ } else if (config.QualityOptimization?.enabled) {
459
+ strategy = 'quality';
460
+ }
461
+
462
+ // Apply routing strategy
463
+ const selected = applyRoutingStrategy(candidates, strategy, metrics, detectedIntent);
464
+
465
+ if (selected) {
466
+ const latency = Date.now() - startTime;
467
+
468
+ // Log routing decision
469
+ console.log(`[SmartRouter] ${detectedIntent?.intent || 'unknown'} → ${selected.route} (${strategy}, ${selected.reason})`);
470
+
471
+ // Update metrics
472
+ updateRoutingMetrics(metrics, detectedIntent, selected, latency);
473
+
474
+ return selected.route;
475
+ }
476
+
477
+ // Ultimate fallback - use config default or first available
478
+ const fallback = config.Router?.default || availableProviders[0]?.route;
479
+ console.log(`[SmartRouter] No match → ${fallback}`);
480
+
481
+ return fallback;
482
+
483
+ } catch (error) {
484
+ console.error('[SmartRouter] Error:', error);
485
+ return config.Router?.default || null;
486
+ } finally {
487
+ saveMetrics(metrics);
488
+ }
489
+ };
490
+
491
+ // Update routing metrics
492
+ function updateRoutingMetrics(metrics, intent, selected, latency) {
493
+ const timestamp = new Date().toISOString();
494
+ const key = `${selected.provider}/${selected.model}`;
495
+
496
+ // Track request
497
+ metrics.requests.push({
498
+ timestamp,
499
+ intent: intent?.intent || 'unknown',
500
+ provider: selected.provider,
501
+ model: selected.model,
502
+ latency,
503
+ source: selected.source
504
+ });
505
+
506
+ // Update provider metrics
507
+ if (!metrics.providers[key]) {
508
+ metrics.providers[key] = {
509
+ requests: 0,
510
+ totalLatency: 0,
511
+ minLatency: Infinity,
512
+ maxLatency: 0,
513
+ errors: 0
514
+ };
515
+ }
516
+
517
+ const providerMetrics = metrics.providers[key];
518
+ providerMetrics.requests++;
519
+ providerMetrics.totalLatency += latency;
520
+ providerMetrics.minLatency = Math.min(providerMetrics.minLatency, latency);
521
+ providerMetrics.maxLatency = Math.max(providerMetrics.maxLatency, latency);
522
+
523
+ // Update intent metrics
524
+ if (intent?.intent) {
525
+ if (!metrics.intents[intent.intent]) {
526
+ metrics.intents[intent.intent] = {
527
+ requests: 0,
528
+ totalLatency: 0,
529
+ avgLatency: 0
530
+ };
531
+ }
532
+
533
+ const intentMetrics = metrics.intents[intent.intent];
534
+ intentMetrics.requests++;
535
+ intentMetrics.totalLatency += latency;
536
+ intentMetrics.avgLatency = intentMetrics.totalLatency / intentMetrics.requests;
537
+ }
538
+ }
539
+
540
+ // Export utilities for testing
541
+ module.exports.INTENTS = INTENTS;
542
+ module.exports.PROVIDER_PROFILES = PROVIDER_PROFILES;
543
+ module.exports.ROUTING_STRATEGIES = ROUTING_STRATEGIES;