claude-code-router-config 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,472 @@
1
+ const { logger } = require('./enhanced-logger');
2
+ const { spawn } = require('child_process');
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const os = require('os');
6
+
7
+ class HealthMonitor {
8
+ constructor(options = {}) {
9
+ this.providers = new Map();
10
+ this.checkInterval = options.checkInterval || 30000; // 30 seconds
11
+ this.timeout = options.timeout || 10000; // 10 seconds
12
+ this.failureThreshold = options.failureThreshold || 3;
13
+ this.recoveryTimeout = options.recoveryTimeout || 60000; // 1 minute
14
+ this.enabled = options.enabled !== false;
15
+
16
+ // Health status storage
17
+ this.healthData = {
18
+ timestamp: new Date().toISOString(),
19
+ system: {
20
+ uptime: process.uptime(),
21
+ memory: process.memoryUsage(),
22
+ cpu: this.getCPUUsage()
23
+ },
24
+ providers: {}
25
+ };
26
+
27
+ this.intervalId = null;
28
+ }
29
+
30
+ // Add provider to monitor
31
+ addProvider(name, config) {
32
+ this.providers.set(name, {
33
+ name,
34
+ ...config,
35
+ status: 'unknown',
36
+ lastCheck: null,
37
+ consecutiveFailures: 0,
38
+ lastSuccess: null,
39
+ lastFailure: null,
40
+ responseTime: null,
41
+ error: null
42
+ });
43
+ }
44
+
45
+ // Remove provider from monitoring
46
+ removeProvider(name) {
47
+ this.providers.delete(name);
48
+ }
49
+
50
+ // Get current CPU usage (simplified)
51
+ getCPUUsage() {
52
+ const cpus = os.cpus();
53
+ let totalIdle = 0;
54
+ let totalTick = 0;
55
+
56
+ cpus.forEach(cpu => {
57
+ for (const type in cpu.times) {
58
+ totalTick += cpu.times[type];
59
+ }
60
+ totalIdle += cpu.times.idle;
61
+ });
62
+
63
+ return {
64
+ usage: 100 - (totalIdle / totalTick * 100).toFixed(2),
65
+ cores: cpus.length,
66
+ model: cpus[0]?.model || 'Unknown'
67
+ };
68
+ }
69
+
70
+ // Perform health check on a provider
71
+ async checkProvider(providerName) {
72
+ const provider = this.providers.get(providerName);
73
+ if (!provider) {
74
+ logger.error(`Provider ${providerName} not found for health check`);
75
+ return null;
76
+ }
77
+
78
+ const startTime = Date.now();
79
+ let status = 'healthy';
80
+ let error = null;
81
+
82
+ try {
83
+ // Check if API key is configured
84
+ const apiKey = process.env[provider.api_key.replace('$', '')];
85
+ if (!apiKey) {
86
+ throw new Error('API key not configured');
87
+ }
88
+
89
+ // Simple connectivity check
90
+ const testResult = await this.testProviderConnectivity(provider);
91
+
92
+ if (!testResult.success) {
93
+ status = 'unhealthy';
94
+ error = testResult.error;
95
+ }
96
+
97
+ const responseTime = Date.now() - startTime;
98
+
99
+ // Update provider status
100
+ provider.lastCheck = new Date().toISOString();
101
+ provider.responseTime = responseTime;
102
+ provider.error = error;
103
+
104
+ if (status === 'healthy') {
105
+ provider.consecutiveFailures = 0;
106
+ provider.lastSuccess = new Date().toISOString();
107
+ provider.status = 'healthy';
108
+ } else {
109
+ provider.consecutiveFailures++;
110
+ provider.lastFailure = new Date().toISOString();
111
+
112
+ if (provider.consecutiveFailures >= this.failureThreshold) {
113
+ provider.status = 'down';
114
+ } else {
115
+ provider.status = 'degraded';
116
+ }
117
+ }
118
+
119
+ logger.logHealthCheck(providerName, status, responseTime, error);
120
+
121
+ return {
122
+ name: providerName,
123
+ status,
124
+ responseTime,
125
+ error,
126
+ consecutiveFailures: provider.consecutiveFailures
127
+ };
128
+
129
+ } catch (err) {
130
+ status = 'unhealthy';
131
+ error = err.message;
132
+ const responseTime = Date.now() - startTime;
133
+
134
+ provider.lastCheck = new Date().toISOString();
135
+ provider.responseTime = responseTime;
136
+ provider.consecutiveFailures++;
137
+ provider.lastFailure = new Date().toISOString();
138
+ provider.error = error;
139
+
140
+ if (provider.consecutiveFailures >= this.failureThreshold) {
141
+ provider.status = 'down';
142
+ } else {
143
+ provider.status = 'degraded';
144
+ }
145
+
146
+ logger.logHealthCheck(providerName, status, responseTime, error);
147
+
148
+ return {
149
+ name: providerName,
150
+ status,
151
+ responseTime,
152
+ error,
153
+ consecutiveFailures: provider.consecutiveFailures
154
+ };
155
+ }
156
+ }
157
+
158
+ // Test basic connectivity to provider
159
+ async testProviderConnectivity(provider) {
160
+ return new Promise((resolve) => {
161
+ const testPrompt = "Test";
162
+ const timeout = setTimeout(() => {
163
+ resolve({ success: false, error: 'Timeout' });
164
+ }, this.timeout);
165
+
166
+ try {
167
+ // Create a simple test request
168
+ const testRequest = {
169
+ model: provider.models[0],
170
+ messages: [{ role: "user", content: testPrompt }],
171
+ max_tokens: 5
172
+ };
173
+
174
+ // Use curl for testing (more reliable than node HTTP for different APIs)
175
+ const curl = spawn('curl', [
176
+ '-s', '-w', '%{http_code}',
177
+ '-o', '/dev/null',
178
+ '-m', Math.floor(this.timeout / 1000),
179
+ '-H', `Authorization: Bearer ${process.env[provider.api_key.replace('$', '')]}`,
180
+ '-H', 'Content-Type: application/json',
181
+ '-d', JSON.stringify(testRequest),
182
+ provider.api_base_url
183
+ ]);
184
+
185
+ let output = '';
186
+ curl.stdout.on('data', (data) => {
187
+ output += data.toString();
188
+ });
189
+
190
+ curl.on('close', (code) => {
191
+ clearTimeout(timeout);
192
+
193
+ if (code === 0 && output.includes('200')) {
194
+ resolve({ success: true });
195
+ } else {
196
+ resolve({
197
+ success: false,
198
+ error: `HTTP Error: ${output.trim() || code}`
199
+ });
200
+ }
201
+ });
202
+
203
+ curl.on('error', (err) => {
204
+ clearTimeout(timeout);
205
+ resolve({ success: false, error: err.message });
206
+ });
207
+
208
+ } catch (err) {
209
+ clearTimeout(timeout);
210
+ resolve({ success: false, error: err.message });
211
+ }
212
+ });
213
+ }
214
+
215
+ // Check all providers
216
+ async checkAllProviders() {
217
+ const results = {};
218
+ const promises = [];
219
+
220
+ for (const [name] of this.providers) {
221
+ promises.push(this.checkProvider(name).then(result => {
222
+ results[name] = result;
223
+ }));
224
+ }
225
+
226
+ await Promise.all(promises);
227
+
228
+ // Update health data
229
+ this.healthData.timestamp = new Date().toISOString();
230
+ this.healthData.system = {
231
+ uptime: process.uptime(),
232
+ memory: process.memoryUsage(),
233
+ cpu: this.getCPUUsage()
234
+ };
235
+
236
+ this.healthData.providers = results;
237
+
238
+ return results;
239
+ }
240
+
241
+ // Get provider health status
242
+ getProviderHealth(providerName) {
243
+ const provider = this.providers.get(providerName);
244
+ if (!provider) {
245
+ return null;
246
+ }
247
+
248
+ return {
249
+ name: provider.name,
250
+ status: provider.status,
251
+ lastCheck: provider.lastCheck,
252
+ lastSuccess: provider.lastSuccess,
253
+ lastFailure: provider.lastFailure,
254
+ consecutiveFailures: provider.consecutiveFailures,
255
+ responseTime: provider.responseTime,
256
+ error: provider.error,
257
+ isHealthy: provider.status === 'healthy',
258
+ isAvailable: provider.status !== 'down'
259
+ };
260
+ }
261
+
262
+ // Get overall system health
263
+ getSystemHealth() {
264
+ const providers = {};
265
+ let healthyCount = 0;
266
+ let degradedCount = 0;
267
+ let downCount = 0;
268
+
269
+ for (const [name, provider] of this.providers) {
270
+ providers[name] = this.getProviderHealth(name);
271
+
272
+ switch (provider.status) {
273
+ case 'healthy':
274
+ healthyCount++;
275
+ break;
276
+ case 'degraded':
277
+ degradedCount++;
278
+ break;
279
+ case 'down':
280
+ downCount++;
281
+ break;
282
+ }
283
+ }
284
+
285
+ const totalProviders = this.providers.size;
286
+ const systemStatus = downCount === totalProviders ? 'critical' :
287
+ downCount > 0 ? 'degraded' :
288
+ degradedCount > 0 ? 'warning' : 'healthy';
289
+
290
+ return {
291
+ status: systemStatus,
292
+ timestamp: this.healthData.timestamp,
293
+ system: this.healthData.system,
294
+ providers: {
295
+ total: totalProviders,
296
+ healthy: healthyCount,
297
+ degraded: degradedCount,
298
+ down: downCount,
299
+ details: providers
300
+ },
301
+ recommendations: this.getRecommendations(providers)
302
+ };
303
+ }
304
+
305
+ // Get health recommendations
306
+ getRecommendations(providers) {
307
+ const recommendations = [];
308
+
309
+ for (const [name, provider] of this.providers) {
310
+ if (provider.status === 'down') {
311
+ recommendations.push({
312
+ type: 'critical',
313
+ provider: name,
314
+ message: `Provider ${name} is down. Check API key and service status.`,
315
+ action: 'verify_api_key'
316
+ });
317
+ } else if (provider.status === 'degraded') {
318
+ recommendations.push({
319
+ type: 'warning',
320
+ provider: name,
321
+ message: `Provider ${name} is experiencing issues (${provider.consecutiveFailures} consecutive failures).`,
322
+ action: 'monitor_closely'
323
+ });
324
+ } else if (provider.responseTime && provider.responseTime > 5000) {
325
+ recommendations.push({
326
+ type: 'performance',
327
+ provider: name,
328
+ message: `Provider ${name} has high response time (${provider.responseTime}ms).`,
329
+ action: 'consider_alternative'
330
+ });
331
+ }
332
+ }
333
+
334
+ // System-level recommendations
335
+ const memUsage = process.memoryUsage();
336
+ const memUsagePercent = (memUsage.heapUsed / memUsage.heapTotal * 100).toFixed(2);
337
+
338
+ if (parseFloat(memUsagePercent) > 90) {
339
+ recommendations.push({
340
+ type: 'system',
341
+ message: `High memory usage: ${memUsagePercent}%. Consider restarting the service.`,
342
+ action: 'restart_service'
343
+ });
344
+ }
345
+
346
+ return recommendations;
347
+ }
348
+
349
+ // Start health monitoring
350
+ start() {
351
+ if (!this.enabled || this.intervalId) {
352
+ return;
353
+ }
354
+
355
+ logger.info('Starting health monitoring', {
356
+ interval: this.checkInterval,
357
+ timeout: this.timeout,
358
+ providers: this.providers.size
359
+ });
360
+
361
+ // Initial check
362
+ this.checkAllProviders().catch(error => {
363
+ logger.error('Initial health check failed', { error: error.message });
364
+ });
365
+
366
+ // Set up periodic checks
367
+ this.intervalId = setInterval(() => {
368
+ this.checkAllProviders().catch(error => {
369
+ logger.error('Periodic health check failed', { error: error.message });
370
+ });
371
+ }, this.checkInterval);
372
+ }
373
+
374
+ // Stop health monitoring
375
+ stop() {
376
+ if (this.intervalId) {
377
+ clearInterval(this.intervalId);
378
+ this.intervalId = null;
379
+ logger.info('Health monitoring stopped');
380
+ }
381
+ }
382
+
383
+ // Export health data to file
384
+ exportHealthData(format = 'json') {
385
+ const healthData = this.getSystemHealth();
386
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
387
+ const filename = `health-report-${timestamp}.${format}`;
388
+ const filepath = path.join(os.homedir(), '.claude-code-router', 'logs', filename);
389
+
390
+ try {
391
+ if (format === 'json') {
392
+ fs.writeFileSync(filepath, JSON.stringify(healthData, null, 2));
393
+ } else if (format === 'csv') {
394
+ // Convert to CSV format
395
+ const headers = ['Provider', 'Status', 'Last Check', 'Response Time', 'Errors'];
396
+ const rows = [headers];
397
+
398
+ Object.entries(healthData.providers.details).forEach(([name, provider]) => {
399
+ rows.push([
400
+ name,
401
+ provider.status,
402
+ provider.lastCheck || '',
403
+ provider.responseTime || '',
404
+ provider.error || ''
405
+ ]);
406
+ });
407
+
408
+ const csv = rows.map(row => row.join(',')).join('\n');
409
+ fs.writeFileSync(filepath, csv);
410
+ }
411
+
412
+ logger.info('Health data exported', { filepath, format });
413
+ return filepath;
414
+ } catch (error) {
415
+ logger.error('Failed to export health data', { error: error.message });
416
+ return null;
417
+ }
418
+ }
419
+
420
+ // Generate health report for CLI
421
+ generateHealthReport() {
422
+ const health = this.getSystemHealth();
423
+ const report = [];
424
+
425
+ // System summary
426
+ report.push(`\nšŸ“Š System Health Status: ${health.status.toUpperCase()}`);
427
+ report.push(`Timestamp: ${health.timestamp}`);
428
+ report.push(`Providers: ${health.providers.healthy}/${health.providers.total} healthy`);
429
+
430
+ if (health.recommendations.length > 0) {
431
+ report.push(`\nāš ļø Recommendations:`);
432
+ health.recommendations.forEach(rec => {
433
+ const icon = rec.type === 'critical' ? '🚨' :
434
+ rec.type === 'warning' ? 'āš ļø' :
435
+ rec.type === 'performance' ? 'šŸ“ˆ' : 'ā„¹ļø';
436
+ report.push(` ${icon} ${rec.message}`);
437
+ });
438
+ }
439
+
440
+ // Provider details
441
+ report.push(`\nšŸ­ Provider Status:`);
442
+ Object.entries(health.providers.details).forEach(([name, provider]) => {
443
+ const status = provider.isHealthy ? '🟢' :
444
+ provider.status === 'degraded' ? '🟔' : 'šŸ”“';
445
+ const latency = provider.responseTime ? `${provider.responseTime}ms` : 'N/A';
446
+
447
+ report.push(` ${status} ${name}: ${provider.status} (${latency})`);
448
+
449
+ if (provider.error) {
450
+ report.push(` Error: ${provider.error}`);
451
+ }
452
+ });
453
+
454
+ // System metrics
455
+ const mem = health.system.memory;
456
+ const memUsage = ((mem.heapUsed / mem.heapTotal) * 100).toFixed(2);
457
+ report.push(`\nšŸ’» System Metrics:`);
458
+ report.push(` Uptime: ${Math.floor(health.system.uptime / 3600)}h`);
459
+ report.push(` Memory: ${memUsage}% (${(mem.heapUsed / 1024 / 1024).toFixed(2)}MB)`);
460
+ report.push(` CPU: ${health.system.cpu.usage}%`);
461
+
462
+ return report.join('\n');
463
+ }
464
+ }
465
+
466
+ // Create singleton instance
467
+ const healthMonitor = new HealthMonitor();
468
+
469
+ module.exports = {
470
+ HealthMonitor,
471
+ healthMonitor
472
+ };