aetherframework-cluster 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,330 @@
1
+ // packages/cluster/src/middleware/cluster-health.js
2
+ /**
3
+ * Cluster Health Middleware - Provides health check endpoints for cluster monitoring
4
+ * This middleware adds health check routes to monitor cluster status
5
+ */
6
+ export default function createClusterHealthMiddleware(clusterManager, options = {}) {
7
+ const config = {
8
+ path: options.path || '/cluster/health',
9
+ detailed: options.detailed !== false,
10
+ auth: options.auth || null,
11
+ ...options
12
+ };
13
+
14
+ /**
15
+ * Health check middleware function
16
+ * @param {Object} ctx - Context object
17
+ * @param {Function} next - Next middleware function
18
+ */
19
+ return async function clusterHealthMiddleware(ctx, next) {
20
+ // Only handle health check route
21
+ if (ctx.path !== config.path) {
22
+ return next();
23
+ }
24
+
25
+ // Check authentication if required
26
+ if (config.auth && !checkAuth(ctx, config.auth)) {
27
+ ctx.status = 401;
28
+ ctx.body = {
29
+ status: 'error',
30
+ message: 'Unauthorized',
31
+ timestamp: new Date().toISOString()
32
+ };
33
+ return;
34
+ }
35
+
36
+ try {
37
+ // Get cluster health status
38
+ const health = await getClusterHealth(clusterManager, config.detailed);
39
+
40
+ // Set response status based on health
41
+ ctx.status = health.status === 'healthy' ? 200 :
42
+ health.status === 'degraded' ? 200 : 503;
43
+
44
+ // Add health headers
45
+ ctx.set('X-Cluster-Health', health.status);
46
+ ctx.set('X-Cluster-Workers', `${health.workers.healthy}/${health.workers.total}`);
47
+ ctx.set('X-Cluster-Uptime', `${health.uptime}s`);
48
+ ctx.set('X-Cluster-Timestamp', health.timestamp);
49
+
50
+ // Return health response
51
+ ctx.body = health;
52
+
53
+ } catch (error) {
54
+ console.error('Health check error:', error);
55
+
56
+ ctx.status = 503;
57
+ ctx.body = {
58
+ status: 'error',
59
+ timestamp: new Date().toISOString(),
60
+ message: 'Health check failed',
61
+ error: error.message,
62
+ stack: process.env.NODE_ENV === 'development' ? error.stack : undefined
63
+ };
64
+ }
65
+ };
66
+ }
67
+
68
+ /**
69
+ * Get cluster health status
70
+ * @param {Object} clusterManager - Cluster manager instance
71
+ * @param {boolean} detailed - Whether to include detailed information
72
+ * @returns {Promise<Object>} Health status object
73
+ */
74
+ async function getClusterHealth(clusterManager, detailed = false) {
75
+ if (!clusterManager) {
76
+ return {
77
+ status: 'unknown',
78
+ message: 'Cluster manager not available',
79
+ timestamp: new Date().toISOString()
80
+ };
81
+ }
82
+
83
+ const stats = clusterManager.getStats();
84
+ const now = Date.now();
85
+
86
+ // Calculate health metrics
87
+ const activeWorkers = stats.workers.filter(w => w.state === 'online').length;
88
+ const totalWorkers = stats.workers.length;
89
+ const healthPercentage = totalWorkers > 0 ? (activeWorkers / totalWorkers) * 100 : 0;
90
+
91
+ let status = 'healthy';
92
+ let issues = [];
93
+ let warnings = [];
94
+
95
+ // Critical issues
96
+ if (activeWorkers === 0) {
97
+ status = 'unhealthy';
98
+ issues.push('No active workers available');
99
+ }
100
+
101
+ if (totalWorkers === 0) {
102
+ status = 'unhealthy';
103
+ issues.push('No workers registered in cluster');
104
+ }
105
+
106
+ // Warnings
107
+ if (activeWorkers < totalWorkers * 0.5) {
108
+ status = 'degraded';
109
+ warnings.push(`Less than 50% of workers active: ${activeWorkers}/${totalWorkers}`);
110
+ }
111
+
112
+ // Check error rate
113
+ const errorRate = stats.totalRequests > 0 ? (stats.errors / stats.totalRequests) * 100 : 0;
114
+ if (errorRate > 10) {
115
+ status = 'degraded';
116
+ warnings.push(`High error rate: ${errorRate.toFixed(2)}%`);
117
+ }
118
+
119
+ // Check for idle workers
120
+ const idleWorkers = stats.workers.filter(w => {
121
+ const idleTime = w.lastRequestTime ? now - w.lastRequestTime : Infinity;
122
+ return idleTime > 300000; // 5 minutes
123
+ }).length;
124
+
125
+ if (idleWorkers > totalWorkers * 0.7) {
126
+ warnings.push(`High idle worker ratio: ${idleWorkers}/${totalWorkers}`);
127
+ }
128
+
129
+ // Build health report
130
+ const healthReport = {
131
+ status,
132
+ timestamp: new Date().toISOString(),
133
+ uptime: Math.floor((now - stats.startTime) / 1000),
134
+ workers: {
135
+ total: totalWorkers,
136
+ active: activeWorkers,
137
+ idle: idleWorkers,
138
+ dead: totalWorkers - activeWorkers,
139
+ healthPercentage: healthPercentage.toFixed(2)
140
+ },
141
+ performance: {
142
+ totalRequests: stats.totalRequests,
143
+ totalErrors: stats.errors,
144
+ errorRate: errorRate.toFixed(2),
145
+ workerRestarts: stats.workerRestarts
146
+ },
147
+ issues: issues.length > 0 ? issues : null,
148
+ warnings: warnings.length > 0 ? warnings : null,
149
+ recommendations: getHealthRecommendations(stats, activeWorkers, totalWorkers, errorRate)
150
+ };
151
+
152
+ // Add detailed information if requested
153
+ if (detailed) {
154
+ healthReport.detailed = {
155
+ workers: stats.workers.map(w => ({
156
+ pid: w.pid,
157
+ id: w.id,
158
+ state: w.state,
159
+ uptime: Math.floor((now - w.startTime) / 1000),
160
+ requests: w.requests,
161
+ errors: w.errors,
162
+ errorRate: w.requests > 0 ? (w.errors / w.requests) * 100 : 0,
163
+ lastRequestTime: w.lastRequestTime
164
+ })),
165
+ system: {
166
+ memory: process.memoryUsage(),
167
+ cpu: process.cpuUsage(),
168
+ platform: process.platform,
169
+ arch: process.arch,
170
+ version: process.version
171
+ }
172
+ };
173
+ }
174
+
175
+ return healthReport;
176
+ }
177
+
178
+ /**
179
+ * Get health recommendations based on cluster status
180
+ * @param {Object} stats - Cluster statistics
181
+ * @param {number} activeWorkers - Number of active workers
182
+ * @param {number} totalWorkers - Total number of workers
183
+ * @param {number} errorRate - Error rate percentage
184
+ * @returns {Array} Array of recommendations
185
+ */
186
+ function getHealthRecommendations(stats, activeWorkers, totalWorkers, errorRate) {
187
+ const recommendations = [];
188
+
189
+ // Check idle worker ratio
190
+ const idleRatio = (totalWorkers - activeWorkers) / totalWorkers;
191
+ if (idleRatio > 0.7) {
192
+ recommendations.push('Consider reducing the number of workers to save resources');
193
+ }
194
+
195
+ // Check if all workers are active and fewer than CPU cores
196
+ const cpuCount = require('os').cpus().length;
197
+ if (activeWorkers === totalWorkers && totalWorkers < cpuCount) {
198
+ recommendations.push(`Consider increasing the number of workers to ${cpuCount} (CPU cores)`);
199
+ }
200
+
201
+ // Check error rate
202
+ if (errorRate > 10) {
203
+ recommendations.push('Investigate high error rate, check application logs and error handling');
204
+ }
205
+
206
+ // Check request distribution
207
+ const averageRequests = stats.totalRequests / Math.max(totalWorkers, 1);
208
+ if (averageRequests > 1000) {
209
+ recommendations.push('High request load, consider scaling horizontally');
210
+ }
211
+
212
+ // Check worker restarts
213
+ if (stats.workerRestarts > 0) {
214
+ recommendations.push(`Monitor worker stability (${stats.workerRestarts} restarts detected)`);
215
+ }
216
+
217
+ return recommendations.length > 0 ? recommendations : ['All systems operational'];
218
+ }
219
+
220
+ /**
221
+ * Check authentication for health endpoint
222
+ * @param {Object} ctx - Context object
223
+ * @param {Object|Function} auth - Authentication configuration or function
224
+ * @returns {boolean} Authentication status
225
+ */
226
+ function checkAuth(ctx, auth) {
227
+ if (typeof auth === 'function') {
228
+ return auth(ctx);
229
+ }
230
+
231
+ if (typeof auth === 'object') {
232
+ // Check API key
233
+ if (auth.apiKey) {
234
+ const apiKey = ctx.headers['x-api-key'] || ctx.query.apiKey;
235
+ return apiKey === auth.apiKey;
236
+ }
237
+
238
+ // Check basic auth
239
+ if (auth.username && auth.password) {
240
+ const authHeader = ctx.headers.authorization;
241
+ if (!authHeader || !authHeader.startsWith('Basic ')) {
242
+ return false;
243
+ }
244
+
245
+ const credentials = Buffer.from(authHeader.slice(6), 'base64').toString();
246
+ const [username, password] = credentials.split(':');
247
+ return username === auth.username && password === auth.password;
248
+ }
249
+ }
250
+
251
+ return true; // No auth required
252
+ }
253
+
254
+ /**
255
+ * Create detailed health check endpoint
256
+ * @param {Object} clusterManager - Cluster manager instance
257
+ * @param {Object} options - Middleware options
258
+ * @returns {Function} Middleware function
259
+ */
260
+ export function createDetailedHealthMiddleware(clusterManager, options = {}) {
261
+ return async function detailedHealthMiddleware(ctx, next) {
262
+ if (ctx.path !== '/cluster/health/detailed') {
263
+ return next();
264
+ }
265
+
266
+ try {
267
+ const health = await getClusterHealth(clusterManager, true);
268
+
269
+ // Add load balancer stats if available
270
+ if (clusterManager.loadBalancer) {
271
+ health.loadBalancer = clusterManager.loadBalancer.getStats();
272
+ }
273
+
274
+ // Add worker manager stats if available
275
+ if (clusterManager.workerManager) {
276
+ health.workerManager = clusterManager.workerManager.getClusterStats();
277
+ }
278
+
279
+ ctx.status = health.status === 'healthy' ? 200 :
280
+ health.status === 'degraded' ? 200 : 503;
281
+ ctx.body = health;
282
+
283
+ } catch (error) {
284
+ ctx.status = 503;
285
+ ctx.body = {
286
+ status: 'error',
287
+ timestamp: new Date().toISOString(),
288
+ message: 'Detailed health check failed',
289
+ error: error.message
290
+ };
291
+ }
292
+ };
293
+ }
294
+
295
+ /**
296
+ * Create health check endpoint with custom path
297
+ * @param {Object} clusterManager - Cluster manager instance
298
+ * @param {string} path - Custom path for health endpoint
299
+ * @param {Object} options - Additional options
300
+ * @returns {Function} Middleware function
301
+ */
302
+ export function createCustomHealthMiddleware(clusterManager, path, options = {}) {
303
+ return async function customHealthMiddleware(ctx, next) {
304
+ if (ctx.path !== path) {
305
+ return next();
306
+ }
307
+
308
+ try {
309
+ const health = await getClusterHealth(clusterManager, options.detailed || false);
310
+
311
+ // Add custom metrics if provided
312
+ if (options.metrics) {
313
+ health.customMetrics = await options.metrics(ctx);
314
+ }
315
+
316
+ ctx.status = health.status === 'healthy' ? 200 :
317
+ health.status === 'degraded' ? 200 : 503;
318
+ ctx.body = health;
319
+
320
+ } catch (error) {
321
+ ctx.status = 503;
322
+ ctx.body = {
323
+ status: 'error',
324
+ timestamp: new Date().toISOString(),
325
+ message: 'Health check failed',
326
+ error: error.message
327
+ };
328
+ }
329
+ };
330
+ }