aetherframework-cluster 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +90 -0
- package/README.md +1049 -0
- package/index.js +288 -0
- package/package.json +41 -0
- package/src/core/ClusterManager.js +109 -0
- package/src/core/HealthMonitor.js +571 -0
- package/src/core/LoadBalancer.js +531 -0
- package/src/core/WorkerManager.js +619 -0
- package/src/examples/advanced-cluster.js +150 -0
- package/src/examples/basic-cluster.js +107 -0
- package/src/examples/benchmark-cluster.js +112 -0
- package/src/examples/simple-app.js +52 -0
- package/src/middleware/cluster-health.js +330 -0
- package/src/middleware/graceful-shutdown.js +443 -0
- package/src/middleware/process-monitor.js +925 -0
- package/src/middleware/worker-stats.js +879 -0
- package/src/utils/cpu-detector.js +78 -0
- package/src/utils/env-loader.js +140 -0
- package/src/utils/signal-handler.js +90 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
// packages/cluster/src/middleware/cluster-health.js
|
|
2
|
+
/**
|
|
3
|
+
* Cluster Health Middleware - Provides health check endpoints for cluster monitoring
|
|
4
|
+
* This middleware adds health check routes to monitor cluster status
|
|
5
|
+
*/
|
|
6
|
+
export default function createClusterHealthMiddleware(clusterManager, options = {}) {
|
|
7
|
+
const config = {
|
|
8
|
+
path: options.path || '/cluster/health',
|
|
9
|
+
detailed: options.detailed !== false,
|
|
10
|
+
auth: options.auth || null,
|
|
11
|
+
...options
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Health check middleware function
|
|
16
|
+
* @param {Object} ctx - Context object
|
|
17
|
+
* @param {Function} next - Next middleware function
|
|
18
|
+
*/
|
|
19
|
+
return async function clusterHealthMiddleware(ctx, next) {
|
|
20
|
+
// Only handle health check route
|
|
21
|
+
if (ctx.path !== config.path) {
|
|
22
|
+
return next();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Check authentication if required
|
|
26
|
+
if (config.auth && !checkAuth(ctx, config.auth)) {
|
|
27
|
+
ctx.status = 401;
|
|
28
|
+
ctx.body = {
|
|
29
|
+
status: 'error',
|
|
30
|
+
message: 'Unauthorized',
|
|
31
|
+
timestamp: new Date().toISOString()
|
|
32
|
+
};
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
try {
|
|
37
|
+
// Get cluster health status
|
|
38
|
+
const health = await getClusterHealth(clusterManager, config.detailed);
|
|
39
|
+
|
|
40
|
+
// Set response status based on health
|
|
41
|
+
ctx.status = health.status === 'healthy' ? 200 :
|
|
42
|
+
health.status === 'degraded' ? 200 : 503;
|
|
43
|
+
|
|
44
|
+
// Add health headers
|
|
45
|
+
ctx.set('X-Cluster-Health', health.status);
|
|
46
|
+
ctx.set('X-Cluster-Workers', `${health.workers.healthy}/${health.workers.total}`);
|
|
47
|
+
ctx.set('X-Cluster-Uptime', `${health.uptime}s`);
|
|
48
|
+
ctx.set('X-Cluster-Timestamp', health.timestamp);
|
|
49
|
+
|
|
50
|
+
// Return health response
|
|
51
|
+
ctx.body = health;
|
|
52
|
+
|
|
53
|
+
} catch (error) {
|
|
54
|
+
console.error('Health check error:', error);
|
|
55
|
+
|
|
56
|
+
ctx.status = 503;
|
|
57
|
+
ctx.body = {
|
|
58
|
+
status: 'error',
|
|
59
|
+
timestamp: new Date().toISOString(),
|
|
60
|
+
message: 'Health check failed',
|
|
61
|
+
error: error.message,
|
|
62
|
+
stack: process.env.NODE_ENV === 'development' ? error.stack : undefined
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Get cluster health status
|
|
70
|
+
* @param {Object} clusterManager - Cluster manager instance
|
|
71
|
+
* @param {boolean} detailed - Whether to include detailed information
|
|
72
|
+
* @returns {Promise<Object>} Health status object
|
|
73
|
+
*/
|
|
74
|
+
async function getClusterHealth(clusterManager, detailed = false) {
|
|
75
|
+
if (!clusterManager) {
|
|
76
|
+
return {
|
|
77
|
+
status: 'unknown',
|
|
78
|
+
message: 'Cluster manager not available',
|
|
79
|
+
timestamp: new Date().toISOString()
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const stats = clusterManager.getStats();
|
|
84
|
+
const now = Date.now();
|
|
85
|
+
|
|
86
|
+
// Calculate health metrics
|
|
87
|
+
const activeWorkers = stats.workers.filter(w => w.state === 'online').length;
|
|
88
|
+
const totalWorkers = stats.workers.length;
|
|
89
|
+
const healthPercentage = totalWorkers > 0 ? (activeWorkers / totalWorkers) * 100 : 0;
|
|
90
|
+
|
|
91
|
+
let status = 'healthy';
|
|
92
|
+
let issues = [];
|
|
93
|
+
let warnings = [];
|
|
94
|
+
|
|
95
|
+
// Critical issues
|
|
96
|
+
if (activeWorkers === 0) {
|
|
97
|
+
status = 'unhealthy';
|
|
98
|
+
issues.push('No active workers available');
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (totalWorkers === 0) {
|
|
102
|
+
status = 'unhealthy';
|
|
103
|
+
issues.push('No workers registered in cluster');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Warnings
|
|
107
|
+
if (activeWorkers < totalWorkers * 0.5) {
|
|
108
|
+
status = 'degraded';
|
|
109
|
+
warnings.push(`Less than 50% of workers active: ${activeWorkers}/${totalWorkers}`);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Check error rate
|
|
113
|
+
const errorRate = stats.totalRequests > 0 ? (stats.errors / stats.totalRequests) * 100 : 0;
|
|
114
|
+
if (errorRate > 10) {
|
|
115
|
+
status = 'degraded';
|
|
116
|
+
warnings.push(`High error rate: ${errorRate.toFixed(2)}%`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Check for idle workers
|
|
120
|
+
const idleWorkers = stats.workers.filter(w => {
|
|
121
|
+
const idleTime = w.lastRequestTime ? now - w.lastRequestTime : Infinity;
|
|
122
|
+
return idleTime > 300000; // 5 minutes
|
|
123
|
+
}).length;
|
|
124
|
+
|
|
125
|
+
if (idleWorkers > totalWorkers * 0.7) {
|
|
126
|
+
warnings.push(`High idle worker ratio: ${idleWorkers}/${totalWorkers}`);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Build health report
|
|
130
|
+
const healthReport = {
|
|
131
|
+
status,
|
|
132
|
+
timestamp: new Date().toISOString(),
|
|
133
|
+
uptime: Math.floor((now - stats.startTime) / 1000),
|
|
134
|
+
workers: {
|
|
135
|
+
total: totalWorkers,
|
|
136
|
+
active: activeWorkers,
|
|
137
|
+
idle: idleWorkers,
|
|
138
|
+
dead: totalWorkers - activeWorkers,
|
|
139
|
+
healthPercentage: healthPercentage.toFixed(2)
|
|
140
|
+
},
|
|
141
|
+
performance: {
|
|
142
|
+
totalRequests: stats.totalRequests,
|
|
143
|
+
totalErrors: stats.errors,
|
|
144
|
+
errorRate: errorRate.toFixed(2),
|
|
145
|
+
workerRestarts: stats.workerRestarts
|
|
146
|
+
},
|
|
147
|
+
issues: issues.length > 0 ? issues : null,
|
|
148
|
+
warnings: warnings.length > 0 ? warnings : null,
|
|
149
|
+
recommendations: getHealthRecommendations(stats, activeWorkers, totalWorkers, errorRate)
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
// Add detailed information if requested
|
|
153
|
+
if (detailed) {
|
|
154
|
+
healthReport.detailed = {
|
|
155
|
+
workers: stats.workers.map(w => ({
|
|
156
|
+
pid: w.pid,
|
|
157
|
+
id: w.id,
|
|
158
|
+
state: w.state,
|
|
159
|
+
uptime: Math.floor((now - w.startTime) / 1000),
|
|
160
|
+
requests: w.requests,
|
|
161
|
+
errors: w.errors,
|
|
162
|
+
errorRate: w.requests > 0 ? (w.errors / w.requests) * 100 : 0,
|
|
163
|
+
lastRequestTime: w.lastRequestTime
|
|
164
|
+
})),
|
|
165
|
+
system: {
|
|
166
|
+
memory: process.memoryUsage(),
|
|
167
|
+
cpu: process.cpuUsage(),
|
|
168
|
+
platform: process.platform,
|
|
169
|
+
arch: process.arch,
|
|
170
|
+
version: process.version
|
|
171
|
+
}
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return healthReport;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Get health recommendations based on cluster status
|
|
180
|
+
* @param {Object} stats - Cluster statistics
|
|
181
|
+
* @param {number} activeWorkers - Number of active workers
|
|
182
|
+
* @param {number} totalWorkers - Total number of workers
|
|
183
|
+
* @param {number} errorRate - Error rate percentage
|
|
184
|
+
* @returns {Array} Array of recommendations
|
|
185
|
+
*/
|
|
186
|
+
function getHealthRecommendations(stats, activeWorkers, totalWorkers, errorRate) {
|
|
187
|
+
const recommendations = [];
|
|
188
|
+
|
|
189
|
+
// Check idle worker ratio
|
|
190
|
+
const idleRatio = (totalWorkers - activeWorkers) / totalWorkers;
|
|
191
|
+
if (idleRatio > 0.7) {
|
|
192
|
+
recommendations.push('Consider reducing the number of workers to save resources');
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Check if all workers are active and fewer than CPU cores
|
|
196
|
+
const cpuCount = require('os').cpus().length;
|
|
197
|
+
if (activeWorkers === totalWorkers && totalWorkers < cpuCount) {
|
|
198
|
+
recommendations.push(`Consider increasing the number of workers to ${cpuCount} (CPU cores)`);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Check error rate
|
|
202
|
+
if (errorRate > 10) {
|
|
203
|
+
recommendations.push('Investigate high error rate, check application logs and error handling');
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Check request distribution
|
|
207
|
+
const averageRequests = stats.totalRequests / Math.max(totalWorkers, 1);
|
|
208
|
+
if (averageRequests > 1000) {
|
|
209
|
+
recommendations.push('High request load, consider scaling horizontally');
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Check worker restarts
|
|
213
|
+
if (stats.workerRestarts > 0) {
|
|
214
|
+
recommendations.push(`Monitor worker stability (${stats.workerRestarts} restarts detected)`);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return recommendations.length > 0 ? recommendations : ['All systems operational'];
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Check authentication for health endpoint
|
|
222
|
+
* @param {Object} ctx - Context object
|
|
223
|
+
* @param {Object|Function} auth - Authentication configuration or function
|
|
224
|
+
* @returns {boolean} Authentication status
|
|
225
|
+
*/
|
|
226
|
+
function checkAuth(ctx, auth) {
|
|
227
|
+
if (typeof auth === 'function') {
|
|
228
|
+
return auth(ctx);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (typeof auth === 'object') {
|
|
232
|
+
// Check API key
|
|
233
|
+
if (auth.apiKey) {
|
|
234
|
+
const apiKey = ctx.headers['x-api-key'] || ctx.query.apiKey;
|
|
235
|
+
return apiKey === auth.apiKey;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Check basic auth
|
|
239
|
+
if (auth.username && auth.password) {
|
|
240
|
+
const authHeader = ctx.headers.authorization;
|
|
241
|
+
if (!authHeader || !authHeader.startsWith('Basic ')) {
|
|
242
|
+
return false;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const credentials = Buffer.from(authHeader.slice(6), 'base64').toString();
|
|
246
|
+
const [username, password] = credentials.split(':');
|
|
247
|
+
return username === auth.username && password === auth.password;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return true; // No auth required
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Create detailed health check endpoint
|
|
256
|
+
* @param {Object} clusterManager - Cluster manager instance
|
|
257
|
+
* @param {Object} options - Middleware options
|
|
258
|
+
* @returns {Function} Middleware function
|
|
259
|
+
*/
|
|
260
|
+
export function createDetailedHealthMiddleware(clusterManager, options = {}) {
|
|
261
|
+
return async function detailedHealthMiddleware(ctx, next) {
|
|
262
|
+
if (ctx.path !== '/cluster/health/detailed') {
|
|
263
|
+
return next();
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
try {
|
|
267
|
+
const health = await getClusterHealth(clusterManager, true);
|
|
268
|
+
|
|
269
|
+
// Add load balancer stats if available
|
|
270
|
+
if (clusterManager.loadBalancer) {
|
|
271
|
+
health.loadBalancer = clusterManager.loadBalancer.getStats();
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Add worker manager stats if available
|
|
275
|
+
if (clusterManager.workerManager) {
|
|
276
|
+
health.workerManager = clusterManager.workerManager.getClusterStats();
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
ctx.status = health.status === 'healthy' ? 200 :
|
|
280
|
+
health.status === 'degraded' ? 200 : 503;
|
|
281
|
+
ctx.body = health;
|
|
282
|
+
|
|
283
|
+
} catch (error) {
|
|
284
|
+
ctx.status = 503;
|
|
285
|
+
ctx.body = {
|
|
286
|
+
status: 'error',
|
|
287
|
+
timestamp: new Date().toISOString(),
|
|
288
|
+
message: 'Detailed health check failed',
|
|
289
|
+
error: error.message
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Create health check endpoint with custom path
|
|
297
|
+
* @param {Object} clusterManager - Cluster manager instance
|
|
298
|
+
* @param {string} path - Custom path for health endpoint
|
|
299
|
+
* @param {Object} options - Additional options
|
|
300
|
+
* @returns {Function} Middleware function
|
|
301
|
+
*/
|
|
302
|
+
export function createCustomHealthMiddleware(clusterManager, path, options = {}) {
|
|
303
|
+
return async function customHealthMiddleware(ctx, next) {
|
|
304
|
+
if (ctx.path !== path) {
|
|
305
|
+
return next();
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
try {
|
|
309
|
+
const health = await getClusterHealth(clusterManager, options.detailed || false);
|
|
310
|
+
|
|
311
|
+
// Add custom metrics if provided
|
|
312
|
+
if (options.metrics) {
|
|
313
|
+
health.customMetrics = await options.metrics(ctx);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
ctx.status = health.status === 'healthy' ? 200 :
|
|
317
|
+
health.status === 'degraded' ? 200 : 503;
|
|
318
|
+
ctx.body = health;
|
|
319
|
+
|
|
320
|
+
} catch (error) {
|
|
321
|
+
ctx.status = 503;
|
|
322
|
+
ctx.body = {
|
|
323
|
+
status: 'error',
|
|
324
|
+
timestamp: new Date().toISOString(),
|
|
325
|
+
message: 'Health check failed',
|
|
326
|
+
error: error.message
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
};
|
|
330
|
+
}
|