aetherframework-cluster 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,619 @@
1
+ // packages/cluster/src/core/WorkerManager.js
2
+ import { EventEmitter } from 'events';
3
+
4
+ /**
5
+ * Worker Manager - Manages worker processes and their lifecycle
6
+ * Tracks worker statistics, load balancing, and health status
7
+ */
8
+ class WorkerManager extends EventEmitter {
9
+ constructor(options = {}) {
10
+ super();
11
+
12
+ this.options = {
13
+ maxWorkers: options.maxWorkers || 10,
14
+ minWorkers: options.minWorkers || 1,
15
+ workerTimeout: options.workerTimeout || 30000,
16
+ maxRestarts: options.maxRestarts || 5,
17
+ restartDelay: options.restartDelay || 1000,
18
+ ...options
19
+ };
20
+
21
+ this.workers = new Map();
22
+ this.workerStats = new Map();
23
+ this.workerLoad = new Map();
24
+ this.restartCounts = new Map();
25
+ this.workerStartTime = new Map();
26
+
27
+ // Performance metrics
28
+ this.metrics = {
29
+ totalRequests: 0,
30
+ totalErrors: 0,
31
+ totalRestarts: 0,
32
+ startTime: Date.now()
33
+ };
34
+ }
35
+
36
+ /**
37
+ * Register a new worker
38
+ * @param {number} pid - Process ID
39
+ * @param {Object} worker - Worker object
40
+ * @param {Object} metadata - Worker metadata
41
+ * @returns {number} Worker ID
42
+ */
43
+ registerWorker(pid, worker, metadata = {}) {
44
+ const workerId = this.generateWorkerId();
45
+
46
+ this.workers.set(pid, {
47
+ ...worker,
48
+ id: workerId,
49
+ pid,
50
+ metadata,
51
+ state: 'starting',
52
+ startTime: Date.now(),
53
+ lastHeartbeat: Date.now()
54
+ });
55
+
56
+ this.workerStats.set(pid, {
57
+ pid,
58
+ workerId,
59
+ requests: 0,
60
+ errors: 0,
61
+ memoryUsage: null,
62
+ cpuUsage: null,
63
+ uptime: 0,
64
+ lastRequestTime: null,
65
+ lastError: null,
66
+ state: 'starting'
67
+ });
68
+
69
+ this.workerLoad.set(pid, 0);
70
+ this.restartCounts.set(pid, 0);
71
+ this.workerStartTime.set(pid, Date.now());
72
+
73
+ console.log(`✅ Worker ${pid} (ID: ${workerId}) registered`);
74
+ this.emit('worker:registered', { pid, workerId, metadata });
75
+
76
+ return workerId;
77
+ }
78
+
79
+ /**
80
+ * Unregister a worker
81
+ * @param {number} pid - Process ID
82
+ * @returns {boolean} Success status
83
+ */
84
+ unregisterWorker(pid) {
85
+ if (!this.workers.has(pid)) {
86
+ return false;
87
+ }
88
+
89
+ const worker = this.workers.get(pid);
90
+ const stats = this.workerStats.get(pid);
91
+
92
+ this.workers.delete(pid);
93
+ this.workerStats.delete(pid);
94
+ this.workerLoad.delete(pid);
95
+ this.restartCounts.delete(pid);
96
+ this.workerStartTime.delete(pid);
97
+
98
+ console.log(`🗑️ Worker ${pid} (ID: ${worker.id}) unregistered`);
99
+ this.emit('worker:unregistered', { pid, workerId: worker.id, stats });
100
+
101
+ return true;
102
+ }
103
+
104
+ /**
105
+ * Update worker state
106
+ * @param {number} pid - Process ID
107
+ * @param {string} state - New state
108
+ * @param {Object} data - Additional data
109
+ */
110
+ updateWorkerState(pid, state, data = {}) {
111
+ if (!this.workers.has(pid)) {
112
+ return false;
113
+ }
114
+
115
+ const worker = this.workers.get(pid);
116
+ const oldState = worker.state;
117
+
118
+ worker.state = state;
119
+ worker.lastUpdate = Date.now();
120
+
121
+ if (state === 'ready') {
122
+ worker.readyTime = Date.now();
123
+ }
124
+
125
+ // Update stats
126
+ if (this.workerStats.has(pid)) {
127
+ const stats = this.workerStats.get(pid);
128
+ stats.state = state;
129
+ stats.lastUpdate = Date.now();
130
+
131
+ if (data.memoryUsage) {
132
+ stats.memoryUsage = data.memoryUsage;
133
+ }
134
+
135
+ if (data.cpuUsage) {
136
+ stats.cpuUsage = data.cpuUsage;
137
+ }
138
+
139
+ if (data.uptime) {
140
+ stats.uptime = data.uptime;
141
+ }
142
+ }
143
+
144
+ console.log(`🔄 Worker ${pid} state changed: ${oldState} -> ${state}`);
145
+ this.emit('worker:stateChanged', { pid, oldState, newState: state, data });
146
+
147
+ return true;
148
+ }
149
+
150
+ /**
151
+ * Record a request for a worker
152
+ * @param {number} pid - Process ID
153
+ * @param {Object} requestInfo - Request information
154
+ */
155
+ recordRequest(pid, requestInfo = {}) {
156
+ if (!this.workers.has(pid)) {
157
+ return false;
158
+ }
159
+
160
+ const worker = this.workers.get(pid);
161
+ const stats = this.workerStats.get(pid);
162
+
163
+ if (stats) {
164
+ stats.requests++;
165
+ stats.lastRequestTime = Date.now();
166
+ stats.state = 'active';
167
+
168
+ // Update load
169
+ const currentLoad = this.workerLoad.get(pid) || 0;
170
+ this.workerLoad.set(pid, currentLoad + 1);
171
+
172
+ // Update global metrics
173
+ this.metrics.totalRequests++;
174
+
175
+ // Auto-decrease load after timeout
176
+ setTimeout(() => {
177
+ const load = this.workerLoad.get(pid);
178
+ if (load > 0) {
179
+ this.workerLoad.set(pid, load - 1);
180
+ }
181
+ }, this.options.workerTimeout);
182
+ }
183
+
184
+ this.emit('worker:request', { pid, requestInfo, stats });
185
+ return true;
186
+ }
187
+
188
+ /**
189
+ * Record an error for a worker
190
+ * @param {number} pid - Process ID
191
+ * @param {Error} error - Error object
192
+ * @param {Object} context - Error context
193
+ */
194
+ recordError(pid, error, context = {}) {
195
+ if (!this.workers.has(pid)) {
196
+ return false;
197
+ }
198
+
199
+ const stats = this.workerStats.get(pid);
200
+
201
+ if (stats) {
202
+ stats.errors++;
203
+ stats.lastError = {
204
+ message: error.message,
205
+ stack: error.stack,
206
+ timestamp: Date.now(),
207
+ context
208
+ };
209
+
210
+ // Update global metrics
211
+ this.metrics.totalErrors++;
212
+ }
213
+
214
+ console.error(`❌ Worker ${pid} error:`, error.message);
215
+ this.emit('worker:error', { pid, error, context, stats });
216
+
217
+ return true;
218
+ }
219
+
220
+ /**
221
+ * Update worker heartbeat
222
+ * @param {number} pid - Process ID
223
+ * @returns {boolean} Success status
224
+ */
225
+ updateHeartbeat(pid) {
226
+ if (!this.workers.has(pid)) {
227
+ return false;
228
+ }
229
+
230
+ const worker = this.workers.get(pid);
231
+ worker.lastHeartbeat = Date.now();
232
+
233
+ return true;
234
+ }
235
+
236
+ /**
237
+ * Check if a worker is alive
238
+ * @param {number} pid - Process ID
239
+ * @returns {boolean} Alive status
240
+ */
241
+ isWorkerAlive(pid) {
242
+ if (!this.workers.has(pid)) {
243
+ return false;
244
+ }
245
+
246
+ const worker = this.workers.get(pid);
247
+ const now = Date.now();
248
+ const lastHeartbeat = worker.lastHeartbeat || 0;
249
+
250
+ return (now - lastHeartbeat) < this.options.workerTimeout;
251
+ }
252
+
253
+ /**
254
+ * Get the least loaded worker
255
+ * @returns {number|null} PID of least loaded worker
256
+ */
257
+ getLeastLoadedWorker() {
258
+ let minLoad = Infinity;
259
+ let leastLoadedWorker = null;
260
+
261
+ for (const [pid, load] of this.workerLoad.entries()) {
262
+ const worker = this.workers.get(pid);
263
+ const stats = this.workerStats.get(pid);
264
+
265
+ // Only consider ready and alive workers
266
+ if (worker && worker.state === 'ready' && this.isWorkerAlive(pid)) {
267
+ if (load < minLoad) {
268
+ minLoad = load;
269
+ leastLoadedWorker = pid;
270
+ }
271
+ }
272
+ }
273
+
274
+ return leastLoadedWorker;
275
+ }
276
+
277
+ /**
278
+ * Get worker statistics
279
+ * @param {number} pid - Process ID
280
+ * @returns {Object|null} Worker statistics
281
+ */
282
+ getWorkerStats(pid) {
283
+ if (!this.workers.has(pid)) {
284
+ return null;
285
+ }
286
+
287
+ const worker = this.workers.get(pid);
288
+ const stats = this.workerStats.get(pid);
289
+ const load = this.workerLoad.get(pid) || 0;
290
+ const restartCount = this.restartCounts.get(pid) || 0;
291
+ const startTime = this.workerStartTime.get(pid) || Date.now();
292
+
293
+ const uptime = Date.now() - startTime;
294
+ const errorRate = stats.requests > 0 ? (stats.errors / stats.requests) * 100 : 0;
295
+
296
+ return {
297
+ pid,
298
+ workerId: worker.id,
299
+ state: worker.state,
300
+ uptime: Math.floor(uptime / 1000), // seconds
301
+ requests: stats.requests,
302
+ errors: stats.errors,
303
+ errorRate: errorRate.toFixed(2),
304
+ load,
305
+ memoryUsage: stats.memoryUsage,
306
+ cpuUsage: stats.cpuUsage,
307
+ lastRequestTime: stats.lastRequestTime,
308
+ lastHeartbeat: worker.lastHeartbeat,
309
+ isAlive: this.isWorkerAlive(pid),
310
+ restartCount,
311
+ metadata: worker.metadata,
312
+ lastError: stats.lastError
313
+ };
314
+ }
315
+
316
+ /**
317
+ * Get all worker statistics
318
+ * @returns {Array} Array of worker statistics
319
+ */
320
+ getAllWorkerStats() {
321
+ const allStats = [];
322
+
323
+ for (const pid of this.workers.keys()) {
324
+ const stats = this.getWorkerStats(pid);
325
+ if (stats) {
326
+ allStats.push(stats);
327
+ }
328
+ }
329
+
330
+ return allStats;
331
+ }
332
+
333
+ /**
334
+ * Get cluster statistics
335
+ * @returns {Object} Cluster statistics
336
+ */
337
+ getClusterStats() {
338
+ const allStats = this.getAllWorkerStats();
339
+ const now = Date.now();
340
+
341
+ const totalWorkers = allStats.length;
342
+ const activeWorkers = allStats.filter(w => w.state === 'ready' || w.state === 'active').length;
343
+ const idleWorkers = allStats.filter(w => w.state === 'idle').length;
344
+ const deadWorkers = allStats.filter(w => !w.isAlive).length;
345
+
346
+ const totalRequests = allStats.reduce((sum, w) => sum + w.requests, 0);
347
+ const totalErrors = allStats.reduce((sum, w) => sum + w.errors, 0);
348
+ const totalLoad = allStats.reduce((sum, w) => sum + w.load, 0);
349
+
350
+ const averageRequestsPerWorker = totalWorkers > 0 ? totalRequests / totalWorkers : 0;
351
+ const averageLoadPerWorker = totalWorkers > 0 ? totalLoad / totalWorkers : 0;
352
+ const clusterErrorRate = totalRequests > 0 ? (totalErrors / totalRequests) * 100 : 0;
353
+
354
+ const leastLoadedWorker = this.getLeastLoadedWorker();
355
+
356
+ return {
357
+ timestamp: new Date().toISOString(),
358
+ uptime: Math.floor((now - this.metrics.startTime) / 1000),
359
+ workers: {
360
+ total: totalWorkers,
361
+ active: activeWorkers,
362
+ idle: idleWorkers,
363
+ dead: deadWorkers,
364
+ byState: this.groupWorkersByState(allStats)
365
+ },
366
+ performance: {
367
+ totalRequests,
368
+ totalErrors,
369
+ totalRestarts: this.metrics.totalRestarts,
370
+ errorRate: clusterErrorRate.toFixed(2),
371
+ averageRequestsPerWorker: averageRequestsPerWorker.toFixed(2),
372
+ averageLoadPerWorker: averageLoadPerWorker.toFixed(2)
373
+ },
374
+ loadBalancing: {
375
+ leastLoadedWorker,
376
+ totalLoad,
377
+ averageLoad: averageLoadPerWorker.toFixed(2)
378
+ },
379
+ health: this.getClusterHealth(allStats)
380
+ };
381
+ }
382
+
383
+ /**
384
+ * Group workers by state
385
+ * @param {Array} workers - Array of worker stats
386
+ * @returns {Object} Workers grouped by state
387
+ */
388
+ groupWorkersByState(workers) {
389
+ const groups = {
390
+ starting: [],
391
+ ready: [],
392
+ active: [],
393
+ idle: [],
394
+ stopping: [],
395
+ dead: []
396
+ };
397
+
398
+ workers.forEach(worker => {
399
+ if (groups[worker.state]) {
400
+ groups[worker.state].push(worker.pid);
401
+ }
402
+ });
403
+
404
+ return groups;
405
+ }
406
+
407
+ /**
408
+ * Get cluster health status
409
+ * @param {Array} workers - Array of worker stats
410
+ * @returns {Object} Health status
411
+ */
412
+ getClusterHealth(workers) {
413
+ const totalWorkers = workers.length;
414
+ const healthyWorkers = workers.filter(w => w.isAlive && w.errorRate < 10).length;
415
+ const unhealthyWorkers = workers.filter(w => !w.isAlive || w.errorRate >= 10).length;
416
+
417
+ const healthPercentage = totalWorkers > 0 ? (healthyWorkers / totalWorkers) * 100 : 0;
418
+
419
+ let status = 'healthy';
420
+ let issues = [];
421
+
422
+ if (healthPercentage < 50) {
423
+ status = 'critical';
424
+ issues.push('Less than 50% of workers are healthy');
425
+ } else if (healthPercentage < 80) {
426
+ status = 'degraded';
427
+ issues.push('Less than 80% of workers are healthy');
428
+ }
429
+
430
+ // Check for high error rates
431
+ const highErrorWorkers = workers.filter(w => w.errorRate >= 10);
432
+ if (highErrorWorkers.length > 0) {
433
+ issues.push(`${highErrorWorkers.length} workers have error rate >= 10%`);
434
+ }
435
+
436
+ // Check for dead workers
437
+ const deadWorkers = workers.filter(w => !w.isAlive);
438
+ if (deadWorkers.length > 0) {
439
+ issues.push(`${deadWorkers.length} workers are not responding`);
440
+ }
441
+
442
+ return {
443
+ status,
444
+ healthPercentage: healthPercentage.toFixed(2),
445
+ healthyWorkers,
446
+ unhealthyWorkers,
447
+ totalWorkers,
448
+ issues: issues.length > 0 ? issues : ['All systems operational'],
449
+ timestamp: new Date().toISOString()
450
+ };
451
+ }
452
+
453
+ /**
454
+ * Increment restart count for a worker
455
+ * @param {number} pid - Process ID
456
+ * @returns {number} New restart count
457
+ */
458
+ incrementRestartCount(pid) {
459
+ const currentCount = this.restartCounts.get(pid) || 0;
460
+ const newCount = currentCount + 1;
461
+
462
+ this.restartCounts.set(pid, newCount);
463
+ this.metrics.totalRestarts++;
464
+
465
+ return newCount;
466
+ }
467
+
468
+ /**
469
+ * Check if a worker should be restarted
470
+ * @param {number} pid - Process ID
471
+ * @returns {boolean} Whether to restart
472
+ */
473
+ shouldRestartWorker(pid) {
474
+ const restartCount = this.restartCounts.get(pid) || 0;
475
+ return restartCount < this.options.maxRestarts;
476
+ }
477
+
478
+ /**
479
+ * Get restart count for a worker
480
+ * @param {number} pid - Process ID
481
+ * @returns {number} Restart count
482
+ */
483
+ getRestartCount(pid) {
484
+ return this.restartCounts.get(pid) || 0;
485
+ }
486
+
487
+ /**
488
+ * Reset restart count for a worker
489
+ * @param {number} pid - Process ID
490
+ */
491
+ resetRestartCount(pid) {
492
+ this.restartCounts.set(pid, 0);
493
+ }
494
+
495
+ /**
496
+ * Generate a unique worker ID
497
+ * @returns {string} Worker ID
498
+ */
499
+ generateWorkerId() {
500
+ return `worker_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
501
+ }
502
+
503
+ /**
504
+ * Clean up dead workers
505
+ * @returns {Array} Array of cleaned up worker PIDs
506
+ */
507
+ cleanupDeadWorkers() {
508
+ const deadWorkers = [];
509
+
510
+ for (const [pid, worker] of this.workers.entries()) {
511
+ if (!this.isWorkerAlive(pid)) {
512
+ deadWorkers.push(pid);
513
+ this.unregisterWorker(pid);
514
+ }
515
+ }
516
+
517
+ return deadWorkers;
518
+ }
519
+
520
+ /**
521
+ * Get worker load
522
+ * @param {number} pid - Process ID
523
+ * @returns {number} Current load
524
+ */
525
+ getWorkerLoad(pid) {
526
+ return this.workerLoad.get(pid) || 0;
527
+ }
528
+
529
+ /**
530
+ * Set worker load
531
+ * @param {number} pid - Process ID
532
+ * @param {number} load - New load value
533
+ */
534
+ setWorkerLoad(pid, load) {
535
+ this.workerLoad.set(pid, Math.max(0, load));
536
+ }
537
+
538
+ /**
539
+ * Get all worker loads
540
+ * @returns {Map} Map of worker loads
541
+ */
542
+ getAllWorkerLoads() {
543
+ return new Map(this.workerLoad);
544
+ }
545
+
546
+ /**
547
+ * Get worker by ID
548
+ * @param {string} workerId - Worker ID
549
+ * @returns {Object|null} Worker object
550
+ */
551
+ getWorkerById(workerId) {
552
+ for (const [pid, worker] of this.workers.entries()) {
553
+ if (worker.id === workerId) {
554
+ return { pid, ...worker };
555
+ }
556
+ }
557
+ return null;
558
+ }
559
+
560
+ /**
561
+ * Get worker by PID
562
+ * @param {number} pid - Process ID
563
+ * @returns {Object|null} Worker object
564
+ */
565
+ getWorkerByPid(pid) {
566
+ const worker = this.workers.get(pid);
567
+ if (!worker) return null;
568
+
569
+ return { pid, ...worker };
570
+ }
571
+
572
+ /**
573
+ * Get all workers
574
+ * @returns {Array} Array of worker objects
575
+ */
576
+ getAllWorkers() {
577
+ const workers = [];
578
+
579
+ for (const [pid, worker] of this.workers.entries()) {
580
+ workers.push({ pid, ...worker });
581
+ }
582
+
583
+ return workers;
584
+ }
585
+
586
+ /**
587
+ * Get worker count by state
588
+ * @param {string} state - Worker state
589
+ * @returns {number} Count of workers in state
590
+ */
591
+ getWorkerCountByState(state) {
592
+ let count = 0;
593
+
594
+ for (const worker of this.workers.values()) {
595
+ if (worker.state === state) {
596
+ count++;
597
+ }
598
+ }
599
+
600
+ return count;
601
+ }
602
+
603
+ /**
604
+ * Get metrics
605
+ * @returns {Object} Metrics object
606
+ */
607
+ getMetrics() {
608
+ return {
609
+ ...this.metrics,
610
+ uptime: Date.now() - this.metrics.startTime,
611
+ workerCount: this.workers.size,
612
+ activeWorkerCount: this.getWorkerCountByState('active'),
613
+ readyWorkerCount: this.getWorkerCountByState('ready'),
614
+ totalLoad: Array.from(this.workerLoad.values()).reduce((sum, load) => sum + load, 0)
615
+ };
616
+ }
617
+ }
618
+
619
+ export default WorkerManager;