xypriss 1.2.4 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +51 -3
  2. package/dist/cjs/mods/security/src/components/cache/index.js +1 -1
  3. package/dist/cjs/shared/logger/Logger.js +2 -0
  4. package/dist/cjs/shared/logger/Logger.js.map +1 -1
  5. package/dist/cjs/src/cluster/bun-cluster-manager.js +1567 -0
  6. package/dist/cjs/src/cluster/bun-cluster-manager.js.map +1 -0
  7. package/dist/cjs/src/cluster/cluster-manager.js +1 -1
  8. package/dist/cjs/src/cluster/cluster-manager.js.map +1 -1
  9. package/dist/cjs/src/cluster/index.js +25 -6
  10. package/dist/cjs/src/cluster/index.js.map +1 -1
  11. package/dist/cjs/src/cluster/memory-manager.js +486 -0
  12. package/dist/cjs/src/cluster/memory-manager.js.map +1 -0
  13. package/dist/cjs/src/cluster/modules/BunIPCManager.js +603 -0
  14. package/dist/cjs/src/cluster/modules/BunIPCManager.js.map +1 -0
  15. package/dist/cjs/src/cluster/modules/ClusterFactory.js +22 -1
  16. package/dist/cjs/src/cluster/modules/ClusterFactory.js.map +1 -1
  17. package/dist/cjs/src/cluster/modules/CpuMonitor.js +658 -0
  18. package/dist/cjs/src/cluster/modules/CpuMonitor.js.map +1 -0
  19. package/dist/cjs/src/cluster/modules/CrossPlatformMemory.js +257 -0
  20. package/dist/cjs/src/cluster/modules/CrossPlatformMemory.js.map +1 -0
  21. package/dist/cjs/src/cluster/modules/ProcessMonitor.js +513 -0
  22. package/dist/cjs/src/cluster/modules/ProcessMonitor.js.map +1 -0
  23. package/dist/cjs/src/plugins/server-maintenance-plugin.js +79 -14
  24. package/dist/cjs/src/plugins/server-maintenance-plugin.js.map +1 -1
  25. package/dist/cjs/src/server/FastServer.js +64 -43
  26. package/dist/cjs/src/server/FastServer.js.map +1 -1
  27. package/dist/cjs/src/server/components/fastapi/ClusterManagerComponent.js +226 -10
  28. package/dist/cjs/src/server/components/fastapi/ClusterManagerComponent.js.map +1 -1
  29. package/dist/cjs/src/server/const/Cluster.config.js +174 -31
  30. package/dist/cjs/src/server/const/Cluster.config.js.map +1 -1
  31. package/dist/cjs/src/server/const/default.js +11 -2
  32. package/dist/cjs/src/server/const/default.js.map +1 -1
  33. package/dist/cjs/src/server/utils/PortManager.js +26 -15
  34. package/dist/cjs/src/server/utils/PortManager.js.map +1 -1
  35. package/dist/esm/mods/security/src/components/cache/index.js +1 -1
  36. package/dist/esm/shared/logger/Logger.js +2 -0
  37. package/dist/esm/shared/logger/Logger.js.map +1 -1
  38. package/dist/esm/src/cluster/bun-cluster-manager.js +1565 -0
  39. package/dist/esm/src/cluster/bun-cluster-manager.js.map +1 -0
  40. package/dist/esm/src/cluster/cluster-manager.js +1 -1
  41. package/dist/esm/src/cluster/cluster-manager.js.map +1 -1
  42. package/dist/esm/src/cluster/index.js +25 -6
  43. package/dist/esm/src/cluster/index.js.map +1 -1
  44. package/dist/esm/src/cluster/memory-manager.js +484 -0
  45. package/dist/esm/src/cluster/memory-manager.js.map +1 -0
  46. package/dist/esm/src/cluster/modules/BunIPCManager.js +601 -0
  47. package/dist/esm/src/cluster/modules/BunIPCManager.js.map +1 -0
  48. package/dist/esm/src/cluster/modules/ClusterFactory.js +22 -1
  49. package/dist/esm/src/cluster/modules/ClusterFactory.js.map +1 -1
  50. package/dist/esm/src/cluster/modules/CpuMonitor.js +656 -0
  51. package/dist/esm/src/cluster/modules/CpuMonitor.js.map +1 -0
  52. package/dist/esm/src/cluster/modules/CrossPlatformMemory.js +255 -0
  53. package/dist/esm/src/cluster/modules/CrossPlatformMemory.js.map +1 -0
  54. package/dist/esm/src/cluster/modules/ProcessMonitor.js +511 -0
  55. package/dist/esm/src/cluster/modules/ProcessMonitor.js.map +1 -0
  56. package/dist/esm/src/plugins/server-maintenance-plugin.js +79 -14
  57. package/dist/esm/src/plugins/server-maintenance-plugin.js.map +1 -1
  58. package/dist/esm/src/server/FastServer.js +64 -43
  59. package/dist/esm/src/server/FastServer.js.map +1 -1
  60. package/dist/esm/src/server/components/fastapi/ClusterManagerComponent.js +226 -10
  61. package/dist/esm/src/server/components/fastapi/ClusterManagerComponent.js.map +1 -1
  62. package/dist/esm/src/server/const/Cluster.config.js +174 -31
  63. package/dist/esm/src/server/const/Cluster.config.js.map +1 -1
  64. package/dist/esm/src/server/const/default.js +11 -2
  65. package/dist/esm/src/server/const/default.js.map +1 -1
  66. package/dist/esm/src/server/utils/PortManager.js +26 -15
  67. package/dist/esm/src/server/utils/PortManager.js.map +1 -1
  68. package/dist/index.d.ts +90 -2
  69. package/package.json +6 -1
  70. package/dist/cjs/src/plugins/modules/network/index.js +0 -120
  71. package/dist/cjs/src/plugins/modules/network/index.js.map +0 -1
  72. package/dist/cjs/src/server/plugins/PluginEngine.js +0 -378
  73. package/dist/cjs/src/server/plugins/PluginEngine.js.map +0 -1
  74. package/dist/cjs/src/server/plugins/PluginRegistry.js +0 -339
  75. package/dist/cjs/src/server/plugins/PluginRegistry.js.map +0 -1
  76. package/dist/cjs/src/server/plugins/builtin/JWTAuthPlugin.js +0 -591
  77. package/dist/cjs/src/server/plugins/builtin/JWTAuthPlugin.js.map +0 -1
  78. package/dist/cjs/src/server/plugins/builtin/ResponseTimePlugin.js +0 -413
  79. package/dist/cjs/src/server/plugins/builtin/ResponseTimePlugin.js.map +0 -1
  80. package/dist/cjs/src/server/plugins/builtin/SmartCachePlugin.js +0 -843
  81. package/dist/cjs/src/server/plugins/builtin/SmartCachePlugin.js.map +0 -1
  82. package/dist/cjs/src/server/plugins/core/CachePlugin.js +0 -1975
  83. package/dist/cjs/src/server/plugins/core/CachePlugin.js.map +0 -1
  84. package/dist/cjs/src/server/plugins/core/PerformancePlugin.js +0 -894
  85. package/dist/cjs/src/server/plugins/core/PerformancePlugin.js.map +0 -1
  86. package/dist/cjs/src/server/plugins/core/SecurityPlugin.js +0 -799
  87. package/dist/cjs/src/server/plugins/core/SecurityPlugin.js.map +0 -1
  88. package/dist/cjs/src/server/plugins/types/PluginTypes.js +0 -47
  89. package/dist/cjs/src/server/plugins/types/PluginTypes.js.map +0 -1
  90. package/dist/esm/src/plugins/modules/network/index.js +0 -109
  91. package/dist/esm/src/plugins/modules/network/index.js.map +0 -1
  92. package/dist/esm/src/server/plugins/PluginEngine.js +0 -376
  93. package/dist/esm/src/server/plugins/PluginEngine.js.map +0 -1
  94. package/dist/esm/src/server/plugins/PluginRegistry.js +0 -337
  95. package/dist/esm/src/server/plugins/PluginRegistry.js.map +0 -1
  96. package/dist/esm/src/server/plugins/builtin/JWTAuthPlugin.js +0 -589
  97. package/dist/esm/src/server/plugins/builtin/JWTAuthPlugin.js.map +0 -1
  98. package/dist/esm/src/server/plugins/builtin/ResponseTimePlugin.js +0 -411
  99. package/dist/esm/src/server/plugins/builtin/ResponseTimePlugin.js.map +0 -1
  100. package/dist/esm/src/server/plugins/builtin/SmartCachePlugin.js +0 -841
  101. package/dist/esm/src/server/plugins/builtin/SmartCachePlugin.js.map +0 -1
  102. package/dist/esm/src/server/plugins/core/CachePlugin.js +0 -1973
  103. package/dist/esm/src/server/plugins/core/CachePlugin.js.map +0 -1
  104. package/dist/esm/src/server/plugins/core/PerformancePlugin.js +0 -872
  105. package/dist/esm/src/server/plugins/core/PerformancePlugin.js.map +0 -1
  106. package/dist/esm/src/server/plugins/core/SecurityPlugin.js +0 -797
  107. package/dist/esm/src/server/plugins/core/SecurityPlugin.js.map +0 -1
  108. package/dist/esm/src/server/plugins/types/PluginTypes.js +0 -47
  109. package/dist/esm/src/server/plugins/types/PluginTypes.js.map +0 -1
@@ -0,0 +1,1567 @@
1
+ 'use strict';
2
+
3
+ var events = require('events');
4
+ var Logger = require('../../shared/logger/Logger.js');
5
+ var memoryManager = require('./memory-manager.js');
6
+ var crypto = require('crypto');
7
+ var perf_hooks = require('perf_hooks');
8
+ var CpuMonitor = require('./modules/CpuMonitor.js');
9
+
10
+ /**
11
+ * Robust Bun-compatible cluster manager
12
+ * Uses Bun's native process spawning and IPC capabilities with enhanced security and reliability
13
+ */
14
+ /**
15
+ * Bun-compatible cluster manager with enhanced security and robustness
16
+ */
17
+ class BunClusterManager extends events.EventEmitter {
18
+ constructor(config, basePort = 8085) {
19
+ super();
20
+ this.workers = new Map();
21
+ this.isRunning = false;
22
+ this.startTime = 0;
23
+ this.maxShutdownTime = 30000; // 30 seconds
24
+ this.workerPorts = new Set();
25
+ this._validateConfig(config);
26
+ this.config = config;
27
+ this.basePort = basePort;
28
+ this.masterToken = this._generateSecureToken();
29
+ // Initialize security configuration
30
+ this.securityConfig = {
31
+ maxRestartAttempts: config.processManagement?.maxRestarts || 3,
32
+ restartWindow: 300000, // 5 minutes
33
+ maxMemoryPerWorker: this._parseMemoryString(config.resources?.maxMemoryPerWorker || "512MB"),
34
+ allowedSignals: ["SIGTERM", "SIGKILL", "SIGUSR1", "SIGUSR2"],
35
+ processTimeout: 30000, // 30 seconds
36
+ enableResourceLimits: true,
37
+ };
38
+ // Initialize memory manager with error handling
39
+ try {
40
+ this.memoryManager = new memoryManager.MemoryManager(config.resources);
41
+ this._setupMemoryManagement();
42
+ }
43
+ catch (error) {
44
+ Logger.logger.error("cluster", "Failed to initialize memory manager:", error);
45
+ throw new Error("Failed to initialize cluster manager");
46
+ }
47
+ // Initialize CPU monitor
48
+ this.cpuMonitor = new CpuMonitor.CpuMonitor({
49
+ enabled: true,
50
+ sampleInterval: 5000,
51
+ historySize: 100,
52
+ smoothingFactor: 0.3,
53
+ alertThresholds: {
54
+ warning: 70,
55
+ critical: 90,
56
+ },
57
+ });
58
+ // Setup graceful shutdown handlers
59
+ this._setupGracefulShutdown();
60
+ }
61
+ /**
62
+ * Parse memory string to bytes
63
+ */
64
+ _parseMemoryString(memoryStr) {
65
+ const units = {
66
+ B: 1,
67
+ KB: 1024,
68
+ MB: 1024 * 1024,
69
+ GB: 1024 * 1024 * 1024,
70
+ TB: 1024 * 1024 * 1024 * 1024,
71
+ };
72
+ const match = memoryStr.match(/^(\d+(?:\.\d+)?)\s*([KMGT]?B)$/i);
73
+ if (!match) {
74
+ throw new Error(`Invalid memory format: ${memoryStr}`);
75
+ }
76
+ const value = parseFloat(match[1]);
77
+ const unit = match[2].toUpperCase();
78
+ return value * (units[unit] || 1);
79
+ }
80
+ /**
81
+ * Validate cluster configuration
82
+ */
83
+ _validateConfig(config) {
84
+ if (!config) {
85
+ throw new Error("Cluster configuration is required");
86
+ }
87
+ if (typeof config.workers === "number" &&
88
+ (config.workers < 1 || config.workers > 64)) {
89
+ throw new Error("Worker count must be between 1 and 64");
90
+ }
91
+ if (config.resources?.maxMemoryPerWorker) {
92
+ const memoryLimit = this._parseMemoryString(config.resources.maxMemoryPerWorker);
93
+ if (memoryLimit < 64 * 1024 * 1024) {
94
+ throw new Error("Minimum memory limit is 64MB per worker");
95
+ }
96
+ }
97
+ }
98
+ /**
99
+ * Generate a secure token for worker authentication
100
+ */
101
+ _generateSecureToken() {
102
+ return crypto.randomBytes(32).toString("hex");
103
+ }
104
+ /**
105
+ * Create a worker-specific security token
106
+ */
107
+ _createWorkerToken(workerId) {
108
+ return crypto.createHash("sha256")
109
+ .update(this.masterToken)
110
+ .update(workerId)
111
+ .update(Date.now().toString())
112
+ .digest("hex");
113
+ }
114
+ /**
115
+ * Allocate a safe port for a worker
116
+ */
117
+ _allocatePort() {
118
+ let attempts = 0;
119
+ const maxAttempts = 100;
120
+ while (attempts < maxAttempts) {
121
+ const port = this.basePort + Math.floor(Math.random() * 1000) + 1;
122
+ if (!this.workerPorts.has(port) && port > 1024 && port < 65535) {
123
+ this.workerPorts.add(port);
124
+ return port;
125
+ }
126
+ attempts++;
127
+ }
128
+ throw new Error("Unable to allocate safe port for worker");
129
+ }
130
+ /**
131
+ * Release a port back to the pool
132
+ */
133
+ _releasePort(port) {
134
+ this.workerPorts.delete(port);
135
+ }
136
+ /**
137
+ * Setup enhanced memory management event handlers
138
+ */
139
+ _setupMemoryManagement() {
140
+ if (!this.config.resources?.memoryManagement?.enabled) {
141
+ return;
142
+ }
143
+ this.memoryManager.on("memory_alert", (alert) => {
144
+ Logger.logger.warn("cluster", `Memory Alert: ${alert.message}`);
145
+ this._handleMemoryAlert(alert);
146
+ });
147
+ this.memoryManager.on("low_memory_mode_enabled", () => {
148
+ Logger.logger.info("cluster", "Cluster entering low memory mode");
149
+ this.emit("low_memory_mode", {
150
+ enabled: true,
151
+ timestamp: Date.now(),
152
+ });
153
+ this._enableEmergencyMode();
154
+ });
155
+ this.memoryManager.on("low_memory_mode_disabled", () => {
156
+ Logger.logger.info("cluster", "Cluster exiting low memory mode");
157
+ this.emit("low_memory_mode", {
158
+ enabled: false,
159
+ timestamp: Date.now(),
160
+ });
161
+ this._disableEmergencyMode();
162
+ });
163
+ this.memoryManager.on("error", (error) => {
164
+ Logger.logger.error("cluster", "Memory manager error:", error);
165
+ this.emit("error", { type: "memory_manager", error });
166
+ });
167
+ }
168
+ /**
169
+ * Handle memory alerts with appropriate actions
170
+ */
171
+ async _handleMemoryAlert(alert) {
172
+ try {
173
+ switch (alert.action) {
174
+ case "scale_down":
175
+ await this._handleMemoryScaleDown(alert);
176
+ break;
177
+ case "restart_worker":
178
+ if (alert.workerId) {
179
+ await this._handleWorkerMemoryIssue(alert.workerId, alert);
180
+ }
181
+ break;
182
+ case "throttle":
183
+ this._handleMemoryThrottling(alert);
184
+ break;
185
+ default:
186
+ Logger.logger.warn("cluster", `Unknown memory alert action: ${alert.action}`);
187
+ }
188
+ }
189
+ catch (error) {
190
+ Logger.logger.error("cluster", "Error handling memory alert:", error);
191
+ }
192
+ }
193
+ /**
194
+ * Setup graceful shutdown handlers
195
+ */
196
+ _setupGracefulShutdown() {
197
+ const shutdownHandler = async (signal) => {
198
+ Logger.logger.info("cluster", `Received ${signal}, initiating graceful shutdown...`);
199
+ if (!this.shutdownPromise) {
200
+ this.shutdownPromise = this.stop(true);
201
+ }
202
+ await this.shutdownPromise;
203
+ process.exit(0);
204
+ };
205
+ process.on("SIGTERM", () => shutdownHandler("SIGTERM"));
206
+ process.on("SIGINT", () => shutdownHandler("SIGINT"));
207
+ // Handle uncaught exceptions
208
+ process.on("uncaughtException", (error) => {
209
+ Logger.logger.error("cluster", "Uncaught exception in cluster manager:", error);
210
+ this.stop(false).then(() => process.exit(1));
211
+ });
212
+ process.on("unhandledRejection", (reason, promise) => {
213
+ Logger.logger.error("cluster", "Unhandled rejection in cluster manager:", reason);
214
+ this.emit("error", {
215
+ type: "unhandled_rejection",
216
+ reason,
217
+ promise,
218
+ });
219
+ });
220
+ }
221
+ /**
222
+ * Start the Bun cluster with comprehensive error handling
223
+ */
224
+ async start() {
225
+ if (this.isRunning) {
226
+ Logger.logger.warn("cluster", "Bun cluster is already running");
227
+ return;
228
+ }
229
+ try {
230
+ Logger.logger.info("cluster", "Starting Bun cluster manager...");
231
+ this.startTime = perf_hooks.performance.now();
232
+ this.isRunning = true;
233
+ const workerCount = this._getOptimalWorkerCount();
234
+ Logger.logger.info("cluster", `Spawning ${workerCount} Bun workers`);
235
+ // Validate system resources before starting
236
+ await this._validateSystemResources(workerCount);
237
+ // Spawn workers with staggered startup
238
+ const spawnPromises = Array.from({ length: workerCount }, (_, i) => this._spawnWorkerWithRetry(i));
239
+ const workers = await Promise.allSettled(spawnPromises);
240
+ const successfulWorkers = workers.filter((result) => result.status === "fulfilled").length;
241
+ if (successfulWorkers === 0) {
242
+ throw new Error("Failed to start any workers");
243
+ }
244
+ if (successfulWorkers < workerCount) {
245
+ Logger.logger.warn("cluster", `Started ${successfulWorkers}/${workerCount} workers`);
246
+ }
247
+ // Start monitoring services
248
+ this._startHealthMonitoring();
249
+ this._startMetricsCollection();
250
+ this._startPerformanceMonitoring();
251
+ // Start memory monitoring if enabled
252
+ if (this.config.resources?.memoryManagement?.enabled !== false) {
253
+ this.memoryManager.startMonitoring();
254
+ }
255
+ // Start CPU monitoring
256
+ this.cpuMonitor.startMonitoring();
257
+ Logger.logger.info("cluster", `Bun cluster started with ${successfulWorkers} workers`);
258
+ this.emit("cluster:started", {
259
+ workerCount: successfulWorkers,
260
+ requestedCount: workerCount,
261
+ timestamp: Date.now(),
262
+ });
263
+ }
264
+ catch (error) {
265
+ this.isRunning = false;
266
+ Logger.logger.error("cluster", "Failed to start cluster:", error);
267
+ await this._cleanupPartialStart();
268
+ throw error;
269
+ }
270
+ }
271
+ /**
272
+ * Validate system resources before starting workers
273
+ */
274
+ async _validateSystemResources(workerCount) {
275
+ try {
276
+ // Get actual system memory information
277
+ const systemMemory = await this.memoryManager.getSystemMemoryStats();
278
+ const availableMemory = systemMemory.freeMemory;
279
+ const requiredMemory = workerCount * this.securityConfig.maxMemoryPerWorker;
280
+ // Check if we have enough memory (leave 20% buffer)
281
+ const memoryWithBuffer = availableMemory * 0.8;
282
+ if (requiredMemory > memoryWithBuffer) {
283
+ // Check if we can reduce to a single worker with minimal memory
284
+ const minMemoryPerWorker = 128 * 1024 * 1024; // 128MB minimum
285
+ if (workerCount === 1 &&
286
+ minMemoryPerWorker <= memoryWithBuffer) {
287
+ Logger.logger.warn("cluster", `Reducing memory limit to ${Math.round(minMemoryPerWorker / 1024 / 1024)}MB per worker due to low system memory`);
288
+ this.securityConfig.maxMemoryPerWorker = minMemoryPerWorker;
289
+ return; // Allow startup with reduced memory
290
+ }
291
+ throw new Error(`Insufficient memory for ${workerCount} workers. Required: ${Math.round(requiredMemory / 1024 / 1024)}MB, Available: ${Math.round(memoryWithBuffer / 1024 / 1024)}MB (${Math.round(availableMemory / 1024 / 1024)}MB total free). Consider disabling clustering or increasing system memory.`);
292
+ }
293
+ Logger.logger.debug("cluster", `Memory validation passed: Required ${Math.round(requiredMemory / 1024 / 1024)}MB, Available ${Math.round(memoryWithBuffer / 1024 / 1024)}MB`);
294
+ }
295
+ catch (error) {
296
+ if (error instanceof Error &&
297
+ error.message.includes("Insufficient memory")) {
298
+ throw error;
299
+ }
300
+ // If memory manager fails, fall back to basic validation
301
+ Logger.logger.warn("cluster", "Failed to get system memory stats, using fallback validation:", error);
302
+ const os = await import('os');
303
+ const freeMemory = os.freemem();
304
+ const requiredMemory = workerCount * this.securityConfig.maxMemoryPerWorker;
305
+ const availableMemoryFallback = freeMemory * 0.8;
306
+ if (requiredMemory > availableMemoryFallback) {
307
+ // Try with minimal memory for single worker
308
+ const minMemoryPerWorker = 128 * 1024 * 1024; // 128MB minimum
309
+ if (workerCount === 1 &&
310
+ minMemoryPerWorker <= availableMemoryFallback) {
311
+ Logger.logger.warn("cluster", `Fallback: Reducing memory limit to ${Math.round(minMemoryPerWorker / 1024 / 1024)}MB per worker`);
312
+ this.securityConfig.maxMemoryPerWorker = minMemoryPerWorker;
313
+ return;
314
+ }
315
+ throw new Error(`Insufficient memory for ${workerCount} workers. Required: ${Math.round(requiredMemory / 1024 / 1024)}MB, Available: ${Math.round(availableMemoryFallback / 1024 / 1024)}MB (fallback). Consider disabling clustering or increasing system memory.`);
316
+ }
317
+ }
318
+ // Validate port availability
319
+ if (this.basePort < 1024 || this.basePort > 65000) {
320
+ throw new Error(`Invalid base port: ${this.basePort}. Must be between 1024 and 65000`);
321
+ }
322
+ }
323
+ /**
324
+ * Cleanup after partial startup failure
325
+ */
326
+ async _cleanupPartialStart() {
327
+ try {
328
+ const stopPromises = Array.from(this.workers.values()).map((worker) => this._stopWorker(worker.id, false));
329
+ await Promise.allSettled(stopPromises);
330
+ this.workers.clear();
331
+ this.workerPorts.clear();
332
+ }
333
+ catch (error) {
334
+ Logger.logger.error("cluster", "Error during cleanup:", error);
335
+ }
336
+ }
337
+ /**
338
+ * Stop the Bun cluster with timeout protection
339
+ */
340
+ async stop(graceful = true) {
341
+ if (!this.isRunning) {
342
+ return;
343
+ }
344
+ Logger.logger.info("cluster", `Stopping Bun cluster (graceful: ${graceful})...`);
345
+ this.isRunning = false;
346
+ // Create shutdown timeout
347
+ const shutdownTimeout = new Promise((_, reject) => {
348
+ setTimeout(() => reject(new Error("Shutdown timeout exceeded")), this.maxShutdownTime);
349
+ });
350
+ try {
351
+ // Stop monitoring first
352
+ this._stopMonitoring();
353
+ // Stop memory manager
354
+ if (this.memoryManager) {
355
+ this.memoryManager.stopMonitoring?.();
356
+ }
357
+ // Stop CPU monitoring
358
+ this.cpuMonitor.stopMonitoring();
359
+ // Stop all workers
360
+ const stopPromises = Array.from(this.workers.values()).map((worker) => this._stopWorker(worker.id, graceful));
361
+ await Promise.race([Promise.all(stopPromises), shutdownTimeout]);
362
+ this.workers.clear();
363
+ this.workerPorts.clear();
364
+ Logger.logger.info("cluster", "Bun cluster stopped successfully");
365
+ this.emit("cluster:stopped", { timestamp: Date.now() });
366
+ }
367
+ catch (error) {
368
+ Logger.logger.error("cluster", "Error during cluster shutdown:", error);
369
+ // Force kill remaining workers
370
+ await this._forceKillAllWorkers();
371
+ throw error;
372
+ }
373
+ }
374
+ /**
375
+ * Force kill all workers in emergency situations
376
+ */
377
+ async _forceKillAllWorkers() {
378
+ const forceKillPromises = Array.from(this.workers.values()).map(async (worker) => {
379
+ try {
380
+ worker.subprocess.kill("SIGKILL");
381
+ await worker.subprocess.exited;
382
+ this._releasePort(worker.port);
383
+ }
384
+ catch (error) {
385
+ Logger.logger.error("cluster", `Error force killing worker ${worker.id}:`, error);
386
+ }
387
+ });
388
+ await Promise.allSettled(forceKillPromises);
389
+ this.workers.clear();
390
+ this.workerPorts.clear();
391
+ }
392
+ /**
393
+ * Stop all monitoring services
394
+ */
395
+ _stopMonitoring() {
396
+ if (this.healthCheckInterval) {
397
+ clearInterval(this.healthCheckInterval);
398
+ this.healthCheckInterval = undefined;
399
+ }
400
+ if (this.metricsInterval) {
401
+ clearInterval(this.metricsInterval);
402
+ this.metricsInterval = undefined;
403
+ }
404
+ if (this.performanceInterval) {
405
+ clearInterval(this.performanceInterval);
406
+ this.performanceInterval = undefined;
407
+ }
408
+ }
409
+ /**
410
+ * Spawn a worker with retry logic
411
+ */
412
+ async _spawnWorkerWithRetry(index, retries = 3) {
413
+ let lastError = null;
414
+ for (let attempt = 0; attempt < retries; attempt++) {
415
+ try {
416
+ // Add delay between retry attempts
417
+ if (attempt > 0) {
418
+ await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
419
+ }
420
+ return await this._spawnWorker(index);
421
+ }
422
+ catch (error) {
423
+ lastError = error;
424
+ Logger.logger.warn("cluster", `Worker spawn attempt ${attempt + 1} failed:`, error);
425
+ }
426
+ }
427
+ throw lastError || new Error("Failed to spawn worker after retries");
428
+ }
429
+ /**
430
+ * Spawn a new Bun worker process with enhanced security
431
+ */
432
+ async _spawnWorker(index) {
433
+ const workerId = `worker-${index}-${Date.now()}`;
434
+ const port = this._allocatePort();
435
+ const securityToken = this._createWorkerToken(workerId);
436
+ Logger.logger.debug("cluster", `Spawning Bun worker ${workerId} on port ${port}`);
437
+ try {
438
+ // Validate script path exists and is accessible
439
+ if (!process.argv[1]) {
440
+ throw new Error("Unable to determine script path for worker");
441
+ }
442
+ // Enhanced environment with security measures
443
+ const workerEnv = {
444
+ ...this._getSecureEnvironment(),
445
+ WORKER_ID: workerId,
446
+ WORKER_PORT: port.toString(),
447
+ WORKER_SECURITY_TOKEN: securityToken,
448
+ MASTER_TOKEN: this.masterToken,
449
+ NODE_ENV: "worker",
450
+ CLUSTER_MODE: "true",
451
+ WORKER_MEMORY_LIMIT: this.securityConfig.maxMemoryPerWorker.toString(),
452
+ WORKER_MAX_REQUESTS: "10000", // Prevent memory leaks
453
+ };
454
+ const subprocess = Bun.spawn({
455
+ cmd: ["bun", "run", process.argv[1]],
456
+ env: workerEnv,
457
+ stdio: ["pipe", "pipe", "pipe"],
458
+ });
459
+ const worker = {
460
+ id: workerId,
461
+ subprocess,
462
+ port,
463
+ status: "starting",
464
+ startTime: Date.now(),
465
+ restarts: 0,
466
+ lastPing: Date.now(),
467
+ health: {
468
+ status: "unknown",
469
+ consecutiveFailures: 0,
470
+ },
471
+ securityToken,
472
+ performance: {
473
+ requestCount: 0,
474
+ errorCount: 0,
475
+ averageResponseTime: 0,
476
+ lastRequestTime: 0,
477
+ cpuUsage: 0,
478
+ memoryUsage: 0,
479
+ },
480
+ resourceLimits: {
481
+ maxMemory: this.securityConfig.maxMemoryPerWorker,
482
+ maxCpu: 80, // 80% CPU usage limit
483
+ },
484
+ restartHistory: [],
485
+ };
486
+ this.workers.set(workerId, worker);
487
+ // Setup process event handlers
488
+ subprocess.exited
489
+ .then((exitCode) => {
490
+ this._handleWorkerExit(workerId, exitCode);
491
+ })
492
+ .catch((error) => {
493
+ Logger.logger.error("cluster", `Worker ${workerId} exit handler error:`, error);
494
+ this._handleWorkerExit(workerId, -1);
495
+ });
496
+ // Setup stdout/stderr handling for better debugging
497
+ this._setupWorkerLogging(worker);
498
+ // Wait for worker to be ready with timeout
499
+ await this._waitForWorkerReady(worker);
500
+ worker.status = "running";
501
+ worker.health.status = "healthy";
502
+ // Register worker with IPC manager if available
503
+ if (this.ipcManager) {
504
+ this.ipcManager.registerWorker(workerId, worker.subprocess);
505
+ Logger.logger.debug("cluster", `Worker ${workerId} registered with IPC manager`);
506
+ }
507
+ Logger.logger.info("cluster", `Bun worker ${workerId} started on port ${port}`);
508
+ this.emit("worker:started", {
509
+ workerId,
510
+ port,
511
+ timestamp: Date.now(),
512
+ });
513
+ return worker;
514
+ }
515
+ catch (error) {
516
+ this._releasePort(port);
517
+ Logger.logger.error("cluster", `Failed to spawn Bun worker ${workerId}:`, error);
518
+ throw error;
519
+ }
520
+ }
521
+ /**
522
+ * Get secure environment variables for workers
523
+ */
524
+ _getSecureEnvironment() {
525
+ // Only pass safe environment variables to workers
526
+ const safeEnvVars = [
527
+ "NODE_ENV",
528
+ "PATH",
529
+ "HOME",
530
+ "USER",
531
+ "PWD",
532
+ "LOG_LEVEL",
533
+ "DEBUG",
534
+ "TZ",
535
+ ];
536
+ const secureEnv = {};
537
+ for (const key of safeEnvVars) {
538
+ if (process.env[key]) {
539
+ secureEnv[key] = process.env[key];
540
+ }
541
+ }
542
+ return secureEnv;
543
+ }
544
+ /**
545
+ * Setup logging for worker process
546
+ */
547
+ _setupWorkerLogging(worker) {
548
+ if (worker.subprocess.stdout) {
549
+ worker.subprocess.stdout.pipeTo(new WritableStream({
550
+ write(chunk) {
551
+ const data = new TextDecoder().decode(chunk);
552
+ Logger.logger.debug("cluster", `[${worker.id}] stdout: ${data.trim()}`);
553
+ },
554
+ }));
555
+ }
556
+ if (worker.subprocess.stderr) {
557
+ worker.subprocess.stderr.pipeTo(new WritableStream({
558
+ write(chunk) {
559
+ const data = new TextDecoder().decode(chunk);
560
+ Logger.logger.warn("cluster", `[${worker.id}] stderr: ${data.trim()}`);
561
+ },
562
+ }));
563
+ }
564
+ }
565
+ /**
566
+ * Check if worker is ready and responding with multiple strategies
567
+ */
568
+ async _checkWorkerReadiness(worker) {
569
+ try {
570
+ // Check if process is still running
571
+ if (worker.subprocess.killed) {
572
+ Logger.logger.debug("cluster", `Worker ${worker.id} process is killed`);
573
+ return false;
574
+ }
575
+ // Strategy 1: Check if process is responsive (basic check)
576
+ if (!worker.subprocess.pid) {
577
+ Logger.logger.debug("cluster", `Worker ${worker.id} has no PID`);
578
+ return false;
579
+ }
580
+ // Strategy 2: Try IPC communication first (faster than port check)
581
+ if (this.ipcManager) {
582
+ try {
583
+ // Try to send a ping via IPC
584
+ const ipcReady = await this._checkWorkerIPCReadiness(worker);
585
+ if (ipcReady) {
586
+ Logger.logger.debug("cluster", `Worker ${worker.id} ready via IPC`);
587
+ return true;
588
+ }
589
+ }
590
+ catch (error) {
591
+ Logger.logger.debug("cluster", `Worker ${worker.id} IPC check failed:`, error);
592
+ }
593
+ }
594
+ // Strategy 3: Check port listening (fallback)
595
+ const isListening = await this._checkPortListening(worker.port);
596
+ if (isListening) {
597
+ Logger.logger.debug("cluster", `Worker ${worker.id} ready via port check`);
598
+ return true;
599
+ }
600
+ // Strategy 4: Check if worker has been running for a minimum time (more lenient)
601
+ const runningTime = Date.now() - worker.startTime;
602
+ if (runningTime > 3000) {
603
+ // 3 seconds minimum (reduced from 5)
604
+ Logger.logger.debug("cluster", `Worker ${worker.id} assumed ready after ${runningTime}ms (time-based)`);
605
+ return true;
606
+ }
607
+ // Strategy 5: If worker process is stable and not killed, assume it's working
608
+ if (runningTime > 1000 &&
609
+ !worker.subprocess.killed &&
610
+ worker.subprocess.pid) {
611
+ Logger.logger.debug("cluster", `Worker ${worker.id} process stable after ${runningTime}ms (process-based)`);
612
+ return true;
613
+ }
614
+ Logger.logger.debug("cluster", `Worker ${worker.id} not ready yet (running for ${runningTime}ms)`);
615
+ return false;
616
+ }
617
+ catch (error) {
618
+ Logger.logger.debug("cluster", `Worker ${worker.id} readiness check failed:`, error);
619
+ return false;
620
+ }
621
+ }
622
+ /**
623
+ * Check worker readiness via IPC
624
+ */
625
+ async _checkWorkerIPCReadiness(worker) {
626
+ try {
627
+ // Register worker with IPC manager temporarily for ping
628
+ if (!this.ipcManager)
629
+ return false;
630
+ this.ipcManager.registerWorker(worker.id, worker.subprocess);
631
+ // Try to ping the worker
632
+ const response = await Promise.race([
633
+ this.ipcManager.sendToWorker(worker.id, "ping", {}),
634
+ new Promise((_, reject) => setTimeout(() => reject(new Error("IPC timeout")), 2000)),
635
+ ]);
636
+ // Check if we got a real response or a mock response
637
+ if (response && typeof response === "object") {
638
+ if (response.status === "ok" &&
639
+ response.message === "IPC not fully supported in Bun mode") {
640
+ Logger.logger.debug("cluster", `Worker ${worker.id} IPC not fully supported, but worker is registered`);
641
+ return true; // Worker is registered, even if IPC isn't fully functional
642
+ }
643
+ if (response.status === "fallback") {
644
+ Logger.logger.debug("cluster", `Worker ${worker.id} IPC communication failed, but worker exists`);
645
+ return true; // Worker exists, even if IPC failed
646
+ }
647
+ }
648
+ return response !== undefined;
649
+ }
650
+ catch (error) {
651
+ return false;
652
+ }
653
+ }
654
+ /**
655
+ * Check if a port is listening
656
+ */
657
+ async _checkPortListening(port) {
658
+ return new Promise((resolve) => {
659
+ const net = require("net");
660
+ const socket = new net.Socket();
661
+ const timeout = setTimeout(() => {
662
+ socket.destroy();
663
+ resolve(false);
664
+ }, 2000); // Increased timeout to 2 seconds
665
+ socket.on("connect", () => {
666
+ clearTimeout(timeout);
667
+ socket.destroy();
668
+ resolve(true);
669
+ });
670
+ socket.on("error", (error) => {
671
+ clearTimeout(timeout);
672
+ // Log the specific error for debugging
673
+ Logger.logger.debug("cluster", `Port ${port} connection error:`, error.code);
674
+ resolve(false);
675
+ });
676
+ try {
677
+ socket.connect(port, "localhost");
678
+ }
679
+ catch (error) {
680
+ clearTimeout(timeout);
681
+ Logger.logger.debug("cluster", `Port ${port} connect attempt failed:`, error);
682
+ resolve(false);
683
+ }
684
+ });
685
+ }
686
+ /**
687
+ * Wait for worker to be ready with progressive timeout and better diagnostics
688
+ */
689
+ async _waitForWorkerReady(worker, timeout = 15000 // Further reduced to 15 seconds
690
+ ) {
691
+ return new Promise((resolve, reject) => {
692
+ const checkInterval = 500; // Increased interval to reduce CPU usage
693
+ let attempts = 0;
694
+ const maxAttempts = Math.floor(timeout / checkInterval);
695
+ Logger.logger.debug("cluster", `Waiting for worker ${worker.id} to be ready (timeout: ${timeout}ms)`);
696
+ const timeoutId = setTimeout(() => {
697
+ Logger.logger.error("cluster", `Worker ${worker.id} startup timeout after ${timeout}ms`);
698
+ Logger.logger.error("cluster", `Worker ${worker.id} diagnostics:`, {
699
+ pid: worker.subprocess.pid,
700
+ killed: worker.subprocess.killed,
701
+ port: worker.port,
702
+ startTime: worker.startTime,
703
+ runningTime: Date.now() - worker.startTime,
704
+ });
705
+ reject(new Error(`Worker ${worker.id} failed to start within ${timeout}ms`));
706
+ }, timeout);
707
+ const checkReady = async () => {
708
+ try {
709
+ attempts++;
710
+ // Check if process is still running
711
+ if (worker.subprocess.killed) {
712
+ clearTimeout(timeoutId);
713
+ Logger.logger.error("cluster", `Worker ${worker.id} process died during startup`);
714
+ reject(new Error(`Worker ${worker.id} process died during startup`));
715
+ return;
716
+ }
717
+ // Log progress every 10 attempts (5 seconds)
718
+ if (attempts % 10 === 0) {
719
+ const runningTime = Date.now() - worker.startTime;
720
+ Logger.logger.debug("cluster", `Worker ${worker.id} still starting... (${runningTime}ms, attempt ${attempts}/${maxAttempts})`);
721
+ }
722
+ // Real readiness check - verify worker is actually responding
723
+ const isReady = await this._checkWorkerReadiness(worker);
724
+ if (isReady) {
725
+ clearTimeout(timeoutId);
726
+ const startupTime = Date.now() - worker.startTime;
727
+ Logger.logger.info("cluster", `Worker ${worker.id} ready after ${startupTime}ms`);
728
+ resolve();
729
+ return;
730
+ }
731
+ // Continue checking
732
+ setTimeout(checkReady, checkInterval);
733
+ }
734
+ catch (error) {
735
+ clearTimeout(timeoutId);
736
+ Logger.logger.error("cluster", `Worker ${worker.id} readiness check error:`, error);
737
+ reject(error);
738
+ }
739
+ };
740
+ // Start checking immediately
741
+ checkReady();
742
+ });
743
+ }
744
+ /**
745
+ * Stop a specific worker with enhanced safety measures
746
+ */
747
+ async _stopWorker(workerId, graceful = true) {
748
+ const worker = this.workers.get(workerId);
749
+ if (!worker) {
750
+ return;
751
+ }
752
+ Logger.logger.debug("cluster", `Stopping Bun worker ${workerId} (graceful: ${graceful})`);
753
+ worker.status = "stopping";
754
+ try {
755
+ const stopPromise = this._executeWorkerStop(worker, graceful);
756
+ const timeoutPromise = new Promise((_, reject) => {
757
+ setTimeout(() => reject(new Error("Worker stop timeout")), this.securityConfig.processTimeout);
758
+ });
759
+ await Promise.race([stopPromise, timeoutPromise]);
760
+ worker.status = "stopped";
761
+ this._releasePort(worker.port);
762
+ // Unregister worker from IPC manager if available
763
+ if (this.ipcManager) {
764
+ this.ipcManager.unregisterWorker(workerId);
765
+ Logger.logger.debug("cluster", `Worker ${workerId} unregistered from IPC manager`);
766
+ }
767
+ Logger.logger.info("cluster", `Bun worker ${workerId} stopped`);
768
+ this.emit("worker:stopped", { workerId, timestamp: Date.now() });
769
+ }
770
+ catch (error) {
771
+ Logger.logger.error("cluster", `Error stopping Bun worker ${workerId}:`, error);
772
+ worker.status = "error";
773
+ // Force kill if graceful stop failed
774
+ try {
775
+ worker.subprocess.kill("SIGKILL");
776
+ await worker.subprocess.exited;
777
+ this._releasePort(worker.port);
778
+ }
779
+ catch (forceError) {
780
+ Logger.logger.error("cluster", `Error force killing worker ${workerId}:`, forceError);
781
+ }
782
+ }
783
+ }
784
+ /**
785
+ * Execute worker stop with proper signal handling
786
+ */
787
+ async _executeWorkerStop(worker, graceful) {
788
+ if (graceful) {
789
+ // Send SIGTERM for graceful shutdown
790
+ worker.subprocess.kill("SIGTERM");
791
+ // Wait for graceful shutdown with timeout
792
+ const gracefulTimeout = setTimeout(() => {
793
+ Logger.logger.warn("cluster", `Worker ${worker.id} graceful shutdown timeout, force killing`);
794
+ worker.subprocess.kill("SIGKILL");
795
+ }, 5000);
796
+ await worker.subprocess.exited;
797
+ clearTimeout(gracefulTimeout);
798
+ }
799
+ else {
800
+ worker.subprocess.kill("SIGKILL");
801
+ await worker.subprocess.exited;
802
+ }
803
+ }
804
+ /**
805
+ * Handle worker process exit with enhanced tracking
806
+ */
807
+ async _handleWorkerExit(workerId, exitCode) {
808
+ const worker = this.workers.get(workerId);
809
+ if (!worker) {
810
+ return;
811
+ }
812
+ const exitReason = this._determineExitReason(exitCode);
813
+ Logger.logger.warn("cluster", `Bun worker ${workerId} exited: ${exitReason}`);
814
+ // Update worker state
815
+ worker.status = "stopped";
816
+ worker.health.status = "unhealthy";
817
+ worker.health.consecutiveFailures++;
818
+ // Unregister worker from IPC manager if available
819
+ if (this.ipcManager) {
820
+ this.ipcManager.unregisterWorker(workerId);
821
+ Logger.logger.debug("cluster", `Worker ${workerId} unregistered from IPC manager`);
822
+ }
823
+ // Add to restart history
824
+ worker.restartHistory.push({
825
+ timestamp: Date.now(),
826
+ reason: exitReason,
827
+ exitCode: exitCode || undefined,
828
+ });
829
+ this.emit("worker:exit", {
830
+ workerId,
831
+ exitCode,
832
+ reason: exitReason,
833
+ timestamp: Date.now(),
834
+ });
835
+ // Check if restart is needed and allowed
836
+ if (this._shouldRestartWorker(worker)) {
837
+ await this._attemptWorkerRestart(worker);
838
+ }
839
+ else {
840
+ Logger.logger.warn("cluster", `Worker ${workerId} will not be restarted: ${this._getRestartBlockReason(worker)}`);
841
+ this.workers.delete(workerId);
842
+ this._releasePort(worker.port);
843
+ }
844
+ }
845
+ /**
846
+ * Determine the reason for worker exit
847
+ */
848
+ _determineExitReason(exitCode) {
849
+ if (exitCode === null)
850
+ return "killed";
851
+ if (exitCode === 0)
852
+ return "normal_exit";
853
+ if (exitCode === 1)
854
+ return "error_exit";
855
+ if (exitCode === 130)
856
+ return "sigint";
857
+ if (exitCode === 143)
858
+ return "sigterm";
859
+ if (exitCode === 137)
860
+ return "sigkill";
861
+ return `exit_code_${exitCode}`;
862
+ }
863
+ /**
864
+ * Check if worker should be restarted
865
+ */
866
+ _shouldRestartWorker(worker) {
867
+ if (!this.isRunning)
868
+ return false;
869
+ const autoRestart = this.config.processManagement?.respawn !== false;
870
+ if (!autoRestart)
871
+ return false;
872
+ // Check restart count limits
873
+ if (worker.restarts >= this.securityConfig.maxRestartAttempts)
874
+ return false;
875
+ // Check restart frequency (prevent restart loops)
876
+ const recentRestarts = worker.restartHistory.filter((r) => Date.now() - r.timestamp < this.securityConfig.restartWindow).length;
877
+ return recentRestarts < this.securityConfig.maxRestartAttempts;
878
+ }
879
+ /**
880
+ * Get reason why restart is blocked
881
+ */
882
+ _getRestartBlockReason(worker) {
883
+ if (!this.isRunning)
884
+ return "cluster_shutting_down";
885
+ if (!this.config.processManagement?.respawn)
886
+ return "auto_restart_disabled";
887
+ if (worker.restarts >= this.securityConfig.maxRestartAttempts)
888
+ return "max_restarts_exceeded";
889
+ const recentRestarts = worker.restartHistory.filter((r) => Date.now() - r.timestamp < this.securityConfig.restartWindow).length;
890
+ if (recentRestarts >= this.securityConfig.maxRestartAttempts)
891
+ return "restart_frequency_limit";
892
+ return "unknown";
893
+ }
894
+ /**
895
+ * Attempt to restart a worker with backoff
896
+ */
897
+ async _attemptWorkerRestart(worker) {
898
+ Logger.logger.info("cluster", `Restarting Bun worker ${worker.id}...`);
899
+ worker.restarts++;
900
+ // Calculate backoff delay
901
+ const backoffDelay = Math.min(1000 * Math.pow(2, worker.restarts - 1), 30000);
902
+ await new Promise((resolve) => setTimeout(resolve, backoffDelay));
903
+ try {
904
+ const index = parseInt(worker.id.split("-")[1]) || 0;
905
+ this.workers.delete(worker.id);
906
+ this._releasePort(worker.port);
907
+ await this._spawnWorkerWithRetry(index, 2);
908
+ Logger.logger.info("cluster", `Successfully restarted worker (was ${worker.id})`);
909
+ this.emit("worker:restarted", {
910
+ oldWorkerId: worker.id,
911
+ restartCount: worker.restarts,
912
+ timestamp: Date.now(),
913
+ });
914
+ }
915
+ catch (error) {
916
+ Logger.logger.error("cluster", `Failed to restart worker ${worker.id}:`, error);
917
+ this.workers.delete(worker.id);
918
+ this._releasePort(worker.port);
919
+ this.emit("worker:restart_failed", {
920
+ workerId: worker.id,
921
+ error: error instanceof Error ? error.message : String(error),
922
+ timestamp: Date.now(),
923
+ });
924
+ }
925
+ }
926
+ /**
927
+ * Get optimal worker count with system constraints
928
+ */
929
+ _getOptimalWorkerCount() {
930
+ if (typeof this.config.workers === "number") {
931
+ return Math.max(1, Math.min(this.config.workers, 32)); // Cap at 32 workers
932
+ }
933
+ if (this.config.workers === "auto") {
934
+ const cpuCount = navigator.hardwareConcurrency || 4;
935
+ // Use OS-level memory information for better accuracy
936
+ const os = require("os");
937
+ const totalSystemMemory = os.totalmem();
938
+ const freeSystemMemory = os.freemem();
939
+ // Calculate based on available memory (leave 20% buffer)
940
+ const usableMemory = Math.min(totalSystemMemory * 0.6, freeSystemMemory * 0.8);
941
+ const memoryBasedCount = Math.floor(usableMemory / this.securityConfig.maxMemoryPerWorker);
942
+ // Use the minimum of CPU-based and memory-based counts
943
+ const optimalCount = Math.max(1, Math.min(cpuCount - 1, memoryBasedCount, 16));
944
+ Logger.logger.debug("cluster", `Optimal worker calculation: CPU=${cpuCount - 1}, Memory=${memoryBasedCount}, Selected=${optimalCount}`);
945
+ return optimalCount;
946
+ }
947
+ return 2; // Safe default
948
+ }
949
+ /**
950
+ * Start comprehensive health monitoring
951
+ */
952
+ _startHealthMonitoring() {
953
+ this.healthCheckInterval = setInterval(async () => {
954
+ try {
955
+ await this._performHealthCheck();
956
+ }
957
+ catch (error) {
958
+ Logger.logger.error("cluster", "Health check error:", error);
959
+ }
960
+ }, 15000); // Check every 15 seconds
961
+ }
962
+ /**
963
+ * Perform comprehensive health check on all workers
964
+ */
965
+ async _performHealthCheck() {
966
+ const healthPromises = Array.from(this.workers.values()).map(async (worker) => {
967
+ try {
968
+ // Check process status
969
+ if (worker.subprocess.killed) {
970
+ worker.health.status = "unhealthy";
971
+ worker.health.consecutiveFailures++;
972
+ worker.health.lastError = "Process killed";
973
+ return;
974
+ }
975
+ // Check memory usage if available
976
+ const memoryUsage = await this._getWorkerMemoryUsage(worker);
977
+ if (memoryUsage > worker.resourceLimits.maxMemory) {
978
+ worker.health.status = "unhealthy";
979
+ worker.health.consecutiveFailures++;
980
+ worker.health.lastError = `Memory limit exceeded: ${Math.round(memoryUsage / 1024 / 1024)}MB`;
981
+ this.emit("worker:memory_exceeded", {
982
+ workerId: worker.id,
983
+ memoryUsage,
984
+ limit: worker.resourceLimits.maxMemory,
985
+ });
986
+ return;
987
+ }
988
+ // Update performance metrics
989
+ worker.performance.memoryUsage = memoryUsage;
990
+ // Health check passed
991
+ worker.health.status = "healthy";
992
+ worker.health.consecutiveFailures = 0;
993
+ worker.lastPing = Date.now();
994
+ }
995
+ catch (error) {
996
+ worker.health.status = "unhealthy";
997
+ worker.health.consecutiveFailures++;
998
+ worker.health.lastError =
999
+ error instanceof Error ? error.message : String(error);
1000
+ Logger.logger.warn("cluster", `Health check failed for worker ${worker.id}:`, error);
1001
+ }
1002
+ });
1003
+ await Promise.allSettled(healthPromises);
1004
+ // Check overall cluster health
1005
+ const unhealthyWorkers = this.getAllWorkers().filter((w) => w.health.status === "unhealthy");
1006
+ if (unhealthyWorkers.length > 0) {
1007
+ this.emit("cluster:health_degraded", {
1008
+ unhealthyCount: unhealthyWorkers.length,
1009
+ totalCount: this.workers.size,
1010
+ timestamp: Date.now(),
1011
+ });
1012
+ }
1013
+ }
1014
+ /**
1015
+ * Get worker memory usage using actual process monitoring
1016
+ */
1017
+ async _getWorkerMemoryUsage(worker) {
1018
+ try {
1019
+ // Use Bun's process monitoring if available
1020
+ if (worker.subprocess && !worker.subprocess.killed) {
1021
+ // For Bun processes, we need to use system-level monitoring
1022
+ // since Bun doesn't expose process.memoryUsage() for subprocesses
1023
+ const pid = worker.subprocess.pid;
1024
+ if (pid) {
1025
+ return await this._getProcessMemoryUsage(pid);
1026
+ }
1027
+ }
1028
+ // Fallback to estimated usage if process monitoring fails
1029
+ Logger.logger.warn("cluster", `Unable to get actual memory usage for worker ${worker.id}, using fallback`);
1030
+ return 64 * 1024 * 1024; // 64MB fallback
1031
+ }
1032
+ catch (error) {
1033
+ Logger.logger.error("cluster", `Error getting memory usage for worker ${worker.id}:`, error);
1034
+ return 64 * 1024 * 1024; // 64MB fallback
1035
+ }
1036
+ }
1037
+ /**
1038
+ * Get actual memory usage for a process by PID
1039
+ */
1040
+ async _getProcessMemoryUsage(pid) {
1041
+ try {
1042
+ if (process.platform === "linux") {
1043
+ const fs = await import('fs');
1044
+ const statm = await fs.promises.readFile(`/proc/${pid}/statm`, "utf8");
1045
+ const pages = parseInt(statm.split(" ")[1]); // RSS in pages
1046
+ const pageSize = 4096; // Standard page size on Linux
1047
+ return pages * pageSize;
1048
+ }
1049
+ else if (process.platform === "darwin") {
1050
+ // macOS implementation using ps command
1051
+ const { spawn } = await import('child_process');
1052
+ return new Promise((resolve, reject) => {
1053
+ const ps = spawn("ps", [
1054
+ "-o",
1055
+ "rss=",
1056
+ "-p",
1057
+ pid.toString(),
1058
+ ]);
1059
+ let output = "";
1060
+ ps.stdout.on("data", (data) => {
1061
+ output += data.toString();
1062
+ });
1063
+ ps.on("close", (code) => {
1064
+ if (code === 0) {
1065
+ const rssKB = parseInt(output.trim());
1066
+ resolve(rssKB * 1024); // Convert KB to bytes
1067
+ }
1068
+ else {
1069
+ reject(new Error(`ps command failed with code ${code}`));
1070
+ }
1071
+ });
1072
+ ps.on("error", reject);
1073
+ });
1074
+ }
1075
+ else {
1076
+ // Windows or other platforms - use fallback
1077
+ throw new Error(`Memory monitoring not implemented for platform: ${process.platform}`);
1078
+ }
1079
+ }
1080
+ catch (error) {
1081
+ throw new Error(`Failed to get process memory usage: ${error}`);
1082
+ }
1083
+ }
1084
+ /**
1085
+ * Start metrics collection with detailed tracking
1086
+ */
1087
+ _startMetricsCollection() {
1088
+ this.metricsInterval = setInterval(async () => {
1089
+ try {
1090
+ await this._collectMetrics();
1091
+ }
1092
+ catch (error) {
1093
+ Logger.logger.error("cluster", "Metrics collection error:", error);
1094
+ }
1095
+ }, 60000); // Collect every minute
1096
+ }
1097
+ /**
1098
+ * Collect comprehensive cluster metrics
1099
+ */
1100
+ async _collectMetrics() {
1101
+ const workers = Array.from(this.workers.values());
1102
+ const activeWorkers = workers.filter((w) => w.health.status === "healthy");
1103
+ const totalRequests = workers.reduce((sum, w) => sum + w.performance.requestCount, 0);
1104
+ const totalErrors = workers.reduce((sum, w) => sum + w.performance.errorCount, 0);
1105
+ const avgResponseTimes = workers
1106
+ .filter((w) => w.performance.averageResponseTime > 0)
1107
+ .map((w) => w.performance.averageResponseTime);
1108
+ const averageResponseTime = avgResponseTimes.length > 0
1109
+ ? avgResponseTimes.reduce((sum, time) => sum + time, 0) /
1110
+ avgResponseTimes.length
1111
+ : 0;
1112
+ const memoryUsage = process.memoryUsage();
1113
+ const workerMemoryUsage = workers.reduce((sum, w) => sum + w.performance.memoryUsage, 0);
1114
+ const metrics = {
1115
+ totalWorkers: workers.length,
1116
+ activeWorkers: activeWorkers.length,
1117
+ totalRequests,
1118
+ averageResponseTime: Math.round(averageResponseTime * 100) / 100, // Round to 2 decimal places
1119
+ memoryUsage: memoryUsage.heapUsed + workerMemoryUsage,
1120
+ cpuUsage: await this._calculateCpuUsage(),
1121
+ uptime: perf_hooks.performance.now() - this.startTime,
1122
+ errorRate: totalRequests > 0 ? (totalErrors / totalRequests) * 100 : 0,
1123
+ restartCount: workers.reduce((sum, w) => sum + w.restarts, 0),
1124
+ };
1125
+ this.emit("metrics:collected", { metrics, timestamp: Date.now() });
1126
+ return metrics;
1127
+ }
1128
+ /**
1129
+ * Calculate CPU usage for the cluster using sophisticated monitoring
1130
+ */
1131
+ async _calculateCpuUsage() {
1132
+ const workers = this.getAllWorkers();
1133
+ return await this.cpuMonitor.calculateClusterCpuUsage(workers);
1134
+ }
1135
+ /**
1136
+ * Start performance monitoring
1137
+ */
1138
+ _startPerformanceMonitoring() {
1139
+ this.performanceInterval = setInterval(() => {
1140
+ this._updateWorkerPerformanceMetrics();
1141
+ }, 30000); // Update every 30 seconds
1142
+ }
1143
+ /**
1144
+ * Update worker performance metrics with real data
1145
+ */
1146
+ _updateWorkerPerformanceMetrics() {
1147
+ for (const [, worker] of this.workers) {
1148
+ // Update memory usage with actual data
1149
+ this._getWorkerMemoryUsage(worker)
1150
+ .then((memoryUsage) => {
1151
+ worker.performance.memoryUsage = memoryUsage;
1152
+ })
1153
+ .catch((error) => {
1154
+ Logger.logger.debug("cluster", `Failed to update memory usage for worker ${worker.id}:`, error);
1155
+ });
1156
+ // Update CPU usage if available
1157
+ this._getWorkerCpuUsage(worker)
1158
+ .then((cpuUsage) => {
1159
+ worker.performance.cpuUsage = cpuUsage;
1160
+ })
1161
+ .catch((error) => {
1162
+ Logger.logger.debug("cluster", `Failed to update CPU usage for worker ${worker.id}:`, error);
1163
+ });
1164
+ // Decay old metrics to prevent infinite growth
1165
+ const timeSinceLastUpdate = Date.now() -
1166
+ (worker.performance.lastRequestTime || worker.startTime);
1167
+ if (timeSinceLastUpdate > 300000) {
1168
+ // 5 minutes - decay counters
1169
+ worker.performance.requestCount = Math.floor(worker.performance.requestCount * 0.9);
1170
+ worker.performance.errorCount = Math.floor(worker.performance.errorCount * 0.9);
1171
+ }
1172
+ }
1173
+ }
1174
+ /**
1175
+ * Get actual CPU usage for a worker
1176
+ */
1177
+ async _getWorkerCpuUsage(worker) {
1178
+ try {
1179
+ if (worker.subprocess && !worker.subprocess.killed) {
1180
+ const pid = worker.subprocess.pid;
1181
+ if (pid) {
1182
+ return await this._getProcessCpuUsage(pid);
1183
+ }
1184
+ }
1185
+ return 0;
1186
+ }
1187
+ catch (error) {
1188
+ Logger.logger.debug("cluster", `Error getting CPU usage for worker ${worker.id}:`, error);
1189
+ return 0;
1190
+ }
1191
+ }
1192
+ /**
1193
+ * Get actual CPU usage for a process by PID using sophisticated monitoring
1194
+ */
1195
+ async _getProcessCpuUsage(pid) {
1196
+ return await this.cpuMonitor.getProcessCpuUsage(pid);
1197
+ }
1198
+ /**
1199
+ * Enable emergency mode for resource conservation
1200
+ */
1201
+ _enableEmergencyMode() {
1202
+ Logger.logger.warn("cluster", "Enabling emergency mode - reducing resource usage");
1203
+ // Reduce monitoring frequency
1204
+ if (this.healthCheckInterval) {
1205
+ clearInterval(this.healthCheckInterval);
1206
+ this.healthCheckInterval = setInterval(() => this._performHealthCheck(), 60000); // 1 minute
1207
+ }
1208
+ if (this.metricsInterval) {
1209
+ clearInterval(this.metricsInterval);
1210
+ this.metricsInterval = setInterval(() => this._collectMetrics(), 300000); // 5 minutes
1211
+ }
1212
+ this.emit("emergency_mode", { enabled: true, timestamp: Date.now() });
1213
+ }
1214
+ /**
1215
+ * Disable emergency mode and restore normal operation
1216
+ */
1217
+ _disableEmergencyMode() {
1218
+ Logger.logger.info("cluster", "Disabling emergency mode - restoring normal operation");
1219
+ // Restore normal monitoring frequency
1220
+ if (this.healthCheckInterval) {
1221
+ clearInterval(this.healthCheckInterval);
1222
+ this._startHealthMonitoring();
1223
+ }
1224
+ if (this.metricsInterval) {
1225
+ clearInterval(this.metricsInterval);
1226
+ this._startMetricsCollection();
1227
+ }
1228
+ this.emit("emergency_mode", { enabled: false, timestamp: Date.now() });
1229
+ }
1230
+ // Public API methods (maintaining compatibility)
1231
+ /**
1232
+ * Get all workers
1233
+ */
1234
+ getAllWorkers() {
1235
+ return Array.from(this.workers.values());
1236
+ }
1237
+ /**
1238
+ * Get active workers
1239
+ */
1240
+ getActiveWorkers() {
1241
+ return this.getAllWorkers().filter((w) => w.health.status === "healthy");
1242
+ }
1243
+ /**
1244
+ * Set IPC manager for worker communication
1245
+ */
1246
+ setIPCManager(ipcManager) {
1247
+ this.ipcManager = ipcManager;
1248
+ Logger.logger.debug("cluster", "IPC Manager set for Bun cluster");
1249
+ // Register existing workers with IPC manager
1250
+ for (const [workerId, worker] of this.workers) {
1251
+ if (worker.subprocess && worker.status === "running") {
1252
+ this.ipcManager.registerWorker(workerId, worker.subprocess);
1253
+ }
1254
+ }
1255
+ }
1256
+ /**
1257
+ * Get cluster metrics
1258
+ */
1259
+ async getMetrics() {
1260
+ return this._collectMetrics();
1261
+ }
1262
+ /**
1263
+ * Check cluster health with detailed information
1264
+ */
1265
+ async checkHealth() {
1266
+ const workers = this.getAllWorkers();
1267
+ const activeWorkers = this.getActiveWorkers();
1268
+ const healthyPercentage = workers.length > 0
1269
+ ? (activeWorkers.length / workers.length) * 100
1270
+ : 0;
1271
+ const uptime = perf_hooks.performance.now() - this.startTime;
1272
+ const unhealthyWorkers = workers.filter((w) => w.health.status === "unhealthy");
1273
+ const criticalIssues = unhealthyWorkers.filter((w) => w.health.consecutiveFailures >= 3);
1274
+ return {
1275
+ healthy: healthyPercentage >= 70 && criticalIssues.length === 0,
1276
+ details: {
1277
+ totalWorkers: workers.length,
1278
+ activeWorkers: activeWorkers.length,
1279
+ healthyPercentage: Math.round(healthyPercentage),
1280
+ uptime: Math.round(uptime),
1281
+ criticalIssues: criticalIssues.length,
1282
+ memoryUsage: process.memoryUsage().heapUsed,
1283
+ isEmergencyMode: false, // Will be implemented when MemoryManager is updated
1284
+ lastHealthCheck: Date.now(),
1285
+ },
1286
+ };
1287
+ }
1288
+ /**
1289
+ * Scale up workers with validation
1290
+ */
1291
+ async scaleUp(count = 1) {
1292
+ if (!this.isRunning) {
1293
+ throw new Error("Cannot scale up: cluster is not running");
1294
+ }
1295
+ if (count <= 0 || count > 16) {
1296
+ throw new Error("Invalid scale up count: must be between 1 and 16");
1297
+ }
1298
+ Logger.logger.info("cluster", `Scaling up Bun cluster by ${count} workers`);
1299
+ // Validate resources before scaling
1300
+ const currentCount = this.workers.size;
1301
+ const newCount = currentCount + count;
1302
+ await this._validateSystemResources(newCount);
1303
+ const spawnPromises = Array.from({ length: count }, (_, i) => this._spawnWorkerWithRetry(currentCount + i));
1304
+ const results = await Promise.allSettled(spawnPromises);
1305
+ const successful = results.filter((r) => r.status === "fulfilled").length;
1306
+ Logger.logger.info("cluster", `Scale up completed: ${successful}/${count} workers started`);
1307
+ this.emit("cluster:scaled_up", {
1308
+ requested: count,
1309
+ successful,
1310
+ newTotal: this.workers.size,
1311
+ timestamp: Date.now(),
1312
+ });
1313
+ if (successful === 0) {
1314
+ throw new Error("Failed to start any new workers during scale up");
1315
+ }
1316
+ }
1317
+ /**
1318
+ * Scale down workers with safety checks
1319
+ */
1320
+ async scaleDown(count = 1) {
1321
+ if (!this.isRunning) {
1322
+ throw new Error("Cannot scale down: cluster is not running");
1323
+ }
1324
+ const activeWorkers = this.getActiveWorkers();
1325
+ if (activeWorkers.length <= 1) {
1326
+ throw new Error("Cannot scale down: must maintain at least one active worker");
1327
+ }
1328
+ const actualCount = Math.min(count, activeWorkers.length - 1);
1329
+ Logger.logger.info("cluster", `Scaling down Bun cluster by ${actualCount} workers`);
1330
+ // Select workers to stop (prefer oldest workers)
1331
+ const workersToStop = activeWorkers
1332
+ .sort((a, b) => a.startTime - b.startTime)
1333
+ .slice(-actualCount);
1334
+ const stopPromises = workersToStop.map(async (worker) => {
1335
+ await this._stopWorker(worker.id, true);
1336
+ this.workers.delete(worker.id);
1337
+ return worker.id;
1338
+ });
1339
+ const results = await Promise.allSettled(stopPromises);
1340
+ const successful = results.filter((r) => r.status === "fulfilled").length;
1341
+ Logger.logger.info("cluster", `Scale down completed: ${successful}/${actualCount} workers stopped`);
1342
+ this.emit("cluster:scaled_down", {
1343
+ requested: actualCount,
1344
+ successful,
1345
+ newTotal: this.workers.size,
1346
+ timestamp: Date.now(),
1347
+ });
1348
+ }
1349
+ /**
1350
+ * Handle memory-based scale down with safety measures
1351
+ */
1352
+ async _handleMemoryScaleDown(alert) {
1353
+ const enhancedWorkers = Array.from(this.workers.values()).filter((w) => w.health.status === "healthy");
1354
+ if (enhancedWorkers.length <= 1) {
1355
+ Logger.logger.warn("cluster", "Cannot scale down further - only one worker remaining");
1356
+ return;
1357
+ }
1358
+ // Find the worker using the most memory
1359
+ const workerToStop = enhancedWorkers.reduce((prev, current) => prev.performance.memoryUsage > current.performance.memoryUsage
1360
+ ? prev
1361
+ : current);
1362
+ Logger.logger.info("cluster", `Scaling down due to memory pressure - stopping worker ${workerToStop.id}`);
1363
+ try {
1364
+ await this._stopWorker(workerToStop.id, true);
1365
+ this.workers.delete(workerToStop.id);
1366
+ this.emit("worker:scaled_down_memory", {
1367
+ workerId: workerToStop.id,
1368
+ memoryUsage: workerToStop.performance.memoryUsage,
1369
+ alert,
1370
+ timestamp: Date.now(),
1371
+ });
1372
+ }
1373
+ catch (error) {
1374
+ Logger.logger.error("cluster", `Failed to scale down worker ${workerToStop.id}:`, error);
1375
+ }
1376
+ }
1377
+ /**
1378
+ * Handle worker memory issues with enhanced recovery
1379
+ */
1380
+ async _handleWorkerMemoryIssue(workerId, alert) {
1381
+ const worker = this.workers.get(workerId);
1382
+ if (!worker) {
1383
+ return;
1384
+ }
1385
+ Logger.logger.warn("cluster", `Handling memory issue for worker ${workerId}: ${alert.message}`);
1386
+ // Add to restart history
1387
+ worker.restartHistory.push({
1388
+ timestamp: Date.now(),
1389
+ reason: `memory_issue: ${alert.message}`,
1390
+ });
1391
+ try {
1392
+ // Force stop for memory issues (no graceful shutdown)
1393
+ await this._stopWorker(workerId, false);
1394
+ // Wait before restart to allow memory cleanup
1395
+ await new Promise((resolve) => setTimeout(resolve, 5000));
1396
+ // Restart with the same index
1397
+ const index = parseInt(workerId.split("-")[1]) || 0;
1398
+ await this._spawnWorkerWithRetry(index, 2);
1399
+ this.emit("worker:memory_restart", {
1400
+ oldWorkerId: workerId,
1401
+ alert,
1402
+ timestamp: Date.now(),
1403
+ });
1404
+ }
1405
+ catch (error) {
1406
+ Logger.logger.error("cluster", `Failed to restart worker ${workerId} after memory issue:`, error);
1407
+ this.workers.delete(workerId);
1408
+ }
1409
+ }
1410
+ /**
1411
+ * Handle memory throttling with appropriate measures
1412
+ */
1413
+ _handleMemoryThrottling(alert) {
1414
+ Logger.logger.info("cluster", `Implementing memory throttling: ${alert.message}`);
1415
+ // Reduce monitoring frequency to save memory
1416
+ this._enableEmergencyMode();
1417
+ // Emit throttling event for application to handle
1418
+ this.emit("memory_throttling", {
1419
+ alert,
1420
+ timestamp: Date.now(),
1421
+ action: "reduce_concurrency",
1422
+ recommendations: {
1423
+ reduceWorkerCount: true,
1424
+ enableCompression: true,
1425
+ clearCaches: true,
1426
+ deferNonCriticalTasks: true,
1427
+ },
1428
+ });
1429
+ }
1430
+ /**
1431
+ * Get memory optimization recommendations
1432
+ */
1433
+ getMemoryRecommendations() {
1434
+ const enhancedWorkers = Array.from(this.workers.values());
1435
+ const totalMemory = enhancedWorkers.reduce((sum, w) => sum + w.performance.memoryUsage, 0);
1436
+ const avgMemoryPerWorker = enhancedWorkers.length > 0
1437
+ ? totalMemory / enhancedWorkers.length
1438
+ : 0;
1439
+ return {
1440
+ currentWorkerCount: enhancedWorkers.length,
1441
+ optimalWorkerCount: this.getOptimalWorkerCountForMemory(),
1442
+ averageMemoryPerWorker: Math.round(avgMemoryPerWorker / 1024 / 1024), // MB
1443
+ recommendations: this.memoryManager.getMemoryOptimizationRecommendations?.() || {
1444
+ scaleDown: enhancedWorkers.length >
1445
+ this.getOptimalWorkerCountForMemory(),
1446
+ enableCompression: true,
1447
+ optimizeGarbageCollection: true,
1448
+ monitorMemoryLeaks: totalMemory > 1024 * 1024 * 1024, // > 1GB
1449
+ },
1450
+ timestamp: Date.now(),
1451
+ };
1452
+ }
1453
+ /**
1454
+ * Get optimal worker count based on memory constraints
1455
+ */
1456
+ getOptimalWorkerCountForMemory() {
1457
+ // Use OS-level memory information for accurate calculation
1458
+ const os = require("os");
1459
+ const freeMemory = os.freemem();
1460
+ const totalMemory = os.totalmem();
1461
+ // Use the smaller of free memory or 60% of total memory (conservative approach)
1462
+ const available = Math.min(freeMemory * 0.8, totalMemory * 0.6);
1463
+ const perWorker = this.securityConfig.maxMemoryPerWorker;
1464
+ const memoryBasedCount = Math.floor(available / perWorker);
1465
+ const result = Math.max(1, Math.min(memoryBasedCount, this._getOptimalWorkerCount()));
1466
+ Logger.logger.debug("cluster", `Memory-based worker count: ${memoryBasedCount} (available: ${Math.round(available / 1024 / 1024)}MB, per worker: ${Math.round(perWorker / 1024 / 1024)}MB)`);
1467
+ return result;
1468
+ }
1469
+ /**
1470
+ * Enable low memory mode with comprehensive measures
1471
+ */
1472
+ enableLowMemoryMode() {
1473
+ Logger.logger.info("cluster", "Manually enabling low memory mode");
1474
+ this.memoryManager.enableLowMemoryMode?.();
1475
+ this._enableEmergencyMode();
1476
+ }
1477
+ /**
1478
+ * Disable low memory mode and restore normal operation
1479
+ */
1480
+ disableLowMemoryMode() {
1481
+ Logger.logger.info("cluster", "Manually disabling low memory mode");
1482
+ this.memoryManager.disableLowMemoryMode?.();
1483
+ this._disableEmergencyMode();
1484
+ }
1485
+ /**
1486
+ * Get detailed worker information for debugging
1487
+ */
1488
+ getWorkerDetails(workerId) {
1489
+ if (workerId) {
1490
+ const worker = this.workers.get(workerId);
1491
+ if (!worker) {
1492
+ return null;
1493
+ }
1494
+ return {
1495
+ id: worker.id,
1496
+ port: worker.port,
1497
+ status: worker.status,
1498
+ health: worker.health,
1499
+ performance: worker.performance,
1500
+ uptime: Date.now() - worker.startTime,
1501
+ restarts: worker.restarts,
1502
+ restartHistory: worker.restartHistory.slice(-5), // Last 5 restarts
1503
+ resourceLimits: worker.resourceLimits,
1504
+ };
1505
+ }
1506
+ return Array.from(this.workers.values()).map((worker) => ({
1507
+ id: worker.id,
1508
+ port: worker.port,
1509
+ status: worker.status,
1510
+ health: worker.health.status,
1511
+ uptime: Date.now() - worker.startTime,
1512
+ restarts: worker.restarts,
1513
+ memoryUsage: Math.round(worker.performance.memoryUsage / 1024 / 1024), // MB
1514
+ requestCount: worker.performance.requestCount,
1515
+ }));
1516
+ }
1517
+ /**
1518
+ * Force restart of a specific worker (for debugging/maintenance)
1519
+ */
1520
+ async forceRestartWorker(workerId) {
1521
+ const worker = this.workers.get(workerId);
1522
+ if (!worker) {
1523
+ throw new Error(`Worker ${workerId} not found`);
1524
+ }
1525
+ Logger.logger.info("cluster", `Force restarting worker ${workerId}`);
1526
+ worker.restartHistory.push({
1527
+ timestamp: Date.now(),
1528
+ reason: "manual_restart",
1529
+ });
1530
+ await this._attemptWorkerRestart(worker);
1531
+ }
1532
+ /**
1533
+ * Get cluster status summary
1534
+ */
1535
+ getStatus() {
1536
+ const workers = this.getAllWorkers();
1537
+ const activeWorkers = this.getActiveWorkers();
1538
+ const enhancedWorkers = Array.from(this.workers.values());
1539
+ return {
1540
+ isRunning: this.isRunning,
1541
+ uptime: perf_hooks.performance.now() - this.startTime,
1542
+ workers: {
1543
+ total: workers.length,
1544
+ active: activeWorkers.length,
1545
+ starting: workers.filter((w) => w.status === "starting").length,
1546
+ stopping: workers.filter((w) => w.status === "stopping").length,
1547
+ unhealthy: workers.filter((w) => w.health.status === "unhealthy").length,
1548
+ },
1549
+ performance: {
1550
+ totalRequests: enhancedWorkers.reduce((sum, w) => sum + w.performance.requestCount, 0),
1551
+ totalErrors: enhancedWorkers.reduce((sum, w) => sum + w.performance.errorCount, 0),
1552
+ totalRestarts: workers.reduce((sum, w) => sum + w.restarts, 0),
1553
+ },
1554
+ memory: {
1555
+ masterUsage: Math.round(process.memoryUsage().heapUsed / 1024 / 1024), // MB
1556
+ workerUsage: Math.round(enhancedWorkers.reduce((sum, w) => sum + w.performance.memoryUsage, 0) /
1557
+ 1024 /
1558
+ 1024), // MB
1559
+ isLowMemoryMode: false, // Will be implemented in MemoryManager
1560
+ },
1561
+ timestamp: Date.now(),
1562
+ };
1563
+ }
1564
+ }
1565
+
1566
+ exports.BunClusterManager = BunClusterManager;
1567
+ //# sourceMappingURL=bun-cluster-manager.js.map