@ynode/cluster 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2026 Michael Welter <nme@mikinho.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,96 @@
1
+ # @ynode/cluster
2
+
3
+ Copyright (c) 2025 Michael Welter <me@mikinho.com>
4
+
5
+ [![npm version](https://img.shields.io/npm/v/@ynode/cluster.svg)](https://www.npmjs.com/package/@ynode/cluster)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
+
8
+ **Smart & Easy Node.js Clustering.**
9
+
10
+ `@ynode/cluster` removes the complexity of managing Node.js cluster processes. It provides out-of-the-box support for:
11
+ - **Smart Auto-Scaling**: Automatically spawns and kills workers based on Event Loop Lag (CPU load).
12
+ - **Resiliency**: Automatically restarts workers if they crash.
13
+ - **Zero-Config Defaults**: Works immediately with sensible defaults, but fully configurable.
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ npm install @ynode/cluster
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ Simply wrap your application startup logic in the `run()` function.
24
+
25
+ ```javascript
26
+ import { run } from "@ynode/cluster";
27
+ import Fastify from "fastify";
28
+
29
+ // Define your worker logic
30
+ const startServer = async () => {
31
+ const app = Fastify({ logger: true });
32
+
33
+ app.get("/", async () => "Hello from worker " + process.pid);
34
+
35
+ try {
36
+ await app.listen({ port: 3000 });
37
+ } catch (err) {
38
+ app.log.error(err);
39
+ process.exit(1);
40
+ }
41
+ };
42
+
43
+ // Start the cluster
44
+ run(startServer, {
45
+ mode: "smart", // Enable auto-scaling (default)
46
+ minWorkers: 2,
47
+ maxWorkers: 8 // Default is os.availableParallelism()
48
+ });
49
+ ```
50
+
51
+ ## Configuration
52
+
53
+ The `run(startWorker, options)` function accepts the following options:
54
+
55
+ | Option | Type | Default | Description |
56
+ |--------|------|---------|-------------|
57
+ | `enabled` | `boolean` | `true` | Whether to enable clustering. If `false`, runs `startWorker` directly in the main process. |
58
+ | `mode` | `"smart" \| "max"` | `"smart"` | `"smart"` enables auto-scaling based on load. `"max"` spawns `maxWorkers` and keeps them running. |
59
+ | `minWorkers` | `number` | `2` | Minimum number of workers to keep alive in "smart" mode. |
60
+ | `maxWorkers` | `number` | `os.cpus()` | Maximum number of workers to spawn. |
61
+ | `scaleUpThreshold` | `number` | `50` | Event loop lag (ms) threshold to trigger scaling up. |
62
+ | `scaleDownThreshold` | `number` | `10` | Event loop lag (ms) threshold to trigger scaling down. |
63
+ | `scalingCooldown` | `number` | `10000` | Minimum time (ms) between scaling actions. |
64
+ | `scaleDownGrace` | `number` | `30000` | Grace period (ms) after scaling up before scaling down is allowed. |
65
+
66
+ ## Working with @ynode/autoshutdown
67
+
68
+ This package works seamlessly with **[@ynode/autoshutdown](https://www.npmjs.com/package/@ynode/autoshutdown)**.
69
+
70
+ While `@ynode/cluster` manages the **pool size** based on overall system load (scaling up when busy, down when quiet), `@ynode/autoshutdown` manages the **lifecycle of individual workers** based on their specific inactivity.
71
+
72
+ - **@ynode/cluster**: "We are overloaded, add more workers!" or "We are effectively idle, remove the extra workers."3
73
+ - **@ynode/autoshutdown**: "I haven't received a request in 10 minutes, I should shut down to save memory."
74
+
75
+ Using them together ensures optimal resource usage: responsive scaling for traffic spikes and aggressive cleanup for idle periods.
76
+
77
+ ```javascript
78
+ import { run } from "@ynode/cluster";
79
+ import autoShutdown from "@ynode/autoshutdown";
80
+ import Fastify from "fastify";
81
+
82
+ run(async () => {
83
+ const app = Fastify();
84
+
85
+ // Register auto-shutdown to kill this specific worker if it's unused
86
+ await app.register(autoShutdown, {
87
+ sleep: 600, // 10 minutes
88
+ });
89
+
90
+ await app.listen({ port: 3000 });
91
+ });
92
+ ```
93
+
94
+ ## License
95
+
96
+ This project is licensed under the [MIT License](./LICENSE).
package/package.json ADDED
@@ -0,0 +1,71 @@
1
+ {
2
+ "name": "@ynode/cluster",
3
+ "version": "1.1.0",
4
+ "description": "Smart, auto-scaling Node.js cluster manager that monitors event loop lag to optimize performance and resource usage.",
5
+ "main": "src/cluster.js",
6
+ "exports": {
7
+ ".": "./src/cluster.js"
8
+ },
9
+ "types": "./index.d.ts",
10
+ "type": "module",
11
+ "license": "MIT",
12
+ "author": "Michael Welter <me@mikinho.com>",
13
+ "repository": {
14
+ "type": "git",
15
+ "url": "git+https://github.com/yammm/ynode-cluster.git"
16
+ },
17
+ "bugs": {
18
+ "url": "https://github.com/yammm/ynode-cluster/issues"
19
+ },
20
+ "homepage": "https://github.com/yammm/ynode-cluster#readme",
21
+ "keywords": [
22
+ "cluster",
23
+ "auto-scaling",
24
+ "load-balancing",
25
+ "performance",
26
+ "worker",
27
+ "process-manager",
28
+ "event-loop",
29
+ "lag",
30
+ "resiliency",
31
+ "node"
32
+ ],
33
+ "engines": {
34
+ "node": ">=20"
35
+ },
36
+ "devDependencies": {
37
+ "@eslint/js": "^9.39.2",
38
+ "@eslint/json": "^0.14.0",
39
+ "@eslint/markdown": "^7.5.1",
40
+ "@mikinho/autover": "^2.0.1",
41
+ "eslint": "^9.39.2",
42
+ "eslint-config-prettier": "^10.1.8",
43
+ "eslint-plugin-prettier": "^5.5.5",
44
+ "globals": "^17.1.0",
45
+ "prettier": "^3.8.1",
46
+ "rimraf": "^6.1.2",
47
+ "yuidocjs": "^0.10.2"
48
+ },
49
+ "scripts": {
50
+ "docs": "node scripts/gen-docs.mjs",
51
+ "docs:clean": "rimraf docs || rmdir /s /q docs 2> NUL || true",
52
+ "docs:open": "node -e \"import('node:child_process').then(m=>m.exec(process.platform==='win32'?'start docs/index.html':(process.platform==='darwin'?'open docs/index.html':'xdg-open docs/index.html')))\"",
53
+ "format": "prettier --write .",
54
+ "format:check": "prettier --check .",
55
+ "lint": "eslint .",
56
+ "lint:fix": "eslint . --fix",
57
+ "ver:preview": "npx autover --no-amend --dry-run --short",
58
+ "ver:apply": "npx autover --guard-unchanged --short",
59
+ "test": "node --test test/**/*.test.js",
60
+ "prepublishOnly": "npm test || true",
61
+ "postversion": "git push && git push --tags"
62
+ },
63
+ "publishConfig": {
64
+ "access": "public"
65
+ },
66
+ "files": [
67
+ "src",
68
+ "README.md",
69
+ "LICENSE"
70
+ ]
71
+ }
package/src/cluster.js ADDED
@@ -0,0 +1,241 @@
1
+ // ynode/cluster
2
+
3
+ /*
4
+ The MIT License (MIT)
5
+
6
+ Copyright (c) 2026 Michael Welter <me@mikinho.com>
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
9
+ this software and associated documentation files (the "Software"), to deal in
10
+ the Software without restriction, including without limitation the rights to
11
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
12
+ the Software, and to permit persons to whom the Software is furnished to do so,
13
+ subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
20
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
21
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
22
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ */
25
+
26
+ /**
27
+ * cluster.js: Process Manager or Cluster Orchestrator
28
+ *
29
+ * @module cluster
30
+ *
31
+ * Its sole responsibility is to handle the logic of creating, monitoring,
32
+ * and restarting worker processes.
33
+ */
34
+
35
+ import cluster from "node:cluster";
36
+ import os from "node:os";
37
+
38
+ /**
39
+ * Manages the application"s clustering.
40
+ * @param {function} startWorker - The function to execute when a worker process starts.
41
+ * @param {object|boolean} options - Configuration object or boolean to enable/disable.
42
+ * @param {boolean} [options.enabled=true] - Whether clustering is enabled.
43
+ * @param {number} [options.minWorkers=2] - Minimum number of workers (smart mode).
44
+ * @param {number} [options.maxWorkers=os.cpus()] - Maximum number of workers.
45
+ * @param {number} [options.scaleUpThreshold=50] - Event loop lag (ms) threshold to scale up.
46
+ * @param {number} [options.scaleDownThreshold=10] - Event loop lag (ms) threshold to scale down.
47
+ * @param {string} [options.mode="smart"] - "smart" (auto-scaling) or "max" (all cores).
48
+ * @param {number} [options.scalingCooldown=10000] - Ms to wait between scaling actions.
49
+ * @param {number} [options.scaleDownGrace=30000] - Ms to wait after scale-up before allowing scale-down.
50
+ * @param {object} log - The logger instance.
51
+ */
52
+ export function run(startWorker, options = true, log = console) {
53
+ const isEnabled = typeof options === "object" ? options.enabled : options;
54
+
55
+ if (cluster.isWorker || !isEnabled) {
56
+ log.info(`Running worker process.`);
57
+ return startWorker();
58
+ }
59
+
60
+ let isShuttingDown = false;
61
+
62
+ const {
63
+ minWorkers = Math.min(2, os.availableParallelism()),
64
+ maxWorkers = os.availableParallelism(),
65
+ scaleUpThreshold = 50, // ms lag
66
+ scaleDownThreshold = 10, // ms lag
67
+ mode = "smart", // 'smart' or 'max'
68
+ scalingCooldown = 10000,
69
+ scaleDownGrace = 30000,
70
+ } = typeof options === "object" ? options : {};
71
+
72
+ if (minWorkers > maxWorkers) {
73
+ throw new Error(`Invalid configuration: minWorkers (${minWorkers}) cannot be greater than maxWorkers (${maxWorkers})`);
74
+ }
75
+
76
+ if (scaleUpThreshold <= scaleDownThreshold) {
77
+ throw new Error(`Invalid configuration: scaleUpThreshold (${scaleUpThreshold}) must be greater than scaleDownThreshold (${scaleDownThreshold})`);
78
+ }
79
+
80
+ const initialWorkers = mode === "max" ? maxWorkers : minWorkers;
81
+ log.info(`Shogun is the master! Starting ${initialWorkers} workers (Max: ${maxWorkers}).`);
82
+
83
+ let lastScaleUpTime = Date.now();
84
+
85
+ // Fork initial workers
86
+ for (let i = 0; i < initialWorkers; ++i) {
87
+ try {
88
+ cluster.fork();
89
+ } catch (err) {
90
+ log.error("Failed to fork initial worker:", err);
91
+ }
92
+ lastScaleUpTime = Date.now();
93
+ }
94
+
95
+ const workerLoads = new Map();
96
+ let lastScalingAction = Date.now();
97
+
98
+ function broadcastWorkerCount() {
99
+ const count = Object.keys(cluster.workers).length;
100
+ for (const worker of Object.values(cluster.workers)) {
101
+ if (worker && worker.isConnected()) {
102
+ try {
103
+ worker.send({ cmd: "cluster-count", count });
104
+ } catch (err) {
105
+ // Ignore channel closed errors
106
+ log.debug(err);
107
+ }
108
+ }
109
+ }
110
+ }
111
+
112
+ cluster.on("online", (worker) => {
113
+ log.info("Worker %o is online", worker.process.pid);
114
+ broadcastWorkerCount();
115
+
116
+ workerLoads.set(worker.id, { lag: 0, lastSeen: Date.now() });
117
+
118
+ worker.on("message", (msg) => {
119
+ if (msg.cmd === "heartbeat") {
120
+ workerLoads.set(worker.id, {
121
+ lag: msg.lag,
122
+ lastSeen: Date.now(),
123
+ memory: msg.memory
124
+ });
125
+ }
126
+ });
127
+ });
128
+
129
+ cluster.on("exit", (worker, code, signal) => {
130
+ workerLoads.delete(worker.id);
131
+ const currentWorkers = Object.keys(cluster.workers).length;
132
+
133
+ if (worker.exitedAfterDisconnect) {
134
+ return log.info(`Worker [${worker.process.pid}: ${currentWorkers} of ${maxWorkers}] disconnected voluntarily.`);
135
+ }
136
+
137
+ if (isShuttingDown) {
138
+ return log.info(`Worker [${worker.process.pid}: ${currentWorkers} of ${maxWorkers}] died. Code: ${code}, Signal: ${signal}.`);
139
+ }
140
+
141
+ log.warn(`Worker [${worker.process.pid}: ${currentWorkers} of ${maxWorkers}] died. Code: ${code}, Signal: ${signal}. Restarting...`);
142
+ try {
143
+ cluster.fork();
144
+ } catch (err) {
145
+ log.error("Failed to restart worker:", err);
146
+ }
147
+ broadcastWorkerCount();
148
+ });
149
+
150
+ cluster.on("listening", (worker, address) => {
151
+ const currentWorkers = Object.keys(cluster.workers).length;
152
+ log.info(`A worker [${worker.process.pid}: ${currentWorkers} of ${maxWorkers}] is now connected to ${address.address}:${address.port}`);
153
+ broadcastWorkerCount();
154
+ });
155
+
156
+ // Auto-scaling logic
157
+ if (mode === "smart") {
158
+ setInterval(() => {
159
+ const now = Date.now();
160
+ if (now - lastScalingAction < scalingCooldown) {
161
+ return;
162
+ }
163
+
164
+ // Calculate average lag across all workers
165
+ let totalLag = 0;
166
+ let count = 0;
167
+
168
+ for (const stats of workerLoads.values()) {
169
+ totalLag += stats.lag;
170
+ ++count;
171
+ }
172
+
173
+ // Avoid scaling decisions if we have no stats yet
174
+ if (count === 0) {
175
+ return;
176
+ }
177
+
178
+ const avgLag = totalLag / count;
179
+ const currentWorkers = Object.keys(cluster.workers).length;
180
+
181
+ if (avgLag > scaleUpThreshold && currentWorkers < maxWorkers) {
182
+ log.info(`High load detected (Avg Lag: ${avgLag.toFixed(2)}ms). Scaling up...`);
183
+ try {
184
+ cluster.fork();
185
+ } catch (err) {
186
+ log.error("Failed to scale up:", err);
187
+ }
188
+ lastScaleUpTime = Date.now();
189
+ lastScalingAction = now;
190
+
191
+ return;
192
+ }
193
+
194
+ if (avgLag < scaleDownThreshold && currentWorkers > minWorkers) {
195
+ if (now - lastScaleUpTime < scaleDownGrace) {
196
+ log.debug("Skipping scale down due to warm-up grace period.");
197
+ return;
198
+ }
199
+
200
+ log.info(`Low load detected (Avg Lag: ${avgLag.toFixed(2)}ms). Scaling down...`);
201
+ // Kill the last worker
202
+ const workerIds = Object.keys(cluster.workers);
203
+ const victimId = workerIds[workerIds.length - 1];
204
+ if (victimId) {
205
+ cluster.workers[victimId].disconnect();
206
+ lastScalingAction = now;
207
+ }
208
+
209
+ return;
210
+ }
211
+ }, 5000).unref();
212
+ }
213
+
214
+ // Graceful shutdown handling for Master
215
+ const signals = ["SIGINT", "SIGTERM", "SIGQUIT"];
216
+
217
+ signals.forEach((signal) => {
218
+ process.on(signal, () => {
219
+ log.info(`Master received ${signal}, shutting down workers...`);
220
+ isShuttingDown = true;
221
+ for (const worker of Object.values(cluster.workers)) {
222
+ if (worker && worker.isConnected()) {
223
+ worker.send("shutdown");
224
+ }
225
+ }
226
+
227
+ // Allow some time for workers to clean up?
228
+ // Ideally we wait for them to exit, but for now we just let the process exit eventually
229
+ // or rely on the fact that existing "shutdown" message logic in worker handles close.
230
+ // We can just exit the master after a timeout if we want to force it,
231
+ // but usually letting workers exit will cause master to exit if all handles are closed.
232
+ // For safety in this template, we'll force exit after a timeout.
233
+ setTimeout(() => {
234
+ log.info("Master force exiting after timeout.");
235
+ process.exit(0);
236
+ }, 10000).unref();
237
+ });
238
+ });
239
+ }
240
+
241
+