npm - @ynode/cluster - Versions diffs - 1.1.0 - Mend

@ynode/cluster 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,20 @@
+The MIT License (MIT)
+Copyright (c) 2026 Michael Welter <nme@mikinho.com>
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,96 @@
+# @ynode/cluster
+Copyright (c) 2025 Michael Welter <me@mikinho.com>
+[![npm version](https://img.shields.io/npm/v/@ynode/cluster.svg)](https://www.npmjs.com/package/@ynode/cluster)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+**Smart & Easy Node.js Clustering.**
+`@ynode/cluster` removes the complexity of managing Node.js cluster processes. It provides out-of-the-box support for:
+- **Smart Auto-Scaling**: Automatically spawns and kills workers based on Event Loop Lag (CPU load).
+- **Resiliency**: Automatically restarts workers if they crash.
+- **Zero-Config Defaults**: Works immediately with sensible defaults, but fully configurable.
+## Installation
+```bash
+npm install @ynode/cluster
+```
+## Usage
+Simply wrap your application startup logic in the `run()` function.
+```javascript
+import { run } from "@ynode/cluster";
+import Fastify from "fastify";
+// Define your worker logic
+const startServer = async () => {
+    const app = Fastify({ logger: true });
+    app.get("/", async () => "Hello from worker " + process.pid);
+    try {
+        await app.listen({ port: 3000 });
+    } catch (err) {
+        app.log.error(err);
+        process.exit(1);
+    }
+};
+// Start the cluster
+run(startServer, {
+    mode: "smart", // Enable auto-scaling (default)
+    minWorkers: 2,
+    maxWorkers: 8 // Default is os.availableParallelism()
+});
+```
+## Configuration
+The `run(startWorker, options)` function accepts the following options:
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `enabled` | `boolean` | `true` | Whether to enable clustering. If `false`, runs `startWorker` directly in the main process. |
+| `mode` | `"smart" \| "max"` | `"smart"` | `"smart"` enables auto-scaling based on load. `"max"` spawns `maxWorkers` and keeps them running. |
+| `minWorkers` | `number` | `2` | Minimum number of workers to keep alive in "smart" mode. |
+| `maxWorkers` | `number` | `os.cpus()` | Maximum number of workers to spawn. |
+| `scaleUpThreshold` | `number` | `50` | Event loop lag (ms) threshold to trigger scaling up. |
+| `scaleDownThreshold` | `number` | `10` | Event loop lag (ms) threshold to trigger scaling down. |
+| `scalingCooldown` | `number` | `10000` | Minimum time (ms) between scaling actions. |
+| `scaleDownGrace` | `number` | `30000` | Grace period (ms) after scaling up before scaling down is allowed. |
+## Working with @ynode/autoshutdown
+This package works seamlessly with **[@ynode/autoshutdown](https://www.npmjs.com/package/@ynode/autoshutdown)**.
+While `@ynode/cluster` manages the **pool size** based on overall system load (scaling up when busy, down when quiet), `@ynode/autoshutdown` manages the **lifecycle of individual workers** based on their specific inactivity.
+- **@ynode/cluster**: "We are overloaded, add more workers!" or "We are effectively idle, remove the extra workers."3
+- **@ynode/autoshutdown**: "I haven't received a request in 10 minutes, I should shut down to save memory."
+Using them together ensures optimal resource usage: responsive scaling for traffic spikes and aggressive cleanup for idle periods.
+```javascript
+import { run } from "@ynode/cluster";
+import autoShutdown from "@ynode/autoshutdown";
+import Fastify from "fastify";
+run(async () => {
+    const app = Fastify();
+    // Register auto-shutdown to kill this specific worker if it's unused
+    await app.register(autoShutdown, {
+        sleep: 600, // 10 minutes
+    });
+    await app.listen({ port: 3000 });
+});
+```
+## License
+This project is licensed under the [MIT License](./LICENSE).

package/package.json ADDED Viewed

@@ -0,0 +1,71 @@
+{
+    "name": "@ynode/cluster",
+    "version": "1.1.0",
+    "description": "Smart, auto-scaling Node.js cluster manager that monitors event loop lag to optimize performance and resource usage.",
+    "main": "src/cluster.js",
+    "exports": {
+        ".": "./src/cluster.js"
+    },
+    "types": "./index.d.ts",
+    "type": "module",
+    "license": "MIT",
+    "author": "Michael Welter <me@mikinho.com>",
+    "repository": {
+        "type": "git",
+        "url": "git+https://github.com/yammm/ynode-cluster.git"
+    },
+    "bugs": {
+        "url": "https://github.com/yammm/ynode-cluster/issues"
+    },
+    "homepage": "https://github.com/yammm/ynode-cluster#readme",
+    "keywords": [
+        "cluster",
+        "auto-scaling",
+        "load-balancing",
+        "performance",
+        "worker",
+        "process-manager",
+        "event-loop",
+        "lag",
+        "resiliency",
+        "node"
+    ],
+    "engines": {
+        "node": ">=20"
+    },
+    "devDependencies": {
+        "@eslint/js": "^9.39.2",
+        "@eslint/json": "^0.14.0",
+        "@eslint/markdown": "^7.5.1",
+        "@mikinho/autover": "^2.0.1",
+        "eslint": "^9.39.2",
+        "eslint-config-prettier": "^10.1.8",
+        "eslint-plugin-prettier": "^5.5.5",
+        "globals": "^17.1.0",
+        "prettier": "^3.8.1",
+        "rimraf": "^6.1.2",
+        "yuidocjs": "^0.10.2"
+    },
+    "scripts": {
+        "docs": "node scripts/gen-docs.mjs",
+        "docs:clean": "rimraf docs || rmdir /s /q docs 2> NUL || true",
+        "docs:open": "node -e \"import('node:child_process').then(m=>m.exec(process.platform==='win32'?'start docs/index.html':(process.platform==='darwin'?'open docs/index.html':'xdg-open docs/index.html')))\"",
+        "format": "prettier --write .",
+        "format:check": "prettier --check .",
+        "lint": "eslint .",
+        "lint:fix": "eslint . --fix",
+        "ver:preview": "npx autover --no-amend --dry-run --short",
+        "ver:apply": "npx autover --guard-unchanged --short",
+        "test": "node --test test/**/*.test.js",
+        "prepublishOnly": "npm test || true",
+        "postversion": "git push && git push --tags"
+    },
+    "publishConfig": {
+        "access": "public"
+    },
+    "files": [
+        "src",
+        "README.md",
+        "LICENSE"
+    ]
+}

package/src/cluster.js ADDED Viewed

@@ -0,0 +1,241 @@
+// ynode/cluster
+/*
+The MIT License (MIT)
+Copyright (c) 2026 Michael Welter <me@mikinho.com>
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+/**
+ *  cluster.js: Process Manager or Cluster Orchestrator
+ *
+ * @module cluster
+ *
+ * Its sole responsibility is to handle the logic of creating, monitoring,
+ * and restarting worker processes.
+ */
+import cluster from "node:cluster";
+import os from "node:os";
+/**
+ * Manages the application"s clustering.
+ * @param {function} startWorker - The function to execute when a worker process starts.
+ * @param {object|boolean} options - Configuration object or boolean to enable/disable.
+ * @param {boolean} [options.enabled=true] - Whether clustering is enabled.
+ * @param {number} [options.minWorkers=2] - Minimum number of workers (smart mode).
+ * @param {number} [options.maxWorkers=os.cpus()] - Maximum number of workers.
+ * @param {number} [options.scaleUpThreshold=50] - Event loop lag (ms) threshold to scale up.
+ * @param {number} [options.scaleDownThreshold=10] - Event loop lag (ms) threshold to scale down.
+ * @param {string} [options.mode="smart"] - "smart" (auto-scaling) or "max" (all cores).
+ * @param {number} [options.scalingCooldown=10000] - Ms to wait between scaling actions.
+ * @param {number} [options.scaleDownGrace=30000] - Ms to wait after scale-up before allowing scale-down.
+ * @param {object} log - The logger instance.
+ */
+export function run(startWorker, options = true, log = console) {
+    const isEnabled = typeof options === "object" ? options.enabled : options;
+    if (cluster.isWorker || !isEnabled) {
+        log.info(`Running worker process.`);
+        return startWorker();
+    }
+    let isShuttingDown = false;
+    const {
+        minWorkers = Math.min(2, os.availableParallelism()),
+        maxWorkers = os.availableParallelism(),
+        scaleUpThreshold = 50, // ms lag
+        scaleDownThreshold = 10, // ms lag
+        mode = "smart", // 'smart' or 'max'
+        scalingCooldown = 10000,
+        scaleDownGrace = 30000,
+    } = typeof options === "object" ? options : {};
+    if (minWorkers > maxWorkers) {
+        throw new Error(`Invalid configuration: minWorkers (${minWorkers}) cannot be greater than maxWorkers (${maxWorkers})`);
+    }
+    if (scaleUpThreshold <= scaleDownThreshold) {
+        throw new Error(`Invalid configuration: scaleUpThreshold (${scaleUpThreshold}) must be greater than scaleDownThreshold (${scaleDownThreshold})`);
+    }
+    const initialWorkers = mode === "max" ? maxWorkers : minWorkers;
+    log.info(`Shogun is the master! Starting ${initialWorkers} workers (Max: ${maxWorkers}).`);
+    let lastScaleUpTime = Date.now();
+    // Fork initial workers
+    for (let i = 0; i < initialWorkers; ++i) {
+        try {
+            cluster.fork();
+        } catch (err) {
+            log.error("Failed to fork initial worker:", err);
+        }
+        lastScaleUpTime = Date.now();
+    }
+    const workerLoads = new Map();
+    let lastScalingAction = Date.now();
+    function broadcastWorkerCount() {
+        const count = Object.keys(cluster.workers).length;
+        for (const worker of Object.values(cluster.workers)) {
+            if (worker && worker.isConnected()) {
+                try {
+                    worker.send({ cmd: "cluster-count", count });
+                } catch (err) {
+                    // Ignore channel closed errors
+                    log.debug(err);
+                }
+            }
+        }
+    }
+    cluster.on("online", (worker) => {
+        log.info("Worker %o is online", worker.process.pid);
+        broadcastWorkerCount();
+        workerLoads.set(worker.id, { lag: 0, lastSeen: Date.now() });
+        worker.on("message", (msg) => {
+            if (msg.cmd === "heartbeat") {
+                workerLoads.set(worker.id, {
+                    lag: msg.lag,
+                    lastSeen: Date.now(),
+                    memory: msg.memory
+                });
+            }
+        });
+    });
+    cluster.on("exit", (worker, code, signal) => {
+        workerLoads.delete(worker.id);
+        const currentWorkers = Object.keys(cluster.workers).length;
+        if (worker.exitedAfterDisconnect) {
+            return log.info(`Worker [${worker.process.pid}: ${currentWorkers} of ${maxWorkers}] disconnected voluntarily.`);
+        }
+        if (isShuttingDown) {
+            return log.info(`Worker [${worker.process.pid}: ${currentWorkers} of ${maxWorkers}] died. Code: ${code}, Signal: ${signal}.`);
+        }
+        log.warn(`Worker [${worker.process.pid}: ${currentWorkers} of ${maxWorkers}] died. Code: ${code}, Signal: ${signal}. Restarting...`);
+        try {
+            cluster.fork();
+        } catch (err) {
+            log.error("Failed to restart worker:", err);
+        }
+        broadcastWorkerCount();
+    });
+    cluster.on("listening", (worker, address) => {
+        const currentWorkers = Object.keys(cluster.workers).length;
+        log.info(`A worker [${worker.process.pid}: ${currentWorkers} of ${maxWorkers}] is now connected to ${address.address}:${address.port}`);
+        broadcastWorkerCount();
+    });
+    // Auto-scaling logic
+    if (mode === "smart") {
+        setInterval(() => {
+            const now = Date.now();
+            if (now - lastScalingAction < scalingCooldown) {
+                return;
+            }
+            // Calculate average lag across all workers
+            let totalLag = 0;
+            let count = 0;
+            for (const stats of workerLoads.values()) {
+                totalLag += stats.lag;
+                ++count;
+            }
+            // Avoid scaling decisions if we have no stats yet
+            if (count === 0) {
+                return;
+            }
+            const avgLag = totalLag / count;
+            const currentWorkers = Object.keys(cluster.workers).length;
+            if (avgLag > scaleUpThreshold && currentWorkers < maxWorkers) {
+                log.info(`High load detected (Avg Lag: ${avgLag.toFixed(2)}ms). Scaling up...`);
+                try {
+                    cluster.fork();
+                } catch (err) {
+                    log.error("Failed to scale up:", err);
+                }
+                lastScaleUpTime = Date.now();
+                lastScalingAction = now;
+                return;
+            }
+            if (avgLag < scaleDownThreshold && currentWorkers > minWorkers) {
+                if (now - lastScaleUpTime < scaleDownGrace) {
+                    log.debug("Skipping scale down due to warm-up grace period.");
+                    return;
+                }
+                log.info(`Low load detected (Avg Lag: ${avgLag.toFixed(2)}ms). Scaling down...`);
+                // Kill the last worker
+                const workerIds = Object.keys(cluster.workers);
+                const victimId = workerIds[workerIds.length - 1];
+                if (victimId) {
+                    cluster.workers[victimId].disconnect();
+                    lastScalingAction = now;
+                }
+                return;
+            }
+        }, 5000).unref();
+    }
+    // Graceful shutdown handling for Master
+    const signals = ["SIGINT", "SIGTERM", "SIGQUIT"];
+    signals.forEach((signal) => {
+        process.on(signal, () => {
+            log.info(`Master received ${signal}, shutting down workers...`);
+            isShuttingDown = true;
+            for (const worker of Object.values(cluster.workers)) {
+                if (worker && worker.isConnected()) {
+                    worker.send("shutdown");
+                }
+            }
+            // Allow some time for workers to clean up?
+            // Ideally we wait for them to exit, but for now we just let the process exit eventually
+            // or rely on the fact that existing "shutdown" message logic in worker handles close.
+            // We can just exit the master after a timeout if we want to force it,
+            // but usually letting workers exit will cause master to exit if all handles are closed.
+            // For safety in this template, we'll force exit after a timeout.
+            setTimeout(() => {
+                log.info("Master force exiting after timeout.");
+                process.exit(0);
+            }, 10000).unref();
+        });
+    });
+}