@orkify/cli 1.0.0-beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +191 -0
- package/README.md +1701 -0
- package/bin/orkify +3 -0
- package/boot/systemd/orkify@.service +30 -0
- package/dist/agent-name.d.ts +4 -0
- package/dist/agent-name.js +42 -0
- package/dist/alerts/AlertEvaluator.d.ts +14 -0
- package/dist/alerts/AlertEvaluator.js +135 -0
- package/dist/cli/commands/autostart.d.ts +3 -0
- package/dist/cli/commands/autostart.js +11 -0
- package/dist/cli/commands/crash-test.d.ts +3 -0
- package/dist/cli/commands/crash-test.js +17 -0
- package/dist/cli/commands/daemon-reload.d.ts +3 -0
- package/dist/cli/commands/daemon-reload.js +72 -0
- package/dist/cli/commands/delete.d.ts +3 -0
- package/dist/cli/commands/delete.js +37 -0
- package/dist/cli/commands/deploy.d.ts +6 -0
- package/dist/cli/commands/deploy.js +266 -0
- package/dist/cli/commands/down.d.ts +3 -0
- package/dist/cli/commands/down.js +36 -0
- package/dist/cli/commands/flush.d.ts +3 -0
- package/dist/cli/commands/flush.js +28 -0
- package/dist/cli/commands/kill.d.ts +3 -0
- package/dist/cli/commands/kill.js +35 -0
- package/dist/cli/commands/list.d.ts +14 -0
- package/dist/cli/commands/list.js +361 -0
- package/dist/cli/commands/logs.d.ts +3 -0
- package/dist/cli/commands/logs.js +107 -0
- package/dist/cli/commands/mcp.d.ts +3 -0
- package/dist/cli/commands/mcp.js +151 -0
- package/dist/cli/commands/reload.d.ts +3 -0
- package/dist/cli/commands/reload.js +54 -0
- package/dist/cli/commands/restart.d.ts +3 -0
- package/dist/cli/commands/restart.js +43 -0
- package/dist/cli/commands/restore.d.ts +3 -0
- package/dist/cli/commands/restore.js +88 -0
- package/dist/cli/commands/run.d.ts +8 -0
- package/dist/cli/commands/run.js +212 -0
- package/dist/cli/commands/snap.d.ts +3 -0
- package/dist/cli/commands/snap.js +30 -0
- package/dist/cli/commands/up.d.ts +3 -0
- package/dist/cli/commands/up.js +125 -0
- package/dist/cli/crash-recovery.d.ts +2 -0
- package/dist/cli/crash-recovery.js +67 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +46 -0
- package/dist/cli/parse.d.ts +28 -0
- package/dist/cli/parse.js +97 -0
- package/dist/cluster/ClusterWrapper.d.ts +18 -0
- package/dist/cluster/ClusterWrapper.js +602 -0
- package/dist/config/ConfigStore.d.ts +11 -0
- package/dist/config/ConfigStore.js +21 -0
- package/dist/config/schema.d.ts +103 -0
- package/dist/config/schema.js +49 -0
- package/dist/constants.d.ts +83 -0
- package/dist/constants.js +289 -0
- package/dist/cron/CronScheduler.d.ts +25 -0
- package/dist/cron/CronScheduler.js +149 -0
- package/dist/daemon/GracefulManager.d.ts +8 -0
- package/dist/daemon/GracefulManager.js +29 -0
- package/dist/daemon/ManagedProcess.d.ts +71 -0
- package/dist/daemon/ManagedProcess.js +1020 -0
- package/dist/daemon/Orchestrator.d.ts +51 -0
- package/dist/daemon/Orchestrator.js +416 -0
- package/dist/daemon/RotatingWriter.d.ts +27 -0
- package/dist/daemon/RotatingWriter.js +264 -0
- package/dist/daemon/index.d.ts +2 -0
- package/dist/daemon/index.js +106 -0
- package/dist/daemon/startDaemon.d.ts +30 -0
- package/dist/daemon/startDaemon.js +693 -0
- package/dist/deploy/CommandPoller.d.ts +13 -0
- package/dist/deploy/CommandPoller.js +53 -0
- package/dist/deploy/DeployExecutor.d.ts +33 -0
- package/dist/deploy/DeployExecutor.js +340 -0
- package/dist/deploy/config.d.ts +20 -0
- package/dist/deploy/config.js +161 -0
- package/dist/deploy/env.d.ts +2 -0
- package/dist/deploy/env.js +17 -0
- package/dist/deploy/tarball.d.ts +32 -0
- package/dist/deploy/tarball.js +243 -0
- package/dist/detect/framework.d.ts +2 -0
- package/dist/detect/framework.js +24 -0
- package/dist/ipc/DaemonClient.d.ts +31 -0
- package/dist/ipc/DaemonClient.js +248 -0
- package/dist/ipc/DaemonServer.d.ts +28 -0
- package/dist/ipc/DaemonServer.js +166 -0
- package/dist/ipc/MultiUserClient.d.ts +27 -0
- package/dist/ipc/MultiUserClient.js +203 -0
- package/dist/ipc/protocol.d.ts +7 -0
- package/dist/ipc/protocol.js +53 -0
- package/dist/ipc/restoreDaemon.d.ts +8 -0
- package/dist/ipc/restoreDaemon.js +19 -0
- package/dist/machine-id.d.ts +11 -0
- package/dist/machine-id.js +51 -0
- package/dist/mcp/auth.d.ts +118 -0
- package/dist/mcp/auth.js +245 -0
- package/dist/mcp/http.d.ts +20 -0
- package/dist/mcp/http.js +229 -0
- package/dist/mcp/index.d.ts +3 -0
- package/dist/mcp/index.js +8 -0
- package/dist/mcp/server.d.ts +37 -0
- package/dist/mcp/server.js +413 -0
- package/dist/probe/compute-fingerprint.d.ts +27 -0
- package/dist/probe/compute-fingerprint.js +65 -0
- package/dist/probe/parse-frames.d.ts +21 -0
- package/dist/probe/parse-frames.js +57 -0
- package/dist/probe/resolve-sourcemaps.d.ts +25 -0
- package/dist/probe/resolve-sourcemaps.js +281 -0
- package/dist/state/StateStore.d.ts +11 -0
- package/dist/state/StateStore.js +78 -0
- package/dist/telemetry/TelemetryReporter.d.ts +49 -0
- package/dist/telemetry/TelemetryReporter.js +451 -0
- package/dist/types/index.d.ts +373 -0
- package/dist/types/index.js +2 -0
- package/package.json +148 -0
- package/packages/cache/README.md +114 -0
- package/packages/cache/dist/CacheClient.d.ts +26 -0
- package/packages/cache/dist/CacheClient.d.ts.map +1 -0
- package/packages/cache/dist/CacheClient.js +174 -0
- package/packages/cache/dist/CacheClient.js.map +1 -0
- package/packages/cache/dist/CacheFileStore.d.ts +45 -0
- package/packages/cache/dist/CacheFileStore.d.ts.map +1 -0
- package/packages/cache/dist/CacheFileStore.js +446 -0
- package/packages/cache/dist/CacheFileStore.js.map +1 -0
- package/packages/cache/dist/CachePersistence.d.ts +9 -0
- package/packages/cache/dist/CachePersistence.d.ts.map +1 -0
- package/packages/cache/dist/CachePersistence.js +67 -0
- package/packages/cache/dist/CachePersistence.js.map +1 -0
- package/packages/cache/dist/CachePrimary.d.ts +25 -0
- package/packages/cache/dist/CachePrimary.d.ts.map +1 -0
- package/packages/cache/dist/CachePrimary.js +155 -0
- package/packages/cache/dist/CachePrimary.js.map +1 -0
- package/packages/cache/dist/CacheStore.d.ts +50 -0
- package/packages/cache/dist/CacheStore.d.ts.map +1 -0
- package/packages/cache/dist/CacheStore.js +271 -0
- package/packages/cache/dist/CacheStore.js.map +1 -0
- package/packages/cache/dist/constants.d.ts +6 -0
- package/packages/cache/dist/constants.d.ts.map +1 -0
- package/packages/cache/dist/constants.js +9 -0
- package/packages/cache/dist/constants.js.map +1 -0
- package/packages/cache/dist/index.d.ts +16 -0
- package/packages/cache/dist/index.d.ts.map +1 -0
- package/packages/cache/dist/index.js +86 -0
- package/packages/cache/dist/index.js.map +1 -0
- package/packages/cache/dist/serialize.d.ts +9 -0
- package/packages/cache/dist/serialize.d.ts.map +1 -0
- package/packages/cache/dist/serialize.js +40 -0
- package/packages/cache/dist/serialize.js.map +1 -0
- package/packages/cache/dist/types.d.ts +123 -0
- package/packages/cache/dist/types.d.ts.map +1 -0
- package/packages/cache/dist/types.js +2 -0
- package/packages/cache/dist/types.js.map +1 -0
- package/packages/cache/package.json +27 -0
- package/packages/cache/src/CacheClient.ts +227 -0
- package/packages/cache/src/CacheFileStore.ts +528 -0
- package/packages/cache/src/CachePersistence.ts +89 -0
- package/packages/cache/src/CachePrimary.ts +172 -0
- package/packages/cache/src/CacheStore.ts +308 -0
- package/packages/cache/src/constants.ts +10 -0
- package/packages/cache/src/index.ts +100 -0
- package/packages/cache/src/serialize.ts +49 -0
- package/packages/cache/src/types.ts +156 -0
- package/packages/cache/tsconfig.json +18 -0
- package/packages/cache/tsconfig.tsbuildinfo +1 -0
- package/packages/next/README.md +166 -0
- package/packages/next/dist/error-capture.d.ts +34 -0
- package/packages/next/dist/error-capture.d.ts.map +1 -0
- package/packages/next/dist/error-capture.js +130 -0
- package/packages/next/dist/error-capture.js.map +1 -0
- package/packages/next/dist/error-handler.d.ts +10 -0
- package/packages/next/dist/error-handler.d.ts.map +1 -0
- package/packages/next/dist/error-handler.js +186 -0
- package/packages/next/dist/error-handler.js.map +1 -0
- package/packages/next/dist/isr-cache.d.ts +9 -0
- package/packages/next/dist/isr-cache.d.ts.map +1 -0
- package/packages/next/dist/isr-cache.js +86 -0
- package/packages/next/dist/isr-cache.js.map +1 -0
- package/packages/next/dist/stream.d.ts +5 -0
- package/packages/next/dist/stream.d.ts.map +1 -0
- package/packages/next/dist/stream.js +22 -0
- package/packages/next/dist/stream.js.map +1 -0
- package/packages/next/dist/types.d.ts +33 -0
- package/packages/next/dist/types.d.ts.map +1 -0
- package/packages/next/dist/types.js +6 -0
- package/packages/next/dist/types.js.map +1 -0
- package/packages/next/dist/use-cache.d.ts +4 -0
- package/packages/next/dist/use-cache.d.ts.map +1 -0
- package/packages/next/dist/use-cache.js +86 -0
- package/packages/next/dist/use-cache.js.map +1 -0
- package/packages/next/dist/utils.d.ts +32 -0
- package/packages/next/dist/utils.d.ts.map +1 -0
- package/packages/next/dist/utils.js +88 -0
- package/packages/next/dist/utils.js.map +1 -0
- package/packages/next/package.json +52 -0
- package/packages/next/src/error-capture.ts +177 -0
- package/packages/next/src/error-handler.ts +221 -0
- package/packages/next/src/isr-cache.ts +100 -0
- package/packages/next/src/stream.ts +23 -0
- package/packages/next/src/types.ts +33 -0
- package/packages/next/src/use-cache.ts +99 -0
- package/packages/next/src/utils.ts +102 -0
- package/packages/next/tsconfig.json +19 -0
- package/packages/next/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,1020 @@
|
|
|
1
|
+
import { watch } from 'chokidar';
|
|
2
|
+
import { fork } from 'node:child_process';
|
|
3
|
+
import { randomBytes } from 'node:crypto';
|
|
4
|
+
import { EventEmitter } from 'node:events';
|
|
5
|
+
import { existsSync, mkdirSync } from 'node:fs';
|
|
6
|
+
import { dirname, join } from 'node:path';
|
|
7
|
+
import { fileURLToPath } from 'node:url';
|
|
8
|
+
import pidusage from 'pidusage';
|
|
9
|
+
import { ExecMode, LAUNCH_TIMEOUT, LOGS_DIR, MEMORY_RESTART_COOLDOWN, METRICS_PROBE_IMPORT, ProcessStatus, } from '../constants.js';
|
|
10
|
+
import { RotatingWriter } from './RotatingWriter.js';
|
|
11
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
12
|
+
const __dirname = dirname(__filename);
|
|
13
|
+
export class ManagedProcess extends EventEmitter {
|
|
14
|
+
id;
|
|
15
|
+
config;
|
|
16
|
+
// For fork mode: single child process
|
|
17
|
+
forkProcess = null;
|
|
18
|
+
// For cluster mode: primary process that manages workers
|
|
19
|
+
clusterPrimary = null;
|
|
20
|
+
clusterWorkers = new Map();
|
|
21
|
+
slotRestarts = new Map();
|
|
22
|
+
slotCrashes = new Map();
|
|
23
|
+
launchTimers = new Map();
|
|
24
|
+
outWriter = null;
|
|
25
|
+
errWriter = null;
|
|
26
|
+
watcher = null;
|
|
27
|
+
isShuttingDown = false;
|
|
28
|
+
isReloading = false;
|
|
29
|
+
statsInterval = null;
|
|
30
|
+
forkRestarts = 0;
|
|
31
|
+
forkCrashes = 0;
|
|
32
|
+
forkCreatedAt = 0;
|
|
33
|
+
forkReady = false;
|
|
34
|
+
forkLaunchTimer = null;
|
|
35
|
+
detectedPort;
|
|
36
|
+
cronSecret;
|
|
37
|
+
primaryRestarts = 0;
|
|
38
|
+
lastMemoryRestart = 0;
|
|
39
|
+
workerMemoryCooldowns = new Map();
|
|
40
|
+
memoryRestartingWorkers = new Set();
|
|
41
|
+
// Ring buffer of recent stderr lines for crash diagnostics
|
|
42
|
+
recentStderr = [];
|
|
43
|
+
static STDERR_BUFFER_SIZE = 10;
|
|
44
|
+
forkStats = {
|
|
45
|
+
memory: 0,
|
|
46
|
+
cpu: 0,
|
|
47
|
+
heapUsed: 0,
|
|
48
|
+
heapTotal: 0,
|
|
49
|
+
external: 0,
|
|
50
|
+
arrayBuffers: 0,
|
|
51
|
+
eventLoopLag: 0,
|
|
52
|
+
eventLoopLagP95: 0,
|
|
53
|
+
activeHandles: 0,
|
|
54
|
+
};
|
|
55
|
+
constructor(id, config) {
|
|
56
|
+
super();
|
|
57
|
+
this.id = id;
|
|
58
|
+
this.config = config;
|
|
59
|
+
if (config.cron?.length) {
|
|
60
|
+
this.cronSecret = randomBytes(32).toString('hex');
|
|
61
|
+
}
|
|
62
|
+
this.setupLogStreams();
|
|
63
|
+
}
|
|
64
|
+
setupLogStreams() {
|
|
65
|
+
if (!existsSync(LOGS_DIR)) {
|
|
66
|
+
mkdirSync(LOGS_DIR, { recursive: true });
|
|
67
|
+
}
|
|
68
|
+
const outPath = join(LOGS_DIR, `${this.config.name}.stdout.log`);
|
|
69
|
+
const errPath = join(LOGS_DIR, `${this.config.name}.stderr.log`);
|
|
70
|
+
this.outWriter = new RotatingWriter(outPath, this.config.logMaxSize, this.config.logMaxFiles, this.config.logMaxAge);
|
|
71
|
+
this.errWriter = new RotatingWriter(errPath, this.config.logMaxSize, this.config.logMaxFiles, this.config.logMaxAge);
|
|
72
|
+
}
|
|
73
|
+
async start() {
|
|
74
|
+
if (this.isShuttingDown) {
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
if (this.config.execMode === ExecMode.CLUSTER) {
|
|
78
|
+
await this.startCluster();
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
await this.startFork();
|
|
82
|
+
}
|
|
83
|
+
if (this.config.watch) {
|
|
84
|
+
this.setupWatcher();
|
|
85
|
+
}
|
|
86
|
+
this.startStatsCollection();
|
|
87
|
+
}
|
|
88
|
+
async startFork() {
|
|
89
|
+
const env = {
|
|
90
|
+
...process.env,
|
|
91
|
+
...this.config.env,
|
|
92
|
+
ORKIFY_PROCESS_ID: String(this.id),
|
|
93
|
+
ORKIFY_WORKER_ID: '0',
|
|
94
|
+
ORKIFY_PROCESS_NAME: this.config.name,
|
|
95
|
+
ORKIFY_EXEC_MODE: 'fork',
|
|
96
|
+
};
|
|
97
|
+
if (this.config.healthCheck) {
|
|
98
|
+
env.ORKIFY_HEALTH_CHECK = this.config.healthCheck;
|
|
99
|
+
}
|
|
100
|
+
if (this.config.port !== undefined) {
|
|
101
|
+
env.ORKIFY_PORT = String(this.config.port);
|
|
102
|
+
}
|
|
103
|
+
if (this.cronSecret) {
|
|
104
|
+
env.ORKIFY_CRON_SECRET = this.cronSecret;
|
|
105
|
+
}
|
|
106
|
+
// Prepend --import for the metrics probe so it runs inside the child
|
|
107
|
+
const execArgv = [METRICS_PROBE_IMPORT, ...this.config.nodeArgs];
|
|
108
|
+
// windowsHide is supported by fork() but not in TypeScript types
|
|
109
|
+
// See: https://github.com/nodejs/node/issues/17370
|
|
110
|
+
this.forkProcess = fork(this.config.script, this.config.args, {
|
|
111
|
+
cwd: this.config.cwd,
|
|
112
|
+
env,
|
|
113
|
+
execArgv,
|
|
114
|
+
stdio: ['pipe', 'pipe', 'pipe', 'ipc'],
|
|
115
|
+
detached: false,
|
|
116
|
+
windowsHide: true, // Hide subprocess console window on Windows
|
|
117
|
+
});
|
|
118
|
+
this.forkCreatedAt = Date.now();
|
|
119
|
+
this.forkReady = false;
|
|
120
|
+
this.recentStderr = [];
|
|
121
|
+
this.setupForkHandlers(this.forkProcess);
|
|
122
|
+
this.startForkLaunchTimer();
|
|
123
|
+
}
|
|
124
|
+
startForkLaunchTimer() {
|
|
125
|
+
this.clearForkLaunchTimer();
|
|
126
|
+
this.forkLaunchTimer = setTimeout(() => {
|
|
127
|
+
this.forkLaunchTimer = null;
|
|
128
|
+
if (!this.forkReady && this.forkProcess && !this.isShuttingDown) {
|
|
129
|
+
console.error(`[ERROR] ${this.config.name}: process failed to start — not listening and no ready signal after ${LAUNCH_TIMEOUT / 1000}s.\n` +
|
|
130
|
+
` Common causes:\n` +
|
|
131
|
+
` - Application crashed or hung during startup\n` +
|
|
132
|
+
` - Missing process.send('ready') for apps that don't bind a port`);
|
|
133
|
+
this.emit('worker:error', {
|
|
134
|
+
workerId: 0,
|
|
135
|
+
error: new Error(`Fork process launch timeout after ${LAUNCH_TIMEOUT / 1000}s`),
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}, LAUNCH_TIMEOUT);
|
|
139
|
+
}
|
|
140
|
+
clearForkLaunchTimer() {
|
|
141
|
+
if (this.forkLaunchTimer) {
|
|
142
|
+
clearTimeout(this.forkLaunchTimer);
|
|
143
|
+
this.forkLaunchTimer = null;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
setupForkHandlers(child) {
|
|
147
|
+
child.stdout?.on('data', (data) => {
|
|
148
|
+
this.handleLog('out', 0, data);
|
|
149
|
+
});
|
|
150
|
+
child.stderr?.on('data', (data) => {
|
|
151
|
+
this.handleLog('err', 0, data);
|
|
152
|
+
});
|
|
153
|
+
child.on('message', (message) => {
|
|
154
|
+
// Handle metrics probe messages from the injected MetricsProbe preload
|
|
155
|
+
const msg = message;
|
|
156
|
+
if (msg?.__orkify && msg.type === 'metrics' && msg.data) {
|
|
157
|
+
const d = msg.data;
|
|
158
|
+
this.forkStats.heapUsed = d.heapUsed ?? 0;
|
|
159
|
+
this.forkStats.heapTotal = d.heapTotal ?? 0;
|
|
160
|
+
this.forkStats.external = d.external ?? 0;
|
|
161
|
+
this.forkStats.arrayBuffers = d.arrayBuffers ?? 0;
|
|
162
|
+
this.forkStats.eventLoopLag = d.eventLoopLag ?? 0;
|
|
163
|
+
this.forkStats.eventLoopLagP95 = d.eventLoopLagP95 ?? 0;
|
|
164
|
+
this.forkStats.activeHandles = d.activeHandles ?? 0;
|
|
165
|
+
if (d.cacheSize !== undefined) {
|
|
166
|
+
this.forkStats.cacheSize = d.cacheSize;
|
|
167
|
+
this.forkStats.cacheTotalBytes = d.cacheTotalBytes;
|
|
168
|
+
this.forkStats.cacheHits = d.cacheHits;
|
|
169
|
+
this.forkStats.cacheMisses = d.cacheMisses;
|
|
170
|
+
this.forkStats.cacheHitRate = d.cacheHitRate;
|
|
171
|
+
}
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
if (msg?.__orkify && msg.type === 'error' && msg.data) {
|
|
175
|
+
this.emit('worker:error:captured', {
|
|
176
|
+
workerId: 0,
|
|
177
|
+
error: msg.data,
|
|
178
|
+
});
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
// Port auto-detection from the metrics probe's net.Server.listen hook
|
|
182
|
+
if (msg?.__orkify && msg.type === 'listening' && msg.data) {
|
|
183
|
+
const port = msg.data.port;
|
|
184
|
+
if (port && !this.detectedPort) {
|
|
185
|
+
this.detectedPort = port;
|
|
186
|
+
}
|
|
187
|
+
if (!this.forkReady) {
|
|
188
|
+
this.forkReady = true;
|
|
189
|
+
this.clearForkLaunchTimer();
|
|
190
|
+
const effectivePort = this.config.port ?? this.detectedPort;
|
|
191
|
+
if (this.config.healthCheck && effectivePort) {
|
|
192
|
+
this.checkHealth(effectivePort, this.config.healthCheck)
|
|
193
|
+
.then(() => this.emit('worker:ready', 0))
|
|
194
|
+
.catch((err) => this.emit('worker:error', { workerId: 0, error: err }));
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
this.emit('worker:ready', 0);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
if (message === 'ready') {
|
|
203
|
+
this.forkReady = true;
|
|
204
|
+
this.clearForkLaunchTimer();
|
|
205
|
+
if (this.config.healthCheck && this.config.port) {
|
|
206
|
+
this.checkHealth(this.config.port, this.config.healthCheck)
|
|
207
|
+
.then(() => this.emit('worker:ready', 0))
|
|
208
|
+
.catch((err) => this.emit('worker:error', { workerId: 0, error: err }));
|
|
209
|
+
}
|
|
210
|
+
else {
|
|
211
|
+
this.emit('worker:ready', 0);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
else {
|
|
215
|
+
this.emit('message', { workerId: 0, message });
|
|
216
|
+
}
|
|
217
|
+
});
|
|
218
|
+
child.on('exit', (code, signal) => {
|
|
219
|
+
this.clearForkLaunchTimer();
|
|
220
|
+
this.emit('worker:exit', { workerId: 0, code, signal });
|
|
221
|
+
if (!this.isShuttingDown) {
|
|
222
|
+
// Clean exit (code 0, no signal) is not a crash — don't restart or count
|
|
223
|
+
if (code === 0 && !signal) {
|
|
224
|
+
this.forkProcess = null;
|
|
225
|
+
this.emit('process:finished', { code, signal });
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
const uptime = Date.now() - this.forkCreatedAt;
|
|
229
|
+
this.forkCrashes++;
|
|
230
|
+
if (this.forkRestarts < this.config.maxRestarts) {
|
|
231
|
+
if (uptime < this.config.minUptime) {
|
|
232
|
+
const stderrContext = this.recentStderr.length
|
|
233
|
+
? `\n Last stderr:\n ${this.recentStderr.join('\n ')}`
|
|
234
|
+
: '';
|
|
235
|
+
console.error(`[${this.config.name}] Process crashed after ${uptime}ms (exit code ${code ?? 'null'}, signal ${signal ?? 'none'})${stderrContext}`);
|
|
236
|
+
}
|
|
237
|
+
this.forkRestarts++;
|
|
238
|
+
// Exponential backoff: delay * 2^(restarts-1), capped at 15s
|
|
239
|
+
const backoffDelay = Math.min(this.config.restartDelay * Math.pow(2, this.forkRestarts - 1), 15000);
|
|
240
|
+
setTimeout(() => {
|
|
241
|
+
if (!this.isShuttingDown) {
|
|
242
|
+
this.startFork();
|
|
243
|
+
}
|
|
244
|
+
}, backoffDelay);
|
|
245
|
+
}
|
|
246
|
+
else {
|
|
247
|
+
const stderrContext = this.recentStderr.length
|
|
248
|
+
? `\n Last stderr:\n ${this.recentStderr.join('\n ')}`
|
|
249
|
+
: '';
|
|
250
|
+
console.error(`[${this.config.name}] Max restarts exceeded${stderrContext}`);
|
|
251
|
+
this.forkProcess = null;
|
|
252
|
+
this.emit('worker:maxRestarts', 0);
|
|
253
|
+
this.emit('process:finished', { code, signal });
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
else {
|
|
257
|
+
// Process exited during shutdown
|
|
258
|
+
this.forkProcess = null;
|
|
259
|
+
}
|
|
260
|
+
});
|
|
261
|
+
child.on('error', (err) => {
|
|
262
|
+
console.error(`[${this.config.name}] Process error:`, err.message);
|
|
263
|
+
this.emit('worker:error', { workerId: 0, error: err });
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
async startCluster() {
|
|
267
|
+
const clusterWrapperPath = join(__dirname, '..', 'cluster', 'ClusterWrapper.js');
|
|
268
|
+
const env = {
|
|
269
|
+
...process.env,
|
|
270
|
+
...this.config.env,
|
|
271
|
+
ORKIFY_SCRIPT: this.config.script,
|
|
272
|
+
ORKIFY_WORKERS: String(this.config.workerCount),
|
|
273
|
+
ORKIFY_PROCESS_NAME: this.config.name,
|
|
274
|
+
ORKIFY_PROCESS_ID: String(this.id),
|
|
275
|
+
ORKIFY_KILL_TIMEOUT: String(this.config.killTimeout),
|
|
276
|
+
ORKIFY_STICKY: String(this.config.sticky),
|
|
277
|
+
ORKIFY_RELOAD_RETRIES: String(this.config.reloadRetries ?? 3),
|
|
278
|
+
};
|
|
279
|
+
// Set sticky port for TCP-level session routing
|
|
280
|
+
if (this.config.sticky && this.config.port) {
|
|
281
|
+
env.ORKIFY_STICKY_PORT = String(this.config.port);
|
|
282
|
+
}
|
|
283
|
+
// Pass health check config to ClusterWrapper
|
|
284
|
+
if (this.config.healthCheck) {
|
|
285
|
+
env.ORKIFY_HEALTH_CHECK = this.config.healthCheck;
|
|
286
|
+
}
|
|
287
|
+
if (this.config.port !== undefined) {
|
|
288
|
+
env.ORKIFY_PORT = String(this.config.port);
|
|
289
|
+
}
|
|
290
|
+
if (this.cronSecret) {
|
|
291
|
+
env.ORKIFY_CRON_SECRET = this.cronSecret;
|
|
292
|
+
}
|
|
293
|
+
if (this.config.args.length > 0) {
|
|
294
|
+
env.ORKIFY_ARGS = JSON.stringify(this.config.args);
|
|
295
|
+
}
|
|
296
|
+
// Spawn the cluster wrapper as the primary
|
|
297
|
+
// windowsHide is supported by fork() but not in TypeScript types
|
|
298
|
+
// See: https://github.com/nodejs/node/issues/17370
|
|
299
|
+
this.clusterPrimary = fork(clusterWrapperPath, [], {
|
|
300
|
+
cwd: this.config.cwd,
|
|
301
|
+
env,
|
|
302
|
+
execArgv: this.config.nodeArgs,
|
|
303
|
+
stdio: ['pipe', 'pipe', 'pipe', 'ipc'],
|
|
304
|
+
detached: false,
|
|
305
|
+
windowsHide: true, // Hide subprocess console window on Windows
|
|
306
|
+
});
|
|
307
|
+
this.recentStderr = [];
|
|
308
|
+
this.setupClusterHandlers(this.clusterPrimary);
|
|
309
|
+
// Wait for primary to be ready
|
|
310
|
+
await this.waitForPrimaryReady();
|
|
311
|
+
}
|
|
312
|
+
setupClusterHandlers(primary) {
|
|
313
|
+
// With silent: true, per-worker output arrives via IPC (worker:output).
|
|
314
|
+
// Primary stdout still carries the ClusterWrapper's own log() lines — capture as primary (-1).
|
|
315
|
+
primary.stdout?.on('data', (data) => {
|
|
316
|
+
this.handleLog('out', -1, data);
|
|
317
|
+
});
|
|
318
|
+
primary.stderr?.on('data', (data) => {
|
|
319
|
+
this.handleLog('err', -1, data);
|
|
320
|
+
});
|
|
321
|
+
primary.on('message', (message) => {
|
|
322
|
+
const msg = message;
|
|
323
|
+
switch (msg.type) {
|
|
324
|
+
case 'primary:ready':
|
|
325
|
+
this.emit('primary:ready');
|
|
326
|
+
break;
|
|
327
|
+
case 'worker:ready': {
|
|
328
|
+
const readyWorkerId = msg.workerId;
|
|
329
|
+
this.clearLaunchTimer(readyWorkerId);
|
|
330
|
+
this.updateWorkerState(readyWorkerId, { ready: true, status: ProcessStatus.ONLINE });
|
|
331
|
+
this.emit('worker:ready', readyWorkerId);
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
case 'worker:listening': {
|
|
335
|
+
const listeningWorkerId = msg.workerId;
|
|
336
|
+
const addr = msg.address;
|
|
337
|
+
if (addr?.port && !this.detectedPort) {
|
|
338
|
+
this.detectedPort = addr.port;
|
|
339
|
+
}
|
|
340
|
+
this.clearLaunchTimer(listeningWorkerId);
|
|
341
|
+
this.updateWorkerState(listeningWorkerId, { ready: true, status: ProcessStatus.ONLINE });
|
|
342
|
+
this.emit('worker:ready', listeningWorkerId);
|
|
343
|
+
break;
|
|
344
|
+
}
|
|
345
|
+
case 'worker:online': {
|
|
346
|
+
const onlineWorkerId = msg.workerId;
|
|
347
|
+
const onlinePid = msg.pid;
|
|
348
|
+
this.updateWorkerState(onlineWorkerId, {
|
|
349
|
+
pid: onlinePid,
|
|
350
|
+
status: ProcessStatus.LAUNCHING,
|
|
351
|
+
ready: false,
|
|
352
|
+
createdAt: Date.now(),
|
|
353
|
+
});
|
|
354
|
+
this.startLaunchTimer(onlineWorkerId);
|
|
355
|
+
break;
|
|
356
|
+
}
|
|
357
|
+
case 'worker:exit': {
|
|
358
|
+
const exitedWorkerId = msg.workerId;
|
|
359
|
+
const exitedPid = msg.pid;
|
|
360
|
+
this.clearLaunchTimer(exitedWorkerId);
|
|
361
|
+
const existing = this.clusterWorkers.get(exitedWorkerId);
|
|
362
|
+
if (existing) {
|
|
363
|
+
const newRestarts = (existing.restarts ?? 0) + 1;
|
|
364
|
+
this.slotRestarts.set(exitedWorkerId, newRestarts);
|
|
365
|
+
// Only delete if PID matches — during reload, a new worker already holds this slot
|
|
366
|
+
if (!exitedPid || existing.pid === exitedPid) {
|
|
367
|
+
// Tracked worker exited — count as error if not a deliberate shutdown, reload, or memory restart
|
|
368
|
+
if (!this.isShuttingDown &&
|
|
369
|
+
!this.isReloading &&
|
|
370
|
+
!this.memoryRestartingWorkers.has(exitedWorkerId)) {
|
|
371
|
+
const newCrashes = (existing.crashes ?? 0) + 1;
|
|
372
|
+
this.slotCrashes.set(exitedWorkerId, newCrashes);
|
|
373
|
+
}
|
|
374
|
+
this.memoryRestartingWorkers.delete(exitedWorkerId);
|
|
375
|
+
this.clusterWorkers.delete(exitedWorkerId);
|
|
376
|
+
}
|
|
377
|
+
else {
|
|
378
|
+
// PID mismatch: new worker already holds slot — carry the counter forward
|
|
379
|
+
existing.restarts = newRestarts;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
this.emit('worker:exit', {
|
|
383
|
+
workerId: exitedWorkerId,
|
|
384
|
+
code: msg.code,
|
|
385
|
+
signal: msg.signal,
|
|
386
|
+
});
|
|
387
|
+
break;
|
|
388
|
+
}
|
|
389
|
+
case 'worker:metrics': {
|
|
390
|
+
const metricsWorkerId = msg.workerId;
|
|
391
|
+
const metricsData = msg.data;
|
|
392
|
+
const worker = this.clusterWorkers.get(metricsWorkerId);
|
|
393
|
+
if (worker && metricsData) {
|
|
394
|
+
worker.heapUsed = metricsData.heapUsed ?? 0;
|
|
395
|
+
worker.heapTotal = metricsData.heapTotal ?? 0;
|
|
396
|
+
worker.external = metricsData.external ?? 0;
|
|
397
|
+
worker.arrayBuffers = metricsData.arrayBuffers ?? 0;
|
|
398
|
+
worker.eventLoopLag = metricsData.eventLoopLag ?? 0;
|
|
399
|
+
worker.eventLoopLagP95 = metricsData.eventLoopLagP95 ?? 0;
|
|
400
|
+
worker.activeHandles = metricsData.activeHandles ?? 0;
|
|
401
|
+
if (metricsData.cacheSize !== undefined) {
|
|
402
|
+
worker.cacheSize = metricsData.cacheSize;
|
|
403
|
+
worker.cacheTotalBytes = metricsData.cacheTotalBytes;
|
|
404
|
+
worker.cacheHits = metricsData.cacheHits;
|
|
405
|
+
worker.cacheMisses = metricsData.cacheMisses;
|
|
406
|
+
worker.cacheHitRate = metricsData.cacheHitRate;
|
|
407
|
+
}
|
|
408
|
+
// Recover from launch timeout: if a worker is sending metrics,
|
|
409
|
+
// it's alive and its event loop is responsive. The 30s launch
|
|
410
|
+
// timeout already fired but the process didn't crash — it just
|
|
411
|
+
// took longer than expected to start (e.g. Next.js compilation).
|
|
412
|
+
if (worker.status === ProcessStatus.ERRORED) {
|
|
413
|
+
worker.status = ProcessStatus.ONLINE;
|
|
414
|
+
worker.ready = true;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
break;
|
|
418
|
+
}
|
|
419
|
+
case 'worker:output': {
|
|
420
|
+
const outputWorkerId = msg.workerId;
|
|
421
|
+
const outputStream = msg.stream;
|
|
422
|
+
const outputData = msg.data;
|
|
423
|
+
this.handleLog(outputStream, outputWorkerId, Buffer.from(outputData));
|
|
424
|
+
break;
|
|
425
|
+
}
|
|
426
|
+
case 'worker:error:captured': {
|
|
427
|
+
this.emit('worker:error:captured', {
|
|
428
|
+
workerId: msg.workerId,
|
|
429
|
+
error: msg.data,
|
|
430
|
+
});
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
case 'reload:complete': {
|
|
434
|
+
// Update stale flags from per-slot results
|
|
435
|
+
const results = (msg.results ?? []);
|
|
436
|
+
for (const result of results) {
|
|
437
|
+
if (result.status === 'stale') {
|
|
438
|
+
const worker = this.clusterWorkers.get(result.slotId);
|
|
439
|
+
if (worker) {
|
|
440
|
+
worker.stale = true;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
// Clear stale flags if all slots succeeded
|
|
445
|
+
if (results.length > 0 && results.every((r) => r.status === 'success')) {
|
|
446
|
+
for (const worker of this.clusterWorkers.values()) {
|
|
447
|
+
worker.stale = false;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
this.emit('reload:complete', { results });
|
|
451
|
+
break;
|
|
452
|
+
}
|
|
453
|
+
case 'restart-worker-failed': {
|
|
454
|
+
this.memoryRestartingWorkers.delete(msg.workerId);
|
|
455
|
+
break;
|
|
456
|
+
}
|
|
457
|
+
default:
|
|
458
|
+
this.emit('message', { workerId: -1, message: msg });
|
|
459
|
+
}
|
|
460
|
+
});
|
|
461
|
+
primary.on('exit', (code, signal) => {
|
|
462
|
+
this.clusterWorkers.clear();
|
|
463
|
+
this.emit('primary:exit', { code, signal });
|
|
464
|
+
if (!this.isShuttingDown) {
|
|
465
|
+
this.primaryRestarts++;
|
|
466
|
+
if (this.primaryRestarts <= this.config.maxRestarts) {
|
|
467
|
+
const stderrContext = this.recentStderr.length
|
|
468
|
+
? `\n Last stderr:\n ${this.recentStderr.join('\n ')}`
|
|
469
|
+
: '';
|
|
470
|
+
console.error(`[${this.config.name}] Cluster primary exited unexpectedly (exit code ${code ?? 'null'}, signal ${signal ?? 'none'}), restarting... (${this.primaryRestarts}/${this.config.maxRestarts})${stderrContext}`);
|
|
471
|
+
const backoffDelay = Math.min(this.config.restartDelay * Math.pow(2, this.primaryRestarts - 1), 15000);
|
|
472
|
+
setTimeout(() => {
|
|
473
|
+
if (!this.isShuttingDown) {
|
|
474
|
+
this.startCluster();
|
|
475
|
+
}
|
|
476
|
+
}, backoffDelay);
|
|
477
|
+
}
|
|
478
|
+
else {
|
|
479
|
+
const stderrContext = this.recentStderr.length
|
|
480
|
+
? `\n Last stderr:\n ${this.recentStderr.join('\n ')}`
|
|
481
|
+
: '';
|
|
482
|
+
console.error(`[${this.config.name}] Cluster primary max restarts exceeded${stderrContext}`);
|
|
483
|
+
this.clusterPrimary = null;
|
|
484
|
+
this.emit('worker:maxRestarts', -1);
|
|
485
|
+
this.emit('process:finished', { code, signal });
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
});
|
|
489
|
+
primary.on('error', (err) => {
|
|
490
|
+
console.error(`[${this.config.name}] Cluster primary error:`, err.message);
|
|
491
|
+
});
|
|
492
|
+
}
|
|
493
|
+
waitForPrimaryReady() {
|
|
494
|
+
return new Promise((resolve, reject) => {
|
|
495
|
+
const timeout = setTimeout(() => {
|
|
496
|
+
this.off('primary:ready', onReady);
|
|
497
|
+
reject(new Error(`Cluster primary failed to start within 10s`));
|
|
498
|
+
}, 10000);
|
|
499
|
+
const onReady = () => {
|
|
500
|
+
clearTimeout(timeout);
|
|
501
|
+
this.off('primary:ready', onReady);
|
|
502
|
+
resolve();
|
|
503
|
+
};
|
|
504
|
+
this.on('primary:ready', onReady);
|
|
505
|
+
});
|
|
506
|
+
}
|
|
507
|
+
updateWorkerState(workerId, updates) {
|
|
508
|
+
let state = this.clusterWorkers.get(workerId);
|
|
509
|
+
if (!state) {
|
|
510
|
+
state = {
|
|
511
|
+
id: workerId,
|
|
512
|
+
pid: 0,
|
|
513
|
+
status: ProcessStatus.LAUNCHING,
|
|
514
|
+
ready: false,
|
|
515
|
+
stale: false,
|
|
516
|
+
restarts: this.slotRestarts.get(workerId) ?? 0,
|
|
517
|
+
crashes: this.slotCrashes.get(workerId) ?? 0,
|
|
518
|
+
createdAt: Date.now(),
|
|
519
|
+
memory: 0,
|
|
520
|
+
cpu: 0,
|
|
521
|
+
heapUsed: 0,
|
|
522
|
+
heapTotal: 0,
|
|
523
|
+
external: 0,
|
|
524
|
+
arrayBuffers: 0,
|
|
525
|
+
eventLoopLag: 0,
|
|
526
|
+
eventLoopLagP95: 0,
|
|
527
|
+
activeHandles: 0,
|
|
528
|
+
};
|
|
529
|
+
this.clusterWorkers.set(workerId, state);
|
|
530
|
+
}
|
|
531
|
+
Object.assign(state, updates);
|
|
532
|
+
}
|
|
533
|
+
startLaunchTimer(workerId) {
|
|
534
|
+
this.clearLaunchTimer(workerId);
|
|
535
|
+
const timer = setTimeout(() => {
|
|
536
|
+
this.launchTimers.delete(workerId);
|
|
537
|
+
const worker = this.clusterWorkers.get(workerId);
|
|
538
|
+
if (worker && !worker.ready) {
|
|
539
|
+
worker.status = ProcessStatus.ERRORED;
|
|
540
|
+
console.error(`[ERROR] ${this.config.name}: worker ${workerId} failed to start — not listening and no ready signal after ${LAUNCH_TIMEOUT / 1000}s.\n` +
|
|
541
|
+
` Common causes:\n` +
|
|
542
|
+
` - Application crashed or hung during startup\n` +
|
|
543
|
+
` - Running a dev server in cluster mode (e.g., Next.js dev with -w 0)\n` +
|
|
544
|
+
` - Missing process.send('ready') for apps that don't bind a port`);
|
|
545
|
+
this.emit('worker:error', {
|
|
546
|
+
workerId,
|
|
547
|
+
error: new Error(`Worker ${workerId} launch timeout after ${LAUNCH_TIMEOUT / 1000}s`),
|
|
548
|
+
});
|
|
549
|
+
}
|
|
550
|
+
}, LAUNCH_TIMEOUT);
|
|
551
|
+
this.launchTimers.set(workerId, timer);
|
|
552
|
+
}
|
|
553
|
+
clearLaunchTimer(workerId) {
|
|
554
|
+
const timer = this.launchTimers.get(workerId);
|
|
555
|
+
if (timer) {
|
|
556
|
+
clearTimeout(timer);
|
|
557
|
+
this.launchTimers.delete(workerId);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
clearAllLaunchTimers() {
|
|
561
|
+
for (const timer of this.launchTimers.values()) {
|
|
562
|
+
clearTimeout(timer);
|
|
563
|
+
}
|
|
564
|
+
this.launchTimers.clear();
|
|
565
|
+
}
|
|
566
|
+
async checkHealth(port, path) {
|
|
567
|
+
const url = `http://localhost:${port}${path}`;
|
|
568
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
569
|
+
try {
|
|
570
|
+
const resp = await fetch(url, { signal: AbortSignal.timeout(5000) });
|
|
571
|
+
if (resp.status >= 200 && resp.status < 300)
|
|
572
|
+
return;
|
|
573
|
+
}
|
|
574
|
+
catch {
|
|
575
|
+
// Retry — app may still be booting
|
|
576
|
+
}
|
|
577
|
+
if (attempt < 2)
|
|
578
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
579
|
+
}
|
|
580
|
+
throw new Error(`Health check failed: ${url}`);
|
|
581
|
+
}
|
|
582
|
+
handleLog(type, workerId, data) {
|
|
583
|
+
const line = data.toString();
|
|
584
|
+
const timestamp = new Date().toISOString();
|
|
585
|
+
const workerLabel = workerId === -1 ? 'primary' : workerId;
|
|
586
|
+
const logLine = `[${timestamp}] [${this.config.name}:${workerLabel}] ${line}`;
|
|
587
|
+
if (type === 'out') {
|
|
588
|
+
this.outWriter?.write(logLine);
|
|
589
|
+
}
|
|
590
|
+
else {
|
|
591
|
+
this.errWriter?.write(logLine);
|
|
592
|
+
// Buffer recent stderr for crash diagnostics
|
|
593
|
+
const trimmed = line.trimEnd();
|
|
594
|
+
if (trimmed) {
|
|
595
|
+
this.recentStderr.push(trimmed);
|
|
596
|
+
if (this.recentStderr.length > ManagedProcess.STDERR_BUFFER_SIZE) {
|
|
597
|
+
this.recentStderr.shift();
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
this.emit('log', { type, workerId, data: line });
|
|
602
|
+
}
|
|
603
|
+
setupWatcher() {
|
|
604
|
+
const paths = this.config.watchPaths || [this.config.cwd];
|
|
605
|
+
this.watcher = watch(paths, {
|
|
606
|
+
ignored: /(^|[/\\])\.|node_modules/,
|
|
607
|
+
persistent: true,
|
|
608
|
+
ignoreInitial: true,
|
|
609
|
+
});
|
|
610
|
+
let reloadTimeout = null;
|
|
611
|
+
this.watcher.on('change', (path) => {
|
|
612
|
+
this.emit('watch:change', path);
|
|
613
|
+
if (reloadTimeout) {
|
|
614
|
+
clearTimeout(reloadTimeout);
|
|
615
|
+
}
|
|
616
|
+
reloadTimeout = setTimeout(() => {
|
|
617
|
+
this.emit('watch:reload');
|
|
618
|
+
}, 300);
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
startStatsCollection() {
|
|
622
|
+
this.statsInterval = setInterval(async () => {
|
|
623
|
+
await this.collectStats();
|
|
624
|
+
}, 1000);
|
|
625
|
+
}
|
|
626
|
+
async collectStats() {
|
|
627
|
+
if (this.config.execMode === ExecMode.FORK && this.forkProcess?.pid) {
|
|
628
|
+
try {
|
|
629
|
+
const stats = await pidusage(this.forkProcess.pid);
|
|
630
|
+
// Store stats for getInfo() — preserve probe metrics
|
|
631
|
+
this.forkStats.memory = stats.memory;
|
|
632
|
+
this.forkStats.cpu = stats.cpu;
|
|
633
|
+
}
|
|
634
|
+
catch {
|
|
635
|
+
// Process might have exited
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
else if (this.config.execMode === ExecMode.CLUSTER) {
|
|
639
|
+
// Collect stats for each worker
|
|
640
|
+
for (const [_workerId, state] of this.clusterWorkers) {
|
|
641
|
+
if (state.pid) {
|
|
642
|
+
try {
|
|
643
|
+
const stats = await pidusage(state.pid);
|
|
644
|
+
state.memory = stats.memory;
|
|
645
|
+
state.cpu = stats.cpu;
|
|
646
|
+
}
|
|
647
|
+
catch {
|
|
648
|
+
// Worker might have exited
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
this.checkMemoryThreshold();
|
|
654
|
+
}
|
|
655
|
+
checkMemoryThreshold() {
|
|
656
|
+
const limit = this.config.restartOnMemory;
|
|
657
|
+
if (!limit)
|
|
658
|
+
return;
|
|
659
|
+
if (this.isShuttingDown)
|
|
660
|
+
return;
|
|
661
|
+
if (this.config.execMode === ExecMode.FORK) {
|
|
662
|
+
// Cooldown: skip checks for 30s after a memory-triggered restart
|
|
663
|
+
if (Date.now() - this.lastMemoryRestart < MEMORY_RESTART_COOLDOWN)
|
|
664
|
+
return;
|
|
665
|
+
if (this.forkStats.memory > limit) {
|
|
666
|
+
console.log(`[${this.config.name}] RSS ${formatBytes(this.forkStats.memory)} exceeds ${formatBytes(limit)}, restarting`);
|
|
667
|
+
this.lastMemoryRestart = Date.now();
|
|
668
|
+
this.forkRestarts++;
|
|
669
|
+
this.emit('worker:memoryRestart', {
|
|
670
|
+
workerId: 0,
|
|
671
|
+
memory: this.forkStats.memory,
|
|
672
|
+
limit,
|
|
673
|
+
});
|
|
674
|
+
this.memoryRestartFork().catch((err) => {
|
|
675
|
+
console.error(`[${this.config.name}] Memory-triggered restart failed:`, err.message);
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
else {
|
|
680
|
+
// Cluster: check each worker individually
|
|
681
|
+
for (const [workerId, state] of this.clusterWorkers) {
|
|
682
|
+
const lastRestart = this.workerMemoryCooldowns.get(workerId) ?? 0;
|
|
683
|
+
if (Date.now() - lastRestart < MEMORY_RESTART_COOLDOWN)
|
|
684
|
+
continue;
|
|
685
|
+
if (state.memory > limit) {
|
|
686
|
+
console.log(`[${this.config.name}] Worker ${workerId} RSS ${formatBytes(state.memory)} exceeds ${formatBytes(limit)}, restarting worker`);
|
|
687
|
+
this.workerMemoryCooldowns.set(workerId, Date.now());
|
|
688
|
+
this.emit('worker:memoryRestart', {
|
|
689
|
+
workerId,
|
|
690
|
+
memory: state.memory,
|
|
691
|
+
limit,
|
|
692
|
+
});
|
|
693
|
+
this.memoryRestartWorker(workerId);
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
async memoryRestartFork() {
|
|
699
|
+
await this.stop();
|
|
700
|
+
this.isShuttingDown = false;
|
|
701
|
+
this.forkReady = false;
|
|
702
|
+
this.setupLogStreams();
|
|
703
|
+
await this.start();
|
|
704
|
+
}
|
|
705
|
+
memoryRestartWorker(workerId) {
|
|
706
|
+
if (this.isReloading || !this.clusterPrimary?.connected)
|
|
707
|
+
return;
|
|
708
|
+
this.memoryRestartingWorkers.add(workerId);
|
|
709
|
+
this.clusterPrimary.send({ type: 'restart-worker', workerId });
|
|
710
|
+
}
|
|
711
|
+
async stop(opts) {
|
|
712
|
+
this.isShuttingDown = true;
|
|
713
|
+
this.clearForkLaunchTimer();
|
|
714
|
+
this.clearAllLaunchTimers();
|
|
715
|
+
if (this.statsInterval) {
|
|
716
|
+
clearInterval(this.statsInterval);
|
|
717
|
+
this.statsInterval = null;
|
|
718
|
+
}
|
|
719
|
+
if (this.watcher) {
|
|
720
|
+
await this.watcher.close();
|
|
721
|
+
this.watcher = null;
|
|
722
|
+
}
|
|
723
|
+
if (this.config.execMode === ExecMode.FORK) {
|
|
724
|
+
await this.stopFork(opts);
|
|
725
|
+
}
|
|
726
|
+
else {
|
|
727
|
+
await this.stopCluster(opts);
|
|
728
|
+
}
|
|
729
|
+
this.outWriter?.end();
|
|
730
|
+
this.errWriter?.end();
|
|
731
|
+
}
|
|
732
|
+
async flushLogs() {
|
|
733
|
+
await Promise.all([this.outWriter?.flush(), this.errWriter?.flush()]);
|
|
734
|
+
}
|
|
735
|
+
/**
|
|
736
|
+
* Immediately SIGKILL all child processes without waiting for graceful shutdown.
|
|
737
|
+
*/
|
|
738
|
+
forceKill() {
|
|
739
|
+
this.isShuttingDown = true;
|
|
740
|
+
this.clearForkLaunchTimer();
|
|
741
|
+
this.clearAllLaunchTimers();
|
|
742
|
+
if (this.statsInterval) {
|
|
743
|
+
clearInterval(this.statsInterval);
|
|
744
|
+
this.statsInterval = null;
|
|
745
|
+
}
|
|
746
|
+
if (this.config.execMode === ExecMode.FORK) {
|
|
747
|
+
if (this.forkProcess) {
|
|
748
|
+
this.forkProcess.kill('SIGKILL');
|
|
749
|
+
this.forkProcess = null;
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
else {
|
|
753
|
+
if (this.clusterPrimary) {
|
|
754
|
+
this.clusterPrimary.kill('SIGKILL');
|
|
755
|
+
this.clusterPrimary = null;
|
|
756
|
+
this.clusterWorkers.clear();
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
this.outWriter?.end();
|
|
760
|
+
this.errWriter?.end();
|
|
761
|
+
}
|
|
762
|
+
async stopFork(opts) {
|
|
763
|
+
const child = this.forkProcess;
|
|
764
|
+
if (!child)
|
|
765
|
+
return;
|
|
766
|
+
return new Promise((resolve) => {
|
|
767
|
+
const timeout = setTimeout(() => {
|
|
768
|
+
child.kill('SIGKILL');
|
|
769
|
+
resolve();
|
|
770
|
+
}, this.config.killTimeout);
|
|
771
|
+
child.once('exit', () => {
|
|
772
|
+
clearTimeout(timeout);
|
|
773
|
+
this.forkProcess = null;
|
|
774
|
+
resolve();
|
|
775
|
+
});
|
|
776
|
+
if (opts?.persistCache && child.connected) {
|
|
777
|
+
const flushTimer = setTimeout(() => child.kill('SIGTERM'), 2000);
|
|
778
|
+
child.once('message', (msg) => {
|
|
779
|
+
const m = msg;
|
|
780
|
+
if (m?.__orkify && m.type === 'cache:flushed') {
|
|
781
|
+
clearTimeout(flushTimer);
|
|
782
|
+
}
|
|
783
|
+
});
|
|
784
|
+
try {
|
|
785
|
+
child.send({ __orkify: true, type: 'cache:flush' });
|
|
786
|
+
}
|
|
787
|
+
catch {
|
|
788
|
+
clearTimeout(flushTimer);
|
|
789
|
+
child.kill('SIGTERM');
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
else {
|
|
793
|
+
child.kill('SIGTERM');
|
|
794
|
+
}
|
|
795
|
+
});
|
|
796
|
+
}
|
|
797
|
+
async stopCluster(opts) {
|
|
798
|
+
const primary = this.clusterPrimary;
|
|
799
|
+
if (!primary)
|
|
800
|
+
return;
|
|
801
|
+
return new Promise((resolve) => {
|
|
802
|
+
const timeout = setTimeout(() => {
|
|
803
|
+
primary.kill('SIGKILL');
|
|
804
|
+
resolve();
|
|
805
|
+
}, this.config.killTimeout + 5000); // Extra time for workers
|
|
806
|
+
primary.once('exit', () => {
|
|
807
|
+
clearTimeout(timeout);
|
|
808
|
+
this.clusterPrimary = null;
|
|
809
|
+
this.clusterWorkers.clear();
|
|
810
|
+
resolve();
|
|
811
|
+
});
|
|
812
|
+
// Send shutdown command to cluster primary
|
|
813
|
+
if (primary.connected) {
|
|
814
|
+
primary.send({ type: 'shutdown', persistCache: opts?.persistCache });
|
|
815
|
+
}
|
|
816
|
+
else {
|
|
817
|
+
primary.kill('SIGTERM');
|
|
818
|
+
}
|
|
819
|
+
});
|
|
820
|
+
}
|
|
821
|
+
async restart() {
|
|
822
|
+
await this.stop();
|
|
823
|
+
this.isShuttingDown = false;
|
|
824
|
+
this.forkRestarts = 0;
|
|
825
|
+
this.forkCrashes = 0;
|
|
826
|
+
this.forkReady = false;
|
|
827
|
+
this.primaryRestarts = 0;
|
|
828
|
+
this.lastMemoryRestart = 0;
|
|
829
|
+
this.workerMemoryCooldowns.clear();
|
|
830
|
+
this.memoryRestartingWorkers.clear();
|
|
831
|
+
this.clusterWorkers.clear();
|
|
832
|
+
this.slotRestarts.clear();
|
|
833
|
+
this.slotCrashes.clear();
|
|
834
|
+
// Re-create log writers since stop() closed them
|
|
835
|
+
this.setupLogStreams();
|
|
836
|
+
await this.start();
|
|
837
|
+
}
|
|
838
|
+
async reload() {
|
|
839
|
+
if (this.config.execMode === ExecMode.FORK) {
|
|
840
|
+
// For fork mode, just restart
|
|
841
|
+
await this.restart();
|
|
842
|
+
return;
|
|
843
|
+
}
|
|
844
|
+
// For cluster mode, send reload command to primary
|
|
845
|
+
const primary = this.clusterPrimary;
|
|
846
|
+
if (!primary?.connected) {
|
|
847
|
+
throw new Error('Cluster primary not connected');
|
|
848
|
+
}
|
|
849
|
+
this.isReloading = true;
|
|
850
|
+
// Compute a timeout that accommodates the worst-case reload duration:
|
|
851
|
+
// - Success path: each worker waits up to LAUNCH_TIMEOUT to become ready + KILL_TIMEOUT to stop old
|
|
852
|
+
// - Failure path: one slot retries (reloadRetries+1) × LAUNCH_TIMEOUT before aborting
|
|
853
|
+
const retries = this.config.reloadRetries ?? 3;
|
|
854
|
+
const perSlotSuccess = LAUNCH_TIMEOUT + this.config.killTimeout;
|
|
855
|
+
const failurePath = (retries + 1) * LAUNCH_TIMEOUT + this.config.killTimeout;
|
|
856
|
+
const reloadTimeout = Math.max(this.config.workerCount * perSlotSuccess, failurePath) + 5000;
|
|
857
|
+
return new Promise((resolve, reject) => {
|
|
858
|
+
const timeout = setTimeout(() => {
|
|
859
|
+
this.isReloading = false;
|
|
860
|
+
reject(new Error('Reload timeout'));
|
|
861
|
+
}, reloadTimeout);
|
|
862
|
+
const onComplete = () => {
|
|
863
|
+
clearTimeout(timeout);
|
|
864
|
+
this.off('reload:complete', onComplete);
|
|
865
|
+
this.isReloading = false;
|
|
866
|
+
resolve();
|
|
867
|
+
};
|
|
868
|
+
this.on('reload:complete', onComplete);
|
|
869
|
+
primary.send({ type: 'reload' });
|
|
870
|
+
});
|
|
871
|
+
}
|
|
872
|
+
getInfo() {
|
|
873
|
+
const workers = [];
|
|
874
|
+
if (this.config.execMode === ExecMode.FORK) {
|
|
875
|
+
const stats = this.forkStats;
|
|
876
|
+
if (this.forkProcess) {
|
|
877
|
+
workers.push({
|
|
878
|
+
id: 0,
|
|
879
|
+
pid: this.forkProcess.pid || 0,
|
|
880
|
+
status: this.isShuttingDown ? ProcessStatus.STOPPING : ProcessStatus.ONLINE,
|
|
881
|
+
restarts: this.forkRestarts,
|
|
882
|
+
crashes: this.forkCrashes,
|
|
883
|
+
uptime: Date.now() - this.forkCreatedAt,
|
|
884
|
+
memory: stats.memory,
|
|
885
|
+
cpu: stats.cpu,
|
|
886
|
+
createdAt: this.forkCreatedAt,
|
|
887
|
+
heapUsed: stats.heapUsed,
|
|
888
|
+
heapTotal: stats.heapTotal,
|
|
889
|
+
external: stats.external,
|
|
890
|
+
arrayBuffers: stats.arrayBuffers,
|
|
891
|
+
eventLoopLag: stats.eventLoopLag,
|
|
892
|
+
eventLoopLagP95: stats.eventLoopLagP95,
|
|
893
|
+
activeHandles: stats.activeHandles,
|
|
894
|
+
...(stats.cacheSize !== undefined && {
|
|
895
|
+
cacheSize: stats.cacheSize,
|
|
896
|
+
cacheTotalBytes: stats.cacheTotalBytes,
|
|
897
|
+
cacheHits: stats.cacheHits,
|
|
898
|
+
cacheMisses: stats.cacheMisses,
|
|
899
|
+
cacheHitRate: stats.cacheHitRate,
|
|
900
|
+
}),
|
|
901
|
+
});
|
|
902
|
+
}
|
|
903
|
+
else {
|
|
904
|
+
// Process has stopped - still show worker entry for restart count
|
|
905
|
+
workers.push({
|
|
906
|
+
id: 0,
|
|
907
|
+
pid: 0,
|
|
908
|
+
status: ProcessStatus.STOPPED,
|
|
909
|
+
restarts: this.forkRestarts,
|
|
910
|
+
crashes: this.forkCrashes,
|
|
911
|
+
uptime: 0,
|
|
912
|
+
memory: 0,
|
|
913
|
+
cpu: 0,
|
|
914
|
+
createdAt: this.forkCreatedAt,
|
|
915
|
+
});
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
else {
|
|
919
|
+
for (const state of this.clusterWorkers.values()) {
|
|
920
|
+
workers.push({
|
|
921
|
+
id: state.id,
|
|
922
|
+
pid: state.pid,
|
|
923
|
+
status: state.status,
|
|
924
|
+
restarts: state.restarts,
|
|
925
|
+
crashes: state.crashes,
|
|
926
|
+
uptime: Date.now() - state.createdAt,
|
|
927
|
+
memory: state.memory,
|
|
928
|
+
cpu: state.cpu,
|
|
929
|
+
createdAt: state.createdAt,
|
|
930
|
+
stale: state.stale || undefined,
|
|
931
|
+
heapUsed: state.heapUsed,
|
|
932
|
+
heapTotal: state.heapTotal,
|
|
933
|
+
external: state.external,
|
|
934
|
+
arrayBuffers: state.arrayBuffers,
|
|
935
|
+
eventLoopLag: state.eventLoopLag,
|
|
936
|
+
eventLoopLagP95: state.eventLoopLagP95,
|
|
937
|
+
activeHandles: state.activeHandles,
|
|
938
|
+
...(state.cacheSize !== undefined && {
|
|
939
|
+
cacheSize: state.cacheSize,
|
|
940
|
+
cacheTotalBytes: state.cacheTotalBytes,
|
|
941
|
+
cacheHits: state.cacheHits,
|
|
942
|
+
cacheMisses: state.cacheMisses,
|
|
943
|
+
cacheHitRate: state.cacheHitRate,
|
|
944
|
+
}),
|
|
945
|
+
});
|
|
946
|
+
}
|
|
947
|
+
workers.sort((a, b) => a.id - b.id);
|
|
948
|
+
}
|
|
949
|
+
return {
|
|
950
|
+
id: this.id,
|
|
951
|
+
name: this.config.name,
|
|
952
|
+
script: this.config.script,
|
|
953
|
+
cwd: this.config.cwd,
|
|
954
|
+
execMode: this.config.execMode,
|
|
955
|
+
workerCount: this.config.workerCount,
|
|
956
|
+
status: this.getStatus(),
|
|
957
|
+
workers,
|
|
958
|
+
pid: this.forkProcess?.pid || this.clusterPrimary?.pid,
|
|
959
|
+
createdAt: workers[0]?.createdAt || Date.now(),
|
|
960
|
+
watch: this.config.watch,
|
|
961
|
+
sticky: this.config.sticky,
|
|
962
|
+
port: this.config.port ?? this.detectedPort,
|
|
963
|
+
};
|
|
964
|
+
}
|
|
965
|
+
getStatus() {
|
|
966
|
+
if (this.config.execMode === ExecMode.FORK) {
|
|
967
|
+
// Check if process exists first - if null, it's stopped regardless of flags
|
|
968
|
+
if (!this.forkProcess) {
|
|
969
|
+
return ProcessStatus.STOPPED;
|
|
970
|
+
}
|
|
971
|
+
if (this.isShuttingDown) {
|
|
972
|
+
return ProcessStatus.STOPPING;
|
|
973
|
+
}
|
|
974
|
+
return ProcessStatus.ONLINE;
|
|
975
|
+
}
|
|
976
|
+
// Cluster mode - check if primary exists first
|
|
977
|
+
if (!this.clusterPrimary) {
|
|
978
|
+
return ProcessStatus.STOPPED;
|
|
979
|
+
}
|
|
980
|
+
if (this.isShuttingDown) {
|
|
981
|
+
return ProcessStatus.STOPPING;
|
|
982
|
+
}
|
|
983
|
+
if (this.clusterWorkers.size === 0) {
|
|
984
|
+
return ProcessStatus.LAUNCHING;
|
|
985
|
+
}
|
|
986
|
+
const statuses = Array.from(this.clusterWorkers.values()).map((w) => w.status);
|
|
987
|
+
if (statuses.every((s) => s === ProcessStatus.ONLINE)) {
|
|
988
|
+
return ProcessStatus.ONLINE;
|
|
989
|
+
}
|
|
990
|
+
if (statuses.some((s) => s === ProcessStatus.ERRORED)) {
|
|
991
|
+
return ProcessStatus.ERRORED;
|
|
992
|
+
}
|
|
993
|
+
return ProcessStatus.LAUNCHING;
|
|
994
|
+
}
|
|
995
|
+
getWorkerCount() {
|
|
996
|
+
if (this.config.execMode === ExecMode.FORK) {
|
|
997
|
+
return this.forkProcess ? 1 : 0;
|
|
998
|
+
}
|
|
999
|
+
return this.clusterWorkers.size;
|
|
1000
|
+
}
|
|
1001
|
+
isRunning() {
|
|
1002
|
+
if (this.config.execMode === ExecMode.FORK) {
|
|
1003
|
+
return !!this.forkProcess && !this.isShuttingDown;
|
|
1004
|
+
}
|
|
1005
|
+
return !!this.clusterPrimary && !this.isShuttingDown;
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
function formatBytes(bytes) {
|
|
1009
|
+
if (bytes >= 1024 * 1024 * 1024) {
|
|
1010
|
+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
|
1011
|
+
}
|
|
1012
|
+
if (bytes >= 1024 * 1024) {
|
|
1013
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
1014
|
+
}
|
|
1015
|
+
if (bytes >= 1024) {
|
|
1016
|
+
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
1017
|
+
}
|
|
1018
|
+
return `${bytes} B`;
|
|
1019
|
+
}
|
|
1020
|
+
//# sourceMappingURL=ManagedProcess.js.map
|