@yemi33/minions 0.1.2092 → 0.1.2094

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/minions.js CHANGED
@@ -246,6 +246,69 @@ function spawnDashboard() {
246
246
  return proc;
247
247
  }
248
248
 
249
+ /** Spawn the external supervisor (engine/supervisor.js). The supervisor
250
+ * watches BOTH the engine and the dashboard and respawns whichever dies,
251
+ * unless `engine/stop-intent.json` is present (set by `minions stop`).
252
+ * Single-instance guard inside supervisor.js means double-spawn during
253
+ * rapid restart is harmless — the second invocation exits cleanly when it
254
+ * sees a live PID in supervisor.pid. */
255
+ function spawnSupervisor() {
256
+ const out = _openStdioLog('supervisor-stdio.log');
257
+ const err = _openStdioLog('supervisor-stdio.log');
258
+ const proc = spawn(process.execPath, [path.join(MINIONS_HOME, 'engine', 'supervisor.js')], {
259
+ cwd: MINIONS_HOME, stdio: ['ignore', out, err], detached: true, windowsHide: true,
260
+ });
261
+ proc.unref();
262
+ return proc;
263
+ }
264
+
265
+ // Path getter — evaluated lazily inside the helper so MINIONS_HOME, which
266
+ // is initialized later in module load, is bound at call time rather than
267
+ // at function-definition time.
268
+ const _supervisorPidPath = () => path.join(MINIONS_HOME, 'engine', 'supervisor.pid');
269
+
270
+ /** Clear the stop-intent flag so the supervisor resumes guarding the engine
271
+ * and dashboard. Called at the top of every start/restart path.
272
+ * Delegates to engine/shared.js when available so engine.js, dashboard.js,
273
+ * and the CLI all share one source of truth. Falls back to a direct unlink
274
+ * during early init when shared.js may not have loaded yet. */
275
+ function clearStopIntent() {
276
+ try { shared.clearStopIntent(); }
277
+ catch {
278
+ try { fs.unlinkSync(path.join(MINIONS_HOME, 'engine', 'stop-intent.json')); } catch {}
279
+ }
280
+ }
281
+
282
+ /** Write the stop-intent flag so the supervisor stands down instead of
283
+ * respawning the engine/dashboard we're about to kill. Called at the top of
284
+ * every stop/uninstall path. */
285
+ function writeStopIntent(source) {
286
+ try { shared.writeStopIntent(source || 'minions cli'); }
287
+ catch {
288
+ try {
289
+ fs.writeFileSync(path.join(MINIONS_HOME, 'engine', 'stop-intent.json'), JSON.stringify({
290
+ stoppedAt: new Date().toISOString(),
291
+ source: source || 'minions cli',
292
+ }, null, 2));
293
+ } catch { /* best effort */ }
294
+ }
295
+ }
296
+
297
+ /** Kill the supervisor process if its PID file points at a live process.
298
+ * Used by `minions stop` and `minions restart` after writing stop-intent. */
299
+ function killSupervisor() {
300
+ try {
301
+ const pid = Number(fs.readFileSync(_supervisorPidPath(), 'utf8').trim());
302
+ if (!pid || pid === process.pid) return;
303
+ if (process.platform === 'win32') {
304
+ try { execSync(`taskkill /F /PID ${pid}`, { stdio: 'ignore', timeout: 5000, windowsHide: true }); } catch {}
305
+ } else {
306
+ try { process.kill(pid, 'SIGTERM'); } catch {}
307
+ }
308
+ try { fs.unlinkSync(_supervisorPidPath()); } catch {}
309
+ } catch { /* no PID file — supervisor not running */ }
310
+ }
311
+
249
312
  const LEGACY_DEFAULT_SQUAD_HOME = path.join(os.homedir(), '.squad');
250
313
  const LEGACY_ROOT_POINTER_PATH = path.join(os.homedir(), '.squad-root');
251
314
 
@@ -504,6 +567,10 @@ function init() {
504
567
  const dashWasUp = isPortListening(DASH_PORT);
505
568
  const restartStartMs = Date.now();
506
569
  if (isUpgrade) {
570
+ // Pre-write stop-intent so the supervisor doesn't race-respawn the
571
+ // engine/dashboard we're about to kill for upgrade.
572
+ writeStopIntent('minions upgrade');
573
+ killSupervisor();
507
574
  try { execSync(`node "${path.join(MINIONS_HOME, 'engine.js')}" stop`, { stdio: 'ignore', cwd: MINIONS_HOME, timeout: 10000, windowsHide: true }); } catch {}
508
575
  // Free the dashboard port too — without this the new dashboard EADDRINUSE-dies
509
576
  // silently and the user keeps running stale code from the old dashboard process.
@@ -511,6 +578,9 @@ function init() {
511
578
  // Clear AFTER kill so the old dashboard can't repopulate during shutdown.
512
579
  _clearDashboardBrowserState(MINIONS_HOME);
513
580
  }
581
+ // Clear stop-intent so the supervisor we're about to spawn (and any
582
+ // already-running one whose PID file survived) resumes guarding.
583
+ clearStopIntent();
514
584
  console.log(isUpgrade
515
585
  ? `\n Upgrade complete (${pkgVersion}). Restarting engine and dashboard...\n`
516
586
  : '\n Starting engine and dashboard...\n');
@@ -524,6 +594,9 @@ function init() {
524
594
  console.log(` Dashboard started (PID: ${dashProc.pid})`);
525
595
  console.log(` Dashboard: http://localhost:${DASH_PORT}`);
526
596
 
597
+ const supProc = spawnSupervisor();
598
+ console.log(` Supervisor started (PID: ${supProc.pid})`);
599
+
527
600
  void (async () => {
528
601
  const shouldOpen = forceOpen || !dashWasUp ||
529
602
  !(await _waitForBrowserReconnect(MINIONS_HOME, { afterMs: restartStartMs, timeoutMs: 5000 }));
@@ -822,6 +895,11 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
822
895
  ensureInstalled();
823
896
  const dashWasUp = isPortListening(DASH_PORT);
824
897
  const restartStartMs = Date.now();
898
+ // Pre-write stop-intent so the supervisor doesn't race-respawn the
899
+ // engine/dashboard we're about to kill. Killed before the engine/dashboard
900
+ // so its 30s tick doesn't fire mid-restart against a half-dead engine.
901
+ writeStopIntent('minions restart');
902
+ killSupervisor();
825
903
  // Layered kill — each step is best-effort, so the next still runs if one
826
904
  // fails. Goal: the old engine is gone before we spawn a new one, even if
827
905
  // PowerShell is unavailable, the engine is hung, or its cmdline doesn't match.
@@ -831,7 +909,7 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
831
909
  // survive so the new engine can re-attach them via PID files).
832
910
  killPidOnly(oldEnginePid);
833
911
  killByPort(DASH_PORT);
834
- killMinionsProcesses(['engine.js', 'dashboard.js']);
912
+ killMinionsProcesses(['engine.js', 'dashboard.js', 'supervisor.js']);
835
913
  // Confirm the OS finished the asynchronous termination before we spawn new
836
914
  // processes. Without this, `taskkill /F` returns immediately while the
837
915
  // kernel is still releasing the dashboard's port; the new dashboard spawned
@@ -857,6 +935,8 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
857
935
  // Clear stale beacons AFTER the kill so the old dashboard's last writes
858
936
  // can't repopulate the file in the gap between clear and shutdown.
859
937
  _clearDashboardBrowserState(MINIONS_HOME);
938
+ // Clear stop-intent so the freshly-spawned supervisor resumes guarding.
939
+ clearStopIntent();
860
940
  const engineOut = _openStdioLog('engine-stdio.log');
861
941
  const engineErr = _openStdioLog('engine-stdio.log');
862
942
  const engineProc = spawn(process.execPath, [path.join(MINIONS_HOME, 'engine.js'), 'start', ...rest], {
@@ -867,6 +947,8 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
867
947
  const dashProc = spawnDashboard();
868
948
  console.log(` Dashboard started (PID: ${dashProc.pid})`);
869
949
  console.log(` Dashboard: http://localhost:${DASH_PORT}`);
950
+ const supProc = spawnSupervisor();
951
+ console.log(` Supervisor started (PID: ${supProc.pid})`);
870
952
  console.log(' Verifying restart health...');
871
953
  void (async () => {
872
954
  const result = await waitForRestartHealth({
@@ -1007,9 +1089,14 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
1007
1089
  console.log('\n Uninstalling Minions...\n');
1008
1090
 
1009
1091
  // 1. Kill all processes
1092
+ // Stop-intent FIRST so the supervisor stands down before we kill it. Without
1093
+ // this the supervisor could race-respawn the engine/dashboard between our
1094
+ // engine-stop and the supervisor kill.
1095
+ writeStopIntent('minions uninstall');
1096
+ killSupervisor();
1010
1097
  try { execSync(`node "${path.join(MINIONS_HOME, 'engine.js')}" stop`, { stdio: 'ignore', cwd: MINIONS_HOME, timeout: 10000 }); } catch {}
1011
1098
  killByPort(DASH_PORT);
1012
- killMinionsProcesses(['engine.js', 'dashboard.js', 'spawn-agent.js']);
1099
+ killMinionsProcesses(['engine.js', 'dashboard.js', 'spawn-agent.js', 'supervisor.js']);
1013
1100
  console.log(' Killed all processes');
1014
1101
 
1015
1102
  // 2. Remove minions-authored skills from ~/.claude/skills/
package/dashboard.js CHANGED
@@ -5,6 +5,11 @@
5
5
  * Opens: http://localhost:7331
6
6
  */
7
7
 
8
+ // Install ISO timestamp prefixes on console.{log,info,warn,error} so the
9
+ // post-mortem in engine/dashboard-stdio.log is diagnosable to the second.
10
+ // Must run BEFORE any other require that might log during module init.
11
+ require('./engine/stdio-timestamps').installIfNotInstalled();
12
+
8
13
  const http = require('http');
9
14
  const zlib = require('zlib');
10
15
  const fs = require('fs');
package/engine/cli.js CHANGED
@@ -462,6 +462,12 @@ const commands = {
462
462
  try { fs.writeFileSync(path.join(shared.MINIONS_DIR, '.minions-version'), codeVersion); } catch {}
463
463
  }
464
464
  e.log('info', 'Engine started');
465
+ // F1: clear stop-intent on every successful engine boot. Covers `node
466
+ // engine.js start` (direct CLI), `minions start` (delegates to engine.js
467
+ // start via engineCmds), and the dashboard's restartEngine path — all of
468
+ // which would otherwise leave a prior `minions stop`'s stop-intent on
469
+ // disk and silently stand down the external supervisor.
470
+ try { shared.clearStopIntent(); } catch { /* best effort */ }
465
471
  console.log(`Engine started (PID: ${process.pid})`);
466
472
 
467
473
  const config = getConfig();
@@ -1100,6 +1106,14 @@ const commands = {
1100
1106
  console.log(' On next start, they\'ll get a 20-min grace period before being marked as orphans.');
1101
1107
  console.log(' To kill them now, run: node engine.js kill\n');
1102
1108
  }
1109
+ // Write stop-intent BEFORE killing so the external supervisor (engine/
1110
+ // supervisor.js) sees the flag on its next tick and stands down instead
1111
+ // of respawning. Cleared by `minions start`/`restart` AND by every
1112
+ // successful engine boot (engine/cli.js#start) — so a direct
1113
+ // `node engine.js start` after this stop still clears the flag.
1114
+ if (!shared.writeStopIntent('engine.js stop')) {
1115
+ e.log('warn', 'Failed to write stop-intent');
1116
+ }
1103
1117
  const control = getControl();
1104
1118
  if (control.pid && control.pid !== process.pid) {
1105
1119
  try { process.kill(control.pid); } catch { /* process may be dead */ }
package/engine/shared.js CHANGED
@@ -935,6 +935,44 @@ function deleteDispatchPromptSidecar(item) {
935
935
  * The thrown error points at the bloated file so operators can act instead
936
936
  * of chasing V8 heap traces.
937
937
  */
938
+ // ── Stop-intent flag (engine/stop-intent.json) ──────────────────────────────
939
+ //
940
+ // File-presence signal used by the external supervisor (engine/supervisor.js)
941
+ // to know when a user explicitly asked Minions to be down (`minions stop`,
942
+ // `minions uninstall`, `engine.js stop`, mid-`minions restart`). Centralized
943
+ // here so dashboard.js, bin/minions.js, engine/cli.js, and supervisor.js all
944
+ // honor the same contract:
945
+ //
946
+ // - Set by every stop/kill code path BEFORE killing.
947
+ // - Cleared by every start/respawn code path AFTER the new processes are up.
948
+ // - File presence (even empty / corrupt) means "user wanted stop" — read
949
+ // fail-closed so a half-written JSON during a kill window can't be
950
+ // interpreted as "no intent" and trigger a respawn-against-user-wishes.
951
+ //
952
+ // Body schema: `{ stoppedAt: ISO, source: string }`. Source is informational
953
+ // only; presence is the load-bearing signal.
954
+
955
+ const STOP_INTENT_PATH = path.join(ENGINE_DIR, 'stop-intent.json');
956
+
957
+ function writeStopIntent(source) {
958
+ try {
959
+ fs.writeFileSync(STOP_INTENT_PATH, JSON.stringify({
960
+ stoppedAt: new Date().toISOString(),
961
+ source: source || 'unspecified',
962
+ }, null, 2));
963
+ return true;
964
+ } catch { return false; }
965
+ }
966
+
967
+ function clearStopIntent() {
968
+ try { fs.unlinkSync(STOP_INTENT_PATH); return true; }
969
+ catch { return false; }
970
+ }
971
+
972
+ function isStopIntentSet() {
973
+ return fs.existsSync(STOP_INTENT_PATH);
974
+ }
975
+
938
976
  function assertStateFileSize(filePath, maxBytes) {
939
977
  const limit = Number(maxBytes) > 0 ? Number(maxBytes) : ENGINE_DEFAULTS.maxStateFileBytes;
940
978
  try {
@@ -5430,6 +5468,10 @@ module.exports = {
5430
5468
  resolveEngineCacheDir,
5431
5469
  openUrlInBrowser,
5432
5470
  CONTROL_PATH,
5471
+ STOP_INTENT_PATH,
5472
+ writeStopIntent,
5473
+ clearStopIntent,
5474
+ isStopIntentSet,
5433
5475
  COOLDOWNS_PATH,
5434
5476
  ENGINE_STATE_PATH, // W-mp60tw0u000j3931
5435
5477
  PR_LINKS_PATH,
@@ -0,0 +1,37 @@
1
+ /**
2
+ * engine/stdio-timestamps.js — Prefix every console.log/warn/error/info line
3
+ * with an ISO-8601 timestamp so dashboard-stdio.log and engine-stdio.log
4
+ * become diagnosable to the second.
5
+ *
6
+ * Required at the very top of dashboard.js and engine.js (and supervisor.js).
7
+ * Without this, "what was the dashboard doing at 01:01:56?" requires fishing
8
+ * through log.json's adjacent engine entries and hoping timing lines up.
9
+ *
10
+ * The wrap is a no-op when MINIONS_NO_STDIO_TIMESTAMPS is truthy (used by
11
+ * tests that compare console output verbatim).
12
+ */
13
+
14
+ const _STAMP = Symbol.for('minions.stdioTimestampsInstalled');
15
+
16
+ function _ts() { return new Date().toISOString(); }
17
+
18
+ function _wrap(method) {
19
+ const original = console[method].bind(console);
20
+ return (...args) => {
21
+ try { original(`[${_ts()}]`, ...args); }
22
+ catch { original(...args); }
23
+ };
24
+ }
25
+
26
+ function installIfNotInstalled() {
27
+ if (globalThis[_STAMP]) return false;
28
+ if (process.env.MINIONS_NO_STDIO_TIMESTAMPS) return false;
29
+ console.log = _wrap('log');
30
+ console.info = _wrap('info');
31
+ console.warn = _wrap('warn');
32
+ console.error = _wrap('error');
33
+ globalThis[_STAMP] = true;
34
+ return true;
35
+ }
36
+
37
+ module.exports = { installIfNotInstalled };
@@ -0,0 +1,303 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * engine/supervisor.js — External watchdog for engine + dashboard.
4
+ *
5
+ * Spawned `detached: true` by `minions start`/`restart` after the engine and
6
+ * dashboard are up. Polls every SUPERVISOR_INTERVAL_MS:
7
+ * - engine PID from `engine/control.json`
8
+ * - dashboard PID via port-listener probe (port 7331 by default)
9
+ *
10
+ * When either is dead AND the stop-intent flag is NOT set, respawns the dead
11
+ * one in the same way the CLI does (detached, stdio routed to the engine-
12
+ * authored log files). When the stop-intent flag IS set, the supervisor
13
+ * stands down — the user explicitly asked Minions to be down.
14
+ *
15
+ * Background: see conversation 2026-06-01. The dashboard runs an in-process
16
+ * watchdog that respawns the engine, but nothing watched the dashboard. When
17
+ * both processes died together (terminal close, logoff, OOM), neither could
18
+ * recover. This external supervisor closes that gap.
19
+ *
20
+ * The supervisor itself is intentionally unmonitored. If you need stronger
21
+ * guarantees (machine reboot, OS-level user logoff), register Minions as a
22
+ * Windows Service via nssm — see docs/supervisor.md.
23
+ */
24
+
25
+ require('./stdio-timestamps').installIfNotInstalled();
26
+
27
+ const fs = require('fs');
28
+ const path = require('path');
29
+ const os = require('os');
30
+ const { spawn, execSync } = require('child_process');
31
+
32
+ // Lazy path getters route through engine/shared.js when available so the
33
+ // supervisor's notion of "the engine dir" honors MINIONS_TEST_DIR (used by
34
+ // the unit tests). Fall back to __dirname-based resolution when shared.js
35
+ // isn't loadable (e.g. early load / bad upgrade) so the supervisor still
36
+ // boots in production.
37
+ const MINIONS_DIR = path.resolve(__dirname, '..');
38
+ function _sharedOrNull() {
39
+ try { return require('./shared'); } catch { return null; }
40
+ }
41
+ function _engineDir() { return _sharedOrNull()?.ENGINE_DIR || __dirname; }
42
+ const STATIC_ENGINE_DIR = __dirname; // for require-time paths that can't be lazy
43
+ function CONTROL_PATH_FN() { return path.join(_engineDir(), 'control.json'); }
44
+ function STOP_INTENT_PATH_FN() { return _sharedOrNull()?.STOP_INTENT_PATH || path.join(_engineDir(), 'stop-intent.json'); }
45
+ function SUPERVISOR_PID_PATH_FN() { return path.join(_engineDir(), 'supervisor.pid'); }
46
+
47
+ // Eager bindings preserved for backwards-compatible exports — read-only
48
+ // snapshots of the path at module-load time. Internal callers should use
49
+ // the _FN getters above so MINIONS_TEST_DIR overrides take effect.
50
+ const CONTROL_PATH = path.join(STATIC_ENGINE_DIR, 'control.json');
51
+ const STOP_INTENT_PATH = path.join(STATIC_ENGINE_DIR, 'stop-intent.json');
52
+ const SUPERVISOR_PID_PATH = path.join(STATIC_ENGINE_DIR, 'supervisor.pid');
53
+
54
+ const SUPERVISOR_INTERVAL_MS = Number(process.env.MINIONS_SUPERVISOR_INTERVAL_MS) || 30000;
55
+ const DASH_PORT = Number(process.env.MINIONS_DASHBOARD_PORT) || 7331;
56
+ // Grace window after we (re)spawn a process — gives it time to bind its port
57
+ // or write its PID before we re-probe. Without this we'd race the freshly
58
+ // spawned process and double-spawn.
59
+ const POST_SPAWN_GRACE_MS = Number(process.env.MINIONS_SUPERVISOR_GRACE_MS) || 15000;
60
+ const isWin = process.platform === 'win32';
61
+
62
+ function safeReadJson(p) {
63
+ try { return JSON.parse(fs.readFileSync(p, 'utf8')); } catch { return null; }
64
+ }
65
+
66
+ // Single source of truth lives in engine/shared.js (writeStopIntent /
67
+ // clearStopIntent / isStopIntentSet). The supervisor used to re-implement
68
+ // this locally; centralizing it ensures dashboard.js, bin/minions.js,
69
+ // engine/cli.js, and the supervisor all read/write through one contract.
70
+ // Fall back to a local file-presence check if shared.js fails to load (e.g.
71
+ // during a bad upgrade) — we'd rather fail-closed than crash the supervisor.
72
+ function isStopIntentSet() {
73
+ try {
74
+ const shared = require('./shared');
75
+ if (shared && typeof shared.isStopIntentSet === 'function') {
76
+ return shared.isStopIntentSet();
77
+ }
78
+ } catch { /* shared unavailable — fall through to local probe */ }
79
+ return fs.existsSync(STOP_INTENT_PATH_FN());
80
+ }
81
+
82
+ function isPidAlive(pid) {
83
+ if (!pid) return false;
84
+ try {
85
+ if (isWin) {
86
+ const out = execSync(`tasklist /FI "PID eq ${pid}" /NH`, {
87
+ encoding: 'utf8', timeout: 3000, windowsHide: true,
88
+ });
89
+ return out.includes(String(pid));
90
+ }
91
+ process.kill(pid, 0);
92
+ return true;
93
+ } catch { return false; }
94
+ }
95
+
96
+ function listeningPidsForPort(port) {
97
+ try {
98
+ if (isWin) {
99
+ const out = execSync(`netstat -ano | findstr ":${port} " | findstr LISTENING`, {
100
+ encoding: 'utf8', timeout: 5000, windowsHide: true,
101
+ });
102
+ const pids = new Set();
103
+ for (const line of out.split('\n')) {
104
+ const pid = line.trim().split(/\s+/).pop();
105
+ if (pid && /^\d+$/.test(pid) && pid !== '0') pids.add(Number(pid));
106
+ }
107
+ return [...pids];
108
+ }
109
+ const out = execSync(`lsof -ti:${port}`, { encoding: 'utf8', timeout: 5000 });
110
+ return out.split('\n').map(s => s.trim()).filter(Boolean).map(Number);
111
+ } catch { return []; }
112
+ }
113
+
114
+ function openAppendFd(name) {
115
+ // Try to use the same append-fd helper the rest of the codebase uses, fall
116
+ // back to a plain fs.openSync(..., 'a') when it isn't available. We re-route
117
+ // child stdio to engine/<name> so a respawned process produces a postmortem
118
+ // in the same place a CLI-spawned one would.
119
+ //
120
+ // Uses _engineDir() (lazy getter) — not the removed top-level ENGINE_DIR.
121
+ // The fixup commit renamed top-level ENGINE_DIR to STATIC_ENGINE_DIR and
122
+ // _engineDir(), and a stale ENGINE_DIR reference here silently returned
123
+ // 'ignore' for both stdout AND stderr, causing every supervisor-spawned
124
+ // dashboard and engine to lose its stdio trail entirely (the dashboard
125
+ // still serves HTTP 200, but you can't see anything it logs).
126
+ const dir = _engineDir();
127
+ try {
128
+ const shared = require('./shared');
129
+ if (shared && typeof shared.openAppendLogFd === 'function') {
130
+ return shared.openAppendLogFd(name, dir, { fallback: 'ignore' }).fd;
131
+ }
132
+ } catch { /* shared unavailable — fall through */ }
133
+ try { return fs.openSync(path.join(dir, name), 'a'); } catch { return 'ignore'; }
134
+ }
135
+
136
+ function spawnEngine() {
137
+ const out = openAppendFd('engine-stdio.log');
138
+ const err = openAppendFd('engine-stdio.log');
139
+ const proc = spawn(process.execPath, [path.join(MINIONS_DIR, 'engine.js'), 'start'], {
140
+ cwd: MINIONS_DIR,
141
+ stdio: ['ignore', out, err],
142
+ detached: true,
143
+ windowsHide: true,
144
+ });
145
+ proc.unref();
146
+ return proc.pid;
147
+ }
148
+
149
+ function spawnDashboard() {
150
+ const out = openAppendFd('dashboard-stdio.log');
151
+ const err = openAppendFd('dashboard-stdio.log');
152
+ const env = { ...process.env, MINIONS_NO_AUTO_OPEN: '1' };
153
+ const proc = spawn(process.execPath, [path.join(MINIONS_DIR, 'dashboard.js')], {
154
+ cwd: MINIONS_DIR,
155
+ stdio: ['ignore', out, err],
156
+ detached: true,
157
+ windowsHide: true,
158
+ env,
159
+ });
160
+ proc.unref();
161
+ return proc.pid;
162
+ }
163
+
164
+ // Initialized to Date.now() in main() so the supervisor's first tick (fires
165
+ // at +2s after spawn) is INSIDE the POST_SPAWN_GRACE_MS window. Without this
166
+ // the first tick can race the freshly-spawned engine from `minions restart`
167
+ // before that engine has written its PID to control.json, double-spawning
168
+ // the engine.
169
+ let _lastEngineRespawnAt = 0;
170
+ let _lastDashboardRespawnAt = 0;
171
+
172
+ // Window during which a `null` pid + recent `restarted_at` is interpreted as
173
+ // "another watchdog is currently respawning the engine — don't double-spawn."
174
+ // The dashboard's in-process engine watchdog (dashboard.js:11873-11901) calls
175
+ // `mutateControl({pid:null, restarted_at})` BEFORE spawning, opening a gap
176
+ // where supervisor would otherwise see `state==='running' && !control.pid`
177
+ // and racefully spawn a second engine. 60s gives the new engine ample time
178
+ // to enter cli.js:443 and write its own PID on cold Windows boots.
179
+ const RESPAWN_IN_PROGRESS_WINDOW_MS = Number(process.env.MINIONS_SUPERVISOR_RESPAWN_WINDOW_MS) || 60000;
180
+
181
+ function checkEngine(now) {
182
+ if (now - _lastEngineRespawnAt < POST_SPAWN_GRACE_MS) return;
183
+ const control = safeReadJson(CONTROL_PATH_FN());
184
+ // Only respawn when control.json says "running" — paused/stopped/stopping
185
+ // are legitimate states the supervisor must not override.
186
+ if (!control || control.state !== 'running') return;
187
+ if (control.pid && isPidAlive(control.pid)) return;
188
+
189
+ // Cross-watchdog race guard: dashboard.js's in-process engine watchdog
190
+ // writes `{pid: null, restarted_at}` BEFORE spawning a new engine. During
191
+ // that window supervisor's `control.pid && isPidAlive` check falls through
192
+ // (pid is null/falsy) and we would double-spawn. If `restarted_at` was
193
+ // updated in the last RESPAWN_IN_PROGRESS_WINDOW_MS, assume someone else
194
+ // owns the respawn and skip this tick.
195
+ if (control.pid == null && control.restarted_at) {
196
+ const restartedAtMs = Date.parse(control.restarted_at);
197
+ if (Number.isFinite(restartedAtMs) && (now - restartedAtMs) < RESPAWN_IN_PROGRESS_WINDOW_MS) {
198
+ return;
199
+ }
200
+ }
201
+
202
+ console.log(`[supervisor] Engine PID ${control.pid || '(none)'} is dead — respawning...`);
203
+ const newPid = spawnEngine();
204
+ _lastEngineRespawnAt = now;
205
+ console.log(`[supervisor] Engine respawned (new PID: ${newPid})`);
206
+ }
207
+
208
+ function checkDashboard(now) {
209
+ if (now - _lastDashboardRespawnAt < POST_SPAWN_GRACE_MS) return;
210
+ const pids = listeningPidsForPort(DASH_PORT);
211
+ if (pids.length > 0) return;
212
+
213
+ console.log(`[supervisor] Dashboard not listening on port ${DASH_PORT} — respawning...`);
214
+ const newPid = spawnDashboard();
215
+ _lastDashboardRespawnAt = now;
216
+ console.log(`[supervisor] Dashboard respawned (new PID: ${newPid})`);
217
+ }
218
+
219
+ function tick() {
220
+ try {
221
+ if (isStopIntentSet()) return;
222
+ const now = Date.now();
223
+ checkEngine(now);
224
+ checkDashboard(now);
225
+ } catch (e) {
226
+ console.error(`[supervisor] tick error: ${e && e.message}`);
227
+ }
228
+ }
229
+
230
+ function writePidFile() {
231
+ try { fs.writeFileSync(SUPERVISOR_PID_PATH_FN(), String(process.pid)); } catch { /* best effort */ }
232
+ }
233
+
234
+ function clearPidFileIfMine() {
235
+ try {
236
+ const existing = fs.readFileSync(SUPERVISOR_PID_PATH_FN(), 'utf8').trim();
237
+ if (existing === String(process.pid)) fs.unlinkSync(SUPERVISOR_PID_PATH_FN());
238
+ } catch { /* file already gone or read failed — fine */ }
239
+ }
240
+
241
+ function main() {
242
+ // Single-instance guard: if another supervisor is already alive (PID file
243
+ // points to a live process), exit cleanly. Avoids supervisor-spawning-
244
+ // supervisor loops during rapid `minions restart`.
245
+ const existing = (() => {
246
+ try { return Number(fs.readFileSync(SUPERVISOR_PID_PATH_FN(), 'utf8').trim()); } catch { return null; }
247
+ })();
248
+ if (existing && existing !== process.pid && isPidAlive(existing)) {
249
+ console.log(`[supervisor] Another supervisor already running (PID ${existing}); exiting.`);
250
+ process.exit(0);
251
+ }
252
+
253
+ writePidFile();
254
+ // Seed both respawn timestamps so the first tick is inside the post-spawn
255
+ // grace window — supervisor was spawned moments after engine + dashboard
256
+ // and we don't want to race them. Without this seed, _lastEngineRespawnAt
257
+ // = 0 and the +2s warm-up tick can fire before the fresh engine writes
258
+ // its PID to control.json on cold Windows boots.
259
+ _lastEngineRespawnAt = Date.now();
260
+ _lastDashboardRespawnAt = Date.now();
261
+ console.log(`[supervisor] Started (PID ${process.pid}); interval=${SUPERVISOR_INTERVAL_MS}ms, dashboardPort=${DASH_PORT}`);
262
+
263
+ const interval = setInterval(tick, SUPERVISOR_INTERVAL_MS);
264
+ // Run one tick immediately so a freshly-restarted Minions covers any early
265
+ // crash in the first SUPERVISOR_INTERVAL_MS window.
266
+ setTimeout(tick, 2000);
267
+
268
+ function shutdown(reason) {
269
+ console.log(`[supervisor] Shutting down (${reason}).`);
270
+ clearInterval(interval);
271
+ clearPidFileIfMine();
272
+ // Don't process.exit — let the runtime drain remaining log writes.
273
+ }
274
+ process.on('SIGTERM', () => shutdown('SIGTERM'));
275
+ process.on('SIGINT', () => shutdown('SIGINT'));
276
+ process.on('beforeExit', () => clearPidFileIfMine());
277
+ }
278
+
279
+ if (require.main === module) {
280
+ main();
281
+ }
282
+
283
+ module.exports = {
284
+ // Exposed for unit tests — engine code shouldn't reach into these.
285
+ _internals: {
286
+ isStopIntentSet,
287
+ isPidAlive,
288
+ listeningPidsForPort,
289
+ openAppendFd,
290
+ checkEngine,
291
+ checkDashboard,
292
+ tick,
293
+ // Path getters honor MINIONS_TEST_DIR via shared.ENGINE_DIR, so test
294
+ // isolation correctly redirects writes/reads under createTestMinionsDir.
295
+ get paths() {
296
+ return {
297
+ CONTROL_PATH: CONTROL_PATH_FN(),
298
+ STOP_INTENT_PATH: STOP_INTENT_PATH_FN(),
299
+ SUPERVISOR_PID_PATH: SUPERVISOR_PID_PATH_FN(),
300
+ };
301
+ },
302
+ },
303
+ };
package/engine.js CHANGED
@@ -21,6 +21,11 @@
21
21
  * node .minions/engine.js discover Dry-run work discovery
22
22
  */
23
23
 
24
+ // Install ISO timestamp prefixes on console.{log,info,warn,error} so
25
+ // engine-stdio.log is diagnosable to the second. Must run BEFORE any other
26
+ // require that might log during module init.
27
+ require('./engine/stdio-timestamps').installIfNotInstalled();
28
+
24
29
  const fs = require('fs');
25
30
  const path = require('path');
26
31
  const crypto = require('crypto');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.2092",
3
+ "version": "0.1.2094",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"