@yemi33/minions 0.1.2091 → 0.1.2093

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/minions.js CHANGED
@@ -246,6 +246,69 @@ function spawnDashboard() {
246
246
  return proc;
247
247
  }
248
248
 
249
+ /** Spawn the external supervisor (engine/supervisor.js). The supervisor
250
+ * watches BOTH the engine and the dashboard and respawns whichever dies,
251
+ * unless `engine/stop-intent.json` is present (set by `minions stop`).
252
+ * Single-instance guard inside supervisor.js means double-spawn during
253
+ * rapid restart is harmless — the second invocation exits cleanly when it
254
+ * sees a live PID in supervisor.pid. */
255
+ function spawnSupervisor() {
256
+ const out = _openStdioLog('supervisor-stdio.log');
257
+ const err = _openStdioLog('supervisor-stdio.log');
258
+ const proc = spawn(process.execPath, [path.join(MINIONS_HOME, 'engine', 'supervisor.js')], {
259
+ cwd: MINIONS_HOME, stdio: ['ignore', out, err], detached: true, windowsHide: true,
260
+ });
261
+ proc.unref();
262
+ return proc;
263
+ }
264
+
265
+ // Path getter — evaluated lazily inside the helper so MINIONS_HOME, which
266
+ // is initialized later in module load, is bound at call time rather than
267
+ // at function-definition time.
268
+ const _supervisorPidPath = () => path.join(MINIONS_HOME, 'engine', 'supervisor.pid');
269
+
270
+ /** Clear the stop-intent flag so the supervisor resumes guarding the engine
271
+ * and dashboard. Called at the top of every start/restart path.
272
+ * Delegates to engine/shared.js when available so engine.js, dashboard.js,
273
+ * and the CLI all share one source of truth. Falls back to a direct unlink
274
+ * during early init when shared.js may not have loaded yet. */
275
+ function clearStopIntent() {
276
+ try { shared.clearStopIntent(); }
277
+ catch {
278
+ try { fs.unlinkSync(path.join(MINIONS_HOME, 'engine', 'stop-intent.json')); } catch {}
279
+ }
280
+ }
281
+
282
+ /** Write the stop-intent flag so the supervisor stands down instead of
283
+ * respawning the engine/dashboard we're about to kill. Called at the top of
284
+ * every stop/uninstall path. */
285
+ function writeStopIntent(source) {
286
+ try { shared.writeStopIntent(source || 'minions cli'); }
287
+ catch {
288
+ try {
289
+ fs.writeFileSync(path.join(MINIONS_HOME, 'engine', 'stop-intent.json'), JSON.stringify({
290
+ stoppedAt: new Date().toISOString(),
291
+ source: source || 'minions cli',
292
+ }, null, 2));
293
+ } catch { /* best effort */ }
294
+ }
295
+ }
296
+
297
+ /** Kill the supervisor process if its PID file points at a live process.
298
+ * Used by `minions stop` and `minions restart` after writing stop-intent. */
299
+ function killSupervisor() {
300
+ try {
301
+ const pid = Number(fs.readFileSync(_supervisorPidPath(), 'utf8').trim());
302
+ if (!pid || pid === process.pid) return;
303
+ if (process.platform === 'win32') {
304
+ try { execSync(`taskkill /F /PID ${pid}`, { stdio: 'ignore', timeout: 5000, windowsHide: true }); } catch {}
305
+ } else {
306
+ try { process.kill(pid, 'SIGTERM'); } catch {}
307
+ }
308
+ try { fs.unlinkSync(_supervisorPidPath()); } catch {}
309
+ } catch { /* no PID file — supervisor not running */ }
310
+ }
311
+
249
312
  const LEGACY_DEFAULT_SQUAD_HOME = path.join(os.homedir(), '.squad');
250
313
  const LEGACY_ROOT_POINTER_PATH = path.join(os.homedir(), '.squad-root');
251
314
 
@@ -504,6 +567,10 @@ function init() {
504
567
  const dashWasUp = isPortListening(DASH_PORT);
505
568
  const restartStartMs = Date.now();
506
569
  if (isUpgrade) {
570
+ // Pre-write stop-intent so the supervisor doesn't race-respawn the
571
+ // engine/dashboard we're about to kill for upgrade.
572
+ writeStopIntent('minions upgrade');
573
+ killSupervisor();
507
574
  try { execSync(`node "${path.join(MINIONS_HOME, 'engine.js')}" stop`, { stdio: 'ignore', cwd: MINIONS_HOME, timeout: 10000, windowsHide: true }); } catch {}
508
575
  // Free the dashboard port too — without this the new dashboard EADDRINUSE-dies
509
576
  // silently and the user keeps running stale code from the old dashboard process.
@@ -511,6 +578,9 @@ function init() {
511
578
  // Clear AFTER kill so the old dashboard can't repopulate during shutdown.
512
579
  _clearDashboardBrowserState(MINIONS_HOME);
513
580
  }
581
+ // Clear stop-intent so the supervisor we're about to spawn (and any
582
+ // already-running one whose PID file survived) resumes guarding.
583
+ clearStopIntent();
514
584
  console.log(isUpgrade
515
585
  ? `\n Upgrade complete (${pkgVersion}). Restarting engine and dashboard...\n`
516
586
  : '\n Starting engine and dashboard...\n');
@@ -524,6 +594,9 @@ function init() {
524
594
  console.log(` Dashboard started (PID: ${dashProc.pid})`);
525
595
  console.log(` Dashboard: http://localhost:${DASH_PORT}`);
526
596
 
597
+ const supProc = spawnSupervisor();
598
+ console.log(` Supervisor started (PID: ${supProc.pid})`);
599
+
527
600
  void (async () => {
528
601
  const shouldOpen = forceOpen || !dashWasUp ||
529
602
  !(await _waitForBrowserReconnect(MINIONS_HOME, { afterMs: restartStartMs, timeoutMs: 5000 }));
@@ -822,6 +895,11 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
822
895
  ensureInstalled();
823
896
  const dashWasUp = isPortListening(DASH_PORT);
824
897
  const restartStartMs = Date.now();
898
+ // Pre-write stop-intent so the supervisor doesn't race-respawn the
899
+ // engine/dashboard we're about to kill. Killed before the engine/dashboard
900
+ // so its 30s tick doesn't fire mid-restart against a half-dead engine.
901
+ writeStopIntent('minions restart');
902
+ killSupervisor();
825
903
  // Layered kill — each step is best-effort, so the next still runs if one
826
904
  // fails. Goal: the old engine is gone before we spawn a new one, even if
827
905
  // PowerShell is unavailable, the engine is hung, or its cmdline doesn't match.
@@ -831,7 +909,7 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
831
909
  // survive so the new engine can re-attach them via PID files).
832
910
  killPidOnly(oldEnginePid);
833
911
  killByPort(DASH_PORT);
834
- killMinionsProcesses(['engine.js', 'dashboard.js']);
912
+ killMinionsProcesses(['engine.js', 'dashboard.js', 'supervisor.js']);
835
913
  // Confirm the OS finished the asynchronous termination before we spawn new
836
914
  // processes. Without this, `taskkill /F` returns immediately while the
837
915
  // kernel is still releasing the dashboard's port; the new dashboard spawned
@@ -857,6 +935,8 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
857
935
  // Clear stale beacons AFTER the kill so the old dashboard's last writes
858
936
  // can't repopulate the file in the gap between clear and shutdown.
859
937
  _clearDashboardBrowserState(MINIONS_HOME);
938
+ // Clear stop-intent so the freshly-spawned supervisor resumes guarding.
939
+ clearStopIntent();
860
940
  const engineOut = _openStdioLog('engine-stdio.log');
861
941
  const engineErr = _openStdioLog('engine-stdio.log');
862
942
  const engineProc = spawn(process.execPath, [path.join(MINIONS_HOME, 'engine.js'), 'start', ...rest], {
@@ -867,6 +947,8 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
867
947
  const dashProc = spawnDashboard();
868
948
  console.log(` Dashboard started (PID: ${dashProc.pid})`);
869
949
  console.log(` Dashboard: http://localhost:${DASH_PORT}`);
950
+ const supProc = spawnSupervisor();
951
+ console.log(` Supervisor started (PID: ${supProc.pid})`);
870
952
  console.log(' Verifying restart health...');
871
953
  void (async () => {
872
954
  const result = await waitForRestartHealth({
@@ -1007,9 +1089,14 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
1007
1089
  console.log('\n Uninstalling Minions...\n');
1008
1090
 
1009
1091
  // 1. Kill all processes
1092
+ // Stop-intent FIRST so the supervisor stands down before we kill it. Without
1093
+ // this the supervisor could race-respawn the engine/dashboard between our
1094
+ // engine-stop and the supervisor kill.
1095
+ writeStopIntent('minions uninstall');
1096
+ killSupervisor();
1010
1097
  try { execSync(`node "${path.join(MINIONS_HOME, 'engine.js')}" stop`, { stdio: 'ignore', cwd: MINIONS_HOME, timeout: 10000 }); } catch {}
1011
1098
  killByPort(DASH_PORT);
1012
- killMinionsProcesses(['engine.js', 'dashboard.js', 'spawn-agent.js']);
1099
+ killMinionsProcesses(['engine.js', 'dashboard.js', 'spawn-agent.js', 'supervisor.js']);
1013
1100
  console.log(' Killed all processes');
1014
1101
 
1015
1102
  // 2. Remove minions-authored skills from ~/.claude/skills/
package/dashboard.js CHANGED
@@ -5,6 +5,11 @@
5
5
  * Opens: http://localhost:7331
6
6
  */
7
7
 
8
+ // Install ISO timestamp prefixes on console.{log,info,warn,error} so the
9
+ // post-mortem in engine/dashboard-stdio.log is diagnosable to the second.
10
+ // Must run BEFORE any other require that might log during module init.
11
+ require('./engine/stdio-timestamps').installIfNotInstalled();
12
+
8
13
  const http = require('http');
9
14
  const zlib = require('zlib');
10
15
  const fs = require('fs');
package/engine/cli.js CHANGED
@@ -462,6 +462,12 @@ const commands = {
462
462
  try { fs.writeFileSync(path.join(shared.MINIONS_DIR, '.minions-version'), codeVersion); } catch {}
463
463
  }
464
464
  e.log('info', 'Engine started');
465
+ // F1: clear stop-intent on every successful engine boot. Covers `node
466
+ // engine.js start` (direct CLI), `minions start` (delegates to engine.js
467
+ // start via engineCmds), and the dashboard's restartEngine path — all of
468
+ // which would otherwise leave a prior `minions stop`'s stop-intent on
469
+ // disk and silently stand down the external supervisor.
470
+ try { shared.clearStopIntent(); } catch { /* best effort */ }
465
471
  console.log(`Engine started (PID: ${process.pid})`);
466
472
 
467
473
  const config = getConfig();
@@ -1100,6 +1106,14 @@ const commands = {
1100
1106
  console.log(' On next start, they\'ll get a 20-min grace period before being marked as orphans.');
1101
1107
  console.log(' To kill them now, run: node engine.js kill\n');
1102
1108
  }
1109
+ // Write stop-intent BEFORE killing so the external supervisor (engine/
1110
+ // supervisor.js) sees the flag on its next tick and stands down instead
1111
+ // of respawning. Cleared by `minions start`/`restart` AND by every
1112
+ // successful engine boot (engine/cli.js#start) — so a direct
1113
+ // `node engine.js start` after this stop still clears the flag.
1114
+ if (!shared.writeStopIntent('engine.js stop')) {
1115
+ e.log('warn', 'Failed to write stop-intent');
1116
+ }
1103
1117
  const control = getControl();
1104
1118
  if (control.pid && control.pid !== process.pid) {
1105
1119
  try { process.kill(control.pid); } catch { /* process may be dead */ }
@@ -222,25 +222,39 @@ function _isCachedBinUsable(cached) {
222
222
  }
223
223
 
224
224
  /**
225
- * Probe `gh extension list` for the gh-copilot extension. Returns the absolute
226
- * path of the `gh` binary when found, null otherwise.
225
+ * Probe `gh` for Copilot support. Returns the absolute path of the `gh`
226
+ * binary when `gh copilot` is invokable, null otherwise.
227
227
  *
228
- * `gh extension list` exits 0 with a list of extensions on stdout. We grep for
229
- * `gh-copilot`, the extension's repository slug. If `gh` isn't on PATH the
230
- * outer try-catch swallows the ENOENT.
228
+ * Two install paths produce a working `gh copilot` subcommand and we accept
229
+ * both:
230
+ * 1. gh ≥ ~2.90 ships Copilot as a built-in preview command. `gh extension
231
+ * list` does NOT list it (it's not an extension) — and attempting to
232
+ * install the legacy `github/gh-copilot` extension is rejected because
233
+ * gh already provides the command. Detecting via the extension list
234
+ * would falsely report "copilot not installed" on every recent gh.
235
+ * 2. gh < ~2.90 with the legacy `gh-copilot` extension installed. The
236
+ * extension also responds to `gh copilot --help` with exit 0.
237
+ *
238
+ * `gh copilot --help` exits 0 in both cases and non-zero ("unknown command")
239
+ * when neither path is available, so it's a clean unified signal. The probe
240
+ * runs with --no-update-notifier (silences gh's update banner so a slow
241
+ * network doesn't drag the probe over the timeout). If `gh` isn't on PATH
242
+ * the outer try-catch swallows the ENOENT.
231
243
  */
232
- function _findGhCopilotExtension(env) {
244
+ function _findGhCopilotExtension(env, { execSyncCapture = _execSyncCapture } = {}) {
233
245
  let ghPath = null;
234
246
  try {
235
247
  const cmd = isWin ? 'where gh 2>NUL' : 'which gh 2>/dev/null';
236
- const which = _execSyncCapture(cmd, env).trim().split('\n')[0].trim();
248
+ const which = execSyncCapture(cmd, env).trim().split('\n')[0].trim();
237
249
  if (!which) return null;
238
250
  ghPath = which;
239
251
  } catch { return null; }
240
252
  try {
241
- const out = _execSyncCapture('gh extension list', env);
242
- if (/gh-copilot/i.test(out)) return ghPath;
243
- } catch { /* `gh` may have no extensions or be misconfigured */ }
253
+ // execSync throws on non-zero exit; reaching the return means `gh copilot`
254
+ // is a valid subcommand on this gh version (builtin or legacy extension).
255
+ execSyncCapture('gh copilot --help', env, 5000);
256
+ return ghPath;
257
+ } catch { /* gh present but no copilot subcommand */ }
244
258
  return null;
245
259
  }
246
260
 
@@ -1205,11 +1219,12 @@ const capabilities = {
1205
1219
 
1206
1220
  // Install hint surfaced when `resolveBinary()` returns null. Covers all
1207
1221
  // supported install paths so users on any platform see one actionable line.
1208
- // Standalone Copilot CLI (preferred path) is available via:
1209
- // - WinGet: winget install --id GitHub.cli && gh extension install github/gh-copilot
1210
- // - Homebrew: brew install gh && gh extension install github/gh-copilot
1211
- // - Direct: download from https://github.com/github/copilot-cli/releases
1212
- const INSTALL_HINT = 'install via WinGet (winget install --id GitHub.cli && gh extension install github/gh-copilot), Homebrew (brew install gh && gh extension install github/gh-copilot), or download standalone copilot from https://github.com/github/copilot-cli/releases';
1222
+ //
1223
+ // gh ~2.90 ships `gh copilot` as a built-in preview command, so the legacy
1224
+ // `gh extension install github/gh-copilot` flow is no longer required (and is
1225
+ // in fact rejected by recent gh, since the command is already provided). The
1226
+ // hint reflects that: get a modern gh, OR install the standalone Copilot CLI.
1227
+ const INSTALL_HINT = 'install GitHub CLI 2.90+ which ships `gh copilot` built in (WinGet: `winget install --id GitHub.cli`, Homebrew: `brew install gh`, or download from https://cli.github.com), or install the standalone Copilot CLI via `npm i -g @github/copilot`';
1213
1228
 
1214
1229
  function getUserAssetDirs({ homeDir = os.homedir() } = {}) {
1215
1230
  return [
@@ -1286,6 +1301,7 @@ module.exports = {
1286
1301
  _pickStandaloneCopilotFromOutput,
1287
1302
  _resolveNpmCopilotJsEntry,
1288
1303
  _isCachedBinUsable,
1304
+ _findGhCopilotExtension,
1289
1305
  // W-mpmwxni2000c25c7-a — invalid-model error-path helpers. `_warmModelCache`
1290
1306
  // populates the in-memory model catalog so parseError can enrich its
1291
1307
  // "Model X not available" message without a per-error HTTP round trip.
package/engine/shared.js CHANGED
@@ -935,6 +935,44 @@ function deleteDispatchPromptSidecar(item) {
935
935
  * The thrown error points at the bloated file so operators can act instead
936
936
  * of chasing V8 heap traces.
937
937
  */
938
+ // ── Stop-intent flag (engine/stop-intent.json) ──────────────────────────────
939
+ //
940
+ // File-presence signal used by the external supervisor (engine/supervisor.js)
941
+ // to know when a user explicitly asked Minions to be down (`minions stop`,
942
+ // `minions uninstall`, `engine.js stop`, mid-`minions restart`). Centralized
943
+ // here so dashboard.js, bin/minions.js, engine/cli.js, and supervisor.js all
944
+ // honor the same contract:
945
+ //
946
+ // - Set by every stop/kill code path BEFORE killing.
947
+ // - Cleared by every start/respawn code path AFTER the new processes are up.
948
+ // - File presence (even empty / corrupt) means "user wanted stop" — read
949
+ // fail-closed so a half-written JSON during a kill window can't be
950
+ // interpreted as "no intent" and trigger a respawn-against-user-wishes.
951
+ //
952
+ // Body schema: `{ stoppedAt: ISO, source: string }`. Source is informational
953
+ // only; presence is the load-bearing signal.
954
+
955
+ const STOP_INTENT_PATH = path.join(ENGINE_DIR, 'stop-intent.json');
956
+
957
+ function writeStopIntent(source) {
958
+ try {
959
+ fs.writeFileSync(STOP_INTENT_PATH, JSON.stringify({
960
+ stoppedAt: new Date().toISOString(),
961
+ source: source || 'unspecified',
962
+ }, null, 2));
963
+ return true;
964
+ } catch { return false; }
965
+ }
966
+
967
+ function clearStopIntent() {
968
+ try { fs.unlinkSync(STOP_INTENT_PATH); return true; }
969
+ catch { return false; }
970
+ }
971
+
972
+ function isStopIntentSet() {
973
+ return fs.existsSync(STOP_INTENT_PATH);
974
+ }
975
+
938
976
  function assertStateFileSize(filePath, maxBytes) {
939
977
  const limit = Number(maxBytes) > 0 ? Number(maxBytes) : ENGINE_DEFAULTS.maxStateFileBytes;
940
978
  try {
@@ -5430,6 +5468,10 @@ module.exports = {
5430
5468
  resolveEngineCacheDir,
5431
5469
  openUrlInBrowser,
5432
5470
  CONTROL_PATH,
5471
+ STOP_INTENT_PATH,
5472
+ writeStopIntent,
5473
+ clearStopIntent,
5474
+ isStopIntentSet,
5433
5475
  COOLDOWNS_PATH,
5434
5476
  ENGINE_STATE_PATH, // W-mp60tw0u000j3931
5435
5477
  PR_LINKS_PATH,
@@ -0,0 +1,37 @@
1
+ /**
2
+ * engine/stdio-timestamps.js — Prefix every console.log/warn/error/info line
3
+ * with an ISO-8601 timestamp so dashboard-stdio.log and engine-stdio.log
4
+ * become diagnosable to the second.
5
+ *
6
+ * Required at the very top of dashboard.js and engine.js (and supervisor.js).
7
+ * Without this, "what was the dashboard doing at 01:01:56?" requires fishing
8
+ * through log.json's adjacent engine entries and hoping timing lines up.
9
+ *
10
+ * The wrap is a no-op when MINIONS_NO_STDIO_TIMESTAMPS is truthy (used by
11
+ * tests that compare console output verbatim).
12
+ */
13
+
14
+ const _STAMP = Symbol.for('minions.stdioTimestampsInstalled');
15
+
16
+ function _ts() { return new Date().toISOString(); }
17
+
18
+ function _wrap(method) {
19
+ const original = console[method].bind(console);
20
+ return (...args) => {
21
+ try { original(`[${_ts()}]`, ...args); }
22
+ catch { original(...args); }
23
+ };
24
+ }
25
+
26
+ function installIfNotInstalled() {
27
+ if (globalThis[_STAMP]) return false;
28
+ if (process.env.MINIONS_NO_STDIO_TIMESTAMPS) return false;
29
+ console.log = _wrap('log');
30
+ console.info = _wrap('info');
31
+ console.warn = _wrap('warn');
32
+ console.error = _wrap('error');
33
+ globalThis[_STAMP] = true;
34
+ return true;
35
+ }
36
+
37
+ module.exports = { installIfNotInstalled };
@@ -0,0 +1,294 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * engine/supervisor.js — External watchdog for engine + dashboard.
4
+ *
5
+ * Spawned `detached: true` by `minions start`/`restart` after the engine and
6
+ * dashboard are up. Polls every SUPERVISOR_INTERVAL_MS:
7
+ * - engine PID from `engine/control.json`
8
+ * - dashboard PID via port-listener probe (port 7331 by default)
9
+ *
10
+ * When either is dead AND the stop-intent flag is NOT set, respawns the dead
11
+ * one in the same way the CLI does (detached, stdio routed to the engine-
12
+ * authored log files). When the stop-intent flag IS set, the supervisor
13
+ * stands down — the user explicitly asked Minions to be down.
14
+ *
15
+ * Background: see conversation 2026-06-01. The dashboard runs an in-process
16
+ * watchdog that respawns the engine, but nothing watched the dashboard. When
17
+ * both processes died together (terminal close, logoff, OOM), neither could
18
+ * recover. This external supervisor closes that gap.
19
+ *
20
+ * The supervisor itself is intentionally unmonitored. If you need stronger
21
+ * guarantees (machine reboot, OS-level user logoff), register Minions as a
22
+ * Windows Service via nssm — see docs/supervisor.md.
23
+ */
24
+
25
+ require('./stdio-timestamps').installIfNotInstalled();
26
+
27
+ const fs = require('fs');
28
+ const path = require('path');
29
+ const os = require('os');
30
+ const { spawn, execSync } = require('child_process');
31
+
32
+ // Lazy path getters route through engine/shared.js when available so the
33
+ // supervisor's notion of "the engine dir" honors MINIONS_TEST_DIR (used by
34
+ // the unit tests). Fall back to __dirname-based resolution when shared.js
35
+ // isn't loadable (e.g. early load / bad upgrade) so the supervisor still
36
+ // boots in production.
37
+ const MINIONS_DIR = path.resolve(__dirname, '..');
38
+ function _sharedOrNull() {
39
+ try { return require('./shared'); } catch { return null; }
40
+ }
41
+ function _engineDir() { return _sharedOrNull()?.ENGINE_DIR || __dirname; }
42
+ const STATIC_ENGINE_DIR = __dirname; // for require-time paths that can't be lazy
43
+ function CONTROL_PATH_FN() { return path.join(_engineDir(), 'control.json'); }
44
+ function STOP_INTENT_PATH_FN() { return _sharedOrNull()?.STOP_INTENT_PATH || path.join(_engineDir(), 'stop-intent.json'); }
45
+ function SUPERVISOR_PID_PATH_FN() { return path.join(_engineDir(), 'supervisor.pid'); }
46
+
47
+ // Eager bindings preserved for backwards-compatible exports — read-only
48
+ // snapshots of the path at module-load time. Internal callers should use
49
+ // the _FN getters above so MINIONS_TEST_DIR overrides take effect.
50
+ const CONTROL_PATH = path.join(STATIC_ENGINE_DIR, 'control.json');
51
+ const STOP_INTENT_PATH = path.join(STATIC_ENGINE_DIR, 'stop-intent.json');
52
+ const SUPERVISOR_PID_PATH = path.join(STATIC_ENGINE_DIR, 'supervisor.pid');
53
+
54
+ const SUPERVISOR_INTERVAL_MS = Number(process.env.MINIONS_SUPERVISOR_INTERVAL_MS) || 30000;
55
+ const DASH_PORT = Number(process.env.MINIONS_DASHBOARD_PORT) || 7331;
56
+ // Grace window after we (re)spawn a process — gives it time to bind its port
57
+ // or write its PID before we re-probe. Without this we'd race the freshly
58
+ // spawned process and double-spawn.
59
+ const POST_SPAWN_GRACE_MS = Number(process.env.MINIONS_SUPERVISOR_GRACE_MS) || 15000;
60
+ const isWin = process.platform === 'win32';
61
+
62
+ function safeReadJson(p) {
63
+ try { return JSON.parse(fs.readFileSync(p, 'utf8')); } catch { return null; }
64
+ }
65
+
66
+ // Single source of truth lives in engine/shared.js (writeStopIntent /
67
+ // clearStopIntent / isStopIntentSet). The supervisor used to re-implement
68
+ // this locally; centralizing it ensures dashboard.js, bin/minions.js,
69
+ // engine/cli.js, and the supervisor all read/write through one contract.
70
+ // Fall back to a local file-presence check if shared.js fails to load (e.g.
71
+ // during a bad upgrade) — we'd rather fail-closed than crash the supervisor.
72
+ function isStopIntentSet() {
73
+ try {
74
+ const shared = require('./shared');
75
+ if (shared && typeof shared.isStopIntentSet === 'function') {
76
+ return shared.isStopIntentSet();
77
+ }
78
+ } catch { /* shared unavailable — fall through to local probe */ }
79
+ return fs.existsSync(STOP_INTENT_PATH_FN());
80
+ }
81
+
82
+ function isPidAlive(pid) {
83
+ if (!pid) return false;
84
+ try {
85
+ if (isWin) {
86
+ const out = execSync(`tasklist /FI "PID eq ${pid}" /NH`, {
87
+ encoding: 'utf8', timeout: 3000, windowsHide: true,
88
+ });
89
+ return out.includes(String(pid));
90
+ }
91
+ process.kill(pid, 0);
92
+ return true;
93
+ } catch { return false; }
94
+ }
95
+
96
+ function listeningPidsForPort(port) {
97
+ try {
98
+ if (isWin) {
99
+ const out = execSync(`netstat -ano | findstr ":${port} " | findstr LISTENING`, {
100
+ encoding: 'utf8', timeout: 5000, windowsHide: true,
101
+ });
102
+ const pids = new Set();
103
+ for (const line of out.split('\n')) {
104
+ const pid = line.trim().split(/\s+/).pop();
105
+ if (pid && /^\d+$/.test(pid) && pid !== '0') pids.add(Number(pid));
106
+ }
107
+ return [...pids];
108
+ }
109
+ const out = execSync(`lsof -ti:${port}`, { encoding: 'utf8', timeout: 5000 });
110
+ return out.split('\n').map(s => s.trim()).filter(Boolean).map(Number);
111
+ } catch { return []; }
112
+ }
113
+
114
+ function openAppendFd(name) {
115
+ // Try to use the same append-fd helper the rest of the codebase uses, fall
116
+ // back to a plain fs.openSync(..., 'a') when it isn't available. We re-route
117
+ // child stdio to engine/<name> so a respawned process produces a postmortem
118
+ // in the same place a CLI-spawned one would.
119
+ try {
120
+ const shared = require('./shared');
121
+ if (shared && typeof shared.openAppendLogFd === 'function') {
122
+ return shared.openAppendLogFd(name, ENGINE_DIR, { fallback: 'ignore' }).fd;
123
+ }
124
+ } catch { /* shared unavailable — fall through */ }
125
+ try { return fs.openSync(path.join(ENGINE_DIR, name), 'a'); } catch { return 'ignore'; }
126
+ }
127
+
128
+ function spawnEngine() {
129
+ const out = openAppendFd('engine-stdio.log');
130
+ const err = openAppendFd('engine-stdio.log');
131
+ const proc = spawn(process.execPath, [path.join(MINIONS_DIR, 'engine.js'), 'start'], {
132
+ cwd: MINIONS_DIR,
133
+ stdio: ['ignore', out, err],
134
+ detached: true,
135
+ windowsHide: true,
136
+ });
137
+ proc.unref();
138
+ return proc.pid;
139
+ }
140
+
141
+ function spawnDashboard() {
142
+ const out = openAppendFd('dashboard-stdio.log');
143
+ const err = openAppendFd('dashboard-stdio.log');
144
+ const env = { ...process.env, MINIONS_NO_AUTO_OPEN: '1' };
145
+ const proc = spawn(process.execPath, [path.join(MINIONS_DIR, 'dashboard.js')], {
146
+ cwd: MINIONS_DIR,
147
+ stdio: ['ignore', out, err],
148
+ detached: true,
149
+ windowsHide: true,
150
+ env,
151
+ });
152
+ proc.unref();
153
+ return proc.pid;
154
+ }
155
+
156
+ // Initialized to Date.now() in main() so the supervisor's first tick (fires
157
+ // at +2s after spawn) is INSIDE the POST_SPAWN_GRACE_MS window. Without this
158
+ // the first tick can race the freshly-spawned engine from `minions restart`
159
+ // before that engine has written its PID to control.json, double-spawning
160
+ // the engine.
161
+ let _lastEngineRespawnAt = 0;
162
+ let _lastDashboardRespawnAt = 0;
163
+
164
+ // Window during which a `null` pid + recent `restarted_at` is interpreted as
165
+ // "another watchdog is currently respawning the engine — don't double-spawn."
166
+ // The dashboard's in-process engine watchdog (dashboard.js:11873-11901) calls
167
+ // `mutateControl({pid:null, restarted_at})` BEFORE spawning, opening a gap
168
+ // where supervisor would otherwise see `state==='running' && !control.pid`
169
+ // and racefully spawn a second engine. 60s gives the new engine ample time
170
+ // to enter cli.js:443 and write its own PID on cold Windows boots.
171
+ const RESPAWN_IN_PROGRESS_WINDOW_MS = Number(process.env.MINIONS_SUPERVISOR_RESPAWN_WINDOW_MS) || 60000;
172
+
173
+ function checkEngine(now) {
174
+ if (now - _lastEngineRespawnAt < POST_SPAWN_GRACE_MS) return;
175
+ const control = safeReadJson(CONTROL_PATH_FN());
176
+ // Only respawn when control.json says "running" — paused/stopped/stopping
177
+ // are legitimate states the supervisor must not override.
178
+ if (!control || control.state !== 'running') return;
179
+ if (control.pid && isPidAlive(control.pid)) return;
180
+
181
+ // Cross-watchdog race guard: dashboard.js's in-process engine watchdog
182
+ // writes `{pid: null, restarted_at}` BEFORE spawning a new engine. During
183
+ // that window supervisor's `control.pid && isPidAlive` check falls through
184
+ // (pid is null/falsy) and we would double-spawn. If `restarted_at` was
185
+ // updated in the last RESPAWN_IN_PROGRESS_WINDOW_MS, assume someone else
186
+ // owns the respawn and skip this tick.
187
+ if (control.pid == null && control.restarted_at) {
188
+ const restartedAtMs = Date.parse(control.restarted_at);
189
+ if (Number.isFinite(restartedAtMs) && (now - restartedAtMs) < RESPAWN_IN_PROGRESS_WINDOW_MS) {
190
+ return;
191
+ }
192
+ }
193
+
194
+ console.log(`[supervisor] Engine PID ${control.pid || '(none)'} is dead — respawning...`);
195
+ const newPid = spawnEngine();
196
+ _lastEngineRespawnAt = now;
197
+ console.log(`[supervisor] Engine respawned (new PID: ${newPid})`);
198
+ }
199
+
200
+ function checkDashboard(now) {
201
+ if (now - _lastDashboardRespawnAt < POST_SPAWN_GRACE_MS) return;
202
+ const pids = listeningPidsForPort(DASH_PORT);
203
+ if (pids.length > 0) return;
204
+
205
+ console.log(`[supervisor] Dashboard not listening on port ${DASH_PORT} — respawning...`);
206
+ const newPid = spawnDashboard();
207
+ _lastDashboardRespawnAt = now;
208
+ console.log(`[supervisor] Dashboard respawned (new PID: ${newPid})`);
209
+ }
210
+
211
+ function tick() {
212
+ try {
213
+ if (isStopIntentSet()) return;
214
+ const now = Date.now();
215
+ checkEngine(now);
216
+ checkDashboard(now);
217
+ } catch (e) {
218
+ console.error(`[supervisor] tick error: ${e && e.message}`);
219
+ }
220
+ }
221
+
222
+ function writePidFile() {
223
+ try { fs.writeFileSync(SUPERVISOR_PID_PATH_FN(), String(process.pid)); } catch { /* best effort */ }
224
+ }
225
+
226
+ function clearPidFileIfMine() {
227
+ try {
228
+ const existing = fs.readFileSync(SUPERVISOR_PID_PATH_FN(), 'utf8').trim();
229
+ if (existing === String(process.pid)) fs.unlinkSync(SUPERVISOR_PID_PATH_FN());
230
+ } catch { /* file already gone or read failed — fine */ }
231
+ }
232
+
233
+ function main() {
234
+ // Single-instance guard: if another supervisor is already alive (PID file
235
+ // points to a live process), exit cleanly. Avoids supervisor-spawning-
236
+ // supervisor loops during rapid `minions restart`.
237
+ const existing = (() => {
238
+ try { return Number(fs.readFileSync(SUPERVISOR_PID_PATH_FN(), 'utf8').trim()); } catch { return null; }
239
+ })();
240
+ if (existing && existing !== process.pid && isPidAlive(existing)) {
241
+ console.log(`[supervisor] Another supervisor already running (PID ${existing}); exiting.`);
242
+ process.exit(0);
243
+ }
244
+
245
+ writePidFile();
246
+ // Seed both respawn timestamps so the first tick is inside the post-spawn
247
+ // grace window — supervisor was spawned moments after engine + dashboard
248
+ // and we don't want to race them. Without this seed, _lastEngineRespawnAt
249
+ // = 0 and the +2s warm-up tick can fire before the fresh engine writes
250
+ // its PID to control.json on cold Windows boots.
251
+ _lastEngineRespawnAt = Date.now();
252
+ _lastDashboardRespawnAt = Date.now();
253
+ console.log(`[supervisor] Started (PID ${process.pid}); interval=${SUPERVISOR_INTERVAL_MS}ms, dashboardPort=${DASH_PORT}`);
254
+
255
+ const interval = setInterval(tick, SUPERVISOR_INTERVAL_MS);
256
+ // Run one tick immediately so a freshly-restarted Minions covers any early
257
+ // crash in the first SUPERVISOR_INTERVAL_MS window.
258
+ setTimeout(tick, 2000);
259
+
260
+ function shutdown(reason) {
261
+ console.log(`[supervisor] Shutting down (${reason}).`);
262
+ clearInterval(interval);
263
+ clearPidFileIfMine();
264
+ // Don't process.exit — let the runtime drain remaining log writes.
265
+ }
266
+ process.on('SIGTERM', () => shutdown('SIGTERM'));
267
+ process.on('SIGINT', () => shutdown('SIGINT'));
268
+ process.on('beforeExit', () => clearPidFileIfMine());
269
+ }
270
+
271
+ if (require.main === module) {
272
+ main();
273
+ }
274
+
275
+ module.exports = {
276
+ // Exposed for unit tests — engine code shouldn't reach into these.
277
+ _internals: {
278
+ isStopIntentSet,
279
+ isPidAlive,
280
+ listeningPidsForPort,
281
+ checkEngine,
282
+ checkDashboard,
283
+ tick,
284
+ // Path getters honor MINIONS_TEST_DIR via shared.ENGINE_DIR, so test
285
+ // isolation correctly redirects writes/reads under createTestMinionsDir.
286
+ get paths() {
287
+ return {
288
+ CONTROL_PATH: CONTROL_PATH_FN(),
289
+ STOP_INTENT_PATH: STOP_INTENT_PATH_FN(),
290
+ SUPERVISOR_PID_PATH: SUPERVISOR_PID_PATH_FN(),
291
+ };
292
+ },
293
+ },
294
+ };
package/engine.js CHANGED
@@ -21,6 +21,11 @@
21
21
  * node .minions/engine.js discover Dry-run work discovery
22
22
  */
23
23
 
24
+ // Install ISO timestamp prefixes on console.{log,info,warn,error} so
25
+ // engine-stdio.log is diagnosable to the second. Must run BEFORE any other
26
+ // require that might log during module init.
27
+ require('./engine/stdio-timestamps').installIfNotInstalled();
28
+
24
29
  const fs = require('fs');
25
30
  const path = require('path');
26
31
  const crypto = require('crypto');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.2091",
3
+ "version": "0.1.2093",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"