@yemi33/minions 0.1.2064 → 0.1.2065

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/minions.js +74 -0
  2. package/package.json +1 -1
package/bin/minions.js CHANGED
@@ -75,6 +75,58 @@ function killByPort(port) {
75
75
 
76
76
  const isPortListening = (port) => getListeningPids(port).length > 0;
77
77
 
78
+ /**
79
+ * Wait until no process is listening on `port`, retrying a kill on each tick
80
+ * for any stragglers that re-appeared (e.g. orphan child the original kill
81
+ * missed, or a process that respawned itself). Returns true when the port is
82
+ * free, false on timeout.
83
+ *
84
+ * Rationale: `taskkill /F` and `SIGKILL` return immediately while the OS does
85
+ * the actual termination asynchronously, and the port doesn't transition to
86
+ * available until the kernel finalises the socket close. Without this wait,
87
+ * the new dashboard race-spawned just after `killByPort` can hit EADDRINUSE
88
+ * and exit silently — the bug pattern that surfaced as "Dashboard failed
89
+ * health check, port=7331 listening=no" while the OLD dashboard PID was
90
+ * still bound to the port.
91
+ */
92
+ function waitForPortRelease(port, timeoutMs = 10000, pollMs = 200) {
93
+ const start = Date.now();
94
+ while (Date.now() - start < timeoutMs) {
95
+ const pids = getListeningPids(port);
96
+ if (pids.length === 0) return { ok: true, waitedMs: Date.now() - start };
97
+ // Retry the kill — covers the case where the original killByPort missed a
98
+ // sibling listener or a new orphan appeared mid-wait.
99
+ killByPort(port);
100
+ const sleepUntil = Date.now() + pollMs;
101
+ while (Date.now() < sleepUntil) { /* spin */ }
102
+ }
103
+ return { ok: false, waitedMs: timeoutMs, stillBound: getListeningPids(port) };
104
+ }
105
+
106
+ /**
107
+ * Wait until `pid` no longer exists. Returns true when the PID is dead, false
108
+ * on timeout. Used to confirm the old engine actually exited before the new
109
+ * one starts writing to control.json — otherwise control.json can flap
110
+ * between "old engine alive (state=stopping)" and "new engine alive
111
+ * (state=running)" and the health check reads the wrong snapshot.
112
+ */
113
+ function waitForPidDeath(pid, timeoutMs = 5000, pollMs = 100) {
114
+ if (!pid) return { ok: true, waitedMs: 0 };
115
+ const start = Date.now();
116
+ while (Date.now() - start < timeoutMs) {
117
+ try {
118
+ // process.kill(pid, 0) throws if the PID doesn't exist; succeeds (no-op)
119
+ // if it does. Works on both Windows and POSIX.
120
+ process.kill(pid, 0);
121
+ } catch {
122
+ return { ok: true, waitedMs: Date.now() - start };
123
+ }
124
+ const sleepUntil = Date.now() + pollMs;
125
+ while (Date.now() < sleepUntil) { /* spin */ }
126
+ }
127
+ return { ok: false, waitedMs: timeoutMs };
128
+ }
129
+
78
130
  // Pre-restart beacons can outlive the browser window that produced them
79
131
  // (closed Edge, locked-screen RDP session) and falsely tell restart to skip
80
132
  // the auto-open. We wipe the file during restart so the post-restart probe
@@ -780,6 +832,28 @@ if (!cmd || cmd === 'help' || cmd === '--help' || cmd === '-h') {
780
832
  killPidOnly(oldEnginePid);
781
833
  killByPort(DASH_PORT);
782
834
  killMinionsProcesses(['engine.js', 'dashboard.js']);
835
+ // Confirm the OS finished the asynchronous termination before we spawn new
836
+ // processes. Without this, `taskkill /F` returns immediately while the
837
+ // kernel is still releasing the dashboard's port; the new dashboard spawned
838
+ // ~10ms later hits EADDRINUSE and exits, producing the "Restart verification
839
+ // failed, port=7331 listening=no" symptom against an orphan PID that's
840
+ // STILL bound to the port a heartbeat later.
841
+ if (oldEnginePid) {
842
+ const engineDead = waitForPidDeath(oldEnginePid, 5000);
843
+ if (!engineDead.ok) {
844
+ console.error(`\n ERROR: Old engine (PID ${oldEnginePid}) did not exit within 5s after kill.`);
845
+ console.error(` The new engine cannot safely take over control.json. Aborting restart.`);
846
+ process.exit(1);
847
+ }
848
+ }
849
+ const portFree = waitForPortRelease(DASH_PORT, 10000);
850
+ if (!portFree.ok) {
851
+ console.error(`\n ERROR: Port ${DASH_PORT} still in use after 10s — killing failed.`);
852
+ console.error(` Bound by PID(s): ${portFree.stillBound.join(', ')}`);
853
+ console.error(` Manually free the port: taskkill /F /PID ${portFree.stillBound.join(' /PID ')}`);
854
+ console.error(` Then retry: minions restart`);
855
+ process.exit(1);
856
+ }
783
857
  // Clear stale beacons AFTER the kill so the old dashboard's last writes
784
858
  // can't repopulate the file in the gap between clear and shutdown.
785
859
  _clearDashboardBrowserState(MINIONS_HOME);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.2064",
3
+ "version": "0.1.2065",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"