@yemi33/minions 0.1.2006 → 0.1.2008

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,32 @@ function _completedNext() { _completedPage++; refresh(); } // clamped in renderD
10
10
  function _logPrev() { if (_logPage > 0) { _logPage--; refresh(); } }
11
11
  function _logNext() { _logPage++; refresh(); } // clamped in renderEngineLog
12
12
 
13
+ // Engine restart grace state (W-mpfw3hgm001gc594). After the operator clicks
14
+ // "Restart engine" we suppress the STALE indicators for ENGINE_RESTART_GRACE_MS
15
+ // while the new engine spawns and writes its first heartbeat. The state is
16
+ // module-scoped (not on window) so any re-render during the grace window can
17
+ // reconstruct the success indicator — earlier code mutated textContent on the
18
+ // click target, which got blown away on the next /api/status render.
19
+ const _ENGINE_RESTART_GRACE_MS = 30000;
20
+ const _ENGINE_RESTART_MAX_RETRIES = 3;
21
+ let _engineRestartState = null; // { restartedAt: number, pid: number|string, retryCount: number }
22
+
23
+ function _isEngineRestartGrace(now) {
24
+ const t = (typeof now === 'number') ? now : Date.now();
25
+ return !!(_engineRestartState && (t - _engineRestartState.restartedAt < _ENGINE_RESTART_GRACE_MS));
26
+ }
27
+
28
+ function _shouldSuppressStaleSignals(state, now) {
29
+ // All three stale signals (badge, alert, version warning) consult this helper
30
+ // so they share a single source of truth for the post-restart grace window.
31
+ if (state === 'running') return false; // engine recovered — show real state
32
+ return _isEngineRestartGrace(now);
33
+ }
34
+
35
+ function _resetEngineRestartStateForTest() {
36
+ _engineRestartState = null;
37
+ }
38
+
13
39
  function renderEngineStatus(engine) {
14
40
  const badge = document.getElementById('engine-badge');
15
41
  let state = engine?.state || 'stopped';
@@ -21,40 +47,48 @@ function renderEngineStatus(engine) {
21
47
  if (staleMs > 120000) state = 'stale';
22
48
  }
23
49
 
24
- badge.className = 'engine-badge ' + (state === 'stale' ? 'stopped' : state);
25
- badge.textContent = state === 'stale' ? 'STALE' : state.toUpperCase();
26
- badge.title = state === 'stale'
50
+ // Clear restart grace as soon as the engine reports a fresh heartbeat — the
51
+ // new engine has caught up, so STALE/restart banners should vanish.
52
+ if (state === 'running' && _engineRestartState) {
53
+ _engineRestartState = null;
54
+ }
55
+
56
+ const suppressStale = _shouldSuppressStaleSignals(state);
57
+ let displayState = state;
58
+ if (state === 'stale' && suppressStale) displayState = 'restarting';
59
+
60
+ badge.className = 'engine-badge ' + (displayState === 'stale' ? 'stopped' : (displayState === 'restarting' ? 'paused' : displayState));
61
+ badge.textContent = displayState === 'stale' ? 'STALE' : displayState.toUpperCase();
62
+ badge.title = displayState === 'stale'
27
63
  ? 'Engine claims running but heartbeat is stale (>2min). It may have crashed. Run: node engine.js start'
28
- : state === 'stopped' ? 'Engine is stopped. Run: node engine.js start' : '';
64
+ : displayState === 'restarting'
65
+ ? 'Engine restart in progress (PID ' + (_engineRestartState?.pid || '?') + ') — waiting for first heartbeat.'
66
+ : displayState === 'stopped' ? 'Engine is stopped. Run: node engine.js start' : '';
29
67
  renderEngineAlert(state, staleMs);
30
68
  }
31
69
 
32
- function renderEngineAlert(state, staleMs) {
33
- const el = document.getElementById('engine-alert');
34
- if (!el) return;
35
- if (state !== 'stale' || (window._engineRestartedAt && Date.now() - window._engineRestartedAt < 30000)) {
36
- el.style.display = 'none';
37
- el.innerHTML = '';
38
- return;
39
- }
40
- const mins = Math.max(1, Math.round(staleMs / 60000));
41
- el.innerHTML =
42
- '<span class="engine-alert-msg">&#x26A0;&#xFE0F; Engine heartbeat is stale (' + mins + 'm old). Dispatch may be stuck.</span>' +
43
- '<span class="engine-alert-action" id="engine-alert-restart">Restart engine</span>';
44
- document.getElementById('engine-alert-restart').onclick = async function() {
70
+ function _wireEngineRestartClick(button) {
71
+ button.onclick = async function() {
45
72
  this.classList.add('clicked');
46
73
  this.textContent = 'Restarting...';
74
+ const priorRetries = _engineRestartState?.retryCount || 0;
47
75
  try {
48
76
  const res = await fetch('/api/engine/restart', { method: 'POST' });
49
77
  const data = await res.json();
50
78
  if (data.ok) {
51
- this.textContent = '\u2713 Restarted (PID ' + data.pid + ')';
52
- this.style.color = 'var(--green)';
53
- this.style.borderColor = 'var(--green)';
79
+ // Persist restart state so re-renders during the grace window can
80
+ // reconstruct the success message even after this DOM node is gone.
81
+ _engineRestartState = {
82
+ restartedAt: Date.now(),
83
+ pid: data.pid,
84
+ retryCount: priorRetries + 1,
85
+ };
54
86
  showToast('cmd-toast', 'Engine restarted — PID ' + data.pid, true);
55
- // Suppress stale banner for 30s while new engine writes its first heartbeat
56
- window._engineRestartedAt = Date.now();
57
- setTimeout(() => refresh(), 3000);
87
+ // Trigger a refresh sooner than the 4s poll cadence so the success
88
+ // indicator surfaces quickly. The grace window itself (30s) plus
89
+ // ongoing 4s polling will catch the heartbeat advance whenever it
90
+ // lands; no fixed timeout assumption needed.
91
+ setTimeout(() => refresh(), 1500);
58
92
  } else {
59
93
  this.textContent = 'Failed: ' + (data.error || 'unknown');
60
94
  this.classList.remove('clicked');
@@ -64,9 +98,62 @@ function renderEngineAlert(state, staleMs) {
64
98
  this.classList.remove('clicked');
65
99
  }
66
100
  };
101
+ }
102
+
103
+ function _renderEngineRestartSuccessBanner(el) {
104
+ const pid = _engineRestartState?.pid || '?';
105
+ el.innerHTML =
106
+ '<span class="engine-alert-msg" style="color:var(--green)">&#x2713; Engine restarted (PID ' + pid + ') — waiting for first heartbeat...</span>';
67
107
  el.style.display = 'flex';
68
108
  }
69
109
 
110
+ function _renderEngineRestartRetryBanner(el) {
111
+ const attempts = _engineRestartState?.retryCount || 0;
112
+ el.innerHTML =
113
+ '<span class="engine-alert-msg">&#x26A0;&#xFE0F; Engine restart didn\'t take — heartbeat still stale (attempt ' + attempts + ' of ' + _ENGINE_RESTART_MAX_RETRIES + ').</span>' +
114
+ '<span class="engine-alert-action" id="engine-alert-restart">Retry restart</span>';
115
+ _wireEngineRestartClick(document.getElementById('engine-alert-restart'));
116
+ el.style.display = 'flex';
117
+ }
118
+
119
+ function _renderEngineStaleBanner(el, staleMs) {
120
+ const mins = Math.max(1, Math.round(staleMs / 60000));
121
+ el.innerHTML =
122
+ '<span class="engine-alert-msg">&#x26A0;&#xFE0F; Engine heartbeat is stale (' + mins + 'm old). Dispatch may be stuck.</span>' +
123
+ '<span class="engine-alert-action" id="engine-alert-restart">Restart engine</span>';
124
+ _wireEngineRestartClick(document.getElementById('engine-alert-restart'));
125
+ el.style.display = 'flex';
126
+ }
127
+
128
+ function renderEngineAlert(state, staleMs) {
129
+ const el = document.getElementById('engine-alert');
130
+ if (!el) return;
131
+
132
+ // Engine recovered — clear everything (restart state already nulled in
133
+ // renderEngineStatus when state flipped to 'running').
134
+ if (state !== 'stale') {
135
+ el.style.display = 'none';
136
+ el.innerHTML = '';
137
+ return;
138
+ }
139
+
140
+ // Stale + within grace window → persistent success indicator.
141
+ if (_isEngineRestartGrace()) {
142
+ _renderEngineRestartSuccessBanner(el);
143
+ return;
144
+ }
145
+
146
+ // Stale + past grace + we previously restarted but didn't recover.
147
+ // Offer a retry banner instead of snapping back to the original alert,
148
+ // up to MAX_RETRIES. After that, fall back to the original alert.
149
+ if (_engineRestartState && _engineRestartState.retryCount < _ENGINE_RESTART_MAX_RETRIES) {
150
+ _renderEngineRestartRetryBanner(el);
151
+ return;
152
+ }
153
+
154
+ _renderEngineStaleBanner(el, staleMs);
155
+ }
156
+
70
157
  function renderAdoThrottleAlert(adoThrottle) {
71
158
  const el = document.getElementById('ado-throttle-alert');
72
159
  if (!el) return;
@@ -272,11 +359,18 @@ function renderVersionBanner(version) {
272
359
  const commitLabel = version.dashboardRunningCommit ? ' (' + version.dashboardRunningCommit + ')' : '';
273
360
  const warnStyle = 'font-size:9px;padding:2px 8px;background:rgba(210,153,34,0.15);border:1px solid rgba(210,153,34,0.3);border-radius:4px;color:var(--yellow);cursor:help';
274
361
 
275
- if (version.engineStale && version.dashboardStale) {
362
+ // During the post-restart grace window the old engine's reported codeVersion
363
+ // can still be in the cached payload — silently swallow the engineStale flag
364
+ // so the user doesn't see "Engine running v… disk has v…" right after a
365
+ // successful restart. The new engine clears the flag once it writes its
366
+ // first heartbeat. dashboardStale is unrelated and must still surface.
367
+ const engineStale = version.engineStale && !_isEngineRestartGrace();
368
+
369
+ if (engineStale && version.dashboardStale) {
276
370
  el.style.cssText = warnStyle;
277
371
  el.textContent = '\u26A0 Engine + Dashboard running old code. Run: minions restart';
278
372
  el.title = 'Both processes are running v' + (version.running || '?') + ' but disk has v' + (version.disk || '?');
279
- } else if (version.engineStale) {
373
+ } else if (engineStale) {
280
374
  el.style.cssText = warnStyle;
281
375
  el.textContent = '\u26A0 Engine running v' + (version.running || '?') + ' — disk has v' + (version.disk || '?') + '. Restart engine.';
282
376
  el.title = 'The engine process is running older code. Run: minions restart';
@@ -295,4 +389,4 @@ function renderVersionBanner(version) {
295
389
  }
296
390
  }
297
391
 
298
- window.MinionsDispatch = { renderEngineStatus, renderEngineAlert, renderAdoThrottleAlert, renderGhThrottleAlert, renderVersionBanner, renderDispatch, renderEngineLog, shortTime, showErrorDetails };
392
+ window.MinionsDispatch = { renderEngineStatus, renderEngineAlert, renderAdoThrottleAlert, renderGhThrottleAlert, renderVersionBanner, renderDispatch, renderEngineLog, shortTime, showErrorDetails, _isEngineRestartGrace, _shouldSuppressStaleSignals, _resetEngineRestartStateForTest };
package/dashboard.js CHANGED
@@ -1525,22 +1525,24 @@ function _ifNoneMatchHasEtag(headerValue, currentEtag) {
1525
1525
  return false;
1526
1526
  }
1527
1527
 
1528
- // mtime-based cache invalidation — skip full rebuild if no tracked files changed
1529
- const _mtimeTrackedFiles = () => {
1530
- const files = [
1531
- path.join(ENGINE_DIR, 'dispatch.json'),
1532
- path.join(ENGINE_DIR, 'control.json'),
1533
- path.join(ENGINE_DIR, 'log.json'),
1534
- path.join(ENGINE_DIR, 'metrics.json'),
1535
- ];
1536
- // Add per-project work-items.json
1537
- for (const p of PROJECTS) {
1538
- files.push(shared.projectWorkItemsPath(p));
1539
- }
1540
- // Central work-items.json
1541
- files.push(path.join(MINIONS_DIR, 'work-items.json'));
1542
- return files;
1543
- };
1528
+ // mtime-based cache invalidation (W-mpftp7na000td0f4).
1529
+ //
1530
+ // Engine and dashboard are independent processes; `invalidateStatusCache()`
1531
+ // lives in dashboard.js memory and is unreachable from engine code. The
1532
+ // fast-state TTL is 10 s, but we want sub-second visibility for engine-side
1533
+ // state flips (work-item pending→done, PR status changes, dispatch.json
1534
+ // mutations). The fix: every `getStatus()` call statSyncs a small set of
1535
+ // tracked files; if any mtime advanced since the last rebuild, fast-state
1536
+ // is rebuilt and `_statusCacheVersion` bumps (which busts the ETag, so the
1537
+ // next /api/status poll sees a 200 + fresh body instead of a 304).
1538
+ //
1539
+ // The tracked list lives in `engine/queries.js → getStatusFastStateMtimePaths()`
1540
+ // as a single source of truth — colocated with the read-side aggregation
1541
+ // layer that owns the rest of the fast-state assembly. Add a new tracked
1542
+ // file there (one line), NOT here. The dashboard side stays a thin
1543
+ // delegate so any module that contributes to `_buildStatusFastState()` can
1544
+ // register its mtime inputs in one place.
1545
+ const _mtimeTrackedFiles = () => queries.getStatusFastStateMtimePaths(CONFIG);
1544
1546
  let _lastMtimes = {}; // { filePath: mtimeMs }
1545
1547
 
1546
1548
  function _getMtimes() {
package/engine/cli.js CHANGED
@@ -448,6 +448,15 @@ const commands = {
448
448
  codeVersion,
449
449
  codeCommit
450
450
  }));
451
+ // W-mpg3bcp800075d4f — Prime control.heartbeat the moment this process
452
+ // owns control.json, BEFORE the dispatch-recovery / startupReconcile*
453
+ // chain. Those reconciles are synchronous and on a slow Windows host with
454
+ // many projects/worktrees can run for 15-40s; without an early prime the
455
+ // dashboard's 30s restart-grace window (dashboard/js/render-dispatch.js
456
+ // → renderEngineAlert) expires against the *previous* engine's heartbeat
457
+ // and the 'Engine heartbeat is stale' banner snaps back even though the
458
+ // new engine is healthy. The 15s setInterval below keeps it fresh.
459
+ writeHeartbeatNow();
451
460
  // Keep .minions-version in sync so `minions version` stays accurate after git pulls
452
461
  if (codeVersion) {
453
462
  try { fs.writeFileSync(path.join(shared.MINIONS_DIR, '.minions-version'), codeVersion); } catch {}
@@ -924,8 +933,11 @@ const commands = {
924
933
  // surfaced a healthy engine as crashed. We now write every 15s on a
925
934
  // dedicated interval — 8× headroom vs the 120s threshold even under
926
935
  // event-loop pressure, and orders of magnitude under TICK_TIMEOUT_MS so
927
- // a hung tick still looks distinct from a wedged event loop.
928
- writeHeartbeatNow(); // prime control.heartbeat immediately
936
+ // a hung tick still looks distinct from a wedged event loop. The first
937
+ // (priming) heartbeat write lives near the top of this handler — see the
938
+ // W-mpg3bcp800075d4f comment above mutateControl — so that the dashboard's
939
+ // 30s restart grace window sees a fresh heartbeat regardless of how long
940
+ // the boot reconcile chain takes.
929
941
  const heartbeatTimer = setInterval(writeHeartbeatNow, HEARTBEAT_INTERVAL_MS);
930
942
 
931
943
  // Fast poll: check steering every 1s (lightweight — just fs.stat per agent)
package/engine/queries.js CHANGED
@@ -1749,6 +1749,69 @@ function resetProjectGitStatusCache() {
1749
1749
  _projectGitStatusCache.clear();
1750
1750
  }
1751
1751
 
1752
+ /**
1753
+ * Files whose mtime must trigger a dashboard `_fastState` rebuild
1754
+ * (W-mpftp7na000td0f4). Single source of truth for the dashboard's
1755
+ * cache-invalidation tracker — adding a new fast-state-surfaced JSON file
1756
+ * is a **one-line change here**, next to the related fast-state getters.
1757
+ *
1758
+ * Contract for additions:
1759
+ * 1. The file must be read (directly or transitively) by something in
1760
+ * `dashboard._buildStatusFastState` (e.g. `getPullRequests`,
1761
+ * `getDispatchQueue`, `watchesMod.getWatches`). Files surfaced only
1762
+ * in `_buildStatusSlowState` (60 s TTL) should NOT be added — fast-
1763
+ * state mtime invalidation has no effect on slow state, and they
1764
+ * will silently no-op.
1765
+ * 2. The file must be mutated through a `mutate*` helper (so writes
1766
+ * actually advance mtime via `safeWrite`'s rename). Append-only logs
1767
+ * can also be added, but expect rebuilds on every line write.
1768
+ * 3. Per-project files must use `shared.projectPrPath` / `shared.
1769
+ * projectWorkItemsPath` so newly-added projects are picked up
1770
+ * automatically.
1771
+ *
1772
+ * Files intentionally NOT tracked:
1773
+ * - `engine/state.json` — surfaced via `getEngineState()` but changes
1774
+ * only on engine startup / reconcile. Negligible benefit.
1775
+ * - `engine/cooldowns.json`, `engine/pr-links.json`, `engine/pending-
1776
+ * rebases.json`, `agents/<id>/managed-spawn.json` — not in the
1777
+ * `/api/status` payload.
1778
+ * - `pinned.md`, `schedules`, `pipeline-runs.json`, `schedule-runs.json`,
1779
+ * PRD JSON — slow-state only.
1780
+ * - `meetings/` directory — dir mtime semantics are flaky on Windows
1781
+ * for file-content changes inside the dir; meeting transitions also
1782
+ * mutate work items, which already invalidate fast state.
1783
+ *
1784
+ * Performance: `_getMtimes()` in dashboard.js does `fs.statSync` per path
1785
+ * per `getStatus()` call. Roughly N=4 engine paths + 2 per project today,
1786
+ * so a 5-project fleet runs 14 statSync calls per cache miss — bounded
1787
+ * and unmeasurable in benchmarks.
1788
+ */
1789
+ function getStatusFastStateMtimePaths(config) {
1790
+ const projects = getProjects(config || getConfig());
1791
+ const files = [
1792
+ // Engine-level state surfaced by getDispatchQueue / inline engine block /
1793
+ // getEngineLog / getMetrics.
1794
+ DISPATCH_PATH,
1795
+ CONTROL_PATH,
1796
+ LOG_PATH,
1797
+ path.join(ENGINE_DIR, 'metrics.json'),
1798
+ // Watches surfaced by watchesMod.getWatches() (W-mpftp7na000td0f4 fix).
1799
+ path.join(ENGINE_DIR, 'watches.json'),
1800
+ // Central work-items.json surfaced by getWorkItems().
1801
+ path.join(MINIONS_DIR, 'work-items.json'),
1802
+ ];
1803
+ // Per-project work-items (surfaced by getWorkItems) and pull-requests
1804
+ // (surfaced by getPullRequests). The PR file was the biggest miss in the
1805
+ // original tracked list — PR status flips (running → passing, waiting →
1806
+ // approved) were waiting on the 10 s SSE backstop instead of the next
1807
+ // 4 s SPA poll.
1808
+ for (const p of projects) {
1809
+ files.push(shared.projectWorkItemsPath(p));
1810
+ files.push(shared.projectPrPath(p));
1811
+ }
1812
+ return files;
1813
+ }
1814
+
1752
1815
  // ── Exports ─────────────────────────────────────────────────────────────────
1753
1816
 
1754
1817
  module.exports = {
@@ -1766,6 +1829,8 @@ module.exports = {
1766
1829
  getProjectGitStatus,
1767
1830
  warmProjectGitStatus,
1768
1831
  _awaitPendingProjectGitStatusProbes,
1832
+ // W-mpftp7na000td0f4 — engine→dashboard cache-invalidation registry
1833
+ getStatusFastStateMtimePaths,
1769
1834
 
1770
1835
  // Core state
1771
1836
  getConfig, getControl, getDispatch, getDispatchQueue, getDispatchCompletionReport, invalidateDispatchCache,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.2006",
3
+ "version": "0.1.2008",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"