@yemi33/minions 0.1.2006 → 0.1.2007
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/render-dispatch.js +120 -26
- package/dashboard.js +18 -16
- package/engine/queries.js +65 -0
- package/package.json +1 -1
|
@@ -10,6 +10,32 @@ function _completedNext() { _completedPage++; refresh(); } // clamped in renderD
|
|
|
10
10
|
function _logPrev() { if (_logPage > 0) { _logPage--; refresh(); } }
|
|
11
11
|
function _logNext() { _logPage++; refresh(); } // clamped in renderEngineLog
|
|
12
12
|
|
|
13
|
+
// Engine restart grace state (W-mpfw3hgm001gc594). After the operator clicks
|
|
14
|
+
// "Restart engine" we suppress the STALE indicators for ENGINE_RESTART_GRACE_MS
|
|
15
|
+
// while the new engine spawns and writes its first heartbeat. The state is
|
|
16
|
+
// module-scoped (not on window) so any re-render during the grace window can
|
|
17
|
+
// reconstruct the success indicator — earlier code mutated textContent on the
|
|
18
|
+
// click target, which got blown away on the next /api/status render.
|
|
19
|
+
const _ENGINE_RESTART_GRACE_MS = 30000;
|
|
20
|
+
const _ENGINE_RESTART_MAX_RETRIES = 3;
|
|
21
|
+
let _engineRestartState = null; // { restartedAt: number, pid: number|string, retryCount: number }
|
|
22
|
+
|
|
23
|
+
function _isEngineRestartGrace(now) {
|
|
24
|
+
const t = (typeof now === 'number') ? now : Date.now();
|
|
25
|
+
return !!(_engineRestartState && (t - _engineRestartState.restartedAt < _ENGINE_RESTART_GRACE_MS));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function _shouldSuppressStaleSignals(state, now) {
|
|
29
|
+
// All three stale signals (badge, alert, version warning) consult this helper
|
|
30
|
+
// so they share a single source of truth for the post-restart grace window.
|
|
31
|
+
if (state === 'running') return false; // engine recovered — show real state
|
|
32
|
+
return _isEngineRestartGrace(now);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function _resetEngineRestartStateForTest() {
|
|
36
|
+
_engineRestartState = null;
|
|
37
|
+
}
|
|
38
|
+
|
|
13
39
|
function renderEngineStatus(engine) {
|
|
14
40
|
const badge = document.getElementById('engine-badge');
|
|
15
41
|
let state = engine?.state || 'stopped';
|
|
@@ -21,40 +47,48 @@ function renderEngineStatus(engine) {
|
|
|
21
47
|
if (staleMs > 120000) state = 'stale';
|
|
22
48
|
}
|
|
23
49
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
50
|
+
// Clear restart grace as soon as the engine reports a fresh heartbeat — the
|
|
51
|
+
// new engine has caught up, so STALE/restart banners should vanish.
|
|
52
|
+
if (state === 'running' && _engineRestartState) {
|
|
53
|
+
_engineRestartState = null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const suppressStale = _shouldSuppressStaleSignals(state);
|
|
57
|
+
let displayState = state;
|
|
58
|
+
if (state === 'stale' && suppressStale) displayState = 'restarting';
|
|
59
|
+
|
|
60
|
+
badge.className = 'engine-badge ' + (displayState === 'stale' ? 'stopped' : (displayState === 'restarting' ? 'paused' : displayState));
|
|
61
|
+
badge.textContent = displayState === 'stale' ? 'STALE' : displayState.toUpperCase();
|
|
62
|
+
badge.title = displayState === 'stale'
|
|
27
63
|
? 'Engine claims running but heartbeat is stale (>2min). It may have crashed. Run: node engine.js start'
|
|
28
|
-
:
|
|
64
|
+
: displayState === 'restarting'
|
|
65
|
+
? 'Engine restart in progress (PID ' + (_engineRestartState?.pid || '?') + ') — waiting for first heartbeat.'
|
|
66
|
+
: displayState === 'stopped' ? 'Engine is stopped. Run: node engine.js start' : '';
|
|
29
67
|
renderEngineAlert(state, staleMs);
|
|
30
68
|
}
|
|
31
69
|
|
|
32
|
-
function
|
|
33
|
-
|
|
34
|
-
if (!el) return;
|
|
35
|
-
if (state !== 'stale' || (window._engineRestartedAt && Date.now() - window._engineRestartedAt < 30000)) {
|
|
36
|
-
el.style.display = 'none';
|
|
37
|
-
el.innerHTML = '';
|
|
38
|
-
return;
|
|
39
|
-
}
|
|
40
|
-
const mins = Math.max(1, Math.round(staleMs / 60000));
|
|
41
|
-
el.innerHTML =
|
|
42
|
-
'<span class="engine-alert-msg">⚠️ Engine heartbeat is stale (' + mins + 'm old). Dispatch may be stuck.</span>' +
|
|
43
|
-
'<span class="engine-alert-action" id="engine-alert-restart">Restart engine</span>';
|
|
44
|
-
document.getElementById('engine-alert-restart').onclick = async function() {
|
|
70
|
+
function _wireEngineRestartClick(button) {
|
|
71
|
+
button.onclick = async function() {
|
|
45
72
|
this.classList.add('clicked');
|
|
46
73
|
this.textContent = 'Restarting...';
|
|
74
|
+
const priorRetries = _engineRestartState?.retryCount || 0;
|
|
47
75
|
try {
|
|
48
76
|
const res = await fetch('/api/engine/restart', { method: 'POST' });
|
|
49
77
|
const data = await res.json();
|
|
50
78
|
if (data.ok) {
|
|
51
|
-
|
|
52
|
-
this
|
|
53
|
-
|
|
79
|
+
// Persist restart state so re-renders during the grace window can
|
|
80
|
+
// reconstruct the success message even after this DOM node is gone.
|
|
81
|
+
_engineRestartState = {
|
|
82
|
+
restartedAt: Date.now(),
|
|
83
|
+
pid: data.pid,
|
|
84
|
+
retryCount: priorRetries + 1,
|
|
85
|
+
};
|
|
54
86
|
showToast('cmd-toast', 'Engine restarted — PID ' + data.pid, true);
|
|
55
|
-
//
|
|
56
|
-
|
|
57
|
-
|
|
87
|
+
// Trigger a refresh sooner than the 4s poll cadence so the success
|
|
88
|
+
// indicator surfaces quickly. The grace window itself (30s) plus
|
|
89
|
+
// ongoing 4s polling will catch the heartbeat advance whenever it
|
|
90
|
+
// lands; no fixed timeout assumption needed.
|
|
91
|
+
setTimeout(() => refresh(), 1500);
|
|
58
92
|
} else {
|
|
59
93
|
this.textContent = 'Failed: ' + (data.error || 'unknown');
|
|
60
94
|
this.classList.remove('clicked');
|
|
@@ -64,9 +98,62 @@ function renderEngineAlert(state, staleMs) {
|
|
|
64
98
|
this.classList.remove('clicked');
|
|
65
99
|
}
|
|
66
100
|
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function _renderEngineRestartSuccessBanner(el) {
|
|
104
|
+
const pid = _engineRestartState?.pid || '?';
|
|
105
|
+
el.innerHTML =
|
|
106
|
+
'<span class="engine-alert-msg" style="color:var(--green)">✓ Engine restarted (PID ' + pid + ') — waiting for first heartbeat...</span>';
|
|
67
107
|
el.style.display = 'flex';
|
|
68
108
|
}
|
|
69
109
|
|
|
110
|
+
function _renderEngineRestartRetryBanner(el) {
|
|
111
|
+
const attempts = _engineRestartState?.retryCount || 0;
|
|
112
|
+
el.innerHTML =
|
|
113
|
+
'<span class="engine-alert-msg">⚠️ Engine restart didn\'t take — heartbeat still stale (attempt ' + attempts + ' of ' + _ENGINE_RESTART_MAX_RETRIES + ').</span>' +
|
|
114
|
+
'<span class="engine-alert-action" id="engine-alert-restart">Retry restart</span>';
|
|
115
|
+
_wireEngineRestartClick(document.getElementById('engine-alert-restart'));
|
|
116
|
+
el.style.display = 'flex';
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function _renderEngineStaleBanner(el, staleMs) {
|
|
120
|
+
const mins = Math.max(1, Math.round(staleMs / 60000));
|
|
121
|
+
el.innerHTML =
|
|
122
|
+
'<span class="engine-alert-msg">⚠️ Engine heartbeat is stale (' + mins + 'm old). Dispatch may be stuck.</span>' +
|
|
123
|
+
'<span class="engine-alert-action" id="engine-alert-restart">Restart engine</span>';
|
|
124
|
+
_wireEngineRestartClick(document.getElementById('engine-alert-restart'));
|
|
125
|
+
el.style.display = 'flex';
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function renderEngineAlert(state, staleMs) {
|
|
129
|
+
const el = document.getElementById('engine-alert');
|
|
130
|
+
if (!el) return;
|
|
131
|
+
|
|
132
|
+
// Engine recovered — clear everything (restart state already nulled in
|
|
133
|
+
// renderEngineStatus when state flipped to 'running').
|
|
134
|
+
if (state !== 'stale') {
|
|
135
|
+
el.style.display = 'none';
|
|
136
|
+
el.innerHTML = '';
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Stale + within grace window → persistent success indicator.
|
|
141
|
+
if (_isEngineRestartGrace()) {
|
|
142
|
+
_renderEngineRestartSuccessBanner(el);
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Stale + past grace + we previously restarted but didn't recover.
|
|
147
|
+
// Offer a retry banner instead of snapping back to the original alert,
|
|
148
|
+
// up to MAX_RETRIES. After that, fall back to the original alert.
|
|
149
|
+
if (_engineRestartState && _engineRestartState.retryCount < _ENGINE_RESTART_MAX_RETRIES) {
|
|
150
|
+
_renderEngineRestartRetryBanner(el);
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
_renderEngineStaleBanner(el, staleMs);
|
|
155
|
+
}
|
|
156
|
+
|
|
70
157
|
function renderAdoThrottleAlert(adoThrottle) {
|
|
71
158
|
const el = document.getElementById('ado-throttle-alert');
|
|
72
159
|
if (!el) return;
|
|
@@ -272,11 +359,18 @@ function renderVersionBanner(version) {
|
|
|
272
359
|
const commitLabel = version.dashboardRunningCommit ? ' (' + version.dashboardRunningCommit + ')' : '';
|
|
273
360
|
const warnStyle = 'font-size:9px;padding:2px 8px;background:rgba(210,153,34,0.15);border:1px solid rgba(210,153,34,0.3);border-radius:4px;color:var(--yellow);cursor:help';
|
|
274
361
|
|
|
275
|
-
|
|
362
|
+
// During the post-restart grace window the old engine's reported codeVersion
|
|
363
|
+
// can still be in the cached payload — silently swallow the engineStale flag
|
|
364
|
+
// so the user doesn't see "Engine running v… disk has v…" right after a
|
|
365
|
+
// successful restart. The new engine clears the flag once it writes its
|
|
366
|
+
// first heartbeat. dashboardStale is unrelated and must still surface.
|
|
367
|
+
const engineStale = version.engineStale && !_isEngineRestartGrace();
|
|
368
|
+
|
|
369
|
+
if (engineStale && version.dashboardStale) {
|
|
276
370
|
el.style.cssText = warnStyle;
|
|
277
371
|
el.textContent = '\u26A0 Engine + Dashboard running old code. Run: minions restart';
|
|
278
372
|
el.title = 'Both processes are running v' + (version.running || '?') + ' but disk has v' + (version.disk || '?');
|
|
279
|
-
} else if (
|
|
373
|
+
} else if (engineStale) {
|
|
280
374
|
el.style.cssText = warnStyle;
|
|
281
375
|
el.textContent = '\u26A0 Engine running v' + (version.running || '?') + ' — disk has v' + (version.disk || '?') + '. Restart engine.';
|
|
282
376
|
el.title = 'The engine process is running older code. Run: minions restart';
|
|
@@ -295,4 +389,4 @@ function renderVersionBanner(version) {
|
|
|
295
389
|
}
|
|
296
390
|
}
|
|
297
391
|
|
|
298
|
-
window.MinionsDispatch = { renderEngineStatus, renderEngineAlert, renderAdoThrottleAlert, renderGhThrottleAlert, renderVersionBanner, renderDispatch, renderEngineLog, shortTime, showErrorDetails };
|
|
392
|
+
window.MinionsDispatch = { renderEngineStatus, renderEngineAlert, renderAdoThrottleAlert, renderGhThrottleAlert, renderVersionBanner, renderDispatch, renderEngineLog, shortTime, showErrorDetails, _isEngineRestartGrace, _shouldSuppressStaleSignals, _resetEngineRestartStateForTest };
|
package/dashboard.js
CHANGED
|
@@ -1525,22 +1525,24 @@ function _ifNoneMatchHasEtag(headerValue, currentEtag) {
|
|
|
1525
1525
|
return false;
|
|
1526
1526
|
}
|
|
1527
1527
|
|
|
1528
|
-
// mtime-based cache invalidation
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1528
|
+
// mtime-based cache invalidation (W-mpftp7na000td0f4).
|
|
1529
|
+
//
|
|
1530
|
+
// Engine and dashboard are independent processes; `invalidateStatusCache()`
|
|
1531
|
+
// lives in dashboard.js memory and is unreachable from engine code. The
|
|
1532
|
+
// fast-state TTL is 10 s, but we want sub-second visibility for engine-side
|
|
1533
|
+
// state flips (work-item pending→done, PR status changes, dispatch.json
|
|
1534
|
+
// mutations). The fix: every `getStatus()` call statSyncs a small set of
|
|
1535
|
+
// tracked files; if any mtime advanced since the last rebuild, fast-state
|
|
1536
|
+
// is rebuilt and `_statusCacheVersion` bumps (which busts the ETag, so the
|
|
1537
|
+
// next /api/status poll sees a 200 + fresh body instead of a 304).
|
|
1538
|
+
//
|
|
1539
|
+
// The tracked list lives in `engine/queries.js → getStatusFastStateMtimePaths()`
|
|
1540
|
+
// as a single source of truth — colocated with the read-side aggregation
|
|
1541
|
+
// layer that owns the rest of the fast-state assembly. Add a new tracked
|
|
1542
|
+
// file there (one line), NOT here. The dashboard side stays a thin
|
|
1543
|
+
// delegate so any module that contributes to `_buildStatusFastState()` can
|
|
1544
|
+
// register its mtime inputs in one place.
|
|
1545
|
+
const _mtimeTrackedFiles = () => queries.getStatusFastStateMtimePaths(CONFIG);
|
|
1544
1546
|
let _lastMtimes = {}; // { filePath: mtimeMs }
|
|
1545
1547
|
|
|
1546
1548
|
function _getMtimes() {
|
package/engine/queries.js
CHANGED
|
@@ -1749,6 +1749,69 @@ function resetProjectGitStatusCache() {
|
|
|
1749
1749
|
_projectGitStatusCache.clear();
|
|
1750
1750
|
}
|
|
1751
1751
|
|
|
1752
|
+
/**
|
|
1753
|
+
* Files whose mtime must trigger a dashboard `_fastState` rebuild
|
|
1754
|
+
* (W-mpftp7na000td0f4). Single source of truth for the dashboard's
|
|
1755
|
+
* cache-invalidation tracker — adding a new fast-state-surfaced JSON file
|
|
1756
|
+
* is a **one-line change here**, next to the related fast-state getters.
|
|
1757
|
+
*
|
|
1758
|
+
* Contract for additions:
|
|
1759
|
+
* 1. The file must be read (directly or transitively) by something in
|
|
1760
|
+
* `dashboard._buildStatusFastState` (e.g. `getPullRequests`,
|
|
1761
|
+
* `getDispatchQueue`, `watchesMod.getWatches`). Files surfaced only
|
|
1762
|
+
* in `_buildStatusSlowState` (60 s TTL) should NOT be added — fast-
|
|
1763
|
+
* state mtime invalidation has no effect on slow state, and they
|
|
1764
|
+
* will silently no-op.
|
|
1765
|
+
* 2. The file must be mutated through a `mutate*` helper (so writes
|
|
1766
|
+
* actually advance mtime via `safeWrite`'s rename). Append-only logs
|
|
1767
|
+
* can also be added, but expect rebuilds on every line write.
|
|
1768
|
+
* 3. Per-project files must use `shared.projectPrPath` / `shared.
|
|
1769
|
+
* projectWorkItemsPath` so newly-added projects are picked up
|
|
1770
|
+
* automatically.
|
|
1771
|
+
*
|
|
1772
|
+
* Files intentionally NOT tracked:
|
|
1773
|
+
* - `engine/state.json` — surfaced via `getEngineState()` but changes
|
|
1774
|
+
* only on engine startup / reconcile. Negligible benefit.
|
|
1775
|
+
* - `engine/cooldowns.json`, `engine/pr-links.json`, `engine/pending-
|
|
1776
|
+
* rebases.json`, `agents/<id>/managed-spawn.json` — not in the
|
|
1777
|
+
* `/api/status` payload.
|
|
1778
|
+
* - `pinned.md`, `schedules`, `pipeline-runs.json`, `schedule-runs.json`,
|
|
1779
|
+
* PRD JSON — slow-state only.
|
|
1780
|
+
* - `meetings/` directory — dir mtime semantics are flaky on Windows
|
|
1781
|
+
* for file-content changes inside the dir; meeting transitions also
|
|
1782
|
+
* mutate work items, which already invalidate fast state.
|
|
1783
|
+
*
|
|
1784
|
+
* Performance: `_getMtimes()` in dashboard.js does `fs.statSync` per path
|
|
1785
|
+
* per `getStatus()` call. Roughly N=4 engine paths + 2 per project today,
|
|
1786
|
+
* so a 5-project fleet runs 14 statSync calls per cache miss — bounded
|
|
1787
|
+
* and unmeasurable in benchmarks.
|
|
1788
|
+
*/
|
|
1789
|
+
function getStatusFastStateMtimePaths(config) {
|
|
1790
|
+
const projects = getProjects(config || getConfig());
|
|
1791
|
+
const files = [
|
|
1792
|
+
// Engine-level state surfaced by getDispatchQueue / inline engine block /
|
|
1793
|
+
// getEngineLog / getMetrics.
|
|
1794
|
+
DISPATCH_PATH,
|
|
1795
|
+
CONTROL_PATH,
|
|
1796
|
+
LOG_PATH,
|
|
1797
|
+
path.join(ENGINE_DIR, 'metrics.json'),
|
|
1798
|
+
// Watches surfaced by watchesMod.getWatches() (W-mpftp7na000td0f4 fix).
|
|
1799
|
+
path.join(ENGINE_DIR, 'watches.json'),
|
|
1800
|
+
// Central work-items.json surfaced by getWorkItems().
|
|
1801
|
+
path.join(MINIONS_DIR, 'work-items.json'),
|
|
1802
|
+
];
|
|
1803
|
+
// Per-project work-items (surfaced by getWorkItems) and pull-requests
|
|
1804
|
+
// (surfaced by getPullRequests). The PR file was the biggest miss in the
|
|
1805
|
+
// original tracked list — PR status flips (running → passing, waiting →
|
|
1806
|
+
// approved) were waiting on the 10 s SSE backstop instead of the next
|
|
1807
|
+
// 4 s SPA poll.
|
|
1808
|
+
for (const p of projects) {
|
|
1809
|
+
files.push(shared.projectWorkItemsPath(p));
|
|
1810
|
+
files.push(shared.projectPrPath(p));
|
|
1811
|
+
}
|
|
1812
|
+
return files;
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1752
1815
|
// ── Exports ─────────────────────────────────────────────────────────────────
|
|
1753
1816
|
|
|
1754
1817
|
module.exports = {
|
|
@@ -1766,6 +1829,8 @@ module.exports = {
|
|
|
1766
1829
|
getProjectGitStatus,
|
|
1767
1830
|
warmProjectGitStatus,
|
|
1768
1831
|
_awaitPendingProjectGitStatusProbes,
|
|
1832
|
+
// W-mpftp7na000td0f4 — engine→dashboard cache-invalidation registry
|
|
1833
|
+
getStatusFastStateMtimePaths,
|
|
1769
1834
|
|
|
1770
1835
|
// Core state
|
|
1771
1836
|
getConfig, getControl, getDispatch, getDispatchQueue, getDispatchCompletionReport, invalidateDispatchCache,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2007",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|