@yemi33/minions 0.1.2053 → 0.1.2055
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/command-center.js +66 -3
- package/dashboard/js/settings.js +6 -0
- package/dashboard/styles.css +18 -2
- package/dashboard.js +98 -8
- package/engine/lifecycle.js +97 -0
- package/engine/shared.js +10 -2
- package/engine.js +10 -9
- package/package.json +1 -1
|
@@ -1064,7 +1064,18 @@ async function _ccDoSend(message, skipUserMsg, forceTabId, intentMetadata) {
|
|
|
1064
1064
|
activeTab._429retries = 0;
|
|
1065
1065
|
var errText = await res.text();
|
|
1066
1066
|
if (isReconnect && res.status === 409) return { interrupted: true, reconnectable: false, reason: errText || 'No live stream' };
|
|
1067
|
-
|
|
1067
|
+
// W-mpmwxni2000c25c7-d — try to parse the canonical error envelope from
|
|
1068
|
+
// the non-2xx body. Backend wraps every pre-stream error in
|
|
1069
|
+
// _buildCcErrorEnvelope, so when the JSON parses we surface
|
|
1070
|
+
// envelope.message / envelope.code; otherwise fall back to raw text.
|
|
1071
|
+
var ccEnvelope = null;
|
|
1072
|
+
try {
|
|
1073
|
+
var parsed = JSON.parse(errText);
|
|
1074
|
+
if (parsed && parsed.type === 'error' && typeof parsed.message === 'string') ccEnvelope = parsed;
|
|
1075
|
+
} catch (_e) { /* not JSON — keep raw text */ }
|
|
1076
|
+
var thrown = new Error((ccEnvelope && ccEnvelope.message) || errText || 'CC error');
|
|
1077
|
+
if (ccEnvelope) thrown._ccErrorEnvelope = ccEnvelope;
|
|
1078
|
+
throw thrown;
|
|
1068
1079
|
}
|
|
1069
1080
|
|
|
1070
1081
|
activeTab._429retries = 0;
|
|
@@ -1075,6 +1086,11 @@ async function _ccDoSend(message, skipUserMsg, forceTabId, intentMetadata) {
|
|
|
1075
1086
|
var decoder = new TextDecoder();
|
|
1076
1087
|
var buf = '';
|
|
1077
1088
|
var terminalEventSeen = false;
|
|
1089
|
+
// W-mpmwxni2000c25c7-d — SSE spec: `event:` lines name the event type for
|
|
1090
|
+
// the data lines that follow, reset on a blank line. Tracked so backend
|
|
1091
|
+
// `event: error` frames are recognized even by clients that don't read
|
|
1092
|
+
// `data.type` (and so the integration test can assert wire format).
|
|
1093
|
+
var pendingEventName = '';
|
|
1078
1094
|
|
|
1079
1095
|
async function _handleEvent(evt) {
|
|
1080
1096
|
if (evt.type === 'chunk') {
|
|
@@ -1148,7 +1164,23 @@ async function _ccDoSend(message, skipUserMsg, forceTabId, intentMetadata) {
|
|
|
1148
1164
|
} else if (evt.type === 'error') {
|
|
1149
1165
|
terminalEventSeen = true;
|
|
1150
1166
|
_cleanupStreamDiv();
|
|
1151
|
-
|
|
1167
|
+
// W-mpmwxni2000c25c7-d — render the typed error envelope as an
|
|
1168
|
+
// accessible red bubble (role=alert) with a Retry button. We honor
|
|
1169
|
+
// `evt.message` (canonical envelope) and fall back to `evt.error` for
|
|
1170
|
+
// any pre-envelope frames still in flight from older backends.
|
|
1171
|
+
var ccErrMsg = (typeof evt.message === 'string' && evt.message) ? evt.message
|
|
1172
|
+
: (typeof evt.error === 'string' && evt.error) ? evt.error
|
|
1173
|
+
: 'Command Center reported an unknown error.';
|
|
1174
|
+
var ccErrCode = typeof evt.code === 'string' ? evt.code : '';
|
|
1175
|
+
var ccRetry = _ccStoreRetryRequest(activeTab, activeTabId, message);
|
|
1176
|
+
var codeChip = ccErrCode
|
|
1177
|
+
? '<span style="display:inline-block;margin-left:6px;padding:1px 6px;font-size:9px;color:var(--muted);background:var(--surface2);border:1px solid var(--border);border-radius:3px;font-family:monospace">' + escHtml(ccErrCode) + '</span>'
|
|
1178
|
+
: '';
|
|
1179
|
+
var availList = Array.isArray(evt.availableModels) && evt.availableModels.length
|
|
1180
|
+
? '<div style="font-size:10px;color:var(--muted);margin-top:6px">Available models: ' + escHtml(evt.availableModels.slice(0, 8).join(', ')) + (evt.availableModels.length > 8 ? '…' : '') + '</div>'
|
|
1181
|
+
: '';
|
|
1182
|
+
var errorBubble = '<div class="cc-error" role="alert" aria-live="assertive" style="padding:8px 12px;background:rgba(220,80,80,0.08);border-left:3px solid var(--red);border-radius:4px;color:var(--red);font-size:12px"><strong>Error</strong>' + codeChip + '<div style="margin-top:4px;color:var(--text)">' + escHtml(ccErrMsg) + '</div>' + availList + '</div>';
|
|
1183
|
+
addMsg('assistant', errorBubble + _ccRetryControls(ccRetry, '', false), false, { retryId: ccRetry.id });
|
|
1152
1184
|
}
|
|
1153
1185
|
}
|
|
1154
1186
|
|
|
@@ -1160,6 +1192,13 @@ async function _ccDoSend(message, skipUserMsg, forceTabId, intentMetadata) {
|
|
|
1160
1192
|
buf = lines.pop();
|
|
1161
1193
|
for (var li = 0; li < lines.length; li++) {
|
|
1162
1194
|
var line = lines[li];
|
|
1195
|
+
// W-mpmwxni2000c25c7-d — track SSE `event:` lines per spec. The
|
|
1196
|
+
// event-name applies to the next data line and resets on a blank
|
|
1197
|
+
// line. The backend emits `event: error\ndata: {...}` for errors;
|
|
1198
|
+
// listeners that prefer event-typed dispatch see them as named
|
|
1199
|
+
// events rather than having to sniff `data.type`.
|
|
1200
|
+
if (line === '') { pendingEventName = ''; continue; }
|
|
1201
|
+
if (line.startsWith('event: ')) { pendingEventName = line.slice(7).trim(); continue; }
|
|
1163
1202
|
if (!line.startsWith('data: ')) continue;
|
|
1164
1203
|
// W-mpdavudb000v8446 — these used to swallow ALL errors via `catch {}`,
|
|
1165
1204
|
// hiding JSON.parse failures AND any DOM/render exception thrown by
|
|
@@ -1178,6 +1217,9 @@ async function _ccDoSend(message, skipUserMsg, forceTabId, intentMetadata) {
|
|
|
1178
1217
|
try { console.error('[cc-sse] parse-failed', { tab: activeTabId, len: rawJson.length, error: String(parseErr && parseErr.message || parseErr) }); } catch (_e) {}
|
|
1179
1218
|
continue;
|
|
1180
1219
|
}
|
|
1220
|
+
// If the server named the event but the payload didn't carry a `type`,
|
|
1221
|
+
// backfill from the event line so `_handleEvent` dispatch still works.
|
|
1222
|
+
if (pendingEventName && evt && typeof evt === 'object' && !evt.type) evt.type = pendingEventName;
|
|
1181
1223
|
try { await _handleEvent(evt); }
|
|
1182
1224
|
catch (handleErr) {
|
|
1183
1225
|
try { console.error('[cc-sse] handle-failed', { tab: activeTabId, type: evt && evt.type, error: String(handleErr && handleErr.message || handleErr), stack: handleErr && handleErr.stack }); } catch (_e) {}
|
|
@@ -1188,6 +1230,8 @@ async function _ccDoSend(message, skipUserMsg, forceTabId, intentMetadata) {
|
|
|
1188
1230
|
var remainingLines = buf.split('\n');
|
|
1189
1231
|
for (var ri = 0; ri < remainingLines.length; ri++) {
|
|
1190
1232
|
var rline = remainingLines[ri];
|
|
1233
|
+
if (rline === '') { pendingEventName = ''; continue; }
|
|
1234
|
+
if (rline.startsWith('event: ')) { pendingEventName = rline.slice(7).trim(); continue; }
|
|
1191
1235
|
if (!rline.startsWith('data: ')) continue;
|
|
1192
1236
|
var trailRaw = rline.slice(6);
|
|
1193
1237
|
var trailEvt;
|
|
@@ -1196,6 +1240,7 @@ async function _ccDoSend(message, skipUserMsg, forceTabId, intentMetadata) {
|
|
|
1196
1240
|
try { console.error('[cc-sse] parse-failed-trailing', { tab: activeTabId, len: trailRaw.length, error: String(parseErr && parseErr.message || parseErr) }); } catch (_e) {}
|
|
1197
1241
|
continue;
|
|
1198
1242
|
}
|
|
1243
|
+
if (pendingEventName && trailEvt && typeof trailEvt === 'object' && !trailEvt.type) trailEvt.type = pendingEventName;
|
|
1199
1244
|
try { await _handleEvent(trailEvt); }
|
|
1200
1245
|
catch (handleErr) {
|
|
1201
1246
|
try { console.error('[cc-sse] handle-failed-trailing', { tab: activeTabId, type: trailEvt && trailEvt.type, error: String(handleErr && handleErr.message || handleErr), stack: handleErr && handleErr.stack }); } catch (_e) {}
|
|
@@ -1265,8 +1310,26 @@ async function _ccDoSend(message, skipUserMsg, forceTabId, intentMetadata) {
|
|
|
1265
1310
|
: '<div style="font-size:10px;color:var(--muted);margin-top:4px">Dashboard connection lost. Restart Minions to reconnect.</div>';
|
|
1266
1311
|
}
|
|
1267
1312
|
var errorRetry = _ccStoreRetryRequest(activeTab, activeTabId, message);
|
|
1313
|
+
// W-mpmwxni2000c25c7-d — if the thrower attached a parsed CC error
|
|
1314
|
+
// envelope (non-2xx body with `{type:'error', message, code}` shape),
|
|
1315
|
+
// render the styled bubble + code chip + available-models hint to
|
|
1316
|
+
// match the SSE error path. Bare network errors keep the legacy red
|
|
1317
|
+
// "Error: <msg>" span so connection-loss UX is unchanged.
|
|
1318
|
+
var ccEnv = e && e._ccErrorEnvelope;
|
|
1319
|
+
var errorRendered;
|
|
1320
|
+
if (ccEnv) {
|
|
1321
|
+
var ccCodeChip = ccEnv.code
|
|
1322
|
+
? '<span style="display:inline-block;margin-left:6px;padding:1px 6px;font-size:9px;color:var(--muted);background:var(--surface2);border:1px solid var(--border);border-radius:3px;font-family:monospace">' + escHtml(ccEnv.code) + '</span>'
|
|
1323
|
+
: '';
|
|
1324
|
+
var ccAvail = Array.isArray(ccEnv.availableModels) && ccEnv.availableModels.length
|
|
1325
|
+
? '<div style="font-size:10px;color:var(--muted);margin-top:6px">Available models: ' + escHtml(ccEnv.availableModels.slice(0, 8).join(', ')) + (ccEnv.availableModels.length > 8 ? '…' : '') + '</div>'
|
|
1326
|
+
: '';
|
|
1327
|
+
errorRendered = '<div class="cc-error" role="alert" aria-live="assertive" style="padding:8px 12px;background:rgba(220,80,80,0.08);border-left:3px solid var(--red);border-radius:4px;color:var(--red);font-size:12px"><strong>Error</strong>' + ccCodeChip + '<div style="margin-top:4px;color:var(--text)">' + escHtml(ccEnv.message) + '</div>' + ccAvail + '</div>';
|
|
1328
|
+
} else {
|
|
1329
|
+
errorRendered = '<span style="color:var(--red)">Error: ' + escHtml(e.message) + '</span>';
|
|
1330
|
+
}
|
|
1268
1331
|
addMsg('assistant', (streamedText ? renderMd(streamedText) + _ccElapsedFooter('Stream interrupted after {seconds}s') : '') +
|
|
1269
|
-
|
|
1332
|
+
errorRendered +
|
|
1270
1333
|
_ccRetryControls(errorRetry, connectionHint, isNetworkError && (!dashboardHealth.reachable || dashboardHealth.restarted)), false, { retryId: errorRetry.id });
|
|
1271
1334
|
}
|
|
1272
1335
|
} finally {
|
package/dashboard/js/settings.js
CHANGED
|
@@ -130,6 +130,11 @@ async function openSettings() {
|
|
|
130
130
|
'<div style="font-size:9px;color:var(--muted);margin-top:1px">CC reasoning depth</div>' +
|
|
131
131
|
'</div>' +
|
|
132
132
|
'</div>' +
|
|
133
|
+
// W-mpmwxni2000c25c7-d — per-turn watchdog. Surfaced under CC overrides
|
|
134
|
+
// because it gates CC/doc-chat error visibility (not the agent fleet).
|
|
135
|
+
'<div style="display:grid;grid-template-columns:1fr;gap:8px;margin-top:8px">' +
|
|
136
|
+
settingsField('CC Turn Timeout', 'set-ccTurnTimeoutMs', e.ccTurnTimeoutMs || 300000, 'ms', 'Per-turn watchdog for CC + doc-chat. If no terminal SSE event arrives within this window the handler emits event: error with code: cc-turn-timeout, the spinner stops, and a Retry button is shown. Clamped to 10000–3600000 ms.') +
|
|
137
|
+
'</div>' +
|
|
133
138
|
'</details>' +
|
|
134
139
|
'</div>' +
|
|
135
140
|
'<h4>Agents</h4>' +
|
|
@@ -833,6 +838,7 @@ async function saveSettings() {
|
|
|
833
838
|
ccCli: (document.getElementById('set-ccCli')?.value ?? '').trim(),
|
|
834
839
|
ccModel: (document.getElementById('set-ccModel')?.value ?? '').trim(),
|
|
835
840
|
ccEffort: document.getElementById('set-ccEffort').value || null,
|
|
841
|
+
ccTurnTimeoutMs: document.getElementById('set-ccTurnTimeoutMs')?.value,
|
|
836
842
|
claudeBareMode: !!document.getElementById('set-claudeBareMode')?.checked,
|
|
837
843
|
claudeFallbackModel: (document.getElementById('set-claudeFallbackModel')?.value ?? '').trim(),
|
|
838
844
|
copilotFallbackModel: (document.getElementById('set-copilotFallbackModel')?.value ?? '').trim(),
|
package/dashboard/styles.css
CHANGED
|
@@ -715,8 +715,13 @@
|
|
|
715
715
|
vertical nav + per-tab pane. Search input filters control rows across all tabs
|
|
716
716
|
by data-search attribute. .modal.modal-wide is added by openSettings() so the
|
|
717
717
|
rail + content fit comfortably side-by-side. */
|
|
718
|
-
|
|
719
|
-
|
|
718
|
+
/* Lock the settings body so the dialog dimensions stay constant across tabs.
|
|
719
|
+
`overflow: hidden` on the body suppresses the inherited `.modal-body`
|
|
720
|
+
scroll-y; only `.settings-content` should ever show a scrollbar. The
|
|
721
|
+
layout is pinned to a single fixed height (was min/max range) so empty
|
|
722
|
+
tabs don't shrink the dialog and full tabs don't stretch it. */
|
|
723
|
+
.modal-body.settings-body { padding: 0; white-space: normal; font-size: var(--text-md); line-height: 1.45; color: var(--text); font-family: 'Segoe UI', system-ui, sans-serif; overflow: hidden; }
|
|
724
|
+
.settings-layout { display: flex; height: calc(80vh - 64px); }
|
|
720
725
|
.settings-rail { width: 220px; min-width: 220px; background: var(--surface2); border-right: 1px solid var(--border); display: flex; flex-direction: column; overflow: hidden; }
|
|
721
726
|
.settings-search-wrap { padding: var(--space-5) var(--space-5) var(--space-4); border-bottom: 1px solid var(--border); }
|
|
722
727
|
.settings-search { width: 100%; padding: var(--space-3) var(--space-4); background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius-sm); color: var(--text); font-size: var(--text-md); font-family: inherit; }
|
|
@@ -959,3 +964,14 @@
|
|
|
959
964
|
max-height: 160px; overflow: auto; white-space: pre;
|
|
960
965
|
}
|
|
961
966
|
.qa-artifact-log { max-width: 480px; }
|
|
967
|
+
|
|
968
|
+
/* W-mpmwxni2000c25c7-d - Command Center / doc-chat typed error bubble. */
|
|
969
|
+
/* Token-only styling so dark/light themes stay consistent; the inline */
|
|
970
|
+
/* styles emitted by command-center.js use the same vars and are kept */
|
|
971
|
+
/* for backward compat with existing render paths. */
|
|
972
|
+
.cc-error { padding: 8px 12px; background: rgba(248, 81, 73, 0.08);
|
|
973
|
+
border-left: 3px solid var(--red); border-radius: var(--radius-sm);
|
|
974
|
+
color: var(--red); font-size: var(--text-sm); }
|
|
975
|
+
.cc-error strong { color: var(--red); }
|
|
976
|
+
.cc-error code { font-family: monospace; font-size: var(--text-xs);
|
|
977
|
+
color: var(--muted); }
|
package/dashboard.js
CHANGED
|
@@ -2400,6 +2400,37 @@ const DOC_CHAT_TIMEOUT_MS = 60 * 60 * 1000;
|
|
|
2400
2400
|
// reconnect-replay protocol (dashboard.js:7048-7083).
|
|
2401
2401
|
const SSE_MAX_QUEUE_BYTES = 4 * 1024 * 1024; // 4 MB per-tab — conservative, tunable
|
|
2402
2402
|
const SSE_STUCK_KILL_MS = 30 * 1000; // 30s of continuous backpressure → res.destroy()
|
|
2403
|
+
|
|
2404
|
+
// W-mpmwxni2000c25c7-d — CC + doc-chat error envelope contract.
|
|
2405
|
+
// Canonical shape: `{ type: 'error', message, code, retryable, ...extra }`.
|
|
2406
|
+
// `code` is one of: 'model-unavailable', 'auth-failure', 'context-limit',
|
|
2407
|
+
// 'budget-exceeded', 'crash', 'cc-turn-timeout', 'worker-spawn-failed',
|
|
2408
|
+
// 'acp-handshake-failed', 'worker-died'. `retryable` tells the client whether
|
|
2409
|
+
// the same input has a chance of succeeding without operator intervention
|
|
2410
|
+
// (e.g. transient overload retries; auth/budget/timeout don't). Extra fields
|
|
2411
|
+
// (`availableModels`, `runtime`) are envelope-shape-stable so the client can
|
|
2412
|
+
// surface them without sniffing types.
|
|
2413
|
+
const CC_ERROR_CODES = Object.freeze([
|
|
2414
|
+
'model-unavailable',
|
|
2415
|
+
'auth-failure',
|
|
2416
|
+
'context-limit',
|
|
2417
|
+
'budget-exceeded',
|
|
2418
|
+
'crash',
|
|
2419
|
+
'cc-turn-timeout',
|
|
2420
|
+
'worker-spawn-failed',
|
|
2421
|
+
'acp-handshake-failed',
|
|
2422
|
+
'worker-died',
|
|
2423
|
+
]);
|
|
2424
|
+
function _buildCcErrorEnvelope({ message, code, retryable, ...extra } = {}) {
|
|
2425
|
+
const normalizedCode = CC_ERROR_CODES.includes(code) ? code : 'crash';
|
|
2426
|
+
return {
|
|
2427
|
+
type: 'error',
|
|
2428
|
+
message: String(message == null ? '' : message) || 'Unknown error',
|
|
2429
|
+
code: normalizedCode,
|
|
2430
|
+
retryable: !!retryable,
|
|
2431
|
+
...extra,
|
|
2432
|
+
};
|
|
2433
|
+
}
|
|
2403
2434
|
function _releaseCCTab(tabId) { ccInFlightTabs.delete(tabId); ccInFlightAborts.delete(tabId); }
|
|
2404
2435
|
function _getCcLiveStream(tabId) {
|
|
2405
2436
|
return ccLiveStreams.get(tabId) || null;
|
|
@@ -6981,7 +7012,12 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
6981
7012
|
// heartbeat force-close pattern from the writeCcEvent closure
|
|
6982
7013
|
// (dashboard.js, search for SSE_MAX_QUEUE_BYTES).
|
|
6983
7014
|
try {
|
|
6984
|
-
|
|
7015
|
+
const type = payload && payload.type;
|
|
7016
|
+
// W-mpmwxni2000c25c7-d — mirror the writeCcEvent change so doc-chat
|
|
7017
|
+
// also emits `event: error` for terminal errors. Same back-compat:
|
|
7018
|
+
// the JSON still carries `type: 'error'` for data-line parsers.
|
|
7019
|
+
const eventLine = (type === 'error') ? 'event: error\n' : '';
|
|
7020
|
+
res.write(eventLine + 'data: ' + JSON.stringify(payload) + '\n\n');
|
|
6985
7021
|
return true;
|
|
6986
7022
|
} catch {
|
|
6987
7023
|
return false;
|
|
@@ -7117,9 +7153,11 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
7117
7153
|
if (!res.headersSent) {
|
|
7118
7154
|
res.statusCode = e.statusCode || 500;
|
|
7119
7155
|
res.setHeader('Content-Type', 'application/json');
|
|
7120
|
-
|
|
7156
|
+
// W-mpmwxni2000c25c7-d — non-SSE error path mirrors the envelope shape
|
|
7157
|
+
// so the frontend's non-2xx branch can render the same red bubble.
|
|
7158
|
+
try { res.end(JSON.stringify(_buildCcErrorEnvelope({ message: e.message, code: e.code || 'crash', retryable: false }))); } catch {}
|
|
7121
7159
|
} else {
|
|
7122
|
-
writeDocEvent({
|
|
7160
|
+
writeDocEvent(_buildCcErrorEnvelope({ message: e.message, code: e.code || 'crash', retryable: false }));
|
|
7123
7161
|
_docStreamEnded = true;
|
|
7124
7162
|
try { res.end(); } catch {}
|
|
7125
7163
|
}
|
|
@@ -8031,7 +8069,16 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
8031
8069
|
return false;
|
|
8032
8070
|
}
|
|
8033
8071
|
let wire;
|
|
8034
|
-
try {
|
|
8072
|
+
try {
|
|
8073
|
+
// W-mpmwxni2000c25c7-d — terminal error frames go out as `event: error`
|
|
8074
|
+
// so SSE consumers using addEventListener('error', …) and tests
|
|
8075
|
+
// matching the raw wire format can target them directly. The JSON
|
|
8076
|
+
// payload still carries `type: 'error'` so the existing
|
|
8077
|
+
// data-line parser (and any client code that only reads `data:`
|
|
8078
|
+
// lines) keeps working.
|
|
8079
|
+
const eventLine = (type === 'error') ? 'event: error\n' : '';
|
|
8080
|
+
wire = eventLine + 'data: ' + JSON.stringify(payload) + '\n\n';
|
|
8081
|
+
}
|
|
8035
8082
|
catch (err) {
|
|
8036
8083
|
_logFail('json-serialize-failed', { error: String((err && err.message) || err).slice(0, 200) });
|
|
8037
8084
|
return false;
|
|
@@ -8312,13 +8359,48 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
8312
8359
|
const streamModel = CONFIG.engine?.ccModel || shared.ENGINE_DEFAULTS.ccModel;
|
|
8313
8360
|
const streamEffort = CONFIG.engine?.ccEffort || shared.ENGINE_DEFAULTS.ccEffort;
|
|
8314
8361
|
const ccMaxTurns = CONFIG.engine?.ccMaxTurns || shared.ENGINE_DEFAULTS.ccMaxTurns;
|
|
8362
|
+
|
|
8363
|
+
// W-mpmwxni2000c25c7-d — preflight model check inside the streaming
|
|
8364
|
+
// path. ccCall() runs this guard for the non-stream surface; the SSE
|
|
8365
|
+
// handler historically skipped it because the legacy "errorClass:
|
|
8366
|
+
// unknown-model" envelope wasn't surfaced through writeCcEvent. Now
|
|
8367
|
+
// that we have a typed error envelope, fail fast with
|
|
8368
|
+
// `code: 'model-unavailable'` and the runtime-discovered catalog in
|
|
8369
|
+
// `availableModels` / `message` — saves the user one round-trip into
|
|
8370
|
+
// the CLI process that we already know cannot run the requested model.
|
|
8371
|
+
const preflightFailure = await _preflightModelCheck({
|
|
8372
|
+
model: streamModel, engineConfig: CONFIG.engine,
|
|
8373
|
+
});
|
|
8374
|
+
if (preflightFailure) {
|
|
8375
|
+
const known = (preflightFailure.errorMessage || '').match(/known:\s*([^)]+)/);
|
|
8376
|
+
const availableModels = known ? known[1].replace(/[…\u2026]\s*$/, '').split(',').map(s => s.trim()).filter(Boolean) : [];
|
|
8377
|
+
const envelope = _buildCcErrorEnvelope({
|
|
8378
|
+
message: preflightFailure.errorMessage,
|
|
8379
|
+
code: 'model-unavailable',
|
|
8380
|
+
retryable: false,
|
|
8381
|
+
runtime: preflightFailure.runtime || null,
|
|
8382
|
+
availableModels,
|
|
8383
|
+
});
|
|
8384
|
+
writeCcEvent(envelope);
|
|
8385
|
+
liveState.donePayload = envelope;
|
|
8386
|
+
_ccStreamEnded = true;
|
|
8387
|
+
if (liveState.endResponse) liveState.endResponse();
|
|
8388
|
+
_scheduleCcLiveCleanup(tabId);
|
|
8389
|
+
_logCcStreamEnd(_ccTelemetry, 'error-preflight-model-unavailable', { runtime: preflightFailure.runtime });
|
|
8390
|
+
return;
|
|
8391
|
+
}
|
|
8392
|
+
|
|
8315
8393
|
let toolUses = [];
|
|
8316
|
-
// W-mpmwxni2000c25c7-b — turn-level watchdog. Wraps the initial
|
|
8394
|
+
// W-mpmwxni2000c25c7-b/-d — turn-level watchdog. Wraps the initial
|
|
8317
8395
|
// _invokeCcStream PLUS the post-resume-fail retry so the wall clock
|
|
8318
8396
|
// covers the entire CC turn (not just one underlying LLM call). On
|
|
8319
8397
|
// expiry, whichever call is in flight is aborted; the watchdog
|
|
8320
8398
|
// resolves with a synthetic `{ error: { code: 'cc-turn-timeout' } }`
|
|
8321
|
-
// envelope so the SSE error path below kicks in.
|
|
8399
|
+
// envelope so the SSE error path below kicks in. The frontend
|
|
8400
|
+
// (dashboard/js/command-center.js) recognizes `cc-turn-timeout` as a
|
|
8401
|
+
// retryable typed error and offers a Retry affordance instead of
|
|
8402
|
+
// hanging the spinner. The per-turn cap is configurable via
|
|
8403
|
+
// `engine.ccTurnTimeoutMs` (Settings UI; clamped 10s..1h).
|
|
8322
8404
|
const turnTimeoutMs = _resolveCcTurnTimeoutMs();
|
|
8323
8405
|
const result = await withTimeout({
|
|
8324
8406
|
timeoutMs: turnTimeoutMs, label: 'command-center-stream',
|
|
@@ -8465,10 +8547,13 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
8465
8547
|
if (!res.headersSent) {
|
|
8466
8548
|
res.statusCode = e.statusCode || 500;
|
|
8467
8549
|
res.setHeader('Content-Type', 'application/json');
|
|
8468
|
-
|
|
8550
|
+
// W-mpmwxni2000c25c7-d — non-2xx response carries the same envelope
|
|
8551
|
+
// so the frontend's `if (!res.ok)` branch can render the red error
|
|
8552
|
+
// bubble with the same code/message/retryable surface.
|
|
8553
|
+
try { res.end(JSON.stringify(_buildCcErrorEnvelope({ message: e.message, code: e.code || 'crash', retryable: false }))); } catch {}
|
|
8469
8554
|
_logCcStreamEnd(_ccTelemetry, 'error-pre-stream', { error: (e && e.message ? e.message.slice(0, CC_LOG_ERROR_MAX_LEN) : 'unknown') });
|
|
8470
8555
|
} else {
|
|
8471
|
-
writeCcEvent({
|
|
8556
|
+
writeCcEvent(_buildCcErrorEnvelope({ message: e.message, code: e.code || 'crash', retryable: false }));
|
|
8472
8557
|
_ccStreamEnded = true; try { res.end(); } catch {}
|
|
8473
8558
|
_logCcStreamEnd(_ccTelemetry, 'error-mid-stream', { error: (e && e.message ? e.message.slice(0, CC_LOG_ERROR_MAX_LEN) : 'unknown') });
|
|
8474
8559
|
}
|
|
@@ -8858,6 +8943,11 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
8858
8943
|
prPollStatusEvery: [1], prPollCommentsEvery: [1],
|
|
8859
8944
|
agentBusyReassignMs: [0],
|
|
8860
8945
|
maxRetriesPerAgent: [1, 20],
|
|
8946
|
+
// W-mpmwxni2000c25c7-d — per-turn CC/doc-chat watchdog. Min 10s
|
|
8947
|
+
// (anything shorter would fire on legitimate first-token latency for
|
|
8948
|
+
// larger models); max 1h (matches CC_CALL_TIMEOUT_MS so the watchdog
|
|
8949
|
+
// never outlives the outer abort).
|
|
8950
|
+
ccTurnTimeoutMs: [10000, 3600000],
|
|
8861
8951
|
};
|
|
8862
8952
|
for (const [key, [min, max]] of Object.entries(numericFields)) {
|
|
8863
8953
|
if (e[key] !== undefined) {
|
package/engine/lifecycle.js
CHANGED
|
@@ -2067,6 +2067,90 @@ function updatePrAfterFix(pr, project, source, options = {}, legacyDispatchId =
|
|
|
2067
2067
|
delete next.fixedAt;
|
|
2068
2068
|
target.minionsReview = next;
|
|
2069
2069
|
};
|
|
2070
|
+
// W-mpoeirqx0007712a — Build-fix push verification. The agent may report
|
|
2071
|
+
// SUCCESS while the git push silently failed to advance the remote head
|
|
2072
|
+
// (stale-worktree push rejected non-fast-forward, agent ignores non-zero
|
|
2073
|
+
// `git push` exit, etc). detectPrFixBranchChange falls back to
|
|
2074
|
+
// local-head / worktree-diff evidence in those scenarios and returns
|
|
2075
|
+
// `changed: true` even though origin/<branch> never moved. Without a
|
|
2076
|
+
// guard here, the optimistic stamp + 10-min buildFixGracePeriod
|
|
2077
|
+
// suppresses re-dispatch against a still-failing build that was never
|
|
2078
|
+
// actually fixed (live repro: opg-microsoft/minions PR #57).
|
|
2079
|
+
//
|
|
2080
|
+
// Only `evidence: 'remote-head'` proves the push landed. For
|
|
2081
|
+
// BUILD_FAILURE with changed=true AND evidence explicitly set to one of
|
|
2082
|
+
// the unverified types, increment `_buildFixPushFailedCount`, write an
|
|
2083
|
+
// inbox alert, route through recordPrNoOpFixAttempt so the cause stays
|
|
2084
|
+
// unhandled, and never write `_buildFixPushedAt`. When the counter
|
|
2085
|
+
// reaches `engine.maxBuildFixRetries`, flip `_buildFixNeedsHumanRebase`
|
|
2086
|
+
// so the engine stops retrying.
|
|
2087
|
+
//
|
|
2088
|
+
// Note: callers that omit `branchChange.evidence` (legacy / tests
|
|
2089
|
+
// predating evidence plumbing) still hit the trusted-push path below to
|
|
2090
|
+
// preserve backward compatibility — only the explicitly unverified
|
|
2091
|
+
// evidence kinds trigger this guard.
|
|
2092
|
+
const _unverifiedPushEvidence = new Set(['local-head', 'worktree-diff']);
|
|
2093
|
+
if (cause === shared.PR_FIX_CAUSE.BUILD_FAILURE
|
|
2094
|
+
&& explicitlyChangedBranch
|
|
2095
|
+
&& options.branchChange?.changed === true
|
|
2096
|
+
&& _unverifiedPushEvidence.has(options.branchChange?.evidence)) {
|
|
2097
|
+
const maxRetries = options.config?.engine?.maxBuildFixRetries
|
|
2098
|
+
?? ENGINE_DEFAULTS.maxBuildFixRetries;
|
|
2099
|
+
target._buildFixPushFailedCount = (Number(target._buildFixPushFailedCount) || 0) + 1;
|
|
2100
|
+
const reachedCap = target._buildFixPushFailedCount >= maxRetries;
|
|
2101
|
+
if (reachedCap) {
|
|
2102
|
+
target._buildFixNeedsHumanRebase = ts();
|
|
2103
|
+
}
|
|
2104
|
+
const beforeHeadStr = String(options.branchChange?.beforeHead || '').slice(0, 40);
|
|
2105
|
+
const afterHeadStr = String(options.branchChange?.afterHead || '').slice(0, 40);
|
|
2106
|
+
const evidenceStr = String(options.branchChange?.evidence || 'unknown');
|
|
2107
|
+
try {
|
|
2108
|
+
const wiId = options.dispatchItem?.meta?.item?.id || null;
|
|
2109
|
+
const noteBody = `# Build-fix push not verified for ${pr.id}\n\n`
|
|
2110
|
+
+ `**PR:** ${pr.url || pr.id}\n`
|
|
2111
|
+
+ `**Branch:** ${pr.branch || '(unknown)'}\n`
|
|
2112
|
+
+ `**Cause:** build-failure\n`
|
|
2113
|
+
+ `**Pre-dispatch head:** ${beforeHeadStr || '(unknown)'}\n`
|
|
2114
|
+
+ `**Post-completion head (live):** ${afterHeadStr || '(unknown)'}\n`
|
|
2115
|
+
+ `**Branch-change evidence:** ${evidenceStr}\n`
|
|
2116
|
+
+ `**Attempt:** ${target._buildFixPushFailedCount}/${maxRetries}\n\n`
|
|
2117
|
+
+ (reachedCap
|
|
2118
|
+
? `⚠️ **Reached \`engine.maxBuildFixRetries\` (${maxRetries}).** PR flagged \`_buildFixNeedsHumanRebase\` — engine will stop auto-retrying. Likely root cause: worktree stale vs origin/master, push rejected non-fast-forward, or branch protection blocks the engine identity.\n`
|
|
2119
|
+
: `_Engine will re-dispatch on the next \`discoverFromPrs\` pass (counter < cap)._\n`)
|
|
2120
|
+
+ `\nThe agent reported SUCCESS but the remote head did not advance — the optimistic \`_buildFixPushedAt\` stamp was suppressed to avoid the ${(ENGINE_DEFAULTS.buildFixGracePeriod / 60000) | 0}-minute grace-period blackout.\n`;
|
|
2121
|
+
shared.writeToInbox(
|
|
2122
|
+
'engine',
|
|
2123
|
+
`build-fix-push-unverified-${pr.prNumber || pr.id}`,
|
|
2124
|
+
noteBody,
|
|
2125
|
+
null,
|
|
2126
|
+
{ wi: wiId, pr: pr.id, cause: shared.PR_FIX_CAUSE.BUILD_FAILURE }
|
|
2127
|
+
);
|
|
2128
|
+
} catch (err) {
|
|
2129
|
+
log('warn', `build-fix push-verify inbox alert for ${pr.id}: ${err.message}`);
|
|
2130
|
+
}
|
|
2131
|
+
// Route through the noop path so the cause stays unhandled, the noop
|
|
2132
|
+
// counter advances symmetrically with the genuine-noop case, and the
|
|
2133
|
+
// existing `delete target._buildFixPushedAt` cleanup (line ~2016) runs.
|
|
2134
|
+
const verifyBranchChange = {
|
|
2135
|
+
changed: false,
|
|
2136
|
+
beforeHead: options.branchChange?.beforeHead,
|
|
2137
|
+
afterHead: options.branchChange?.afterHead,
|
|
2138
|
+
evidence: 'push-unverified',
|
|
2139
|
+
};
|
|
2140
|
+
const noopReason = `build-fix push unverified (evidence: ${evidenceStr}); attempt ${target._buildFixPushFailedCount}/${maxRetries}${reachedCap ? ' — needs-human-rebase' : ''}`;
|
|
2141
|
+
const record = recordPrNoOpFixAttempt(target, cause, source, options.dispatchItem, verifyBranchChange, options.config, noopReason);
|
|
2142
|
+
result = {
|
|
2143
|
+
noOp: true,
|
|
2144
|
+
cause,
|
|
2145
|
+
paused: !!record.paused,
|
|
2146
|
+
count: record.count,
|
|
2147
|
+
pushUnverified: true,
|
|
2148
|
+
pushFailedCount: target._buildFixPushFailedCount,
|
|
2149
|
+
needsHumanRebase: reachedCap,
|
|
2150
|
+
};
|
|
2151
|
+
log('warn', `Updated ${pr.id} → build-fix push unverified (${target._buildFixPushFailedCount}/${maxRetries}, evidence=${evidenceStr})${reachedCap ? ' [needs-human-rebase]' : ''}; remote head ${beforeHeadStr.slice(0, 8)} did not advance — inbox alert written, cause left unhandled for re-dispatch`);
|
|
2152
|
+
return prs;
|
|
2153
|
+
}
|
|
2070
2154
|
if (explicitlyChangedBranch && options.branchChange?.changed === false) {
|
|
2071
2155
|
const record = recordPrNoOpFixAttempt(target, cause, source, options.dispatchItem, options.branchChange, options.config, options.noopReason);
|
|
2072
2156
|
result = { noOp: true, cause, paused: !!record.paused, count: record.count };
|
|
@@ -2086,6 +2170,19 @@ function updatePrAfterFix(pr, project, source, options = {}, legacyDispatchId =
|
|
|
2086
2170
|
return prs;
|
|
2087
2171
|
}
|
|
2088
2172
|
clearPrNoOpFixAttempt(target, cause);
|
|
2173
|
+
// W-mpoeirqx0007712a — verified-push stamping for BUILD_FAILURE. Reaching
|
|
2174
|
+
// this point with explicitlyChangedBranch=true means the unverified-push
|
|
2175
|
+
// guard above did NOT trigger, so either evidence === 'remote-head'
|
|
2176
|
+
// (live remote refs prove the branch advanced) OR no branchChange info
|
|
2177
|
+
// was supplied (legacy callers that didn't pass branchChange — keep
|
|
2178
|
+
// existing behavior of trusting the agent's branchChanged claim).
|
|
2179
|
+
// Clear the push-failure counter on confirmed success so future
|
|
2180
|
+
// regressions start fresh.
|
|
2181
|
+
if (cause === shared.PR_FIX_CAUSE.BUILD_FAILURE && explicitlyChangedBranch) {
|
|
2182
|
+
target._buildFixPushedAt = ts();
|
|
2183
|
+
delete target._buildFixPushFailedCount;
|
|
2184
|
+
delete target._buildFixNeedsHumanRebase;
|
|
2185
|
+
}
|
|
2089
2186
|
if (source === 'pr-human-feedback') {
|
|
2090
2187
|
const clearPendingFix = shouldClearHumanFeedbackPendingFix(target, pr, automationCauseKey);
|
|
2091
2188
|
if (target.humanFeedback && clearPendingFix) target.humanFeedback.pendingFix = false;
|
package/engine/shared.js
CHANGED
|
@@ -1800,7 +1800,15 @@ const ENGINE_DEFAULTS = {
|
|
|
1800
1800
|
logBufferSize: 50, // flush immediately when buffer exceeds this many entries
|
|
1801
1801
|
lockRetries: 0, // no retries — single 5s timeout window with 25ms polling (200 attempts) is sufficient; stale lock recovery at 60s handles crashes
|
|
1802
1802
|
lockRetryBackoffMs: 500, // base backoff between lock retries (doubles each attempt: 500ms, 1s, 2s, ...)
|
|
1803
|
-
buildFixGracePeriod: 600000, // 10min — wait for CI to run after build
|
|
1803
|
+
buildFixGracePeriod: 600000, // 10min — wait for CI to run after a verified build-fix push before re-dispatching
|
|
1804
|
+
// W-mpoeirqx0007712a: cap re-dispatch attempts when build-fix pushes
|
|
1805
|
+
// silently fail to advance the remote head (stale-worktree push rejected,
|
|
1806
|
+
// agent ignores non-zero git push exit and reports SUCCESS, etc).
|
|
1807
|
+
// updatePrAfterFix increments `_buildFixPushFailedCount` whenever the
|
|
1808
|
+
// post-completion branchChange has non-remote-head evidence; when the
|
|
1809
|
+
// counter reaches this cap, the PR is flagged `_buildFixNeedsHumanRebase`
|
|
1810
|
+
// so the dispatcher stops auto-retrying and a human can rescue the branch.
|
|
1811
|
+
maxBuildFixRetries: 3,
|
|
1804
1812
|
adoPollEnabled: true, // poll ADO PR status, comments, and reconciliation on each tick cycle
|
|
1805
1813
|
ghPollEnabled: true, // poll GitHub PR status, comments, and reconciliation on each tick cycle
|
|
1806
1814
|
prPollStatusEvery: 12, // poll PR build/review/merge status every N ticks for both ADO and GitHub (~12 min at default interval)
|
|
@@ -1879,7 +1887,7 @@ const ENGINE_DEFAULTS = {
|
|
|
1879
1887
|
removeWorktreeFailureTtlMs: 24 * 60 * 60 * 1000, // stale failed paths are forgotten after a day
|
|
1880
1888
|
removeWorktreeFailureMaxEntries: 1000, // bound failed-worktree retry suppression cache
|
|
1881
1889
|
ccMaxTurns: 50, // max tool-use turns per CC/doc-chat call before CLI stops (per response, not per session)
|
|
1882
|
-
ccTurnTimeoutMs: 300000, // W-mpmwxni2000c25c7-b:
|
|
1890
|
+
ccTurnTimeoutMs: 300000, // W-mpmwxni2000c25c7-b/-d: 5min per-turn watchdog. Wall-clock cap per CC/doc-chat turn; on expiry the in-flight LLM call is aborted and the handler surfaces `{code:'cc-turn-timeout', retryable:true}` via the typed error envelope so the UI can stop the spinner and offer Retry. Clamped to [10000, 3600000] in the settings POST handler. Independent of CC_CALL_TIMEOUT_MS (the outer hour-long abort) — this is the visible-to-user no-progress cap.
|
|
1883
1891
|
docSessionMaxEntries: 200, // cap doc-chat session map/disk store by least-recent activity (LRU; sessions are non-expiring otherwise)
|
|
1884
1892
|
ccLiveStreamMaxAgeMs: 30 * 60 * 1000, // hard cap reconnect buffers if abort/cleanup stalls
|
|
1885
1893
|
metricsFlushIntervalMs: 10000, // batch trackEngineUsage writes to metrics.json — flushed every 10s instead of per-call to cut lock contention and dashboard mtime churn
|
package/engine.js
CHANGED
|
@@ -4929,15 +4929,16 @@ async function discoverFromPrs(config, project) {
|
|
|
4929
4929
|
}, `Fix build failure on ${pr.id}: ${pr.title || ''}`, { dispatchKey: key, cooldownKey: key, automationCauseKey: buildCauseKey, source: 'pr', pr, branch: prBranch, project: projMeta });
|
|
4930
4930
|
if (item) {
|
|
4931
4931
|
newWork.push(item); fixDispatched = true;
|
|
4932
|
-
|
|
4933
|
-
|
|
4934
|
-
|
|
4935
|
-
|
|
4936
|
-
|
|
4937
|
-
|
|
4938
|
-
|
|
4939
|
-
|
|
4940
|
-
|
|
4932
|
+
// W-mpoeirqx0007712a — DO NOT stamp `_buildFixPushedAt` at dispatch
|
|
4933
|
+
// time. The optimistic stamp here used to suppress re-dispatch for
|
|
4934
|
+
// the buildFixGracePeriod window even when the agent never pushed
|
|
4935
|
+
// (stale-worktree push silently rejected, agent reported SUCCESS
|
|
4936
|
+
// anyway). `_buildFixPushedAt` is now written only by
|
|
4937
|
+
// lifecycle.updatePrAfterFix after the post-completion branchChange
|
|
4938
|
+
// confirms the remote head actually advanced (evidence ===
|
|
4939
|
+
// 'remote-head'). In-flight dispatches are already deduplicated by
|
|
4940
|
+
// `isPrAutomationCausePending` + `isAlreadyDispatched` above, so no
|
|
4941
|
+
// race window opens by removing the optimistic stamp.
|
|
4941
4942
|
}
|
|
4942
4943
|
|
|
4943
4944
|
if (pr.agent && !pr._buildFailNotified) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2055",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|