@yemi33/minions 0.1.2098 → 0.1.2100
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/settings.js +1 -1
- package/dashboard.js +111 -47
- package/docs/command-center.md +1 -1
- package/docs/copilot-cli-schema.md +17 -6
- package/docs/pr-review-fix-loop.md +18 -0
- package/engine/llm.js +57 -29
- package/engine/shared.js +23 -1
- package/package.json +1 -1
package/dashboard/js/settings.js
CHANGED
|
@@ -145,7 +145,7 @@ async function openSettings() {
|
|
|
145
145
|
// W-mpmwxni2000c25c7-d — per-turn watchdog. Surfaced under CC overrides
|
|
146
146
|
// because it gates CC/doc-chat error visibility (not the agent fleet).
|
|
147
147
|
'<div style="display:grid;grid-template-columns:1fr;gap:8px;margin-top:8px">' +
|
|
148
|
-
settingsField('CC Turn Timeout', 'set-ccTurnTimeoutMs', e.ccTurnTimeoutMs || 300000, 'ms', 'Per-turn watchdog for CC + doc-chat.
|
|
148
|
+
settingsField('CC Turn Timeout', 'set-ccTurnTimeoutMs', e.ccTurnTimeoutMs || 300000, 'ms', 'Per-turn no-progress watchdog for CC + doc-chat. Resets on every liveness signal (token chunk, tool call, tool update) so an actively-streaming turn — long shell command, deep search, sub-agent loop — survives indefinitely up to the outer 1h hard ceiling. Only true silence past this window fires event: error with code: cc-turn-timeout, stops the spinner, and offers Retry. Clamped to 10000–3600000 ms.') +
|
|
149
149
|
'</div>' +
|
|
150
150
|
'</details>' +
|
|
151
151
|
'</div>' +
|
package/dashboard.js
CHANGED
|
@@ -2566,7 +2566,19 @@ function _getCcLiveStream(tabId) {
|
|
|
2566
2566
|
return ccLiveStreams.get(tabId) || null;
|
|
2567
2567
|
}
|
|
2568
2568
|
function _touchCcLiveStream(state) {
|
|
2569
|
-
if (state)
|
|
2569
|
+
if (!state) return;
|
|
2570
|
+
state.updatedAt = Date.now();
|
|
2571
|
+
// W-mpmwxni2000c25c7-b — every CC streaming progress event (onChunk,
|
|
2572
|
+
// onToolUse, onToolUpdate in both _invokeCcStream paths) already routes
|
|
2573
|
+
// through here for the stall detector. Piggy-back the per-turn watchdog's
|
|
2574
|
+
// bumpTimer on the same heartbeat so a turn that's actively streaming
|
|
2575
|
+
// tokens or running long tools can't be killed by stale inactivity. The
|
|
2576
|
+
// watchdog installs `_bumpTimer` for the duration of the turn and removes
|
|
2577
|
+
// it in `finally`, so late progress callbacks delivered after resolution
|
|
2578
|
+
// are a no-op.
|
|
2579
|
+
if (typeof state._bumpTimer === 'function') {
|
|
2580
|
+
try { state._bumpTimer(); } catch { /* swallow */ }
|
|
2581
|
+
}
|
|
2570
2582
|
}
|
|
2571
2583
|
function _clearCcLiveTimers(tabId) {
|
|
2572
2584
|
const state = _getCcLiveStream(tabId);
|
|
@@ -4225,43 +4237,71 @@ function _buildDocChatErrorEnvelope(result) {
|
|
|
4225
4237
|
// with a doc-chat-shaped failure payload that flows through the existing
|
|
4226
4238
|
// _docChatFailureResponse / SSE error event paths. timeoutMs <= 0 disables
|
|
4227
4239
|
// the watchdog (passthrough).
|
|
4228
|
-
async function _raceCcDocChatTimeout(
|
|
4229
|
-
|
|
4240
|
+
async function _raceCcDocChatTimeout(callPromiseOrFactory, timeoutMs, abortFn, label) {
|
|
4241
|
+
// W-mpmwxni2000c25c7-b — no-progress semantics. Callers that want a
|
|
4242
|
+
// self-resetting watchdog pass `callPromiseOrFactory` as a function
|
|
4243
|
+
// `(bumpTimer) => Promise<result>`. The factory is invoked synchronously
|
|
4244
|
+
// before the race starts so any onChunk/onToolUse closures defined inside
|
|
4245
|
+
// can capture `bumpTimer` and reset the timer on each liveness signal.
|
|
4246
|
+
// Legacy callers that pass a pre-started Promise get the old wall-clock
|
|
4247
|
+
// behavior — kept for the non-streaming doc-chat path which has no
|
|
4248
|
+
// progress hooks to wire bump into.
|
|
4249
|
+
const isFactory = typeof callPromiseOrFactory === 'function';
|
|
4250
|
+
if (!timeoutMs || timeoutMs <= 0) {
|
|
4251
|
+
return isFactory ? callPromiseOrFactory(() => {}) : callPromiseOrFactory;
|
|
4252
|
+
}
|
|
4230
4253
|
let timer = null;
|
|
4231
4254
|
let timedOut = false;
|
|
4255
|
+
let done = false;
|
|
4256
|
+
let timeoutResolve = null;
|
|
4257
|
+
const fire = () => {
|
|
4258
|
+
if (done) return;
|
|
4259
|
+
timedOut = true;
|
|
4260
|
+
try { if (abortFn) abortFn(); } catch { /* swallow */ }
|
|
4261
|
+
if (timeoutResolve) timeoutResolve(null);
|
|
4262
|
+
};
|
|
4263
|
+
const bumpTimer = () => {
|
|
4264
|
+
if (timedOut || done) return;
|
|
4265
|
+
if (timer) clearTimeout(timer);
|
|
4266
|
+
timer = setTimeout(fire, timeoutMs);
|
|
4267
|
+
};
|
|
4232
4268
|
const timeoutPromise = new Promise((resolve) => {
|
|
4233
|
-
|
|
4234
|
-
|
|
4235
|
-
try { if (abortFn) abortFn(); } catch { /* swallow */ }
|
|
4236
|
-
resolve(null);
|
|
4237
|
-
}, timeoutMs);
|
|
4269
|
+
timeoutResolve = resolve;
|
|
4270
|
+
timer = setTimeout(fire, timeoutMs);
|
|
4238
4271
|
// NOTE: do NOT unref — Node would exit the event loop while awaiting the
|
|
4239
4272
|
// call promise (Promises don't keep the loop open; timers/I/O do). Cleared
|
|
4240
4273
|
// immediately on the success path below.
|
|
4241
4274
|
});
|
|
4242
|
-
|
|
4243
|
-
|
|
4244
|
-
|
|
4245
|
-
|
|
4246
|
-
|
|
4247
|
-
|
|
4248
|
-
|
|
4249
|
-
|
|
4250
|
-
|
|
4251
|
-
|
|
4252
|
-
|
|
4253
|
-
|
|
4254
|
-
|
|
4255
|
-
|
|
4256
|
-
|
|
4257
|
-
|
|
4258
|
-
|
|
4259
|
-
|
|
4260
|
-
|
|
4261
|
-
|
|
4262
|
-
|
|
4263
|
-
|
|
4264
|
-
|
|
4275
|
+
// Factory branch: invoke synchronously so bumpTimer is already in scope
|
|
4276
|
+
// before any progress event fires. Promise branch: legacy passthrough.
|
|
4277
|
+
const callPromise = isFactory
|
|
4278
|
+
? Promise.resolve().then(() => callPromiseOrFactory(bumpTimer))
|
|
4279
|
+
: callPromiseOrFactory;
|
|
4280
|
+
try {
|
|
4281
|
+
const winner = await Promise.race([callPromise, timeoutPromise]);
|
|
4282
|
+
if (!timedOut) return winner;
|
|
4283
|
+
// Drain the in-flight call so its cleanup runs before we hand back the
|
|
4284
|
+
// synthetic envelope.
|
|
4285
|
+
await callPromise.catch(() => null);
|
|
4286
|
+
const message = `${label || 'doc-chat'} turn timed out after ${timeoutMs}ms with no progress`;
|
|
4287
|
+
return {
|
|
4288
|
+
answer: 'Document chat request timed out — try again.',
|
|
4289
|
+
toolUses: [],
|
|
4290
|
+
error: {
|
|
4291
|
+
code: 'cc-turn-timeout',
|
|
4292
|
+
stderr: '',
|
|
4293
|
+
errorClass: 'cc-turn-timeout',
|
|
4294
|
+
errorMessage: message,
|
|
4295
|
+
runtime: null,
|
|
4296
|
+
typedCode: 'cc-turn-timeout',
|
|
4297
|
+
typedMessage: message,
|
|
4298
|
+
retriable: true,
|
|
4299
|
+
},
|
|
4300
|
+
};
|
|
4301
|
+
} finally {
|
|
4302
|
+
done = true;
|
|
4303
|
+
if (timer) { clearTimeout(timer); timer = null; }
|
|
4304
|
+
}
|
|
4265
4305
|
}
|
|
4266
4306
|
|
|
4267
4307
|
function _docChatFailureResponse(label, filePath, result, sessionPreserved = false) {
|
|
@@ -7104,12 +7144,17 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
7104
7144
|
const ccTurnId = 'cct-' + shared.uid();
|
|
7105
7145
|
const turnSystemPrompt = renderDocChatSystemPromptForTurn(ccTurnId);
|
|
7106
7146
|
|
|
7107
|
-
// W-mpmwxni2000c25c7-b — wall-clock turn watchdog. The
|
|
7108
|
-
//
|
|
7109
|
-
//
|
|
7147
|
+
// W-mpmwxni2000c25c7-b — wall-clock turn watchdog. The non-streaming
|
|
7148
|
+
// doc-chat path has no progress callbacks to wire `bumpTimer` into,
|
|
7149
|
+
// so this caller intentionally uses the legacy promise shape of
|
|
7150
|
+
// _raceCcDocChatTimeout (wall-clock cap, not no-progress). The
|
|
7151
|
+
// doc-chat call can internally spawn resume + fresh + final-retry LLM
|
|
7152
|
+
// calls; one wall-clock cap covers the whole turn so a runtime stuck
|
|
7110
7153
|
// mid-stream can't outlive ccTurnTimeoutMs. On expiry the watchdog
|
|
7111
|
-
// calls _docAbort (kills the in-flight CLI) and the synthesized
|
|
7112
|
-
// below flows through the existing _docChatFailureResponse
|
|
7154
|
+
// calls _docAbort (kills the in-flight CLI) and the synthesized
|
|
7155
|
+
// payload below flows through the existing _docChatFailureResponse
|
|
7156
|
+
// path. Streaming consumers (handleDocChatStream) get the proper
|
|
7157
|
+
// no-progress behavior via the factory shape.
|
|
7113
7158
|
const _docTurnTimeoutMs = _resolveCcTurnTimeoutMs();
|
|
7114
7159
|
const _docCallPromise = ccDocCall({
|
|
7115
7160
|
message: body.message, document: currentContent, title: body.title,
|
|
@@ -7248,25 +7293,27 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
7248
7293
|
const ccTurnId = 'cct-' + shared.uid();
|
|
7249
7294
|
const turnSystemPrompt = renderDocChatSystemPromptForTurn(ccTurnId);
|
|
7250
7295
|
|
|
7251
|
-
// W-mpmwxni2000c25c7-b —
|
|
7252
|
-
// non-stream handleDocChat path).
|
|
7253
|
-
//
|
|
7254
|
-
//
|
|
7296
|
+
// W-mpmwxni2000c25c7-b — no-progress turn watchdog (mirrors the
|
|
7297
|
+
// non-stream handleDocChat path). The factory shape lets the
|
|
7298
|
+
// onChunk/onToolUse callbacks close over `bumpTimer` so each chunk
|
|
7299
|
+
// or tool event resets the timer; only true silence past
|
|
7300
|
+
// `_docTurnTimeoutMs` (no chunks, no tools) fires `_docAbort` and
|
|
7301
|
+
// surfaces the synthesized payload below through the SSE done frame
|
|
7302
|
+
// the client already expects with `error` set.
|
|
7255
7303
|
const _docTurnTimeoutMs = _resolveCcTurnTimeoutMs();
|
|
7256
|
-
const
|
|
7304
|
+
const _docStreamResult = await _raceCcDocChatTimeout((bumpTimer) => ccDocCallStreaming({
|
|
7257
7305
|
message: body.message, document: currentContent, title: body.title,
|
|
7258
7306
|
filePath: body.filePath, selection: body.selection, canEdit, isJson,
|
|
7259
7307
|
model: body.model || undefined,
|
|
7260
7308
|
freshSession: !!body.freshSession,
|
|
7261
7309
|
transcript: body.transcript,
|
|
7262
7310
|
onAbortReady: (abort) => { _docAbort = abort; },
|
|
7263
|
-
onChunk: (text) => { writeDocEvent({ type: 'chunk', text }); },
|
|
7264
|
-
onToolUse: (name, input) => { writeDocEvent({ type: 'tool', name, input: _lightToolInput(input) }); },
|
|
7265
|
-
onRetry: (attempt) => { writeDocEvent({ type: 'progress', attempt }); },
|
|
7311
|
+
onChunk: (text) => { bumpTimer(); writeDocEvent({ type: 'chunk', text }); },
|
|
7312
|
+
onToolUse: (name, input) => { bumpTimer(); writeDocEvent({ type: 'tool', name, input: _lightToolInput(input) }); },
|
|
7313
|
+
onRetry: (attempt) => { bumpTimer(); writeDocEvent({ type: 'progress', attempt }); },
|
|
7266
7314
|
systemPrompt: turnSystemPrompt,
|
|
7267
7315
|
turnId: ccTurnId,
|
|
7268
|
-
});
|
|
7269
|
-
const _docStreamResult = await _raceCcDocChatTimeout(_docStreamCallPromise, _docTurnTimeoutMs, () => _docAbort && _docAbort(), 'doc-chat-stream');
|
|
7316
|
+
}), _docTurnTimeoutMs, () => _docAbort && _docAbort(), 'doc-chat-stream');
|
|
7270
7317
|
let { answer, partial, warning, toolUses, error: ccError } = _docStreamResult;
|
|
7271
7318
|
const finalize = _finalizeDocChatEdit({
|
|
7272
7319
|
filePath: body.filePath, fullPath, isJson, canEdit,
|
|
@@ -8565,7 +8612,17 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
8565
8612
|
const turnTimeoutMs = _resolveCcTurnTimeoutMs();
|
|
8566
8613
|
const result = await withTimeout({
|
|
8567
8614
|
timeoutMs: turnTimeoutMs, label: 'command-center-stream',
|
|
8568
|
-
}, async (registerAbort) => {
|
|
8615
|
+
}, async (registerAbort, bumpTimer) => {
|
|
8616
|
+
// W-mpmwxni2000c25c7-b — no-progress semantics: install bumpTimer
|
|
8617
|
+
// on liveState so _touchCcLiveStream (called from every onChunk /
|
|
8618
|
+
// onToolUse / onToolUpdate in both _invokeCcStream paths) resets
|
|
8619
|
+
// the watchdog on every liveness signal. A turn that's actively
|
|
8620
|
+
// streaming tokens or running long tools survives indefinitely
|
|
8621
|
+
// up to the outer CC_CALL_TIMEOUT_MS ceiling. Cleared in
|
|
8622
|
+
// `finally` so a late progress event after resolution can't
|
|
8623
|
+
// re-arm against a stale abort target.
|
|
8624
|
+
liveState._bumpTimer = bumpTimer;
|
|
8625
|
+
try {
|
|
8569
8626
|
const llmPromise = _invokeCcStream({
|
|
8570
8627
|
prompt, sessionId, liveState, toolUses,
|
|
8571
8628
|
model: streamModel, effort: streamEffort, maxTurns: ccMaxTurns,
|
|
@@ -8614,6 +8671,13 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
8614
8671
|
}
|
|
8615
8672
|
}
|
|
8616
8673
|
return initial;
|
|
8674
|
+
} finally {
|
|
8675
|
+
// Only clear if this turn's bumpTimer is still installed —
|
|
8676
|
+
// belt-and-suspenders for any future code that might swap a
|
|
8677
|
+
// newer bumpTimer in mid-turn (shouldn't happen today, but
|
|
8678
|
+
// protects against silent overwrite).
|
|
8679
|
+
if (liveState._bumpTimer === bumpTimer) delete liveState._bumpTimer;
|
|
8680
|
+
}
|
|
8617
8681
|
});
|
|
8618
8682
|
if (result.missingRuntime) {
|
|
8619
8683
|
finishMissingRuntime(result, liveState);
|
package/docs/command-center.md
CHANGED
|
@@ -27,7 +27,7 @@ Canonical envelope (`_buildCcErrorEnvelope` in `dashboard.js`):
|
|
|
27
27
|
|
|
28
28
|
`code` is clamped to the allowlist (`CC_ERROR_CODES` constant); unknown codes collapse to `crash`. `retryable: true` is informational — there is **no auto-retry**; the dashboard always offers a manual Retry button instead. Auto-retrying these errors is a footgun because most are operator-fix categories (auth, budget, missing model) where re-spawning makes no progress.
|
|
29
29
|
|
|
30
|
-
**Watchdog (`engine.ccTurnTimeoutMs`, default 5 min, clamped 10s–1h).** Each turn arms a `setTimeout` that fires `event: error` with `code: 'cc-turn-timeout'`, aborts the in-flight LLM call, and ends the stream when no
|
|
30
|
+
**Watchdog (`engine.ccTurnTimeoutMs`, default 5 min, clamped 10s–1h).** Per-turn **no-progress** cap. Each turn arms a `setTimeout` that fires `event: error` with `code: 'cc-turn-timeout'`, aborts the in-flight LLM call, and ends the stream when no liveness signal arrives within the window. The timer **resets on every progress event** — token chunk, tool-call notification, tool-update — so a turn that's actively streaming or running long tools (e.g. `Bash` running `npm test`, `WebFetch`/`WebSearch` against slow targets, agent sub-dispatches) survives indefinitely up to the outer 1h `CC_CALL_TIMEOUT_MS` hard ceiling. Wired via `liveState._bumpTimer` (CC stream path) and the factory shape of `_raceCcDocChatTimeout` (doc-chat stream path); the non-streaming doc-chat path has no progress hooks and retains wall-clock semantics. Surfaced in Settings → CC overrides.
|
|
31
31
|
|
|
32
32
|
**No auto-retry policy.** The backend never re-spawns the LLM after an error envelope. The client never silently resends the user's turn. Retry is a single-click manual action — guards against silent budget burn on `budget-exceeded`, infinite loops on `auth-failure`, and accidental re-charges on `context-limit`. The 429 + reconnect paths (rate-limited fetch retry, SSE reconnect-after-disconnect) remain — those are transport-level, not error-envelope-level.
|
|
33
33
|
|
|
@@ -63,14 +63,22 @@ The `gh-copilot` extension is documented at
|
|
|
63
63
|
<https://docs.github.com/en/copilot/github-copilot-in-the-cli>. On this test
|
|
64
64
|
machine `gh extension list` returned empty, so this path was **not exercised
|
|
65
65
|
empirically**. The adapter contract still needs to support it for hosts without
|
|
66
|
-
the WinGet standalone install
|
|
66
|
+
the WinGet standalone install.
|
|
67
|
+
|
|
68
|
+
**Detection note (#2965):** `gh ≥ ~2.90` ships Copilot as a built-in preview
|
|
69
|
+
subcommand, NOT as an extension — `gh extension list` does not list it, and
|
|
70
|
+
`gh extension install github/gh-copilot` is rejected because the command is
|
|
71
|
+
already provided. The adapter therefore probes with `gh copilot --help` (with
|
|
72
|
+
`--no-update-notifier`); exit 0 means the subcommand is available via either
|
|
73
|
+
the modern built-in OR a legacy installed extension, and exit non-zero means
|
|
74
|
+
neither path is present. See `engine/runtimes/copilot.js#_findGhCopilotPath`.
|
|
67
75
|
|
|
68
76
|
```text
|
|
69
|
-
gh extension install github/gh-copilot
|
|
70
|
-
gh copilot --help
|
|
77
|
+
gh extension install github/gh-copilot # only needed for gh < 2.90
|
|
78
|
+
gh copilot --help # detection probe (built-in OR extension)
|
|
71
79
|
```
|
|
72
80
|
|
|
73
|
-
When falling back to the
|
|
81
|
+
When falling back to the gh-hosted form, the adapter must return:
|
|
74
82
|
|
|
75
83
|
```js
|
|
76
84
|
{ bin: '<path-to-gh.exe>', native: true, leadingArgs: ['copilot'] }
|
|
@@ -584,8 +592,11 @@ When implementing `engine/runtimes/copilot.js`:
|
|
|
584
592
|
2. `resolveBinary()`:
|
|
585
593
|
- PATH → standalone first; cache to `engine/copilot-caps.json` with
|
|
586
594
|
`{ copilotBin, copilotIsNative, leadingArgs: [] }`.
|
|
587
|
-
- `gh
|
|
588
|
-
`leadingArgs: ['copilot']`.
|
|
595
|
+
- `gh copilot --help` (exit 0) → fallback with
|
|
596
|
+
`leadingArgs: ['copilot']`. This covers BOTH the legacy `gh-copilot`
|
|
597
|
+
extension AND the modern built-in (gh ≥ ~2.90 ships `copilot` as a
|
|
598
|
+
built-in subcommand, not an extension — so `gh extension list` no longer
|
|
599
|
+
surfaces it; #2965). Mark the result as `bestEffort: true` so
|
|
589
600
|
preflight can warn.
|
|
590
601
|
- **Never** probe npm. Document this in the file header.
|
|
591
602
|
3. `buildArgs(opts)` always emits:
|
|
@@ -65,6 +65,22 @@ The engine does not cap review→fix cycles or build-fix attempts. Each trigger
|
|
|
65
65
|
- Routes to the PR author to resolve target-branch conflicts
|
|
66
66
|
- Runs after review, human, and build triggers; if any earlier trigger enqueued a fix for this PR, the conflict fix waits for a later discovery pass
|
|
67
67
|
|
|
68
|
+
### E. Paused causes (`_noOpFixes[cause].paused`)
|
|
69
|
+
|
|
70
|
+
Each fix cause (build-failure, review-feedback, human-feedback, merge-conflict, pr-fix) tracks repeated no-op outcomes in `target._noOpFixes[cause]`. When `recordPrNoOpFixAttempt` flips `paused` from false to true (typically after multiple no-op dispatches with the same evidence fingerprint), the engine:
|
|
71
|
+
|
|
72
|
+
- Writes a per-PR per-cause inbox alert with PR URL, branch, attempt count, before/after heads, the underlying reason, and three recovery paths.
|
|
73
|
+
- Enriches the PR record returned from `queries.getPullRequests()` with `_pausedCauses` (the list of currently-paused cause keys).
|
|
74
|
+
- Renders a red `_pausedCauses` chip per cause in the dashboard PR list (`dashboard/js/render-prs.js`).
|
|
75
|
+
|
|
76
|
+
A paused cause **suppresses further auto-dispatch for that cause** until cleared. Recovery paths (any one):
|
|
77
|
+
|
|
78
|
+
1. **Push a new SHA to the PR branch** — the next poll refreshes `pr.headSha`, the evidence fingerprint shifts, and `clearPrNoOpFixAttempt` runs on the next non-noop completion.
|
|
79
|
+
2. **Click the red chip on the dashboard** — confirms, then `POST /api/pull-requests/clear-paused-cause` (`dashboard.js`, issue #2969) validates the cause against `shared.PR_FIX_CAUSE`, locates the PR via `mutatePullRequests` across project/central files, and calls `clearPrNoOpFixAttempt` to wipe the cause record.
|
|
80
|
+
3. **Direct API call** — `POST /api/pull-requests/clear-paused-cause` with `{ prId, cause }`.
|
|
81
|
+
|
|
82
|
+
The exported `recordPrNoOpFixAttempt` / `clearPrNoOpFixAttempt` helpers in `engine/lifecycle.js` are the only sanctioned entry points; the dispatch evaluator in `engine/lifecycle.js` (search `_noOpFixes[cause]`) gates re-dispatch on `record.paused === true`.
|
|
83
|
+
|
|
68
84
|
## 5. Fix completes
|
|
69
85
|
|
|
70
86
|
- `updatePrAfterFix()` (lifecycle.js) sets `reviewStatus = 'waiting'` + `fixedAt = ts()`
|
|
@@ -128,6 +144,8 @@ The engine does not cap review→fix cycles or build-fix attempts. Each trigger
|
|
|
128
144
|
| `lastReviewedAt` | `updatePrAfterReview()` | Prevents re-dispatch if reviewed |
|
|
129
145
|
| `minionsReview` | Post-completion hooks | `{ reviewer, reviewedAt, note, fixedAt }` |
|
|
130
146
|
| `humanFeedback` | `pollPrHumanComments()` | `{ pendingFix, feedbackContent, lastProcessedCommentDate }` |
|
|
147
|
+
| `_noOpFixes[cause]` | `recordPrNoOpFixAttempt()` | Per-cause record `{ count, paused, fingerprint, beforeHead, afterHead }` driving the issue #2969 pause loop |
|
|
148
|
+
| `_pausedCauses` | `queries.getPullRequests()` enrichment | Read-only list of currently-paused cause keys for UI rendering (computed from `_noOpFixes`) |
|
|
131
149
|
|
|
132
150
|
## Platform differences
|
|
133
151
|
|
package/engine/llm.js
CHANGED
|
@@ -913,60 +913,88 @@ function callLLMStreaming(promptText, sysPromptText, opts = {}) {
|
|
|
913
913
|
|
|
914
914
|
// ─── CC turn watchdog ────────────────────────────────────────────────────────
|
|
915
915
|
//
|
|
916
|
-
// W-mpmwxni2000c25c7-b —
|
|
916
|
+
// W-mpmwxni2000c25c7-b — no-progress cap for a single CC/doc-chat turn. CC turns
|
|
917
917
|
// are a higher-level concept than the per-LLM-call `timeout` opt: a turn can
|
|
918
918
|
// internally retry (resume → fresh → final retry) and each retry has its own
|
|
919
919
|
// per-call timer. Without a turn-level watchdog, a runtime stuck mid-stream
|
|
920
920
|
// (no exit, no chunks, no errors) leaves the SSE handler waiting for the
|
|
921
921
|
// per-call timer to fire and the user staring at the typing dots.
|
|
922
922
|
//
|
|
923
|
-
//
|
|
923
|
+
// Semantics: `timeoutMs` is a *no-progress* window, not a wall-clock cap.
|
|
924
|
+
// Callers reset it by invoking `bumpTimer()` whenever a liveness signal
|
|
925
|
+
// arrives (token chunk, tool-call notification, tool-update). A long but
|
|
926
|
+
// actively-streaming turn survives indefinitely up to the outer call-level
|
|
927
|
+
// hard ceiling (CC_CALL_TIMEOUT_MS, ~1h). Only true silence past `timeoutMs`
|
|
928
|
+
// — no chunks, no tool events, no exit — fires the cancel. Without
|
|
929
|
+
// `bumpTimer()` calls the behavior degrades to the old wall-clock cap.
|
|
930
|
+
//
|
|
931
|
+
// Usage: `result = await withCcTurnTimeout({ timeoutMs, label, onAbortReady },
|
|
932
|
+
// (registerAbort, bumpTimer) => callerThatReturnsResultPromise(registerAbort, bumpTimer))`.
|
|
924
933
|
// The caller plumbs `registerAbort(abortFn)` into every nested LLM call's
|
|
925
934
|
// `onAbortReady` so the watchdog can kill whichever attempt is in flight on
|
|
926
|
-
// expiry
|
|
935
|
+
// expiry, and invokes `bumpTimer()` from every progress callback so a still-
|
|
936
|
+
// productive turn doesn't get killed by stale inactivity. Returns the original
|
|
937
|
+
// result on success or a synthetic envelope
|
|
927
938
|
// `{ text:'', error:{ code:'cc-turn-timeout', retriable:true } }` on expiry.
|
|
928
939
|
async function withCcTurnTimeout({ timeoutMs, label = 'cc-turn', onAbortReady } = {}, callFn) {
|
|
929
|
-
if (!timeoutMs || timeoutMs <= 0) return callFn(onAbortReady || (() => {}));
|
|
940
|
+
if (!timeoutMs || timeoutMs <= 0) return callFn(onAbortReady || (() => {}), () => {});
|
|
930
941
|
let currentAbort = null;
|
|
931
942
|
let timedOut = false;
|
|
943
|
+
let done = false;
|
|
932
944
|
let timer = null;
|
|
945
|
+
let timeoutResolve = null;
|
|
933
946
|
const registerAbort = (abort) => {
|
|
934
947
|
currentAbort = abort;
|
|
935
948
|
if (onAbortReady) onAbortReady(abort);
|
|
936
949
|
};
|
|
937
|
-
const
|
|
950
|
+
const fire = () => {
|
|
951
|
+
if (done) return;
|
|
952
|
+
timedOut = true;
|
|
953
|
+
try { if (currentAbort) currentAbort(); } catch { /* swallow */ }
|
|
954
|
+
if (timeoutResolve) timeoutResolve(null);
|
|
955
|
+
};
|
|
956
|
+
// bumpTimer is a no-op after the turn settles (done=true) so any late
|
|
957
|
+
// progress callback that fires post-resolution (e.g. a queued onChunk
|
|
958
|
+
// delivered after the SSE finished) cannot re-arm the watchdog and
|
|
959
|
+
// accidentally cancel a *future* unrelated abort target via a stale
|
|
960
|
+
// currentAbort reference.
|
|
961
|
+
const bumpTimer = () => {
|
|
962
|
+
if (timedOut || done) return;
|
|
963
|
+
if (timer) clearTimeout(timer);
|
|
964
|
+
timer = setTimeout(fire, timeoutMs);
|
|
965
|
+
};
|
|
938
966
|
const timeoutPromise = new Promise((resolve) => {
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
try { if (currentAbort) currentAbort(); } catch { /* swallow */ }
|
|
942
|
-
resolve(null);
|
|
943
|
-
}, timeoutMs);
|
|
967
|
+
timeoutResolve = resolve;
|
|
968
|
+
timer = setTimeout(fire, timeoutMs);
|
|
944
969
|
// NOTE: do NOT unref this timer. If we did, Node would exit the event
|
|
945
970
|
// loop while waiting on the inflight promise (Promises themselves don't
|
|
946
971
|
// hold the loop open — only timers/I/O do). The race below clears the
|
|
947
972
|
// timer immediately on success, so a still-armed timer never leaks past
|
|
948
973
|
// the resolution.
|
|
949
974
|
});
|
|
950
|
-
const
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
return winner;
|
|
975
|
+
const inflight = Promise.resolve().then(() => callFn(registerAbort, bumpTimer));
|
|
976
|
+
try {
|
|
977
|
+
const winner = await Promise.race([inflight, timeoutPromise]);
|
|
978
|
+
if (!timedOut) return winner;
|
|
979
|
+
// Let the in-flight call settle so its cleanup (cleanupFiles/Dirs, kill
|
|
980
|
+
// sweeps) actually runs before we hand a synthetic envelope to the caller.
|
|
981
|
+
const settled = await inflight.catch((err) => ({
|
|
982
|
+
text: '', usage: null, sessionId: null, code: 1, stderr: String(err && err.message || err), raw: '', toolUses: [],
|
|
983
|
+
}));
|
|
984
|
+
const message = `CC turn ${label} timed out after ${timeoutMs}ms with no progress`;
|
|
985
|
+
return {
|
|
986
|
+
...settled,
|
|
987
|
+
text: '',
|
|
988
|
+
code: settled?.code || 1,
|
|
989
|
+
errorClass: 'cc-turn-timeout',
|
|
990
|
+
errorMessage: message,
|
|
991
|
+
error: { message, code: 'cc-turn-timeout', retriable: true },
|
|
992
|
+
ok: false,
|
|
993
|
+
};
|
|
994
|
+
} finally {
|
|
995
|
+
done = true;
|
|
996
|
+
if (timer) { clearTimeout(timer); timer = null; }
|
|
954
997
|
}
|
|
955
|
-
// Let the in-flight call settle so its cleanup (cleanupFiles/Dirs, kill
|
|
956
|
-
// sweeps) actually runs before we hand a synthetic envelope to the caller.
|
|
957
|
-
const settled = await inflight.catch((err) => ({
|
|
958
|
-
text: '', usage: null, sessionId: null, code: 1, stderr: String(err && err.message || err), raw: '', toolUses: [],
|
|
959
|
-
}));
|
|
960
|
-
const message = `CC turn ${label} timed out after ${timeoutMs}ms`;
|
|
961
|
-
return {
|
|
962
|
-
...settled,
|
|
963
|
-
text: '',
|
|
964
|
-
code: settled?.code || 1,
|
|
965
|
-
errorClass: 'cc-turn-timeout',
|
|
966
|
-
errorMessage: message,
|
|
967
|
-
error: { message, code: 'cc-turn-timeout', retriable: true },
|
|
968
|
-
ok: false,
|
|
969
|
-
};
|
|
970
998
|
}
|
|
971
999
|
|
|
972
1000
|
module.exports = {
|
package/engine/shared.js
CHANGED
|
@@ -2072,7 +2072,7 @@ const ENGINE_DEFAULTS = {
|
|
|
2072
2072
|
removeWorktreeFailureTtlMs: 24 * 60 * 60 * 1000, // stale failed paths are forgotten after a day
|
|
2073
2073
|
removeWorktreeFailureMaxEntries: 1000, // bound failed-worktree retry suppression cache
|
|
2074
2074
|
ccMaxTurns: 50, // max tool-use turns per CC/doc-chat call before CLI stops (per response, not per session)
|
|
2075
|
-
ccTurnTimeoutMs: 300000, // W-mpmwxni2000c25c7-b/-d: 5min per-turn watchdog.
|
|
2075
|
+
ccTurnTimeoutMs: 300000, // W-mpmwxni2000c25c7-b/-d: 5min per-turn no-progress watchdog. The window resets on every liveness signal — token chunk, tool-call notification, tool-update — so an actively-streaming CC/doc-chat turn (long shell command, deep search, sub-agent loop) survives indefinitely up to the outer CC_CALL_TIMEOUT_MS (~1h) ceiling. Only true silence past this window with no progress fires the cancel: the in-flight LLM call is aborted and the handler surfaces `{code:'cc-turn-timeout', retryable:true}` via the typed error envelope so the UI can stop the spinner and offer Retry. Clamped to [10000, 3600000] in the settings POST handler. Independent of CC_CALL_TIMEOUT_MS. Non-streaming doc-chat is the lone wall-clock exception (no progress hooks); see _raceCcDocChatTimeout in dashboard.js for the dual factory/promise shape.
|
|
2076
2076
|
docSessionMaxEntries: 200, // cap doc-chat session map/disk store by least-recent activity (LRU; sessions are non-expiring otherwise)
|
|
2077
2077
|
ccLiveStreamMaxAgeMs: 30 * 60 * 1000, // hard cap reconnect buffers if abort/cleanup stalls
|
|
2078
2078
|
metricsFlushIntervalMs: 10000, // batch trackEngineUsage writes to metrics.json — flushed every 10s instead of per-call to cut lock contention and dashboard mtime churn
|
|
@@ -5202,6 +5202,14 @@ function getPrFixAutomationCause({ dispatchKey = '', source = '', task = '' } =
|
|
|
5202
5202
|
return PR_FIX_CAUSE.UNKNOWN;
|
|
5203
5203
|
}
|
|
5204
5204
|
|
|
5205
|
+
// Source-branch head SHA, normalized across hosts. GitHub PRs carry
|
|
5206
|
+
// `headSha`/`headRefOid` (engine/github.js:718-742 keeps both in sync); ADO PRs
|
|
5207
|
+
// carry `_adoSourceCommit`/`headRefOid` (engine/ado.js:1083-1129) and a legacy
|
|
5208
|
+
// `_adoHeadCommit`. Mirrors engine/lifecycle.js:1849 getPrFixBaselineHead.
|
|
5209
|
+
function _prHeadSha(pr) {
|
|
5210
|
+
return String(pr?.headRefOid || pr?.headSha || pr?._adoSourceCommit || pr?._adoHeadCommit || '').trim();
|
|
5211
|
+
}
|
|
5212
|
+
|
|
5205
5213
|
function prFixEvidenceFingerprint(pr, cause = PR_FIX_CAUSE.UNKNOWN) {
|
|
5206
5214
|
const review = pr?.minionsReview || {};
|
|
5207
5215
|
const feedback = pr?.humanFeedback || {};
|
|
@@ -5214,6 +5222,13 @@ function prFixEvidenceFingerprint(pr, cause = PR_FIX_CAUSE.UNKNOWN) {
|
|
|
5214
5222
|
evidence.buildFailReason = pr?.buildFailReason || '';
|
|
5215
5223
|
evidence.buildErrorLog = pr?.buildErrorLog || '';
|
|
5216
5224
|
evidence.buildStatusDetail = pr?._buildStatusDetail || '';
|
|
5225
|
+
// #2979 — head SHA + lastPushedAt are the only fingerprint components that
|
|
5226
|
+
// change across a rebase + force-push. Without them, a no-op-fix pause was
|
|
5227
|
+
// sticky forever because failing pipeline name / fail reason / error log
|
|
5228
|
+
// are unchanged across the push. Existing paused records re-fingerprint on
|
|
5229
|
+
// the next poll and clear naturally when the head moves.
|
|
5230
|
+
evidence.headRefOid = _prHeadSha(pr);
|
|
5231
|
+
evidence.lastPushedAt = pr?.lastPushedAt || '';
|
|
5217
5232
|
} else if (cause === PR_FIX_CAUSE.MERGE_CONFLICT) {
|
|
5218
5233
|
evidence.mergeConflict = !!pr?._mergeConflict;
|
|
5219
5234
|
evidence.mergeStatus = pr?.mergeStatus || '';
|
|
@@ -5223,6 +5238,13 @@ function prFixEvidenceFingerprint(pr, cause = PR_FIX_CAUSE.UNKNOWN) {
|
|
|
5223
5238
|
evidence.lastReviewedAt = pr?.lastReviewedAt || '';
|
|
5224
5239
|
evidence.reviewedAt = review.reviewedAt || '';
|
|
5225
5240
|
evidence.reviewNote = review.note || pr?.reviewNote || '';
|
|
5241
|
+
// #2979 — same rationale as BUILD_FAILURE: review feedback fingerprints
|
|
5242
|
+
// were sticky across force-push because reviewStatus / reviewedAt /
|
|
5243
|
+
// reviewNote don't change when the author rebases. Adding the head SHA
|
|
5244
|
+
// gives REVIEW_FEEDBACK the same natural-unsticking property HUMAN_FEEDBACK
|
|
5245
|
+
// already has via lastProcessedCommentDate.
|
|
5246
|
+
evidence.headRefOid = _prHeadSha(pr);
|
|
5247
|
+
evidence.lastPushedAt = pr?.lastPushedAt || '';
|
|
5226
5248
|
}
|
|
5227
5249
|
return crypto.createHash('sha1').update(JSON.stringify(evidence)).digest('hex').slice(0, 16);
|
|
5228
5250
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2100",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|