agentxchain 2.151.0 → 2.153.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agentxchain.js +4 -0
- package/package.json +1 -1
- package/src/commands/resume.js +48 -1
- package/src/lib/continuous-run.js +271 -4
- package/src/lib/ghost-retry.js +447 -0
- package/src/lib/governed-state.js +177 -10
- package/src/lib/normalized-config.js +38 -0
- package/src/lib/run-events.js +3 -0
- package/src/lib/schemas/agentxchain-config.schema.json +28 -0
package/bin/agentxchain.js
CHANGED
|
@@ -753,6 +753,10 @@ program
|
|
|
753
753
|
.option('--triage-approval <mode>', 'Triage policy for vision-derived intents: auto or human (default: config or auto)')
|
|
754
754
|
.option('--max-idle-cycles <n>', 'Stop after N consecutive idle cycles with no derivable work (default: 3)', parseInt)
|
|
755
755
|
.option('--session-budget <usd>', 'Cumulative session-level budget cap in USD for continuous mode', parseFloat)
|
|
756
|
+
.option('--auto-retry-on-ghost', 'Enable bounded automatic retry for continuous-mode startup ghost turns')
|
|
757
|
+
.option('--no-auto-retry-on-ghost', 'Disable bounded automatic retry for continuous-mode startup ghost turns')
|
|
758
|
+
.option('--auto-retry-on-ghost-max-retries <n>', 'Maximum startup ghost retries per continuous run (default: config or 3)', parseInt)
|
|
759
|
+
.option('--auto-retry-on-ghost-cooldown-seconds <n>', 'Seconds to wait between startup ghost retries (default: config or 5)', parseInt)
|
|
756
760
|
.option('--auto-checkpoint', 'Auto-commit accepted writable turns after acceptance')
|
|
757
761
|
.option('--no-auto-checkpoint', 'Disable automatic checkpointing after accepted writable turns')
|
|
758
762
|
.action(runCommand);
|
package/package.json
CHANGED
package/src/commands/resume.js
CHANGED
|
@@ -94,6 +94,7 @@ export async function resumeCommand(opts) {
|
|
|
94
94
|
const activeTurns = getActiveTurns(state);
|
|
95
95
|
const resumeVia = opts?._via || 'resume';
|
|
96
96
|
const turnResumeVia = opts?._via || 'resume --turn';
|
|
97
|
+
let skipRetainedRedispatch = false;
|
|
97
98
|
|
|
98
99
|
if (state.status === 'active' && activeCount > 0) {
|
|
99
100
|
if (activeCount === 1) {
|
|
@@ -142,7 +143,53 @@ export async function resumeCommand(opts) {
|
|
|
142
143
|
// patched defensively) once the schema citation + migration citation are
|
|
143
144
|
// documented in code and the coverage matrix.
|
|
144
145
|
|
|
145
|
-
if (state.status === 'blocked' && activeCount > 0) {
|
|
146
|
+
if (state.status === 'blocked' && activeCount > 0 && resumeVia === 'operator_unblock') {
|
|
147
|
+
const reactivated = reactivateGovernedRun(root, state, { via: resumeVia, notificationConfig: config });
|
|
148
|
+
if (!reactivated.ok) {
|
|
149
|
+
console.log(chalk.red(`Failed to reactivate blocked run: ${reactivated.error}`));
|
|
150
|
+
process.exit(1);
|
|
151
|
+
}
|
|
152
|
+
state = reactivated.state;
|
|
153
|
+
console.log(chalk.green(`Resumed blocked run: ${state.run_id}`));
|
|
154
|
+
if (reactivated.migration_notice) {
|
|
155
|
+
console.log(chalk.yellow(reactivated.migration_notice));
|
|
156
|
+
}
|
|
157
|
+
if (reactivated.phantom_notice) {
|
|
158
|
+
console.log(chalk.yellow(reactivated.phantom_notice));
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const phaseReconciliation = reconcilePhaseAdvanceBeforeDispatch(root, config, state, {
|
|
162
|
+
allow_active_turn_cleanup: true,
|
|
163
|
+
allow_standing_gate: true,
|
|
164
|
+
});
|
|
165
|
+
if (!phaseReconciliation.ok && !phaseReconciliation.state) {
|
|
166
|
+
console.log(chalk.red(`Failed to reconcile phase gate before dispatch: ${phaseReconciliation.error}`));
|
|
167
|
+
process.exit(1);
|
|
168
|
+
}
|
|
169
|
+
state = phaseReconciliation.state || state;
|
|
170
|
+
if (phaseReconciliation.advanced) {
|
|
171
|
+
console.log(chalk.green(`Advanced phase before dispatch: ${phaseReconciliation.from_phase} → ${phaseReconciliation.to_phase}`));
|
|
172
|
+
skipRetainedRedispatch = true;
|
|
173
|
+
} else {
|
|
174
|
+
markRunBlocked(root, {
|
|
175
|
+
category: 'needs_human',
|
|
176
|
+
blockedOn: state.blocked_on || 'human:unblock_reconcile_failed',
|
|
177
|
+
recovery: {
|
|
178
|
+
typed_reason: 'needs_human',
|
|
179
|
+
owner: 'human',
|
|
180
|
+
recovery_action: 'agentxchain approve-transition or agentxchain gate show <gate>',
|
|
181
|
+
turn_retained: true,
|
|
182
|
+
detail: 'Operator unblock resolved the escalation, but no phase transition could be materialized from the current gate state.',
|
|
183
|
+
},
|
|
184
|
+
turnId: opts.turn || null,
|
|
185
|
+
notificationConfig: config,
|
|
186
|
+
});
|
|
187
|
+
console.log(chalk.red('Unblock did not materialize a phase transition; leaving the run blocked for manual recovery.'));
|
|
188
|
+
process.exit(1);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (state.status === 'blocked' && activeCount > 0 && !skipRetainedRedispatch) {
|
|
146
193
|
let retainedTurn = null;
|
|
147
194
|
if (opts.turn) {
|
|
148
195
|
retainedTurn = activeTurns[opts.turn];
|
|
@@ -25,6 +25,14 @@ import {
|
|
|
25
25
|
import { loadProjectState } from './config.js';
|
|
26
26
|
import { safeWriteJson } from './safe-write.js';
|
|
27
27
|
import { emitRunEvent } from './run-events.js';
|
|
28
|
+
import { reissueTurn } from './governed-state.js';
|
|
29
|
+
import {
|
|
30
|
+
applyGhostRetryAttempt,
|
|
31
|
+
applyGhostRetryExhaustion,
|
|
32
|
+
buildGhostRetryDiagnosticBundle,
|
|
33
|
+
buildGhostRetryExhaustionMirror,
|
|
34
|
+
classifyGhostRetryDecision,
|
|
35
|
+
} from './ghost-retry.js';
|
|
28
36
|
import {
|
|
29
37
|
archiveStaleIntentsForRun,
|
|
30
38
|
formatLegacyIntentMigrationNotice,
|
|
@@ -127,6 +135,178 @@ function getBlockedCategory(state) {
|
|
|
127
135
|
return state?.blocked_reason?.category || null;
|
|
128
136
|
}
|
|
129
137
|
|
|
138
|
+
function writeGovernedState(root, state) {
|
|
139
|
+
safeWriteJson(join(root, '.agentxchain', 'state.json'), state);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function clearGhostBlockerAfterReissue(root, state) {
|
|
143
|
+
const nextState = {
|
|
144
|
+
...state,
|
|
145
|
+
status: 'active',
|
|
146
|
+
blocked_on: null,
|
|
147
|
+
blocked_reason: null,
|
|
148
|
+
escalation: null,
|
|
149
|
+
};
|
|
150
|
+
writeGovernedState(root, nextState);
|
|
151
|
+
return nextState;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async function maybeAutoRetryGhostBlocker(context, session, contOpts, blockedState, log = console.log) {
|
|
155
|
+
const { root, config } = context;
|
|
156
|
+
const decision = classifyGhostRetryDecision({
|
|
157
|
+
state: blockedState,
|
|
158
|
+
session,
|
|
159
|
+
autoRetryOnGhost: contOpts.autoRetryOnGhost,
|
|
160
|
+
runId: session.current_run_id || blockedState?.run_id || null,
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
if (decision.decision === 'retry') {
|
|
164
|
+
const oldTurnId = decision.ghost.turn_id;
|
|
165
|
+
const oldTurn = blockedState?.active_turns?.[oldTurnId] || {};
|
|
166
|
+
const reissued = reissueTurn(root, config, {
|
|
167
|
+
turnId: oldTurnId,
|
|
168
|
+
reason: 'auto_retry_ghost',
|
|
169
|
+
});
|
|
170
|
+
if (!reissued.ok) {
|
|
171
|
+
log(`Ghost auto-retry skipped: ${reissued.error}`);
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const runId = session.current_run_id || blockedState?.run_id || reissued.state?.run_id || null;
|
|
176
|
+
const attempt = decision.attempts + 1;
|
|
177
|
+
const nowIso = new Date().toISOString();
|
|
178
|
+
const nextState = clearGhostBlockerAfterReissue(root, reissued.state);
|
|
179
|
+
// Slice 2c: pass runtime/role/timing fields so the fingerprint log can
|
|
180
|
+
// drive same-signature early-stop detection on subsequent invocations.
|
|
181
|
+
const oldRuntimeId = oldTurn.runtime_id || reissued.newTurn.runtime_id || null;
|
|
182
|
+
const oldRoleId = oldTurn.assigned_role || reissued.newTurn.assigned_role || null;
|
|
183
|
+
const oldRunningMs = oldTurn.failed_start_running_ms ?? null;
|
|
184
|
+
const oldThresholdMs = oldTurn.failed_start_threshold_ms ?? null;
|
|
185
|
+
const nextSession = applyGhostRetryAttempt(session, {
|
|
186
|
+
runId,
|
|
187
|
+
oldTurnId,
|
|
188
|
+
newTurnId: reissued.newTurn.turn_id,
|
|
189
|
+
failureType: decision.ghost.failure_type,
|
|
190
|
+
maxRetries: decision.maxRetries,
|
|
191
|
+
nowIso,
|
|
192
|
+
runtimeId: oldRuntimeId,
|
|
193
|
+
roleId: oldRoleId,
|
|
194
|
+
runningMs: oldRunningMs,
|
|
195
|
+
thresholdMs: oldThresholdMs,
|
|
196
|
+
});
|
|
197
|
+
Object.assign(session, nextSession, {
|
|
198
|
+
status: 'running',
|
|
199
|
+
current_run_id: runId,
|
|
200
|
+
});
|
|
201
|
+
writeContinuousSession(root, session);
|
|
202
|
+
|
|
203
|
+
emitRunEvent(root, 'auto_retried_ghost', {
|
|
204
|
+
run_id: runId,
|
|
205
|
+
phase: nextState.phase || blockedState?.phase || null,
|
|
206
|
+
status: 'active',
|
|
207
|
+
turn: { turn_id: reissued.newTurn.turn_id, role_id: reissued.newTurn.assigned_role },
|
|
208
|
+
intent_id: oldTurn.intake_context?.intent_id || null,
|
|
209
|
+
payload: {
|
|
210
|
+
old_turn_id: oldTurnId,
|
|
211
|
+
new_turn_id: reissued.newTurn.turn_id,
|
|
212
|
+
failure_type: decision.ghost.failure_type,
|
|
213
|
+
attempt,
|
|
214
|
+
max_retries_per_run: decision.maxRetries,
|
|
215
|
+
runtime_id: oldTurn.runtime_id || reissued.newTurn.runtime_id || null,
|
|
216
|
+
running_ms: oldTurn.failed_start_running_ms ?? null,
|
|
217
|
+
threshold_ms: oldTurn.failed_start_threshold_ms ?? null,
|
|
218
|
+
},
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
log(`Ghost turn auto-retried (${attempt}/${decision.maxRetries}): ${oldTurnId} -> ${reissued.newTurn.turn_id}`);
|
|
222
|
+
if ((contOpts.autoRetryOnGhost?.cooldownSeconds ?? 0) > 0) {
|
|
223
|
+
await new Promise((resolve) => setTimeout(resolve, contOpts.autoRetryOnGhost.cooldownSeconds * 1000));
|
|
224
|
+
}
|
|
225
|
+
return {
|
|
226
|
+
ok: true,
|
|
227
|
+
status: 'running',
|
|
228
|
+
action: 'auto_retried_ghost',
|
|
229
|
+
run_id: runId,
|
|
230
|
+
old_turn_id: oldTurnId,
|
|
231
|
+
new_turn_id: reissued.newTurn.turn_id,
|
|
232
|
+
attempt,
|
|
233
|
+
max_retries_per_run: decision.maxRetries,
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (decision.decision === 'exhausted') {
|
|
238
|
+
const runId = session.current_run_id || blockedState?.run_id || null;
|
|
239
|
+
const oldTurnId = decision.ghost.turn_id;
|
|
240
|
+
const oldTurn = blockedState?.active_turns?.[oldTurnId] || {};
|
|
241
|
+
const manualDetail = blockedState?.blocked_reason?.recovery?.detail
|
|
242
|
+
|| blockedState?.blocked_reason?.recovery?.recovery_action
|
|
243
|
+
|| null;
|
|
244
|
+
// Slice 2c: build the per-attempt diagnostic bundle from the session's
|
|
245
|
+
// recorded attempts_log. This is the payload the operator needs to
|
|
246
|
+
// decide their next move (bump retries, change runtime, raise watchdog,
|
|
247
|
+
// or file a new bug). Also pass signatureRepeat into the mirror so the
|
|
248
|
+
// status surface distinguishes raw exhaustion from pattern-based early
|
|
249
|
+
// stop.
|
|
250
|
+
const diagnosticBundle = buildGhostRetryDiagnosticBundle(session);
|
|
251
|
+
const signatureRepeat = decision.signatureRepeat || null;
|
|
252
|
+
const detail = buildGhostRetryExhaustionMirror({
|
|
253
|
+
attempts: decision.attempts,
|
|
254
|
+
maxRetries: decision.maxRetries,
|
|
255
|
+
failureType: decision.ghost.failure_type,
|
|
256
|
+
manualRecoveryDetail: manualDetail,
|
|
257
|
+
signatureRepeat,
|
|
258
|
+
});
|
|
259
|
+
const nextState = {
|
|
260
|
+
...blockedState,
|
|
261
|
+
blocked_reason: {
|
|
262
|
+
...(blockedState.blocked_reason || {}),
|
|
263
|
+
recovery: {
|
|
264
|
+
...(blockedState.blocked_reason?.recovery || {}),
|
|
265
|
+
detail,
|
|
266
|
+
},
|
|
267
|
+
},
|
|
268
|
+
};
|
|
269
|
+
writeGovernedState(root, nextState);
|
|
270
|
+
const nextSession = applyGhostRetryExhaustion(session, {
|
|
271
|
+
runId,
|
|
272
|
+
failureType: decision.ghost.failure_type,
|
|
273
|
+
turnId: oldTurnId,
|
|
274
|
+
maxRetries: decision.maxRetries,
|
|
275
|
+
nowIso: new Date().toISOString(),
|
|
276
|
+
});
|
|
277
|
+
Object.assign(session, nextSession, { status: 'paused' });
|
|
278
|
+
writeContinuousSession(root, session);
|
|
279
|
+
|
|
280
|
+
emitRunEvent(root, 'ghost_retry_exhausted', {
|
|
281
|
+
run_id: runId,
|
|
282
|
+
phase: blockedState?.phase || null,
|
|
283
|
+
status: 'blocked',
|
|
284
|
+
turn: { turn_id: oldTurnId, role_id: oldTurn.assigned_role || null },
|
|
285
|
+
intent_id: oldTurn.intake_context?.intent_id || null,
|
|
286
|
+
payload: {
|
|
287
|
+
turn_id: oldTurnId,
|
|
288
|
+
attempts: decision.attempts,
|
|
289
|
+
max_retries_per_run: decision.maxRetries,
|
|
290
|
+
failure_type: decision.ghost.failure_type,
|
|
291
|
+
runtime_id: oldTurn.runtime_id || null,
|
|
292
|
+
exhaustion_reason: signatureRepeat ? 'same_signature_repeat' : 'retry_budget_exhausted',
|
|
293
|
+
signature_repeat: signatureRepeat,
|
|
294
|
+
diagnostic_bundle: diagnosticBundle,
|
|
295
|
+
diagnostic_refs: {
|
|
296
|
+
recovery_action: blockedState?.blocked_reason?.recovery?.recovery_action || null,
|
|
297
|
+
},
|
|
298
|
+
},
|
|
299
|
+
});
|
|
300
|
+
const tag = signatureRepeat
|
|
301
|
+
? `same_signature_repeat [${signatureRepeat.signature}] after ${signatureRepeat.consecutive} attempts`
|
|
302
|
+
: `${decision.attempts}/${decision.maxRetries}`;
|
|
303
|
+
log(`Ghost auto-retry exhausted (${tag}) for ${oldTurnId}.`);
|
|
304
|
+
return null;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
return null;
|
|
308
|
+
}
|
|
309
|
+
|
|
130
310
|
// ---------------------------------------------------------------------------
|
|
131
311
|
// Intake queue check
|
|
132
312
|
// ---------------------------------------------------------------------------
|
|
@@ -301,6 +481,11 @@ export function seedFromVision(root, visionPath, options = {}) {
|
|
|
301
481
|
|
|
302
482
|
export function resolveContinuousOptions(opts, config) {
|
|
303
483
|
const configCont = config?.run_loop?.continuous || {};
|
|
484
|
+
const configGhostRetry = configCont.auto_retry_on_ghost || {};
|
|
485
|
+
const explicitConfigGhostEnabled = Object.prototype.hasOwnProperty.call(configGhostRetry, 'enabled');
|
|
486
|
+
const fullAutoGhostDefault = Boolean((opts.continuous ?? configCont.enabled ?? false) && isFullAutoApprovalPolicy(config));
|
|
487
|
+
const resolvedGhostEnabled = opts.autoRetryOnGhost
|
|
488
|
+
?? (explicitConfigGhostEnabled ? configGhostRetry.enabled : fullAutoGhostDefault);
|
|
304
489
|
|
|
305
490
|
return {
|
|
306
491
|
enabled: opts.continuous ?? configCont.enabled ?? false,
|
|
@@ -313,9 +498,25 @@ export function resolveContinuousOptions(opts, config) {
|
|
|
313
498
|
cooldownSeconds: opts.cooldownSeconds ?? configCont.cooldown_seconds ?? 5,
|
|
314
499
|
perSessionMaxUsd: opts.sessionBudget ?? configCont.per_session_max_usd ?? null,
|
|
315
500
|
autoCheckpoint: opts.autoCheckpoint ?? configCont.auto_checkpoint ?? true,
|
|
501
|
+
autoRetryOnGhost: {
|
|
502
|
+
enabled: resolvedGhostEnabled ?? false,
|
|
503
|
+
maxRetriesPerRun: opts.autoRetryOnGhostMaxRetries
|
|
504
|
+
?? configGhostRetry.max_retries_per_run
|
|
505
|
+
?? 3,
|
|
506
|
+
cooldownSeconds: opts.autoRetryOnGhostCooldownSeconds
|
|
507
|
+
?? configGhostRetry.cooldown_seconds
|
|
508
|
+
?? 5,
|
|
509
|
+
},
|
|
316
510
|
};
|
|
317
511
|
}
|
|
318
512
|
|
|
513
|
+
export function isFullAutoApprovalPolicy(config) {
|
|
514
|
+
const policy = config?.approval_policy;
|
|
515
|
+
if (!policy || typeof policy !== 'object') return false;
|
|
516
|
+
return policy.phase_transitions?.default === 'auto_approve'
|
|
517
|
+
&& policy.run_completion?.action === 'auto_approve';
|
|
518
|
+
}
|
|
519
|
+
|
|
319
520
|
// ---------------------------------------------------------------------------
|
|
320
521
|
// Single-step continuous advancement primitive
|
|
321
522
|
// ---------------------------------------------------------------------------
|
|
@@ -370,6 +571,8 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
370
571
|
if (session.status === 'paused') {
|
|
371
572
|
const governedState = loadProjectState(root, context.config);
|
|
372
573
|
if (governedState?.status === 'blocked') {
|
|
574
|
+
const retried = await maybeAutoRetryGhostBlocker(context, session, contOpts, governedState, log);
|
|
575
|
+
if (retried) return retried;
|
|
373
576
|
// Still blocked — stay paused, do not attempt new work
|
|
374
577
|
writeContinuousSession(root, session);
|
|
375
578
|
return {
|
|
@@ -406,7 +609,10 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
406
609
|
const resumeStopReason = execution.result?.stop_reason;
|
|
407
610
|
|
|
408
611
|
if (isBlockedContinuousExecution(execution)) {
|
|
409
|
-
const
|
|
612
|
+
const blockedState = execution?.result?.state || loadProjectState(root, context.config);
|
|
613
|
+
const retried = await maybeAutoRetryGhostBlocker(context, session, contOpts, blockedState, log);
|
|
614
|
+
if (retried) return retried;
|
|
615
|
+
const blockedRecoveryAction = getBlockedRecoveryAction(blockedState);
|
|
410
616
|
session.status = 'paused';
|
|
411
617
|
log(blockedRecoveryAction
|
|
412
618
|
? `Resumed run blocked again — continuous loop re-paused. Recovery: ${blockedRecoveryAction}`
|
|
@@ -418,7 +624,7 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
418
624
|
action: 'run_blocked',
|
|
419
625
|
run_id: session.current_run_id,
|
|
420
626
|
recovery_action: blockedRecoveryAction,
|
|
421
|
-
blocked_category: getBlockedCategory(
|
|
627
|
+
blocked_category: getBlockedCategory(blockedState),
|
|
422
628
|
};
|
|
423
629
|
}
|
|
424
630
|
|
|
@@ -435,6 +641,64 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
435
641
|
return { ok: true, status: 'running', action: 'resumed_after_unblock', run_id: session.current_run_id };
|
|
436
642
|
}
|
|
437
643
|
|
|
644
|
+
const activeGovernedState = loadProjectState(root, context.config);
|
|
645
|
+
if (
|
|
646
|
+
session.current_run_id
|
|
647
|
+
&& activeGovernedState?.status === 'active'
|
|
648
|
+
&& activeGovernedState.run_id === session.current_run_id
|
|
649
|
+
&& Object.keys(activeGovernedState.active_turns || {}).length > 0
|
|
650
|
+
) {
|
|
651
|
+
log('Continuing active governed run.');
|
|
652
|
+
let execution;
|
|
653
|
+
try {
|
|
654
|
+
execution = await executeGovernedRun(context, {
|
|
655
|
+
autoApprove: true,
|
|
656
|
+
autoCheckpoint: contOpts.autoCheckpoint,
|
|
657
|
+
report: true,
|
|
658
|
+
log,
|
|
659
|
+
});
|
|
660
|
+
} catch (err) {
|
|
661
|
+
session.status = 'failed';
|
|
662
|
+
writeContinuousSession(root, session);
|
|
663
|
+
return { ok: false, status: 'failed', action: 'run_failed', stop_reason: err.message, run_id: session.current_run_id };
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
session.cumulative_spent_usd = (session.cumulative_spent_usd || 0) + getExecutionRunSpentUsd(execution);
|
|
667
|
+
const resumeStopReason = execution.result?.stop_reason;
|
|
668
|
+
|
|
669
|
+
if (isBlockedContinuousExecution(execution)) {
|
|
670
|
+
const blockedState = execution?.result?.state || loadProjectState(root, context.config);
|
|
671
|
+
const retried = await maybeAutoRetryGhostBlocker(context, session, contOpts, blockedState, log);
|
|
672
|
+
if (retried) return retried;
|
|
673
|
+
const blockedRecoveryAction = getBlockedRecoveryAction(blockedState);
|
|
674
|
+
session.status = 'paused';
|
|
675
|
+
log(blockedRecoveryAction
|
|
676
|
+
? `Active run blocked — continuous loop paused. Recovery: ${blockedRecoveryAction}`
|
|
677
|
+
: 'Active run blocked — continuous loop paused.');
|
|
678
|
+
writeContinuousSession(root, session);
|
|
679
|
+
return {
|
|
680
|
+
ok: true,
|
|
681
|
+
status: 'blocked',
|
|
682
|
+
action: 'run_blocked',
|
|
683
|
+
run_id: session.current_run_id,
|
|
684
|
+
recovery_action: blockedRecoveryAction,
|
|
685
|
+
blocked_category: getBlockedCategory(blockedState),
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
if (execution.exitCode !== 0 || !execution.result) {
|
|
690
|
+
session.status = 'failed';
|
|
691
|
+
writeContinuousSession(root, session);
|
|
692
|
+
return { ok: false, status: 'failed', action: 'run_failed', stop_reason: resumeStopReason || `exit_code_${execution.exitCode}`, run_id: session.current_run_id };
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
session.runs_completed += 1;
|
|
696
|
+
session.current_run_id = execution.result?.state?.run_id || session.current_run_id;
|
|
697
|
+
log(`Active run completed (${session.runs_completed}/${contOpts.maxRuns}): ${resumeStopReason || 'completed'}`);
|
|
698
|
+
writeContinuousSession(root, session);
|
|
699
|
+
return { ok: true, status: 'running', action: 'continued_active_run', run_id: session.current_run_id };
|
|
700
|
+
}
|
|
701
|
+
|
|
438
702
|
// Validate vision file
|
|
439
703
|
if (!existsSync(absVisionPath)) {
|
|
440
704
|
session.status = 'failed';
|
|
@@ -573,7 +837,10 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
573
837
|
}
|
|
574
838
|
|
|
575
839
|
if (isBlockedContinuousExecution(execution)) {
|
|
576
|
-
const
|
|
840
|
+
const blockedState = execution?.result?.state || loadProjectState(root, context.config);
|
|
841
|
+
const retried = await maybeAutoRetryGhostBlocker(context, session, contOpts, blockedState, log);
|
|
842
|
+
if (retried) return retried;
|
|
843
|
+
const blockedRecoveryAction = getBlockedRecoveryAction(blockedState);
|
|
577
844
|
const resolved = resolveIntent(root, targetIntentId);
|
|
578
845
|
if (!resolved.ok) {
|
|
579
846
|
log(`Continuous resolve error: ${resolved.error}`);
|
|
@@ -593,7 +860,7 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
593
860
|
run_id: session.current_run_id,
|
|
594
861
|
intent_id: targetIntentId,
|
|
595
862
|
recovery_action: blockedRecoveryAction,
|
|
596
|
-
blocked_category: getBlockedCategory(
|
|
863
|
+
blocked_category: getBlockedCategory(blockedState),
|
|
597
864
|
};
|
|
598
865
|
}
|
|
599
866
|
|
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ghost-retry.js — Pure decision helper for BUG-61 continuous-mode ghost-turn
|
|
3
|
+
* auto-recovery.
|
|
4
|
+
*
|
|
5
|
+
* This module is deliberately pure (no disk I/O, no subprocess spawn): it takes
|
|
6
|
+
* the blocked governed state plus the continuous session snapshot and returns a
|
|
7
|
+
* decision record the continuous loop can act on.
|
|
8
|
+
*
|
|
9
|
+
* Slice 2a ships the decision helper + state-shape primitives. Slice 2b wires
|
|
10
|
+
* it into `advanceContinuousRunOnce()` and covers `reissueTurn()` side-effects
|
|
11
|
+
* + cooldowns + command-chain beta scenarios.
|
|
12
|
+
*
|
|
13
|
+
* Contracts:
|
|
14
|
+
* - Retry is eligible ONLY when `blocked_reason.category === "ghost_turn"`
|
|
15
|
+
* AND an active turn exists with `status === "failed_start"` AND a typed
|
|
16
|
+
* BUG-51 startup failure (`runtime_spawn_failed` or `stdout_attach_failed`).
|
|
17
|
+
* - Retry budget is run-scoped: switching `run_id` resets the counter to 0.
|
|
18
|
+
* - Staged results on the ghost turn disqualify retry (defer to accept flow).
|
|
19
|
+
* - Exhaustion returns `decision: "exhausted"` — the caller is responsible
|
|
20
|
+
* for mirroring the outcome into governed state's
|
|
21
|
+
* `blocked_reason.recovery.detail` per DEC-BUG61-GHOST-RETRY-STATE-OWNERSHIP-001.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
export const GHOST_FAILURE_TYPES = Object.freeze([
|
|
25
|
+
'runtime_spawn_failed',
|
|
26
|
+
'stdout_attach_failed',
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Slice 2c: same-signature early stop threshold.
|
|
31
|
+
*
|
|
32
|
+
* When N consecutive recorded attempts share the same fingerprint
|
|
33
|
+
* `(runtime_id, role_id, failure_type)`, the retry budget is NOT exhausted in
|
|
34
|
+
* raw count terms but the pattern signals a systematic failure that further
|
|
35
|
+
* retries will not clear. At that point the loop stops early with
|
|
36
|
+
* `decision: "exhausted"` and `reason: "same_signature_repeat"`. The threshold
|
|
37
|
+
* is deliberately low (2) because the BUG-61 contract is "retry transient
|
|
38
|
+
* ghosts" — a second identical signature is already non-transient evidence.
|
|
39
|
+
*
|
|
40
|
+
* Not configurable via `auto_retry_on_ghost` in v1; the value is a framework
|
|
41
|
+
* invariant. If evidence emerges that 2 is too aggressive, promote to config
|
|
42
|
+
* through a new DEC rather than silently widening.
|
|
43
|
+
*/
|
|
44
|
+
export const SIGNATURE_REPEAT_THRESHOLD = 2;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Read (or default) the ghost_retry state object from a continuous session.
|
|
48
|
+
* Returns a plain object; callers should spread/clone before mutating.
|
|
49
|
+
*/
|
|
50
|
+
export function readGhostRetryState(session) {
|
|
51
|
+
const gr = session?.ghost_retry;
|
|
52
|
+
if (!gr || typeof gr !== 'object') {
|
|
53
|
+
return {
|
|
54
|
+
run_id: null,
|
|
55
|
+
attempts: 0,
|
|
56
|
+
max_retries_per_run: null,
|
|
57
|
+
last_old_turn_id: null,
|
|
58
|
+
last_new_turn_id: null,
|
|
59
|
+
last_failure_type: null,
|
|
60
|
+
last_retried_at: null,
|
|
61
|
+
exhausted: false,
|
|
62
|
+
attempts_log: [],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
return {
|
|
66
|
+
run_id: gr.run_id ?? null,
|
|
67
|
+
attempts: Number.isInteger(gr.attempts) && gr.attempts >= 0 ? gr.attempts : 0,
|
|
68
|
+
max_retries_per_run: Number.isInteger(gr.max_retries_per_run) ? gr.max_retries_per_run : null,
|
|
69
|
+
last_old_turn_id: gr.last_old_turn_id ?? null,
|
|
70
|
+
last_new_turn_id: gr.last_new_turn_id ?? null,
|
|
71
|
+
last_failure_type: gr.last_failure_type ?? null,
|
|
72
|
+
last_retried_at: gr.last_retried_at ?? null,
|
|
73
|
+
exhausted: Boolean(gr.exhausted),
|
|
74
|
+
attempts_log: Array.isArray(gr.attempts_log) ? gr.attempts_log : [],
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Reset the ghost_retry counter when the active run_id differs from the last
|
|
80
|
+
* recorded run_id. Returns the reset state (does not mutate input).
|
|
81
|
+
*/
|
|
82
|
+
export function resetGhostRetryForRun(session, runId) {
|
|
83
|
+
const current = readGhostRetryState(session);
|
|
84
|
+
if (current.run_id === runId) return current;
|
|
85
|
+
return {
|
|
86
|
+
run_id: runId ?? null,
|
|
87
|
+
attempts: 0,
|
|
88
|
+
max_retries_per_run: current.max_retries_per_run,
|
|
89
|
+
last_old_turn_id: null,
|
|
90
|
+
last_new_turn_id: null,
|
|
91
|
+
last_failure_type: null,
|
|
92
|
+
last_retried_at: null,
|
|
93
|
+
exhausted: false,
|
|
94
|
+
attempts_log: [],
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Build the fingerprint string for a recorded attempt. Same shape as the
|
|
100
|
+
* HUMAN-ROADMAP's "same runtime, same role, same prompt shape" guidance —
|
|
101
|
+
* we key on (runtime_id, role_id, failure_type). Prompt shape is implicitly
|
|
102
|
+
* stable across same-turn reissues because `reissueTurn()` re-renders the
|
|
103
|
+
* same dispatch bundle.
|
|
104
|
+
*
|
|
105
|
+
* `null`/missing fields are normalized to `?` so partial records compare
|
|
106
|
+
* consistently rather than silently matching.
|
|
107
|
+
*/
|
|
108
|
+
export function buildAttemptFingerprint(attempt) {
|
|
109
|
+
const runtime = attempt?.runtime_id ?? '?';
|
|
110
|
+
const role = attempt?.role_id ?? '?';
|
|
111
|
+
const failure = attempt?.failure_type ?? '?';
|
|
112
|
+
return `${runtime}|${role}|${failure}`;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Classify whether the tail of `attemptsLog` shows `threshold` consecutive
|
|
117
|
+
* identical fingerprints. Returns:
|
|
118
|
+
* - `{ triggered: false, signature: null, consecutive: 0 }` when not hit
|
|
119
|
+
* - `{ triggered: true, signature, consecutive }` when hit
|
|
120
|
+
*
|
|
121
|
+
* The caller decides what to do with the trigger (slice 2c routes it into
|
|
122
|
+
* `decision: "exhausted"` with `reason: "same_signature_repeat"`).
|
|
123
|
+
*/
|
|
124
|
+
export function classifySameSignatureExhaustion(attemptsLog, threshold = SIGNATURE_REPEAT_THRESHOLD) {
|
|
125
|
+
if (!Array.isArray(attemptsLog) || attemptsLog.length < threshold) {
|
|
126
|
+
return { triggered: false, signature: null, consecutive: 0 };
|
|
127
|
+
}
|
|
128
|
+
if (!Number.isInteger(threshold) || threshold < 2) {
|
|
129
|
+
return { triggered: false, signature: null, consecutive: 0 };
|
|
130
|
+
}
|
|
131
|
+
const tail = attemptsLog.slice(-threshold);
|
|
132
|
+
const signatures = tail.map(buildAttemptFingerprint);
|
|
133
|
+
const first = signatures[0];
|
|
134
|
+
if (!first || first === '?|?|?') {
|
|
135
|
+
return { triggered: false, signature: null, consecutive: 0 };
|
|
136
|
+
}
|
|
137
|
+
const allMatch = signatures.every((s) => s === first);
|
|
138
|
+
if (!allMatch) {
|
|
139
|
+
return { triggered: false, signature: null, consecutive: 0 };
|
|
140
|
+
}
|
|
141
|
+
return { triggered: true, signature: first, consecutive: threshold };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Locate the primary ghost turn from governed state.
|
|
146
|
+
*
|
|
147
|
+
* Inputs expected (matches shape written by `stale-turn-watchdog.js`):
|
|
148
|
+
* - `state.blocked_reason.category === "ghost_turn"`
|
|
149
|
+
* - `state.blocked_reason.turn_id`
|
|
150
|
+
* - `state.active_turns[turnId].status === "failed_start"`
|
|
151
|
+
* - `state.active_turns[turnId].failed_start_reason` is one of
|
|
152
|
+
* GHOST_FAILURE_TYPES
|
|
153
|
+
*
|
|
154
|
+
* Returns the turn object + failure type, or null when no eligible turn is
|
|
155
|
+
* found. Does NOT consult disk.
|
|
156
|
+
*/
|
|
157
|
+
export function findPrimaryGhostTurn(state) {
|
|
158
|
+
if (!state || typeof state !== 'object') return null;
|
|
159
|
+
const blockedReason = state.blocked_reason;
|
|
160
|
+
if (!blockedReason || blockedReason.category !== 'ghost_turn') return null;
|
|
161
|
+
|
|
162
|
+
const activeTurns = state.active_turns || {};
|
|
163
|
+
const hintedTurnId = blockedReason.turn_id;
|
|
164
|
+
const candidateIds = hintedTurnId && activeTurns[hintedTurnId]
|
|
165
|
+
? [hintedTurnId]
|
|
166
|
+
: Object.keys(activeTurns);
|
|
167
|
+
|
|
168
|
+
for (const turnId of candidateIds) {
|
|
169
|
+
const turn = activeTurns[turnId];
|
|
170
|
+
if (!turn) continue;
|
|
171
|
+
if (turn.status !== 'failed_start') continue;
|
|
172
|
+
const failureType = turn.failed_start_reason;
|
|
173
|
+
if (!GHOST_FAILURE_TYPES.includes(failureType)) continue;
|
|
174
|
+
if (hasMeaningfulStagedResult(turn)) continue;
|
|
175
|
+
return { turn_id: turnId, turn, failure_type: failureType };
|
|
176
|
+
}
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Best-effort detector for a meaningful staged result. If the turn has already
|
|
182
|
+
* produced a structured result the caller should NOT auto-retry — the accept
|
|
183
|
+
* pipeline owns that path.
|
|
184
|
+
*/
|
|
185
|
+
function hasMeaningfulStagedResult(turn) {
|
|
186
|
+
if (!turn) return false;
|
|
187
|
+
const staged = turn.staged_result ?? turn.result ?? null;
|
|
188
|
+
if (!staged) return false;
|
|
189
|
+
if (typeof staged !== 'object') return Boolean(staged);
|
|
190
|
+
// Ignore purely-null / empty shells the watchdog may leave behind.
|
|
191
|
+
for (const value of Object.values(staged)) {
|
|
192
|
+
if (value !== null && value !== undefined && value !== '') return true;
|
|
193
|
+
}
|
|
194
|
+
return false;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Classify the retry decision given the current blocked state + session +
|
|
199
|
+
* resolved options.
|
|
200
|
+
*
|
|
201
|
+
* @param {object} params
|
|
202
|
+
* @param {object} params.state - governed state (has blocked_reason + active_turns)
|
|
203
|
+
* @param {object} params.session - continuous session (source of truth for retry counter)
|
|
204
|
+
* @param {object} params.autoRetryOnGhost - resolved continuous options block: { enabled, maxRetriesPerRun, cooldownSeconds }
|
|
205
|
+
* @param {string|null} [params.runId] - the run_id the continuous loop believes is active (defaults to state.run_id)
|
|
206
|
+
* @returns {{
|
|
207
|
+
* decision: 'retry' | 'exhausted' | 'skip_non_ghost' | 'missing_active_ghost' | 'disabled' | 'missing_run_id',
|
|
208
|
+
* reason: string,
|
|
209
|
+
* attempts: number,
|
|
210
|
+
* maxRetries: number,
|
|
211
|
+
* retryState: object,
|
|
212
|
+
* ghost?: { turn_id: string, failure_type: string },
|
|
213
|
+
* signatureRepeat?: { signature: string, consecutive: number }
|
|
214
|
+
* }}
|
|
215
|
+
*
|
|
216
|
+
* Exhaustion lanes (added in slice 2c):
|
|
217
|
+
* - `reason: "retry budget exhausted (N/N)"` — raw counter cap hit
|
|
218
|
+
* - `reason: "same_signature_repeat (<signature>)"` — N consecutive
|
|
219
|
+
* identical fingerprints recorded; continuing is unlikely to help. This
|
|
220
|
+
* lane can fire BEFORE the raw counter cap — we stop as soon as the
|
|
221
|
+
* pattern is visible.
|
|
222
|
+
*/
|
|
223
|
+
export function classifyGhostRetryDecision({ state, session, autoRetryOnGhost, runId } = {}) {
|
|
224
|
+
const opts = autoRetryOnGhost || {};
|
|
225
|
+
const enabled = Boolean(opts.enabled);
|
|
226
|
+
const maxRetries = Number.isInteger(opts.maxRetriesPerRun) && opts.maxRetriesPerRun > 0
|
|
227
|
+
? opts.maxRetriesPerRun
|
|
228
|
+
: 3;
|
|
229
|
+
|
|
230
|
+
if (!enabled) {
|
|
231
|
+
return {
|
|
232
|
+
decision: 'disabled',
|
|
233
|
+
reason: 'auto_retry_on_ghost.enabled is false',
|
|
234
|
+
attempts: 0,
|
|
235
|
+
maxRetries,
|
|
236
|
+
retryState: readGhostRetryState(session),
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const category = state?.blocked_reason?.category;
|
|
241
|
+
if (category !== 'ghost_turn') {
|
|
242
|
+
return {
|
|
243
|
+
decision: 'skip_non_ghost',
|
|
244
|
+
reason: `blocked_reason.category=${category ?? 'null'} is not ghost_turn`,
|
|
245
|
+
attempts: 0,
|
|
246
|
+
maxRetries,
|
|
247
|
+
retryState: readGhostRetryState(session),
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const ghost = findPrimaryGhostTurn(state);
|
|
252
|
+
if (!ghost) {
|
|
253
|
+
return {
|
|
254
|
+
decision: 'missing_active_ghost',
|
|
255
|
+
reason: 'blocked_reason names a ghost but no active turn has a typed BUG-51 failed_start',
|
|
256
|
+
attempts: 0,
|
|
257
|
+
maxRetries,
|
|
258
|
+
retryState: readGhostRetryState(session),
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
const effectiveRunId = runId ?? state?.run_id ?? null;
|
|
263
|
+
if (!effectiveRunId) {
|
|
264
|
+
return {
|
|
265
|
+
decision: 'missing_run_id',
|
|
266
|
+
reason: 'cannot scope retry counter without a run_id',
|
|
267
|
+
attempts: 0,
|
|
268
|
+
maxRetries,
|
|
269
|
+
retryState: readGhostRetryState(session),
|
|
270
|
+
ghost: { turn_id: ghost.turn_id, failure_type: ghost.failure_type },
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const resetState = resetGhostRetryForRun(session, effectiveRunId);
|
|
275
|
+
const attempts = resetState.attempts;
|
|
276
|
+
|
|
277
|
+
if (attempts >= maxRetries) {
|
|
278
|
+
return {
|
|
279
|
+
decision: 'exhausted',
|
|
280
|
+
reason: `retry budget exhausted (${attempts}/${maxRetries})`,
|
|
281
|
+
attempts,
|
|
282
|
+
maxRetries,
|
|
283
|
+
retryState: { ...resetState, max_retries_per_run: maxRetries, exhausted: true },
|
|
284
|
+
ghost: { turn_id: ghost.turn_id, failure_type: ghost.failure_type },
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Slice 2c: same-signature early stop. If the recorded attempts log shows
|
|
289
|
+
// SIGNATURE_REPEAT_THRESHOLD consecutive identical fingerprints, stop early
|
|
290
|
+
// with a distinct reason so the caller can surface "pattern detected, not
|
|
291
|
+
// transient" in the exhaustion bundle.
|
|
292
|
+
const sigCheck = classifySameSignatureExhaustion(resetState.attempts_log, SIGNATURE_REPEAT_THRESHOLD);
|
|
293
|
+
if (sigCheck.triggered) {
|
|
294
|
+
return {
|
|
295
|
+
decision: 'exhausted',
|
|
296
|
+
reason: `same_signature_repeat (${sigCheck.signature})`,
|
|
297
|
+
attempts,
|
|
298
|
+
maxRetries,
|
|
299
|
+
retryState: { ...resetState, max_retries_per_run: maxRetries, exhausted: true },
|
|
300
|
+
ghost: { turn_id: ghost.turn_id, failure_type: ghost.failure_type },
|
|
301
|
+
signatureRepeat: { signature: sigCheck.signature, consecutive: sigCheck.consecutive },
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
return {
|
|
306
|
+
decision: 'retry',
|
|
307
|
+
reason: `retry budget available (${attempts}/${maxRetries})`,
|
|
308
|
+
attempts,
|
|
309
|
+
maxRetries,
|
|
310
|
+
retryState: { ...resetState, max_retries_per_run: maxRetries },
|
|
311
|
+
ghost: { turn_id: ghost.turn_id, failure_type: ghost.failure_type },
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Apply a successful auto-retry to a session snapshot. Returns a NEW session
|
|
317
|
+
* object with the ghost_retry counter incremented and last_* fields updated.
|
|
318
|
+
* Does not write to disk; the caller owns persistence.
|
|
319
|
+
*/
|
|
320
|
+
export function applyGhostRetryAttempt(session, {
|
|
321
|
+
runId,
|
|
322
|
+
oldTurnId,
|
|
323
|
+
newTurnId,
|
|
324
|
+
failureType,
|
|
325
|
+
maxRetries,
|
|
326
|
+
nowIso,
|
|
327
|
+
runtimeId = null,
|
|
328
|
+
roleId = null,
|
|
329
|
+
runningMs = null,
|
|
330
|
+
thresholdMs = null,
|
|
331
|
+
}) {
|
|
332
|
+
const base = resetGhostRetryForRun(session, runId);
|
|
333
|
+
const at = nowIso || new Date().toISOString();
|
|
334
|
+
// Slice 2c: append a per-attempt fingerprint record. The log is the source
|
|
335
|
+
// of truth for same-signature early-stop detection and the exhaustion
|
|
336
|
+
// diagnostic bundle. We cap its size to 10 entries to prevent unbounded
|
|
337
|
+
// growth on misbehaving projects — the tail is what matters for pattern
|
|
338
|
+
// detection.
|
|
339
|
+
const nextEntry = {
|
|
340
|
+
attempt: base.attempts + 1,
|
|
341
|
+
old_turn_id: oldTurnId ?? null,
|
|
342
|
+
new_turn_id: newTurnId ?? null,
|
|
343
|
+
runtime_id: runtimeId ?? null,
|
|
344
|
+
role_id: roleId ?? null,
|
|
345
|
+
failure_type: failureType ?? null,
|
|
346
|
+
running_ms: runningMs ?? null,
|
|
347
|
+
threshold_ms: thresholdMs ?? null,
|
|
348
|
+
retried_at: at,
|
|
349
|
+
};
|
|
350
|
+
const attemptsLog = [...base.attempts_log, nextEntry].slice(-10);
|
|
351
|
+
const ghost_retry = {
|
|
352
|
+
run_id: runId ?? null,
|
|
353
|
+
attempts: base.attempts + 1,
|
|
354
|
+
max_retries_per_run: Number.isInteger(maxRetries) ? maxRetries : base.max_retries_per_run,
|
|
355
|
+
last_old_turn_id: oldTurnId ?? null,
|
|
356
|
+
last_new_turn_id: newTurnId ?? null,
|
|
357
|
+
last_failure_type: failureType ?? null,
|
|
358
|
+
last_retried_at: at,
|
|
359
|
+
exhausted: false,
|
|
360
|
+
attempts_log: attemptsLog,
|
|
361
|
+
};
|
|
362
|
+
return { ...(session || {}), ghost_retry };
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* Apply an exhaustion outcome to a session snapshot. Returns a NEW session
|
|
367
|
+
* with the counter preserved, `exhausted: true`, and last-failure metadata.
|
|
368
|
+
*/
|
|
369
|
+
export function applyGhostRetryExhaustion(session, { runId, failureType, turnId, maxRetries, nowIso }) {
|
|
370
|
+
const base = resetGhostRetryForRun(session, runId);
|
|
371
|
+
const ghost_retry = {
|
|
372
|
+
run_id: runId ?? null,
|
|
373
|
+
attempts: base.attempts,
|
|
374
|
+
max_retries_per_run: Number.isInteger(maxRetries) ? maxRetries : base.max_retries_per_run,
|
|
375
|
+
last_old_turn_id: turnId ?? base.last_old_turn_id,
|
|
376
|
+
last_new_turn_id: null,
|
|
377
|
+
last_failure_type: failureType ?? base.last_failure_type,
|
|
378
|
+
last_retried_at: nowIso || base.last_retried_at,
|
|
379
|
+
exhausted: true,
|
|
380
|
+
// Slice 2c: preserve the per-attempt fingerprint log into the exhausted
|
|
381
|
+
// state so the operator-facing session.json still has the diagnostic
|
|
382
|
+
// payload after the loop pauses. Without this, the log would be dropped
|
|
383
|
+
// exactly when it is most useful.
|
|
384
|
+
attempts_log: Array.isArray(base.attempts_log) ? base.attempts_log : [],
|
|
385
|
+
};
|
|
386
|
+
return { ...(session || {}), ghost_retry };
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Build the human-readable mirror string the continuous loop should write
|
|
391
|
+
* into governed state's `blocked_reason.recovery.detail` at exhaustion time.
|
|
392
|
+
* Matches the shape `stale-turn-watchdog.js` already uses for that field.
|
|
393
|
+
*
|
|
394
|
+
* Slice 2c: accepts optional `signatureRepeat` and adds a brief inline note
|
|
395
|
+
* so operators see the distinction between raw-budget exhaustion and
|
|
396
|
+
* pattern-based early stop in the status surface.
|
|
397
|
+
*/
|
|
398
|
+
export function buildGhostRetryExhaustionMirror({
|
|
399
|
+
attempts,
|
|
400
|
+
maxRetries,
|
|
401
|
+
failureType,
|
|
402
|
+
manualRecoveryDetail,
|
|
403
|
+
signatureRepeat = null,
|
|
404
|
+
}) {
|
|
405
|
+
const count = `${attempts}/${maxRetries}`;
|
|
406
|
+
const ft = failureType || 'ghost_turn';
|
|
407
|
+
const suffix = manualRecoveryDetail ? ` ${manualRecoveryDetail}` : '';
|
|
408
|
+
if (signatureRepeat && signatureRepeat.signature) {
|
|
409
|
+
const sig = signatureRepeat.signature;
|
|
410
|
+
const consec = signatureRepeat.consecutive || 2;
|
|
411
|
+
return `Auto-retry stopped early after ${consec} consecutive same-signature attempts [${sig}] (${ft}); last attempt ${count}.${suffix}`;
|
|
412
|
+
}
|
|
413
|
+
return `Auto-retry exhausted after ${count} attempts (${ft}).${suffix}`;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
/**
|
|
417
|
+
* Slice 2c: build the per-attempt diagnostic bundle that rides on the
|
|
418
|
+
* `ghost_retry_exhausted` event payload AND gets surfaced in CLI status so
|
|
419
|
+
* the operator has enough evidence to decide between (a) bumping
|
|
420
|
+
* `max_retries_per_run`, (b) changing the runtime, (c) raising
|
|
421
|
+
* `startup_watchdog_ms`, or (d) filing a new BUG-54-class regression.
|
|
422
|
+
*
|
|
423
|
+
* Output shape:
|
|
424
|
+
* {
|
|
425
|
+
* attempts_log: [...per-attempt records, most recent last...],
|
|
426
|
+
* fingerprint_summary: [{ signature, count }, ...] sorted by count desc,
|
|
427
|
+
* final_signature: string | null
|
|
428
|
+
* }
|
|
429
|
+
*/
|
|
430
|
+
export function buildGhostRetryDiagnosticBundle(sessionOrState) {
|
|
431
|
+
const state = sessionOrState && typeof sessionOrState === 'object' && sessionOrState.ghost_retry
|
|
432
|
+
? readGhostRetryState(sessionOrState)
|
|
433
|
+
: (Array.isArray(sessionOrState?.attempts_log)
|
|
434
|
+
? { attempts_log: sessionOrState.attempts_log }
|
|
435
|
+
: { attempts_log: [] });
|
|
436
|
+
const log = Array.isArray(state.attempts_log) ? state.attempts_log : [];
|
|
437
|
+
const counts = new Map();
|
|
438
|
+
for (const entry of log) {
|
|
439
|
+
const sig = buildAttemptFingerprint(entry);
|
|
440
|
+
counts.set(sig, (counts.get(sig) || 0) + 1);
|
|
441
|
+
}
|
|
442
|
+
const fingerprint_summary = Array.from(counts.entries())
|
|
443
|
+
.map(([signature, count]) => ({ signature, count }))
|
|
444
|
+
.sort((a, b) => b.count - a.count);
|
|
445
|
+
const final_signature = log.length > 0 ? buildAttemptFingerprint(log[log.length - 1]) : null;
|
|
446
|
+
return { attempts_log: log, fingerprint_summary, final_signature };
|
|
447
|
+
}
|
|
@@ -1514,7 +1514,12 @@ function buildConflictDetail(conflict) {
|
|
|
1514
1514
|
}
|
|
1515
1515
|
|
|
1516
1516
|
function hasBlockingActiveTurn(activeTurns) {
|
|
1517
|
-
return Object.values(activeTurns || {}).some((turn) =>
|
|
1517
|
+
return Object.values(activeTurns || {}).some((turn) => [
|
|
1518
|
+
'failed',
|
|
1519
|
+
'conflicted',
|
|
1520
|
+
'failed_start',
|
|
1521
|
+
'stalled',
|
|
1522
|
+
].includes(turn?.status));
|
|
1518
1523
|
}
|
|
1519
1524
|
|
|
1520
1525
|
function findHistoryTurnRequest(historyEntries, turnId, kind) {
|
|
@@ -1600,6 +1605,95 @@ function resolvePhaseTransitionSource(historyEntries, gateFailure, fallbackTurnI
|
|
|
1600
1605
|
return requestedSource;
|
|
1601
1606
|
}
|
|
1602
1607
|
|
|
1608
|
+
function buildStandingPhaseTransitionSource(state, config) {
|
|
1609
|
+
const phase = state?.phase;
|
|
1610
|
+
const routing = phase ? config?.routing?.[phase] : null;
|
|
1611
|
+
const gateId = routing?.exit_gate || null;
|
|
1612
|
+
const nextPhase = getNextPhase(phase, config?.routing || {});
|
|
1613
|
+
if (!phase || !gateId || !nextPhase) {
|
|
1614
|
+
return null;
|
|
1615
|
+
}
|
|
1616
|
+
if ((state?.phase_gate_status || {})[gateId] !== 'pending') {
|
|
1617
|
+
return null;
|
|
1618
|
+
}
|
|
1619
|
+
return {
|
|
1620
|
+
turn_id: state?.last_completed_turn_id || state?.blocked_reason?.turn_id || null,
|
|
1621
|
+
run_id: state?.run_id || null,
|
|
1622
|
+
role: null,
|
|
1623
|
+
assigned_role: null,
|
|
1624
|
+
phase,
|
|
1625
|
+
status: 'completed',
|
|
1626
|
+
phase_transition_request: nextPhase,
|
|
1627
|
+
summary: `Synthetic ${gateId} transition source for operator-unblocked standing gate.`,
|
|
1628
|
+
verification: { status: 'pass' },
|
|
1629
|
+
};
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
function getPhaseRoles(config, phase) {
|
|
1633
|
+
const routing = config?.routing?.[phase] || {};
|
|
1634
|
+
const roles = new Set();
|
|
1635
|
+
if (typeof routing.entry_role === 'string' && routing.entry_role) {
|
|
1636
|
+
roles.add(routing.entry_role);
|
|
1637
|
+
}
|
|
1638
|
+
if (Array.isArray(routing.allowed_next_roles)) {
|
|
1639
|
+
for (const role of routing.allowed_next_roles) {
|
|
1640
|
+
if (typeof role === 'string' && role) roles.add(role);
|
|
1641
|
+
}
|
|
1642
|
+
}
|
|
1643
|
+
return roles;
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
function cleanupPhaseAdvanceArtifacts(root, state, config, fromPhase) {
|
|
1647
|
+
const phaseRoles = getPhaseRoles(config, fromPhase);
|
|
1648
|
+
const activeTurns = getActiveTurns(state);
|
|
1649
|
+
const removedTurnIds = [];
|
|
1650
|
+
const nextActiveTurns = {};
|
|
1651
|
+
for (const [turnId, turn] of Object.entries(activeTurns)) {
|
|
1652
|
+
const role = turn?.assigned_role || turn?.role_id || turn?.role || null;
|
|
1653
|
+
if (phaseRoles.has(role) && turn?.status !== 'accepted' && turn?.status !== 'completed') {
|
|
1654
|
+
removedTurnIds.push(turnId);
|
|
1655
|
+
continue;
|
|
1656
|
+
}
|
|
1657
|
+
nextActiveTurns[turnId] = turn;
|
|
1658
|
+
}
|
|
1659
|
+
|
|
1660
|
+
const nextReservations = { ...(state?.budget_reservations || {}) };
|
|
1661
|
+
const clearedBudgetTurnIds = [];
|
|
1662
|
+
for (const turnId of removedTurnIds) {
|
|
1663
|
+
if (Object.prototype.hasOwnProperty.call(nextReservations, turnId)) {
|
|
1664
|
+
delete nextReservations[turnId];
|
|
1665
|
+
clearedBudgetTurnIds.push(turnId);
|
|
1666
|
+
}
|
|
1667
|
+
}
|
|
1668
|
+
|
|
1669
|
+
const removedDispatchTurnIds = [];
|
|
1670
|
+
for (const turnId of removedTurnIds) {
|
|
1671
|
+
const dispatchDir = join(root, getDispatchTurnDir(turnId));
|
|
1672
|
+
if (existsSync(dispatchDir)) {
|
|
1673
|
+
try {
|
|
1674
|
+
rmSync(dispatchDir, { recursive: true, force: true });
|
|
1675
|
+
removedDispatchTurnIds.push(turnId);
|
|
1676
|
+
} catch {
|
|
1677
|
+
// Best-effort cleanup; state correctness must not depend on filesystem pruning.
|
|
1678
|
+
}
|
|
1679
|
+
}
|
|
1680
|
+
}
|
|
1681
|
+
|
|
1682
|
+
return {
|
|
1683
|
+
state: {
|
|
1684
|
+
...state,
|
|
1685
|
+
active_turns: nextActiveTurns,
|
|
1686
|
+
budget_reservations: nextReservations,
|
|
1687
|
+
},
|
|
1688
|
+
payload: {
|
|
1689
|
+
from_phase: fromPhase,
|
|
1690
|
+
removed_turn_ids: removedTurnIds,
|
|
1691
|
+
cleared_budget_turn_ids: clearedBudgetTurnIds,
|
|
1692
|
+
removed_dispatch_turn_ids: removedDispatchTurnIds,
|
|
1693
|
+
},
|
|
1694
|
+
};
|
|
1695
|
+
}
|
|
1696
|
+
|
|
1603
1697
|
function buildBlockedReason({ category, recovery, turnId, blockedAt = new Date().toISOString() }) {
|
|
1604
1698
|
return {
|
|
1605
1699
|
category,
|
|
@@ -2355,7 +2449,7 @@ export function markRunBlocked(root, details) {
|
|
|
2355
2449
|
blockedAt,
|
|
2356
2450
|
});
|
|
2357
2451
|
|
|
2358
|
-
|
|
2452
|
+
let updatedState = {
|
|
2359
2453
|
...state,
|
|
2360
2454
|
status: 'blocked',
|
|
2361
2455
|
blocked_on: details.blockedOn,
|
|
@@ -2607,13 +2701,15 @@ export function reactivateGovernedRun(root, state, details = {}) {
|
|
|
2607
2701
|
};
|
|
2608
2702
|
}
|
|
2609
2703
|
|
|
2610
|
-
export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null) {
|
|
2704
|
+
export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null, opts = {}) {
|
|
2611
2705
|
const currentState = state && typeof state === 'object' ? state : readState(root);
|
|
2612
2706
|
if (!currentState) {
|
|
2613
2707
|
return { ok: false, error: 'No governed state.json found' };
|
|
2614
2708
|
}
|
|
2615
2709
|
|
|
2616
|
-
|
|
2710
|
+
const activeTurnCount = getActiveTurnCount(currentState);
|
|
2711
|
+
const allowActiveTurnCleanup = opts?.allow_active_turn_cleanup === true;
|
|
2712
|
+
if (currentState.status !== 'active' || (activeTurnCount > 0 && !allowActiveTurnCleanup)) {
|
|
2617
2713
|
return {
|
|
2618
2714
|
ok: true,
|
|
2619
2715
|
state: attachLegacyCurrentTurnAlias(currentState),
|
|
@@ -2643,12 +2739,15 @@ export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null)
|
|
|
2643
2739
|
}
|
|
2644
2740
|
|
|
2645
2741
|
const historyEntries = readJsonlEntries(root, HISTORY_PATH);
|
|
2646
|
-
|
|
2742
|
+
let phaseSource = resolvePhaseTransitionSource(
|
|
2647
2743
|
historyEntries,
|
|
2648
2744
|
gateFailure,
|
|
2649
2745
|
currentState.last_completed_turn_id || null,
|
|
2650
2746
|
currentState.queued_phase_transition || null,
|
|
2651
2747
|
);
|
|
2748
|
+
if (!phaseSource?.phase_transition_request && opts?.allow_standing_gate === true) {
|
|
2749
|
+
phaseSource = buildStandingPhaseTransitionSource(currentState, config);
|
|
2750
|
+
}
|
|
2652
2751
|
if (!phaseSource?.phase_transition_request) {
|
|
2653
2752
|
return {
|
|
2654
2753
|
ok: true,
|
|
@@ -2685,7 +2784,7 @@ export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null)
|
|
|
2685
2784
|
if (approvalResult.action === 'auto_approve') {
|
|
2686
2785
|
const now = new Date().toISOString();
|
|
2687
2786
|
const prevPhase = currentState.phase;
|
|
2688
|
-
|
|
2787
|
+
let nextState = {
|
|
2689
2788
|
...currentState,
|
|
2690
2789
|
phase: gateResult.next_phase,
|
|
2691
2790
|
phase_entered_at: now,
|
|
@@ -2699,6 +2798,8 @@ export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null)
|
|
|
2699
2798
|
[gateResult.gate_id || 'no_gate']: 'passed',
|
|
2700
2799
|
},
|
|
2701
2800
|
};
|
|
2801
|
+
const cleanup = cleanupPhaseAdvanceArtifacts(root, nextState, config, prevPhase);
|
|
2802
|
+
nextState = cleanup.state;
|
|
2702
2803
|
writeState(root, nextState);
|
|
2703
2804
|
appendJsonl(root, LEDGER_PATH, {
|
|
2704
2805
|
type: 'approval_policy',
|
|
@@ -2738,6 +2839,17 @@ export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null)
|
|
|
2738
2839
|
trigger: 'auto_approved',
|
|
2739
2840
|
},
|
|
2740
2841
|
});
|
|
2842
|
+
emitRunEvent(root, 'phase_cleanup', {
|
|
2843
|
+
run_id: nextState.run_id,
|
|
2844
|
+
phase: nextState.phase,
|
|
2845
|
+
status: nextState.status,
|
|
2846
|
+
payload: {
|
|
2847
|
+
...cleanup.payload,
|
|
2848
|
+
to_phase: gateResult.next_phase,
|
|
2849
|
+
gate_id: gateResult.gate_id || null,
|
|
2850
|
+
trigger: 'auto_approved',
|
|
2851
|
+
},
|
|
2852
|
+
});
|
|
2741
2853
|
return {
|
|
2742
2854
|
ok: true,
|
|
2743
2855
|
state: attachLegacyCurrentTurnAlias(nextState),
|
|
@@ -2789,7 +2901,7 @@ export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null)
|
|
|
2789
2901
|
|
|
2790
2902
|
const now = new Date().toISOString();
|
|
2791
2903
|
const prevPhase = currentState.phase;
|
|
2792
|
-
|
|
2904
|
+
let nextState = {
|
|
2793
2905
|
...currentState,
|
|
2794
2906
|
phase: gateResult.next_phase,
|
|
2795
2907
|
phase_entered_at: now,
|
|
@@ -2803,6 +2915,8 @@ export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null)
|
|
|
2803
2915
|
[gateResult.gate_id || 'no_gate']: 'passed',
|
|
2804
2916
|
},
|
|
2805
2917
|
};
|
|
2918
|
+
const cleanup = cleanupPhaseAdvanceArtifacts(root, nextState, config, prevPhase);
|
|
2919
|
+
nextState = cleanup.state;
|
|
2806
2920
|
|
|
2807
2921
|
writeState(root, nextState);
|
|
2808
2922
|
const retiredIntentIds = retireApprovedPhaseScopedIntents(root, nextState, config, prevPhase, now);
|
|
@@ -2832,6 +2946,18 @@ export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null)
|
|
|
2832
2946
|
trigger: 'reconciled_before_dispatch',
|
|
2833
2947
|
},
|
|
2834
2948
|
});
|
|
2949
|
+
emitRunEvent(root, 'phase_cleanup', {
|
|
2950
|
+
run_id: nextState.run_id,
|
|
2951
|
+
phase: nextState.phase,
|
|
2952
|
+
status: nextState.status,
|
|
2953
|
+
turn: phaseSource.turn_id ? { turn_id: phaseSource.turn_id, role_id: phaseSource.role || phaseSource.assigned_role || null } : undefined,
|
|
2954
|
+
payload: {
|
|
2955
|
+
...cleanup.payload,
|
|
2956
|
+
to_phase: gateResult.next_phase,
|
|
2957
|
+
gate_id: gateResult.gate_id || null,
|
|
2958
|
+
trigger: 'reconciled_before_dispatch',
|
|
2959
|
+
},
|
|
2960
|
+
});
|
|
2835
2961
|
|
|
2836
2962
|
return {
|
|
2837
2963
|
ok: true,
|
|
@@ -2876,7 +3002,7 @@ export function initializeGovernedRun(root, config, options = {}) {
|
|
|
2876
3002
|
const now = new Date().toISOString();
|
|
2877
3003
|
const provenance = buildDefaultRunProvenance(options.provenance);
|
|
2878
3004
|
const repoDecisions = getActiveRepoDecisions(root);
|
|
2879
|
-
|
|
3005
|
+
let updatedState = {
|
|
2880
3006
|
...state,
|
|
2881
3007
|
run_id: runId,
|
|
2882
3008
|
created_at: now,
|
|
@@ -4604,7 +4730,7 @@ function _acceptGovernedTurnLocked(root, config, opts) {
|
|
|
4604
4730
|
const remainingReservations = { ...(state.budget_reservations || {}) };
|
|
4605
4731
|
delete remainingReservations[currentTurn.turn_id];
|
|
4606
4732
|
const costUsd = turnResult.cost?.usd || 0;
|
|
4607
|
-
|
|
4733
|
+
let updatedState = {
|
|
4608
4734
|
...state,
|
|
4609
4735
|
turn_sequence: acceptedSequence,
|
|
4610
4736
|
last_completed_turn_id: currentTurn.turn_id,
|
|
@@ -4946,6 +5072,8 @@ function _acceptGovernedTurnLocked(root, config, opts) {
|
|
|
4946
5072
|
[gateResult.gate_id || 'no_gate']: 'passed',
|
|
4947
5073
|
};
|
|
4948
5074
|
updatedState.queued_phase_transition = null;
|
|
5075
|
+
const cleanup = cleanupPhaseAdvanceArtifacts(root, updatedState, config, prevPhase);
|
|
5076
|
+
updatedState = cleanup.state;
|
|
4949
5077
|
const retiredIntentIds = retireApprovedPhaseScopedIntents(root, updatedState, config, prevPhase, now);
|
|
4950
5078
|
if (retiredIntentIds.length > 0) {
|
|
4951
5079
|
emitRunEvent(root, 'intent_retired_by_phase_advance', {
|
|
@@ -4973,6 +5101,18 @@ function _acceptGovernedTurnLocked(root, config, opts) {
|
|
|
4973
5101
|
trigger: 'auto',
|
|
4974
5102
|
},
|
|
4975
5103
|
});
|
|
5104
|
+
emitRunEvent(root, 'phase_cleanup', {
|
|
5105
|
+
run_id: updatedState.run_id,
|
|
5106
|
+
phase: updatedState.phase,
|
|
5107
|
+
status: updatedState.status,
|
|
5108
|
+
turn: { turn_id: currentTurn.turn_id, role_id: currentTurn.assigned_role },
|
|
5109
|
+
payload: {
|
|
5110
|
+
...cleanup.payload,
|
|
5111
|
+
to_phase: gateResult.next_phase,
|
|
5112
|
+
gate_id: gateResult.gate_id || null,
|
|
5113
|
+
trigger: 'auto',
|
|
5114
|
+
},
|
|
5115
|
+
});
|
|
4976
5116
|
} else if (gateResult.action === 'awaiting_human_approval') {
|
|
4977
5117
|
// Evaluate approval policy — may auto-approve
|
|
4978
5118
|
const approvalResult = evaluateApprovalPolicy({
|
|
@@ -4992,6 +5132,8 @@ function _acceptGovernedTurnLocked(root, config, opts) {
|
|
|
4992
5132
|
[gateResult.gate_id || 'no_gate']: 'passed',
|
|
4993
5133
|
};
|
|
4994
5134
|
updatedState.queued_phase_transition = null;
|
|
5135
|
+
const cleanup = cleanupPhaseAdvanceArtifacts(root, updatedState, config, prevPhase);
|
|
5136
|
+
updatedState = cleanup.state;
|
|
4995
5137
|
ledgerEntries.push({
|
|
4996
5138
|
type: 'approval_policy',
|
|
4997
5139
|
gate_type: 'phase_transition',
|
|
@@ -5030,6 +5172,18 @@ function _acceptGovernedTurnLocked(root, config, opts) {
|
|
|
5030
5172
|
trigger: 'auto_approved',
|
|
5031
5173
|
},
|
|
5032
5174
|
});
|
|
5175
|
+
emitRunEvent(root, 'phase_cleanup', {
|
|
5176
|
+
run_id: updatedState.run_id,
|
|
5177
|
+
phase: updatedState.phase,
|
|
5178
|
+
status: updatedState.status,
|
|
5179
|
+
turn: { turn_id: currentTurn.turn_id, role_id: currentTurn.assigned_role },
|
|
5180
|
+
payload: {
|
|
5181
|
+
...cleanup.payload,
|
|
5182
|
+
to_phase: gateResult.next_phase,
|
|
5183
|
+
gate_id: gateResult.gate_id || null,
|
|
5184
|
+
trigger: 'auto_approved',
|
|
5185
|
+
},
|
|
5186
|
+
});
|
|
5033
5187
|
} else {
|
|
5034
5188
|
updatedState.status = 'paused';
|
|
5035
5189
|
updatedState.blocked_on = `human_approval:${gateResult.gate_id}`;
|
|
@@ -5975,7 +6129,7 @@ export function approvePhaseTransition(root, config, opts = {}) {
|
|
|
5975
6129
|
appendJsonl(root, LEDGER_PATH, entry);
|
|
5976
6130
|
}
|
|
5977
6131
|
|
|
5978
|
-
|
|
6132
|
+
let updatedState = {
|
|
5979
6133
|
...state,
|
|
5980
6134
|
phase: transition.to,
|
|
5981
6135
|
phase_entered_at: new Date().toISOString(),
|
|
@@ -5989,6 +6143,8 @@ export function approvePhaseTransition(root, config, opts = {}) {
|
|
|
5989
6143
|
[transition.gate]: 'passed',
|
|
5990
6144
|
},
|
|
5991
6145
|
};
|
|
6146
|
+
const cleanup = cleanupPhaseAdvanceArtifacts(root, updatedState, config, transition.from);
|
|
6147
|
+
updatedState = cleanup.state;
|
|
5992
6148
|
|
|
5993
6149
|
writeState(root, updatedState);
|
|
5994
6150
|
clearSlaReminders(root, 'pending_phase_transition');
|
|
@@ -6009,6 +6165,17 @@ export function approvePhaseTransition(root, config, opts = {}) {
|
|
|
6009
6165
|
trigger: 'human_approved',
|
|
6010
6166
|
},
|
|
6011
6167
|
});
|
|
6168
|
+
emitRunEvent(root, 'phase_cleanup', {
|
|
6169
|
+
run_id: updatedState.run_id,
|
|
6170
|
+
phase: updatedState.phase,
|
|
6171
|
+
status: 'active',
|
|
6172
|
+
payload: {
|
|
6173
|
+
...cleanup.payload,
|
|
6174
|
+
to_phase: transition.to,
|
|
6175
|
+
gate_id: transition.gate || null,
|
|
6176
|
+
trigger: 'human_approved',
|
|
6177
|
+
},
|
|
6178
|
+
});
|
|
6012
6179
|
|
|
6013
6180
|
// Session checkpoint — non-fatal
|
|
6014
6181
|
writeSessionCheckpoint(root, updatedState, 'phase_approved');
|
|
@@ -640,9 +640,38 @@ export function validateRunLoopConfig(runLoop) {
|
|
|
640
640
|
}
|
|
641
641
|
validateRunLoopPositiveInteger('run_loop.startup_watchdog_ms', runLoop.startup_watchdog_ms, errors);
|
|
642
642
|
validateRunLoopPositiveInteger('run_loop.stale_turn_threshold_ms', runLoop.stale_turn_threshold_ms, errors);
|
|
643
|
+
if (runLoop.continuous !== undefined && runLoop.continuous !== null) {
|
|
644
|
+
validateRunLoopContinuousConfig('run_loop.continuous', runLoop.continuous, errors);
|
|
645
|
+
}
|
|
643
646
|
return errors;
|
|
644
647
|
}
|
|
645
648
|
|
|
649
|
+
function validateRunLoopContinuousConfig(path, continuous, errors) {
|
|
650
|
+
if (typeof continuous !== 'object' || Array.isArray(continuous)) {
|
|
651
|
+
errors.push(`${path} must be an object`);
|
|
652
|
+
return;
|
|
653
|
+
}
|
|
654
|
+
if (continuous.auto_retry_on_ghost !== undefined && continuous.auto_retry_on_ghost !== null) {
|
|
655
|
+
validateAutoRetryOnGhostConfig(`${path}.auto_retry_on_ghost`, continuous.auto_retry_on_ghost, errors);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
function validateAutoRetryOnGhostConfig(path, value, errors) {
|
|
660
|
+
if (typeof value !== 'object' || Array.isArray(value)) {
|
|
661
|
+
errors.push(`${path} must be an object`);
|
|
662
|
+
return;
|
|
663
|
+
}
|
|
664
|
+
if ('enabled' in value && typeof value.enabled !== 'boolean') {
|
|
665
|
+
errors.push(`${path}.enabled must be a boolean`);
|
|
666
|
+
}
|
|
667
|
+
if ('max_retries_per_run' in value) {
|
|
668
|
+
validatePositiveInteger(`${path}.max_retries_per_run`, value.max_retries_per_run, 'retry count', errors);
|
|
669
|
+
}
|
|
670
|
+
if ('cooldown_seconds' in value) {
|
|
671
|
+
validatePositiveInteger(`${path}.cooldown_seconds`, value.cooldown_seconds, 'seconds', errors);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
|
|
646
675
|
function validateRunLoopPositiveInteger(path, value, errors) {
|
|
647
676
|
if (value === undefined || value === null) {
|
|
648
677
|
return;
|
|
@@ -656,6 +685,15 @@ function validateRunLoopPositiveInteger(path, value, errors) {
|
|
|
656
685
|
}
|
|
657
686
|
}
|
|
658
687
|
|
|
688
|
+
function validatePositiveInteger(path, value, unitLabel, errors) {
|
|
689
|
+
if (value === undefined || value === null) {
|
|
690
|
+
return;
|
|
691
|
+
}
|
|
692
|
+
if (typeof value !== 'number' || !Number.isInteger(value) || value < 1) {
|
|
693
|
+
errors.push(`${path} must be a positive integer (${unitLabel})`);
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
659
697
|
function validateRuntimePositiveInteger(path, value, errors) {
|
|
660
698
|
if (value === undefined || value === null) {
|
|
661
699
|
return;
|
package/src/lib/run-events.js
CHANGED
|
@@ -38,11 +38,14 @@ export const VALID_RUN_EVENTS = [
|
|
|
38
38
|
'gate_pending',
|
|
39
39
|
'gate_approved',
|
|
40
40
|
'gate_failed',
|
|
41
|
+
'phase_cleanup',
|
|
41
42
|
'budget_exceeded_warn',
|
|
42
43
|
'human_escalation_raised',
|
|
43
44
|
'human_escalation_resolved',
|
|
44
45
|
'dispatch_progress',
|
|
45
46
|
'session_continuation',
|
|
47
|
+
'auto_retried_ghost',
|
|
48
|
+
'ghost_retry_exhausted',
|
|
46
49
|
];
|
|
47
50
|
|
|
48
51
|
/**
|
|
@@ -104,6 +104,34 @@
|
|
|
104
104
|
"type": "integer",
|
|
105
105
|
"minimum": 1,
|
|
106
106
|
"description": "Milliseconds to wait before a started turn that previously produced output is treated as stale. Default 600000 for local_cli turns and 300000 for api_proxy turns."
|
|
107
|
+
},
|
|
108
|
+
"continuous": {
|
|
109
|
+
"type": "object",
|
|
110
|
+
"description": "Continuous-run control knobs.",
|
|
111
|
+
"properties": {
|
|
112
|
+
"auto_retry_on_ghost": {
|
|
113
|
+
"type": "object",
|
|
114
|
+
"description": "Bounded ghost-turn retry policy for continuous/full-auto sessions.",
|
|
115
|
+
"properties": {
|
|
116
|
+
"enabled": {
|
|
117
|
+
"type": "boolean",
|
|
118
|
+
"description": "Enable bounded automatic reissue for startup ghost turns. Defaults false unless full-auto approval policy posture promotes it."
|
|
119
|
+
},
|
|
120
|
+
"max_retries_per_run": {
|
|
121
|
+
"type": "integer",
|
|
122
|
+
"minimum": 1,
|
|
123
|
+
"description": "Maximum ghost retries per run before leaving manual recovery visible. Default 3."
|
|
124
|
+
},
|
|
125
|
+
"cooldown_seconds": {
|
|
126
|
+
"type": "integer",
|
|
127
|
+
"minimum": 1,
|
|
128
|
+
"description": "Seconds to wait between automatic ghost retries. Default 5."
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
"additionalProperties": true
|
|
132
|
+
}
|
|
133
|
+
},
|
|
134
|
+
"additionalProperties": true
|
|
107
135
|
}
|
|
108
136
|
},
|
|
109
137
|
"additionalProperties": true
|