agentxchain 2.155.53 → 2.155.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/lib/continuous-run.js +186 -6
- package/src/lib/run-events.js +2 -0
package/package.json
CHANGED
|
@@ -43,7 +43,7 @@ import {
|
|
|
43
43
|
classifyGhostRetryDecision,
|
|
44
44
|
extractLatestStderrDiagnostic,
|
|
45
45
|
} from './ghost-retry.js';
|
|
46
|
-
import { getDispatchLogPath } from './turn-paths.js';
|
|
46
|
+
import { getDispatchLogPath, getTurnStagingResultPath } from './turn-paths.js';
|
|
47
47
|
import { reconcileOperatorHead } from './operator-commit-reconcile.js';
|
|
48
48
|
import { getContinuityStatus } from './continuity-status.js';
|
|
49
49
|
import {
|
|
@@ -53,6 +53,8 @@ import {
|
|
|
53
53
|
} from './intent-startup-migration.js';
|
|
54
54
|
|
|
55
55
|
const CONTINUOUS_SESSION_PATH = '.agentxchain/continuous-session.json';
|
|
56
|
+
const PRODUCTIVE_TIMEOUT_RETRY_MAX_PER_RUN = 1;
|
|
57
|
+
const PRODUCTIVE_TIMEOUT_RETRY_DEADLINE_MINUTES = 60;
|
|
56
58
|
|
|
57
59
|
function getRoadmapReplenishmentTriageHints(root) {
|
|
58
60
|
const context = loadProjectContext(root);
|
|
@@ -310,6 +312,184 @@ function readLatestDispatchDiagnostic(root, turnId) {
|
|
|
310
312
|
}
|
|
311
313
|
}
|
|
312
314
|
|
|
315
|
+
function readProductiveTimeoutRetryState(session) {
|
|
316
|
+
const state = session?.productive_timeout_retry;
|
|
317
|
+
if (!state || typeof state !== 'object') {
|
|
318
|
+
return {
|
|
319
|
+
run_id: null,
|
|
320
|
+
attempts: 0,
|
|
321
|
+
max_retries_per_run: PRODUCTIVE_TIMEOUT_RETRY_MAX_PER_RUN,
|
|
322
|
+
last_old_turn_id: null,
|
|
323
|
+
last_new_turn_id: null,
|
|
324
|
+
last_retried_at: null,
|
|
325
|
+
exhausted: false,
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
return {
|
|
329
|
+
run_id: state.run_id ?? null,
|
|
330
|
+
attempts: Number.isInteger(state.attempts) && state.attempts >= 0 ? state.attempts : 0,
|
|
331
|
+
max_retries_per_run: Number.isInteger(state.max_retries_per_run)
|
|
332
|
+
? state.max_retries_per_run
|
|
333
|
+
: PRODUCTIVE_TIMEOUT_RETRY_MAX_PER_RUN,
|
|
334
|
+
last_old_turn_id: state.last_old_turn_id ?? null,
|
|
335
|
+
last_new_turn_id: state.last_new_turn_id ?? null,
|
|
336
|
+
last_retried_at: state.last_retried_at ?? null,
|
|
337
|
+
exhausted: Boolean(state.exhausted),
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
function resetProductiveTimeoutRetryForRun(session, runId) {
|
|
342
|
+
const current = readProductiveTimeoutRetryState(session);
|
|
343
|
+
if (current.run_id === runId) return current;
|
|
344
|
+
return {
|
|
345
|
+
run_id: runId ?? null,
|
|
346
|
+
attempts: 0,
|
|
347
|
+
max_retries_per_run: PRODUCTIVE_TIMEOUT_RETRY_MAX_PER_RUN,
|
|
348
|
+
last_old_turn_id: null,
|
|
349
|
+
last_new_turn_id: null,
|
|
350
|
+
last_retried_at: null,
|
|
351
|
+
exhausted: false,
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function findPrimaryProductiveTimeoutTurn(root, state) {
|
|
356
|
+
if (!state || typeof state !== 'object') return null;
|
|
357
|
+
if (state.blocked_reason?.category !== 'retries_exhausted') return null;
|
|
358
|
+
const turnId = state.blocked_reason?.turn_id || state.escalation?.from_turn_id || null;
|
|
359
|
+
const activeTurns = state.active_turns || {};
|
|
360
|
+
const candidateIds = turnId && activeTurns[turnId] ? [turnId] : Object.keys(activeTurns);
|
|
361
|
+
for (const candidateId of candidateIds) {
|
|
362
|
+
const turn = activeTurns[candidateId];
|
|
363
|
+
if (!turn || turn.status !== 'failed') continue;
|
|
364
|
+
if (turn.last_rejection?.failed_stage !== 'dispatch') continue;
|
|
365
|
+
const reason = [
|
|
366
|
+
turn.last_rejection?.reason,
|
|
367
|
+
...(Array.isArray(turn.last_rejection?.validation_errors) ? turn.last_rejection.validation_errors : []),
|
|
368
|
+
].join('\n');
|
|
369
|
+
const looksDeadlineKilled = /code 143|dispatch timed out|timed out/i.test(reason);
|
|
370
|
+
if (!looksDeadlineKilled) continue;
|
|
371
|
+
if (!turn.first_output_at) continue;
|
|
372
|
+
const stagingPath = join(root, getTurnStagingResultPath(candidateId));
|
|
373
|
+
if (existsSync(stagingPath)) continue;
|
|
374
|
+
return { turn_id: candidateId, turn };
|
|
375
|
+
}
|
|
376
|
+
return null;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
async function maybeAutoRetryProductiveTimeoutBlocker(context, session, contOpts, blockedState, log = console.log) {
|
|
380
|
+
const { root, config } = context;
|
|
381
|
+
const candidate = findPrimaryProductiveTimeoutTurn(root, blockedState);
|
|
382
|
+
if (!candidate) return null;
|
|
383
|
+
|
|
384
|
+
const runId = session.current_run_id || blockedState?.run_id || null;
|
|
385
|
+
const retryState = resetProductiveTimeoutRetryForRun(session, runId);
|
|
386
|
+
const maxRetries = PRODUCTIVE_TIMEOUT_RETRY_MAX_PER_RUN;
|
|
387
|
+
if (retryState.attempts >= maxRetries) {
|
|
388
|
+
Object.assign(session, {
|
|
389
|
+
productive_timeout_retry: {
|
|
390
|
+
...retryState,
|
|
391
|
+
max_retries_per_run: maxRetries,
|
|
392
|
+
exhausted: true,
|
|
393
|
+
},
|
|
394
|
+
status: 'paused',
|
|
395
|
+
});
|
|
396
|
+
writeContinuousSession(root, session);
|
|
397
|
+
emitRunEvent(root, 'productive_timeout_retry_exhausted', {
|
|
398
|
+
run_id: runId,
|
|
399
|
+
phase: blockedState?.phase || null,
|
|
400
|
+
status: 'blocked',
|
|
401
|
+
turn: { turn_id: candidate.turn_id, role_id: candidate.turn.assigned_role || null },
|
|
402
|
+
intent_id: candidate.turn.intake_context?.intent_id || null,
|
|
403
|
+
payload: {
|
|
404
|
+
turn_id: candidate.turn_id,
|
|
405
|
+
attempts: retryState.attempts,
|
|
406
|
+
max_retries_per_run: maxRetries,
|
|
407
|
+
},
|
|
408
|
+
});
|
|
409
|
+
return null;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
const reissued = reissueTurn(root, config, {
|
|
413
|
+
turnId: candidate.turn_id,
|
|
414
|
+
reason: 'auto_retry_productive_timeout',
|
|
415
|
+
});
|
|
416
|
+
if (!reissued.ok) {
|
|
417
|
+
log(`Productive-timeout auto-retry skipped: ${reissued.error}`);
|
|
418
|
+
return null;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const nowIso = new Date().toISOString();
|
|
422
|
+
let nextState = clearGhostBlockerAfterReissue(root, reissued.state);
|
|
423
|
+
const deadlineAt = new Date(Date.now() + PRODUCTIVE_TIMEOUT_RETRY_DEADLINE_MINUTES * 60 * 1000).toISOString();
|
|
424
|
+
const activeTurns = { ...(nextState.active_turns || {}) };
|
|
425
|
+
if (activeTurns[reissued.newTurn.turn_id]) {
|
|
426
|
+
activeTurns[reissued.newTurn.turn_id] = {
|
|
427
|
+
...activeTurns[reissued.newTurn.turn_id],
|
|
428
|
+
deadline_at: deadlineAt,
|
|
429
|
+
timeout_recovery_context: {
|
|
430
|
+
reissued_from: candidate.turn_id,
|
|
431
|
+
reason: 'productive_timeout',
|
|
432
|
+
previous_attempts: candidate.turn.attempt || null,
|
|
433
|
+
previous_deadline_at: candidate.turn.deadline_at || null,
|
|
434
|
+
extended_deadline_minutes: PRODUCTIVE_TIMEOUT_RETRY_DEADLINE_MINUTES,
|
|
435
|
+
},
|
|
436
|
+
};
|
|
437
|
+
nextState = { ...nextState, active_turns: activeTurns };
|
|
438
|
+
writeGovernedState(root, nextState);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
const attempt = retryState.attempts + 1;
|
|
442
|
+
Object.assign(session, {
|
|
443
|
+
productive_timeout_retry: {
|
|
444
|
+
run_id: runId,
|
|
445
|
+
attempts: attempt,
|
|
446
|
+
max_retries_per_run: maxRetries,
|
|
447
|
+
last_old_turn_id: candidate.turn_id,
|
|
448
|
+
last_new_turn_id: reissued.newTurn.turn_id,
|
|
449
|
+
last_retried_at: nowIso,
|
|
450
|
+
exhausted: false,
|
|
451
|
+
},
|
|
452
|
+
status: 'running',
|
|
453
|
+
current_run_id: runId,
|
|
454
|
+
});
|
|
455
|
+
writeContinuousSession(root, session);
|
|
456
|
+
|
|
457
|
+
emitRunEvent(root, 'auto_retried_productive_timeout', {
|
|
458
|
+
run_id: runId,
|
|
459
|
+
phase: nextState.phase || blockedState?.phase || null,
|
|
460
|
+
status: 'active',
|
|
461
|
+
turn: { turn_id: reissued.newTurn.turn_id, role_id: reissued.newTurn.assigned_role },
|
|
462
|
+
intent_id: candidate.turn.intake_context?.intent_id || null,
|
|
463
|
+
payload: {
|
|
464
|
+
old_turn_id: candidate.turn_id,
|
|
465
|
+
new_turn_id: reissued.newTurn.turn_id,
|
|
466
|
+
attempt,
|
|
467
|
+
max_retries_per_run: maxRetries,
|
|
468
|
+
extended_deadline_minutes: PRODUCTIVE_TIMEOUT_RETRY_DEADLINE_MINUTES,
|
|
469
|
+
},
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
log(`Productive-timeout auto-retried (${attempt}/${maxRetries}): ${candidate.turn_id} -> ${reissued.newTurn.turn_id}`);
|
|
473
|
+
if ((contOpts.cooldownSeconds ?? 0) > 0) {
|
|
474
|
+
await new Promise((resolve) => setTimeout(resolve, contOpts.cooldownSeconds * 1000));
|
|
475
|
+
}
|
|
476
|
+
return {
|
|
477
|
+
ok: true,
|
|
478
|
+
status: 'running',
|
|
479
|
+
action: 'auto_retried_productive_timeout',
|
|
480
|
+
run_id: runId,
|
|
481
|
+
old_turn_id: candidate.turn_id,
|
|
482
|
+
new_turn_id: reissued.newTurn.turn_id,
|
|
483
|
+
attempt,
|
|
484
|
+
max_retries_per_run: maxRetries,
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
async function maybeAutoRetryContinuousBlocker(context, session, contOpts, blockedState, log = console.log) {
|
|
489
|
+
return await maybeAutoRetryProductiveTimeoutBlocker(context, session, contOpts, blockedState, log)
|
|
490
|
+
|| await maybeAutoRetryGhostBlocker(context, session, contOpts, blockedState, log);
|
|
491
|
+
}
|
|
492
|
+
|
|
313
493
|
async function maybeAutoRetryGhostBlocker(context, session, contOpts, blockedState, log = console.log) {
|
|
314
494
|
const { root, config } = context;
|
|
315
495
|
const decision = classifyGhostRetryDecision({
|
|
@@ -1396,7 +1576,7 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
1396
1576
|
|
|
1397
1577
|
const startupGovernedState = loadProjectState(root, context.config);
|
|
1398
1578
|
if (startupGovernedState?.status === 'blocked') {
|
|
1399
|
-
const retried = await
|
|
1579
|
+
const retried = await maybeAutoRetryContinuousBlocker(context, session, contOpts, startupGovernedState, log);
|
|
1400
1580
|
if (retried) return retried;
|
|
1401
1581
|
session.status = 'paused';
|
|
1402
1582
|
writeContinuousSession(root, session);
|
|
@@ -1457,7 +1637,7 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
1457
1637
|
if (session.status === 'paused') {
|
|
1458
1638
|
const governedState = loadProjectState(root, context.config);
|
|
1459
1639
|
if (governedState?.status === 'blocked') {
|
|
1460
|
-
const retried = await
|
|
1640
|
+
const retried = await maybeAutoRetryContinuousBlocker(context, session, contOpts, governedState, log);
|
|
1461
1641
|
if (retried) return retried;
|
|
1462
1642
|
// Still blocked — stay paused, do not attempt new work
|
|
1463
1643
|
writeContinuousSession(root, session);
|
|
@@ -1496,7 +1676,7 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
1496
1676
|
|
|
1497
1677
|
if (isBlockedContinuousExecution(execution)) {
|
|
1498
1678
|
const blockedState = execution?.result?.state || loadProjectState(root, context.config);
|
|
1499
|
-
const retried = await
|
|
1679
|
+
const retried = await maybeAutoRetryContinuousBlocker(context, session, contOpts, blockedState, log);
|
|
1500
1680
|
if (retried) return retried;
|
|
1501
1681
|
const blockedRecoveryAction = getBlockedRecoveryAction(blockedState);
|
|
1502
1682
|
session.status = 'paused';
|
|
@@ -1564,7 +1744,7 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
1564
1744
|
|
|
1565
1745
|
if (isBlockedContinuousExecution(execution)) {
|
|
1566
1746
|
const blockedState = execution?.result?.state || loadProjectState(root, context.config);
|
|
1567
|
-
const retried = await
|
|
1747
|
+
const retried = await maybeAutoRetryContinuousBlocker(context, session, contOpts, blockedState, log);
|
|
1568
1748
|
if (retried) return retried;
|
|
1569
1749
|
const blockedRecoveryAction = getBlockedRecoveryAction(blockedState);
|
|
1570
1750
|
session.status = 'paused';
|
|
@@ -1756,7 +1936,7 @@ export async function advanceContinuousRunOnce(context, session, contOpts, execu
|
|
|
1756
1936
|
|
|
1757
1937
|
if (isBlockedContinuousExecution(execution)) {
|
|
1758
1938
|
const blockedState = execution?.result?.state || loadProjectState(root, context.config);
|
|
1759
|
-
const retried = await
|
|
1939
|
+
const retried = await maybeAutoRetryContinuousBlocker(context, session, contOpts, blockedState, log);
|
|
1760
1940
|
if (retried) return retried;
|
|
1761
1941
|
const blockedRecoveryAction = getBlockedRecoveryAction(blockedState);
|
|
1762
1942
|
const resolved = resolveIntent(root, targetIntentId);
|
package/src/lib/run-events.js
CHANGED
|
@@ -46,6 +46,8 @@ export const VALID_RUN_EVENTS = [
|
|
|
46
46
|
'session_continuation',
|
|
47
47
|
'auto_retried_ghost',
|
|
48
48
|
'ghost_retry_exhausted',
|
|
49
|
+
'auto_retried_productive_timeout',
|
|
50
|
+
'productive_timeout_retry_exhausted',
|
|
49
51
|
'state_reconciled_operator_commits',
|
|
50
52
|
'operator_commit_reconcile_refused',
|
|
51
53
|
'charter_materialization_required',
|