agentxchain 2.146.0 → 2.147.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,7 @@ import {
31
31
  getActiveTurnCount,
32
32
  getActiveTurns,
33
33
  getMaxConcurrentTurns,
34
+ transitionActiveTurnLifecycle,
34
35
  RUNNER_INTERFACE_VERSION,
35
36
  } from './runner-interface.js';
36
37
 
@@ -40,6 +41,18 @@ import { join, dirname } from 'path';
40
41
  import { evaluateApprovalSlaReminders } from './notification-runner.js';
41
42
  import { validatePreemptionMarker } from './intake.js';
42
43
  import { buildTimeoutBlockedReason, evaluateTimeouts } from './timeout-evaluator.js';
44
+ import { hasMinimumTurnResultShape } from './turn-result-shape.js';
45
+
46
+ // Per DEC-RUN-LOOP-MIN-SHAPE-SYMMETRY-001 (Turn 33): runLoop is the SDK boundary
47
+ // any third-party runner can wire (see website-v2/docs/build-your-own-runner.mdx).
48
+ // In-repo adapters (api_proxy, mcp, local_cli, remote_agent) already validate
49
+ // staged-result shape before write per DEC-MINIMUM-TURN-RESULT-SHAPE-001, and
50
+ // run.js's dispatch callback re-validates before returning per
51
+ // DEC-RUN-STAGED-READ-SHAPE-GUARD-001. Third-party callbacks have no such
52
+ // obligation. runLoop must therefore validate dispatchResult.turnResult shape
53
+ // before persisting it as a governed staged-result artifact.
54
+ const MIN_SHAPE_REJECTION_REASON =
55
+ 'staged result missing minimum governed envelope (schema_version + identity + lifecycle fields)';
43
56
 
44
57
  const DEFAULT_MAX_TURNS = 50;
45
58
 
@@ -182,7 +195,7 @@ async function executeSequentialTurn(root, config, state, callbacks, emit, error
182
195
  let assignState;
183
196
  const activeTurn = getActiveTurn(state);
184
197
 
185
- if (activeTurn && (activeTurn.status === 'running' || activeTurn.status === 'retrying')) {
198
+ if (activeTurn && isDispatchableActiveTurn(activeTurn)) {
186
199
  turn = activeTurn;
187
200
  assignState = state;
188
201
  } else {
@@ -224,7 +237,7 @@ async function executeParallelTurns(root, config, state, maxConcurrent, callback
224
237
  const activeTurns = getActiveTurns(state);
225
238
  const turnsToDispatch = [];
226
239
  for (const turn of Object.values(activeTurns)) {
227
- if (turn.status === 'running' || turn.status === 'retrying') {
240
+ if (isDispatchableActiveTurn(turn)) {
228
241
  turnsToDispatch.push({ turn, state });
229
242
  }
230
243
  }
@@ -317,6 +330,7 @@ async function executeParallelTurns(root, config, state, maxConcurrent, callback
317
330
  errors.push(`writeDispatchBundle(${turn.assigned_role}): ${bundleResult.error}`);
318
331
  continue;
319
332
  }
333
+ transitionActiveTurnLifecycle(root, turn.turn_id, 'dispatched');
320
334
  const stagingPath = getTurnStagingResultPath(turn.turn_id);
321
335
  contexts.push({
322
336
  turn,
@@ -362,6 +376,23 @@ async function executeParallelTurns(root, config, state, maxConcurrent, callback
362
376
  continue;
363
377
  }
364
378
 
379
+ if (dispatchResult.accept && !hasMinimumTurnResultShape(dispatchResult.turnResult)) {
380
+ // DEC-RUN-LOOP-MIN-SHAPE-SYMMETRY-001: third-party dispatch callback claimed
381
+ // accept=true but returned a payload missing the minimum envelope. Refuse to
382
+ // stage; convert to standard rejection so the run state advances cleanly.
383
+ const validationResult = { stage: 'dispatch', errors: [MIN_SHAPE_REJECTION_REASON] };
384
+ rejectTurn(root, config, validationResult, MIN_SHAPE_REJECTION_REASON, { turnId: turn.turn_id });
385
+ history.push({ role: roleId, turn_id: turn.turn_id, accepted: false });
386
+ emit({ type: 'turn_rejected', turn, role: roleId, reason: MIN_SHAPE_REJECTION_REASON });
387
+ const postRejectState = loadState(root, config);
388
+ if (postRejectState?.status === 'blocked') {
389
+ errors.push(`Turn rejected for ${roleId}, retries exhausted`);
390
+ emit({ type: 'blocked', state: postRejectState });
391
+ return { terminal: true, ok: false, stop_reason: 'reject_exhausted', history, acceptedCount };
392
+ }
393
+ continue;
394
+ }
395
+
365
396
  if (dispatchResult.accept) {
366
397
  const absStaging = join(root, ctx.stagingPath);
367
398
  mkdirSync(dirname(absStaging), { recursive: true });
@@ -409,6 +440,12 @@ async function executeParallelTurns(root, config, state, maxConcurrent, callback
409
440
  }
410
441
  emit({ type: 'turn_accepted', turn, role: roleId, state: acceptResult.state });
411
442
  } else {
443
+ if (dispatchResult?.blocked === true) {
444
+ history.push({ role: roleId, turn_id: turn.turn_id, accepted: false, blocked: true });
445
+ const blockedState = loadState(root, config);
446
+ emit({ type: 'blocked', state: blockedState });
447
+ return { terminal: true, ok: false, stop_reason: 'blocked', history, acceptedCount };
448
+ }
412
449
  const validationResult = {
413
450
  stage: 'dispatch',
414
451
  errors: [dispatchResult.reason || 'Dispatch callback rejected the turn'],
@@ -449,6 +486,10 @@ async function executeParallelTurns(root, config, state, maxConcurrent, callback
449
486
  return { terminal: false, history, acceptedCount };
450
487
  }
451
488
 
489
+ function isDispatchableActiveTurn(turn) {
490
+ return ['assigned', 'dispatched', 'starting', 'running', 'retrying'].includes(turn?.status);
491
+ }
492
+
452
493
  /**
453
494
  * Dispatch a single turn and process its result.
454
495
  */
@@ -463,6 +504,7 @@ async function dispatchAndProcess(root, config, turn, assignState, callbacks, em
463
504
  errors.push(`writeDispatchBundle(${roleId}): ${bundleResult.error}`);
464
505
  return { terminal: true, ok: false, stop_reason: 'blocked', history };
465
506
  }
507
+ transitionActiveTurnLifecycle(root, turn.turn_id, 'dispatched');
466
508
 
467
509
  const stagingPath = getTurnStagingResultPath(turn.turn_id);
468
510
  const context = {
@@ -488,6 +530,22 @@ async function dispatchAndProcess(root, config, turn, assignState, callbacks, em
488
530
  return { terminal: true, ok: false, stop_reason: 'blocked', history };
489
531
  }
490
532
 
533
+ if (dispatchResult.accept && !hasMinimumTurnResultShape(dispatchResult.turnResult)) {
534
+ // DEC-RUN-LOOP-MIN-SHAPE-SYMMETRY-001: same boundary as parallel branch.
535
+ // Refuse to stage; convert to a standard rejection.
536
+ const validationResult = { stage: 'dispatch', errors: [MIN_SHAPE_REJECTION_REASON] };
537
+ rejectTurn(root, config, validationResult, MIN_SHAPE_REJECTION_REASON);
538
+ history.push({ role: roleId, turn_id: turn.turn_id, accepted: false });
539
+ emit({ type: 'turn_rejected', turn, role: roleId, reason: MIN_SHAPE_REJECTION_REASON });
540
+ const postRejectState = loadState(root, config);
541
+ if (postRejectState?.status === 'blocked') {
542
+ errors.push(`Turn rejected for ${roleId}, retries exhausted`);
543
+ emit({ type: 'blocked', state: postRejectState });
544
+ return { terminal: true, ok: false, stop_reason: 'reject_exhausted', history };
545
+ }
546
+ return { terminal: false, accepted: false, history };
547
+ }
548
+
491
549
  if (dispatchResult.accept) {
492
550
  const absStaging = join(root, stagingPath);
493
551
  mkdirSync(dirname(absStaging), { recursive: true });
@@ -537,6 +595,13 @@ async function dispatchAndProcess(root, config, turn, assignState, callbacks, em
537
595
  return { terminal: false, accepted: true, history };
538
596
  }
539
597
 
598
+ if (dispatchResult?.blocked === true) {
599
+ history.push({ role: roleId, turn_id: turn.turn_id, accepted: false, blocked: true });
600
+ const blockedState = loadState(root, config);
601
+ emit({ type: 'blocked', state: blockedState });
602
+ return { terminal: true, ok: false, stop_reason: 'blocked', history };
603
+ }
604
+
540
605
  // Rejection
541
606
  const validationResult = {
542
607
  stage: 'dispatch',
@@ -41,6 +41,7 @@ export {
41
41
  releaseAcceptanceLock as releaseLock,
42
42
  refreshTurnBaselineSnapshot,
43
43
  reissueTurn,
44
+ transitionActiveTurnLifecycle,
44
45
  } from './governed-state.js';
45
46
 
46
47
  // ── Dispatch ────────────────────────────────────────────────────────────────
package/src/lib/schema.js CHANGED
@@ -35,6 +35,13 @@ export function validateGovernedStateSchema(data) {
35
35
  // but validators and read-only surfaces still tolerate reserved/manual states.
36
36
  const VALID_RUN_STATUSES = ['idle', 'active', 'paused', 'blocked', 'completed', 'failed'];
37
37
  const isV1_1 = data?.schema_version === '1.1';
38
+ // NOTE: `current_turn` is the persisted v1.0 schema field. Under v1.1 it is
39
+ // not a persisted field at all — `loadProjectState()` re-attaches it as a
40
+ // non-enumerable getter alias over `active_turns` after normalization
41
+ // (DEC-CURRENT-TURN-COMPAT-ALIAS-001). This validator runs against the
42
+ // persisted shape, so an `own` property named `current_turn` on a v1.1 doc
43
+ // means "stray persisted-shape leak from a legacy write" and is rejected
44
+ // below — it does NOT mean the runtime alias is going away.
38
45
  const hasLegacyCurrentTurn = Object.prototype.hasOwnProperty.call(data || {}, 'current_turn');
39
46
 
40
47
  function validateTurn(turn, label) {
@@ -85,7 +85,21 @@
85
85
  "type": "object"
86
86
  },
87
87
  "run_loop": {
88
- "type": "object"
88
+ "type": "object",
89
+ "description": "Runner control knobs for execution watchdogs and automation behavior.",
90
+ "properties": {
91
+ "startup_watchdog_ms": {
92
+ "type": "integer",
93
+ "minimum": 1,
94
+ "description": "Milliseconds to wait after dispatch for worker attach/first-output proof before retaining the turn as failed_start. Default 30000."
95
+ },
96
+ "stale_turn_threshold_ms": {
97
+ "type": "integer",
98
+ "minimum": 1,
99
+ "description": "Milliseconds to wait before a started turn that previously produced output is treated as stale. Default 600000 for local_cli turns and 300000 for api_proxy turns."
100
+ }
101
+ },
102
+ "additionalProperties": true
89
103
  },
90
104
  "mission_planner": {
91
105
  "type": "object"
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Staged turn-result proof helpers.
3
+ *
4
+ * Per DEC-BUG51-STAGING-PLACEHOLDER-NOT-PROOF-001: a turn-scoped staged-result
5
+ * file is proof of execution only when it contains meaningful result content.
6
+ * Adapter-authored placeholders (`{}`, blank, whitespace-only) are cleanup
7
+ * artifacts — watchdog, adapter, and recovery code must treat them as absent.
8
+ *
9
+ * This module centralizes that check so every surface (local-cli adapter,
10
+ * manual adapter, stale-turn watchdog) uses the same rule.
11
+ */
12
+
13
+ import { existsSync, readFileSync } from 'node:fs';
14
+
15
+ /**
16
+ * Returns true when the staged-result file at `filePath` exists AND contains
17
+ * content that is not a placeholder (empty, whitespace-only, or `{}`).
18
+ *
19
+ * Trim-aware: `{}\n`, ` {}\n`, and `{}` are all rejected. Legitimate turn
20
+ * results carry the full governed schema and are far larger than the
21
+ * placeholder shapes this function filters.
22
+ *
23
+ * @param {string} filePath - absolute path to the staged-result file
24
+ * @returns {boolean}
25
+ */
26
+ export function hasMeaningfulStagedResult(filePath) {
27
+ if (!existsSync(filePath)) {
28
+ return false;
29
+ }
30
+
31
+ let raw;
32
+ try {
33
+ raw = readFileSync(filePath, 'utf8');
34
+ } catch {
35
+ return false;
36
+ }
37
+
38
+ const trimmed = raw.trim();
39
+ if (trimmed === '' || trimmed === '{}') {
40
+ return false;
41
+ }
42
+ return true;
43
+ }
@@ -3,18 +3,18 @@
3
3
  *
4
4
  * Two-tier lazy idle-threshold detection:
5
5
  *
6
- * 1. **Fast startup watchdog (BUG-51):** if an active turn has been dispatched
7
- * for >30 seconds with NO dispatch-progress file, NO staged result, and NO
8
- * recent events, it is a "ghost turn" the subprocess never attached.
9
- * Transitions to `failed_start` immediately.
6
+ * 1. **Fast startup watchdog (BUG-51):** if an active turn has been
7
+ * `dispatched`/`starting`/`running` for >30 seconds with NO startup proof
8
+ * (no first-byte output recorded on the turn or in dispatch-progress) and
9
+ * NO staged result, it is a "ghost turn" — the subprocess never reached a
10
+ * healthy running state. Transitions to `failed_start` immediately.
10
11
  *
11
- * Design note: the watchdog intentionally keys on turn-scoped
12
- * dispatch-progress rather than `stdout.log` existence. Dispatch-progress is
13
- * a framework-authored signal with a stable per-turn contract across runtime
14
- * wiring; `stdout.log` is adapter-authored visibility output and is allowed
15
- * to be best-effort. Using dispatch-progress therefore gives us the same
16
- * operator-facing "no first byte / no worker heartbeat" detection without
17
- * coupling the watchdog to adapter-specific log-attachment details.
12
+ * Design note: the watchdog intentionally keys on first-output proof from
13
+ * the framework-owned dispatch-progress contract rather than `stdout.log`
14
+ * existence. `stdout.log` is adapter-authored visibility output and may be
15
+ * absent even when the adapter is wired correctly. First-output timestamps
16
+ * and output-line counters are the stable health contract across runtime
17
+ * wiring.
18
18
  *
19
19
  * 2. **Stale turn watchdog (BUG-47):** if an active turn has status "running"
20
20
  * for >N minutes with no event log activity AND no staged result file,
@@ -36,6 +36,7 @@ import { safeWriteJson } from './safe-write.js';
36
36
  import { emitRunEvent, readRunEvents } from './run-events.js';
37
37
  import { getTurnStagingResultPath } from './turn-paths.js';
38
38
  import { getDispatchProgressRelativePath } from './dispatch-progress.js';
39
+ import { hasMeaningfulStagedResult } from './staged-result-proof.js';
39
40
 
40
41
  const DEFAULT_LOCAL_CLI_THRESHOLD_MS = 10 * 60 * 1000; // 10 minutes
41
42
  const DEFAULT_API_PROXY_THRESHOLD_MS = 5 * 60 * 1000; // 5 minutes
@@ -103,12 +104,11 @@ export function detectStaleTurns(root, state, config) {
103
104
  /**
104
105
  * BUG-51: Detect ghost-dispatched turns — subprocess never started.
105
106
  *
106
- * A ghost turn is one that has been in "running" or "retrying" status for
107
- * longer than the startup watchdog threshold (default 30s) AND has:
108
- * - no dispatch-progress file (framework-observed proof that no subprocess
109
- * output or heartbeat was attached)
107
+ * A ghost turn is one that has been in `dispatched`, `starting`, `running`, or
108
+ * `retrying` longer than the startup watchdog threshold (default 30s) AND has:
109
+ * - no startup proof (no `first_output_at` on the turn or dispatch-progress,
110
+ * and no recorded output line counts)
110
111
  * - no staged result file
111
- * - no recent turn-scoped events (beyond the initial turn_dispatched)
112
112
  *
113
113
  * This is a stricter, faster check than detectStaleTurns (BUG-47).
114
114
  * Ghost turns transition to "failed_start" rather than "stalled".
@@ -125,31 +125,22 @@ export function detectGhostTurns(root, state, config) {
125
125
  const startupThreshold = resolveStartupThreshold(config);
126
126
 
127
127
  for (const [turnId, turn] of Object.entries(activeTurns)) {
128
- if (turn.status !== 'running' && turn.status !== 'retrying') continue;
129
- if (!turn.started_at) continue;
128
+ if (!['dispatched', 'starting', 'running', 'retrying'].includes(turn.status)) continue;
130
129
 
131
- const startedAt = new Date(turn.started_at).getTime();
132
- if (isNaN(startedAt)) continue;
130
+ const lifecycleStart = parseGhostLifecycleStart(turn);
131
+ if (!Number.isFinite(lifecycleStart)) continue;
133
132
 
134
- const runningMs = now - startedAt;
133
+ const runningMs = now - lifecycleStart;
135
134
  if (runningMs < startupThreshold) continue;
136
135
 
137
- // Ghost detection: NO dispatch-progress file means subprocess never attached
138
136
  const progressPath = join(root, getDispatchProgressRelativePath(turnId));
139
- const hasProgress = existsSync(progressPath);
137
+ const progress = readDispatchProgressSafe(progressPath);
140
138
 
141
- // If dispatch-progress exists, subprocess started — this is NOT a ghost turn.
142
- // The regular stale-turn watchdog (BUG-47) will handle it if it goes silent.
143
- if (hasProgress) continue;
144
-
145
- // Also check for staged result (unlikely without progress, but be safe)
146
139
  if (hasTurnScopedStagedResult(root, turnId)) continue;
147
-
148
- // Check for any turn-scoped events beyond the initial dispatch event
149
- if (hasRecentTurnEventActivity(root, turnId, startedAt, startupThreshold, now)) continue;
140
+ if (hasStartupProof(turn, progress)) continue;
150
141
 
151
142
  const runningSeconds = Math.floor(runningMs / 1000);
152
- const failureType = 'no_subprocess_output';
143
+ const failureType = classifyStartupFailureType(turn, progress);
153
144
  ghosts.push({
154
145
  turn_id: turnId,
155
146
  role: turn.assigned_role || 'unknown',
@@ -200,37 +191,11 @@ export function reconcileStaleTurns(root, state, config) {
200
191
 
201
192
  // Process ghost turns (BUG-51) — transition to failed_start
202
193
  for (const entry of ghosts) {
203
- const turn = activeTurns[entry.turn_id];
204
- if (!turn || (turn.status !== 'running' && turn.status !== 'retrying')) continue;
205
-
206
- activeTurns[entry.turn_id] = {
207
- ...turn,
208
- status: 'failed_start',
209
- failed_start_at: nowIso,
210
- failed_start_reason: entry.failure_type,
211
- failed_start_previous_status: turn.status,
212
- failed_start_threshold_ms: entry.threshold_ms,
213
- failed_start_running_ms: entry.running_ms,
214
- recovery_command: `agentxchain reissue-turn --turn ${entry.turn_id} --reason ghost`,
215
- };
216
- changed = true;
217
-
218
- // BUG-51 fix #6: Release budget reservation for ghost turns
219
- delete budgetReservations[entry.turn_id];
220
-
221
- emitRunEvent(root, 'turn_start_failed', {
222
- run_id: state?.run_id || null,
223
- phase: state?.phase || null,
224
- status: 'blocked',
225
- turn: { turn_id: entry.turn_id, role_id: entry.role },
226
- payload: {
227
- running_ms: entry.running_ms,
228
- threshold_ms: entry.threshold_ms,
229
- runtime_id: entry.runtime_id,
230
- failure_type: entry.failure_type,
231
- recommendation: entry.recommendation,
232
- },
233
- });
194
+ const applied = applyStartupFailureToActiveTurn(activeTurns, budgetReservations, entry, nowIso);
195
+ if (applied) {
196
+ emitStartupFailureEvent(root, state, entry);
197
+ changed = true;
198
+ }
234
199
  }
235
200
 
236
201
  // Process stale turns (BUG-47) — transition to stalled
@@ -271,32 +236,9 @@ export function reconcileStaleTurns(root, state, config) {
271
236
  return { stale_turns: stale, ghost_turns: ghosts, state, changed: false };
272
237
  }
273
238
 
274
- const allDetected = [...ghosts, ...stale];
275
- const primary = allDetected[0];
239
+ const nextState = buildBlockedStateFromEntries(state, activeTurns, budgetReservations, ghosts, stale, nowIso);
240
+ const primary = [...ghosts, ...stale][0];
276
241
  const category = ghosts.length > 0 ? 'ghost_turn' : 'stale_turn';
277
- const blockedOn = allDetected.length === 1
278
- ? `turn:${primary.failure_type ? 'failed_start' : 'stalled'}:${primary.turn_id}`
279
- : ghosts.length > 0 ? 'turns:failed_start' : 'turns:stalled';
280
-
281
- const nextState = {
282
- ...state,
283
- status: 'blocked',
284
- active_turns: activeTurns,
285
- budget_reservations: budgetReservations,
286
- blocked_on: blockedOn,
287
- blocked_reason: {
288
- category,
289
- blocked_at: nowIso,
290
- turn_id: primary.turn_id,
291
- recovery: {
292
- typed_reason: category,
293
- owner: 'human',
294
- recovery_action: primary.recommendation,
295
- turn_retained: true,
296
- detail: primary.recommendation,
297
- },
298
- },
299
- };
300
242
 
301
243
  safeWriteJson(join(root, '.agentxchain', 'state.json'), nextState);
302
244
  emitRunEvent(root, 'run_blocked', {
@@ -340,13 +282,63 @@ function resolveStartupThreshold(config) {
340
282
  return DEFAULT_STARTUP_WATCHDOG_MS;
341
283
  }
342
284
 
285
+ export function failTurnStartup(root, state, config, turnId, details = {}) {
286
+ if (!state || typeof state !== 'object') {
287
+ return { ok: false, error: 'No governed state found' };
288
+ }
289
+
290
+ const turn = state.active_turns?.[turnId];
291
+ if (!turn) {
292
+ return { ok: false, error: `Turn ${turnId} not found in active turns` };
293
+ }
294
+
295
+ const nowIso = new Date().toISOString();
296
+ const activeTurns = { ...(state.active_turns || {}) };
297
+ const budgetReservations = { ...(state.budget_reservations || {}) };
298
+ const entry = {
299
+ turn_id: turnId,
300
+ role: turn.assigned_role || 'unknown',
301
+ runtime_id: turn.runtime_id || 'unknown',
302
+ running_ms: details.running_ms ?? computeLifecycleAgeMs(turn),
303
+ threshold_ms: details.threshold_ms ?? resolveStartupThreshold(config),
304
+ failure_type: classifyStartupFailureType(turn, null, details.failure_type || 'no_subprocess_output'),
305
+ recommendation: details.recommendation
306
+ || `Turn ${turnId} failed to start cleanly. Run \`agentxchain reissue-turn --turn ${turnId} --reason ghost\` to recover.`,
307
+ };
308
+
309
+ if (!applyStartupFailureToActiveTurn(activeTurns, budgetReservations, entry, nowIso)) {
310
+ return { ok: false, error: `Turn ${turnId} is not eligible for startup failure transition` };
311
+ }
312
+
313
+ const nextState = buildBlockedStateFromEntries(state, activeTurns, budgetReservations, [entry], [], nowIso);
314
+ safeWriteJson(join(root, '.agentxchain', 'state.json'), nextState);
315
+ emitStartupFailureEvent(root, state, entry);
316
+ emitRunEvent(root, 'run_blocked', {
317
+ run_id: nextState.run_id || null,
318
+ phase: nextState.phase || null,
319
+ status: 'blocked',
320
+ turn: { turn_id: entry.turn_id, role_id: entry.role },
321
+ payload: {
322
+ category: 'ghost_turn',
323
+ ghost_turn_ids: [entry.turn_id],
324
+ stalled_turn_ids: [],
325
+ },
326
+ });
327
+ return { ok: true, state: nextState, turn: nextState.active_turns?.[turnId] || null };
328
+ }
329
+
343
330
  function hasRecentTurnEventActivity(root, turnId, startedAt, threshold, now) {
344
331
  try {
345
332
  const events = readRunEvents(root, { limit: 200 });
346
333
  for (let i = events.length - 1; i >= 0; i--) {
347
334
  const event = events[i];
348
335
  if (event?.turn?.turn_id !== turnId) continue;
349
- if (event.event_type === 'turn_stalled' || event.event_type === 'turn_start_failed') continue;
336
+ if (
337
+ event.event_type === 'turn_stalled'
338
+ || event.event_type === 'turn_start_failed'
339
+ || event.event_type === 'runtime_spawn_failed'
340
+ || event.event_type === 'stdout_attach_failed'
341
+ ) continue;
350
342
  const timestamp = Date.parse(event.timestamp || '');
351
343
  if (!Number.isFinite(timestamp)) continue;
352
344
  if (timestamp < startedAt) continue;
@@ -360,9 +352,145 @@ function hasRecentTurnEventActivity(root, turnId, startedAt, threshold, now) {
360
352
  return false;
361
353
  }
362
354
 
355
+ function applyStartupFailureToActiveTurn(activeTurns, budgetReservations, entry, nowIso) {
356
+ const turn = activeTurns[entry.turn_id];
357
+ if (!turn || !['dispatched', 'starting', 'running', 'retrying'].includes(turn.status)) {
358
+ return false;
359
+ }
360
+
361
+ activeTurns[entry.turn_id] = {
362
+ ...turn,
363
+ status: 'failed_start',
364
+ failed_start_at: nowIso,
365
+ failed_start_reason: entry.failure_type,
366
+ failed_start_previous_status: turn.status,
367
+ failed_start_threshold_ms: entry.threshold_ms,
368
+ failed_start_running_ms: entry.running_ms,
369
+ recovery_command: `agentxchain reissue-turn --turn ${entry.turn_id} --reason ghost`,
370
+ };
371
+ delete budgetReservations[entry.turn_id];
372
+ return true;
373
+ }
374
+
375
+ function emitStartupFailureEvent(root, state, entry) {
376
+ const payload = {
377
+ running_ms: entry.running_ms,
378
+ threshold_ms: entry.threshold_ms,
379
+ runtime_id: entry.runtime_id,
380
+ failure_type: entry.failure_type,
381
+ recommendation: entry.recommendation,
382
+ };
383
+ const details = {
384
+ run_id: state?.run_id || null,
385
+ phase: state?.phase || null,
386
+ status: 'blocked',
387
+ turn: { turn_id: entry.turn_id, role_id: entry.role },
388
+ payload,
389
+ };
390
+ emitRunEvent(root, 'turn_start_failed', details);
391
+ const failureEventType = mapStartupFailureEventType(entry.failure_type);
392
+ if (failureEventType) {
393
+ emitRunEvent(root, failureEventType, details);
394
+ }
395
+ }
396
+
397
+ function buildBlockedStateFromEntries(state, activeTurns, budgetReservations, ghosts, stale, nowIso) {
398
+ const allDetected = [...ghosts, ...stale];
399
+ const primary = allDetected[0];
400
+ const category = ghosts.length > 0 ? 'ghost_turn' : 'stale_turn';
401
+ const blockedOn = allDetected.length === 1
402
+ ? `turn:${primary.failure_type ? 'failed_start' : 'stalled'}:${primary.turn_id}`
403
+ : ghosts.length > 0 ? 'turns:failed_start' : 'turns:stalled';
404
+
405
+ return {
406
+ ...state,
407
+ status: 'blocked',
408
+ active_turns: activeTurns,
409
+ budget_reservations: budgetReservations,
410
+ blocked_on: blockedOn,
411
+ blocked_reason: {
412
+ category,
413
+ blocked_at: nowIso,
414
+ turn_id: primary.turn_id,
415
+ recovery: {
416
+ typed_reason: category,
417
+ owner: 'human',
418
+ recovery_action: primary.recommendation,
419
+ turn_retained: true,
420
+ detail: primary.recommendation,
421
+ },
422
+ },
423
+ };
424
+ }
425
+
426
+ function parseGhostLifecycleStart(turn) {
427
+ if (turn.status === 'dispatched') {
428
+ return Date.parse(turn.dispatched_at || turn.assigned_at || '');
429
+ }
430
+ return Date.parse(turn.started_at || turn.dispatched_at || turn.assigned_at || '');
431
+ }
432
+
433
+ function computeLifecycleAgeMs(turn) {
434
+ const start = parseGhostLifecycleStart(turn);
435
+ if (!Number.isFinite(start)) return 0;
436
+ return Math.max(0, Date.now() - start);
437
+ }
438
+
439
+ function readDispatchProgressSafe(progressPath) {
440
+ if (!existsSync(progressPath)) {
441
+ return null;
442
+ }
443
+ try {
444
+ return JSON.parse(readFileSync(progressPath, 'utf8'));
445
+ } catch {
446
+ return null;
447
+ }
448
+ }
449
+
450
+ function classifyStartupFailureType(turn, progress, fallback = 'no_subprocess_output') {
451
+ if (fallback === 'runtime_spawn_failed' || fallback === 'stdout_attach_failed') {
452
+ return fallback;
453
+ }
454
+ if (turn?.status === 'dispatched') {
455
+ return 'runtime_spawn_failed';
456
+ }
457
+ const hasWorkerAttachProof = Boolean(
458
+ turn?.worker_attached_at
459
+ || turn?.worker_pid != null
460
+ || progress?.pid != null,
461
+ );
462
+ if (turn?.status === 'starting' || hasWorkerAttachProof) {
463
+ return 'stdout_attach_failed';
464
+ }
465
+ return fallback;
466
+ }
467
+
468
+ function mapStartupFailureEventType(failureType) {
469
+ if (failureType === 'runtime_spawn_failed') {
470
+ return 'runtime_spawn_failed';
471
+ }
472
+ if (failureType === 'stdout_attach_failed') {
473
+ return 'stdout_attach_failed';
474
+ }
475
+ return null;
476
+ }
477
+
478
+ function hasStartupProof(turn, progress) {
479
+ if (turn.first_output_at) {
480
+ return true;
481
+ }
482
+ if (!progress || typeof progress !== 'object') {
483
+ return false;
484
+ }
485
+ if (progress.first_output_at) {
486
+ return true;
487
+ }
488
+ return Number(progress.output_lines || 0) > 0 || Number(progress.stderr_lines || 0) > 0;
489
+ }
490
+
363
491
  function hasTurnScopedStagedResult(root, turnId) {
364
492
  const turnScopedPath = join(root, getTurnStagingResultPath(turnId));
365
- if (existsSync(turnScopedPath)) {
493
+ if (hasMeaningfulStagedResult(turnScopedPath)) {
366
494
  return true;
367
495
  }
368
496
 
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Lightweight staged turn-result shape guard.
3
+ *
4
+ * This is intentionally weaker than full acceptance validation. It exists for
5
+ * adapter pre-stage checks so obviously incomplete payloads (`{}`,
6
+ * `{"turn_id":"t1"}`, etc.) are rejected before they can be written into the
7
+ * governed staging path and mistaken for meaningful execution output.
8
+ */
9
+
10
+ function isNonEmptyString(value) {
11
+ return typeof value === 'string' && value.trim() !== '';
12
+ }
13
+
14
+ /**
15
+ * Returns true when `value` has the minimum governed turn-result envelope:
16
+ * - `schema_version`
17
+ * - at least one identity field (`run_id` or `turn_id`)
18
+ * - at least one lifecycle field (`status`, `role`, or `runtime_id`)
19
+ *
20
+ * Full schema validation still happens later via `validateStagedTurnResult`.
21
+ *
22
+ * @param {unknown} value
23
+ * @returns {boolean}
24
+ */
25
+ export function hasMinimumTurnResultShape(value) {
26
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
27
+ return false;
28
+ }
29
+
30
+ const candidate = /** @type {Record<string, unknown>} */ (value);
31
+ const hasSchemaVersion = isNonEmptyString(candidate.schema_version);
32
+ const hasIdentity = isNonEmptyString(candidate.run_id) || isNonEmptyString(candidate.turn_id);
33
+ const hasLifecycle = isNonEmptyString(candidate.status)
34
+ || isNonEmptyString(candidate.role)
35
+ || isNonEmptyString(candidate.runtime_id);
36
+
37
+ return hasSchemaVersion && hasIdentity && hasLifecycle;
38
+ }
@@ -75,7 +75,10 @@ export function validateStagedTurnResult(root, state, config, opts = {}) {
75
75
  const normContext = {};
76
76
  if (state) {
77
77
  normContext.phase = state.phase;
78
- // Support both active_turns (v2+) and legacy current_turn formats
78
+ // Prefer active_turns (the persisted schema field); fall back to the
79
+ // current_turn compatibility alias for callers that pass a state shape
80
+ // built outside loadProjectState() (e.g. raw fixtures). Both surfaces are
81
+ // live per DEC-CURRENT-TURN-COMPAT-ALIAS-001 — current_turn is not legacy.
79
82
  const activeTurn = getActiveTurn(state) || state.current_turn;
80
83
  if (activeTurn) {
81
84
  const roleKey = activeTurn.assigned_role || activeTurn.role;