agentxchain 2.146.0 → 2.148.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +1 -1
  2. package/scripts/publish-npm.sh +16 -0
  3. package/scripts/sync-homebrew.sh +14 -1
  4. package/scripts/verify-post-publish.sh +55 -4
  5. package/src/commands/reissue-turn.js +16 -0
  6. package/src/commands/reject-turn.js +14 -1
  7. package/src/commands/restart.js +15 -0
  8. package/src/commands/resume.js +61 -66
  9. package/src/commands/run.js +67 -10
  10. package/src/commands/schedule.js +34 -7
  11. package/src/commands/status.js +20 -0
  12. package/src/commands/step.js +100 -34
  13. package/src/lib/adapters/api-proxy-adapter.js +8 -0
  14. package/src/lib/adapters/local-cli-adapter.js +271 -16
  15. package/src/lib/adapters/manual-adapter.js +9 -10
  16. package/src/lib/adapters/mcp-adapter.js +3 -5
  17. package/src/lib/adapters/remote-agent-adapter.js +3 -5
  18. package/src/lib/continuous-run.js +71 -6
  19. package/src/lib/dispatch-bundle.js +1 -1
  20. package/src/lib/dispatch-progress.js +5 -3
  21. package/src/lib/governed-state.js +258 -17
  22. package/src/lib/intake.js +10 -1
  23. package/src/lib/normalized-config.js +51 -1
  24. package/src/lib/recent-event-summary.js +11 -0
  25. package/src/lib/run-events.js +4 -0
  26. package/src/lib/run-loop.js +67 -2
  27. package/src/lib/runner-interface.js +1 -0
  28. package/src/lib/schema.js +7 -0
  29. package/src/lib/schemas/agentxchain-config.schema.json +15 -1
  30. package/src/lib/schemas/turn-result.schema.json +8 -2
  31. package/src/lib/staged-result-proof.js +43 -0
  32. package/src/lib/stale-turn-watchdog.js +218 -90
  33. package/src/lib/turn-checkpoint.js +65 -1
  34. package/src/lib/turn-result-shape.js +38 -0
  35. package/src/lib/turn-result-validator.js +15 -3
@@ -382,6 +382,16 @@ function renderGovernedStatus(context, opts) {
382
382
  console.log(` ${chalk.dim('Recover:')} ${chalk.cyan(`agentxchain reject-turn --turn ${turn.turn_id}`)} — reject and retry`);
383
383
  console.log(` ${chalk.dim(' or:')} ${chalk.cyan(`agentxchain accept-turn --turn ${turn.turn_id}`)} — re-attempt acceptance`);
384
384
  }
385
+ if (turn.status === 'failed_start') {
386
+ console.log(` ${chalk.dim('Reason:')} ${turn.failed_start_reason || 'no_subprocess_output'}`);
387
+ const recover = turn.recovery_command || `agentxchain reissue-turn --turn ${turn.turn_id} --reason ghost`;
388
+ console.log(` ${chalk.dim('Recover:')} ${chalk.cyan(recover)}`);
389
+ }
390
+ if (turn.status === 'stalled') {
391
+ console.log(` ${chalk.dim('Reason:')} ${turn.stalled_reason || 'no_output_within_threshold'}`);
392
+ const recover = turn.recovery_command || `agentxchain reissue-turn --turn ${turn.turn_id} --reason stale`;
393
+ console.log(` ${chalk.dim('Recover:')} ${chalk.cyan(recover)}`);
394
+ }
385
395
  }
386
396
  } else if (singleActiveTurn) {
387
397
  console.log(` ${chalk.dim('Turn:')} ${singleActiveTurn.turn_id}`);
@@ -432,6 +442,16 @@ function renderGovernedStatus(context, opts) {
432
442
  console.log(` ${chalk.dim('Resolve:')} ${chalk.cyan(reassignAction.command)}`);
433
443
  console.log(` ${chalk.dim(' or:')} ${chalk.cyan(mergeAction.command)}`);
434
444
  }
445
+ if (singleActiveTurn.status === 'failed_start') {
446
+ console.log(` ${chalk.dim('Reason:')} ${singleActiveTurn.failed_start_reason || 'no_subprocess_output'}`);
447
+ const recover = singleActiveTurn.recovery_command || `agentxchain reissue-turn --turn ${singleActiveTurn.turn_id} --reason ghost`;
448
+ console.log(` ${chalk.dim('Recover:')} ${chalk.cyan(recover)}`);
449
+ }
450
+ if (singleActiveTurn.status === 'stalled') {
451
+ console.log(` ${chalk.dim('Reason:')} ${singleActiveTurn.stalled_reason || 'no_output_within_threshold'}`);
452
+ const recover = singleActiveTurn.recovery_command || `agentxchain reissue-turn --turn ${singleActiveTurn.turn_id} --reason stale`;
453
+ console.log(` ${chalk.dim('Recover:')} ${chalk.cyan(recover)}`);
454
+ }
435
455
  } else {
436
456
  console.log(` ${chalk.dim('Turn:')} ${chalk.yellow('No active turn')}`);
437
457
  }
@@ -35,7 +35,9 @@ import {
35
35
  getActiveTurnCount,
36
36
  getActiveTurns,
37
37
  reactivateGovernedRun,
38
+ reconcilePhaseAdvanceBeforeDispatch,
38
39
  refreshTurnBaselineSnapshot,
40
+ transitionActiveTurnLifecycle,
39
41
  STATE_PATH,
40
42
  } from '../lib/governed-state.js';
41
43
  import { getMaxConcurrentTurns } from '../lib/normalized-config.js';
@@ -70,7 +72,7 @@ import { resolveGovernedRole } from '../lib/role-resolution.js';
70
72
  import { shouldSuggestManualQaFallback } from '../lib/manual-qa-fallback.js';
71
73
  import { evaluateApprovalSlaReminders } from '../lib/notification-runner.js';
72
74
  import { consumeNextApprovedIntent } from '../lib/intake.js';
73
- import { reconcileStaleTurns } from '../lib/stale-turn-watchdog.js';
75
+ import { failTurnStartup, reconcileStaleTurns } from '../lib/stale-turn-watchdog.js';
74
76
 
75
77
  export async function stepCommand(opts) {
76
78
  const context = loadProjectContext();
@@ -260,39 +262,14 @@ export async function stepCommand(opts) {
260
262
  printDispatchBundleWarnings(bundleResult);
261
263
  }
262
264
 
263
- // Handle paused + failed/retrying turn → re-dispatch
264
- if (!skipAssignment && state.status === 'paused' && activeCount > 0) {
265
- const pausedTurn = targetTurn || Object.values(activeTurns)[0];
266
- const turnStatus = pausedTurn?.status;
267
- if (turnStatus === 'failed' || turnStatus === 'retrying') {
268
- console.log(chalk.yellow(`Re-dispatching failed turn: ${pausedTurn.turn_id}`));
269
- const reactivated = reactivateGovernedRun(root, state, { via: 'step --resume', notificationConfig: config });
270
- if (!reactivated.ok) {
271
- console.log(chalk.red(`Failed to reactivate run: ${reactivated.error}`));
272
- process.exit(1);
273
- }
274
- state = reactivated.state;
275
- if (reactivated.migration_notice) {
276
- console.log(chalk.yellow(reactivated.migration_notice));
277
- }
278
- if (reactivated.phantom_notice) {
279
- console.log(chalk.yellow(reactivated.phantom_notice));
280
- }
281
- skipAssignment = true;
282
-
283
- // BUG-1 fix: refresh baseline snapshot to capture files dirtied between assignment and dispatch
284
- refreshTurnBaselineSnapshot(root, pausedTurn.turn_id);
285
- state = JSON.parse(readFileSync(join(root, '.agentxchain/state.json'), 'utf8'));
286
-
287
- const bundleResult = writeDispatchBundle(root, state, config);
288
- if (!bundleResult.ok) {
289
- console.log(chalk.red(`Failed to write dispatch bundle: ${bundleResult.error}`));
290
- process.exit(1);
291
- }
292
- bundleWritten = true;
293
- printDispatchBundleWarnings(bundleResult);
294
- }
295
- }
265
+ // Removed (Turn 25): the `paused + failed/retrying retained turn → re-dispatch`
266
+ // branch is unreachable under the current schema. See the matching deletion in
267
+ // `cli/src/commands/resume.js` for the full citation chain (schema.js:184 +
268
+ // governed-state.js:2191-2204 + the line-187 short-circuit above). The reachable
269
+ // retained-turn re-dispatch path for `step --resume` is the `state.status ===
270
+ // 'blocked' && activeCount > 0` branch at line 193 above. Per
271
+ // `DEC-UNREACHABLE-BRANCH-COVERAGE-001`, dead branches are removed once the
272
+ // schema citation + migration citation are documented.
296
273
 
297
274
  // idle → initialize run
298
275
  if (!skipAssignment && state.status === 'idle' && !state.run_id) {
@@ -344,6 +321,27 @@ export async function stepCommand(opts) {
344
321
  }
345
322
  }
346
323
 
324
+ if (!skipAssignment) {
325
+ const phaseReconciliation = reconcilePhaseAdvanceBeforeDispatch(root, config, state);
326
+ if (!phaseReconciliation.ok && !phaseReconciliation.state) {
327
+ console.log(chalk.red(`Failed to reconcile phase gate before dispatch: ${phaseReconciliation.error}`));
328
+ process.exit(1);
329
+ }
330
+ state = phaseReconciliation.state || state;
331
+ if (phaseReconciliation.advanced) {
332
+ console.log(chalk.green(`Advanced phase before dispatch: ${phaseReconciliation.from_phase} → ${phaseReconciliation.to_phase}`));
333
+ }
334
+ if (state.pending_phase_transition || state.pending_run_completion) {
335
+ evaluateApprovalSlaReminders(root, config, state);
336
+ printRecoverySummary(state, 'This run is awaiting approval.', config);
337
+ process.exit(1);
338
+ }
339
+ if (state.status === 'blocked') {
340
+ printRecoverySummary(state, 'This run is blocked.', config);
341
+ process.exit(1);
342
+ }
343
+ }
344
+
347
345
  // Assign the turn
348
346
  if (!skipAssignment) {
349
347
  const roleId = resolveTargetRole(opts, state, config);
@@ -448,6 +446,10 @@ export async function stepCommand(opts) {
448
446
  console.log(chalk.red(`Failed to finalize dispatch manifest: ${manifestResult.error}`));
449
447
  process.exit(1);
450
448
  }
449
+ const dispatched = transitionActiveTurnLifecycle(root, turn.turn_id, 'dispatched');
450
+ if (dispatched.ok) {
451
+ state = dispatched.state;
452
+ }
451
453
  }
452
454
 
453
455
  const controller = new AbortController();
@@ -456,6 +458,13 @@ export async function stepCommand(opts) {
456
458
  });
457
459
 
458
460
  if (runtimeType === 'api_proxy') {
461
+ const running = transitionActiveTurnLifecycle(root, turn.turn_id, 'running', {
462
+ stream: 'request',
463
+ at: new Date().toISOString(),
464
+ });
465
+ if (running.ok) {
466
+ state = running.state;
467
+ }
459
468
  console.log(chalk.cyan(`Dispatching to API proxy: ${runtime?.provider || '(unknown)'} / ${runtime?.model || '(unknown)'}`));
460
469
  console.log(chalk.dim(`Turn: ${turn.turn_id} Role: ${roleId} Phase: ${state.phase}`));
461
470
 
@@ -535,6 +544,13 @@ export async function stepCommand(opts) {
535
544
  }
536
545
  console.log('');
537
546
  } else if (runtimeType === 'mcp') {
547
+ const running = transitionActiveTurnLifecycle(root, turn.turn_id, 'running', {
548
+ stream: 'request',
549
+ at: new Date().toISOString(),
550
+ });
551
+ if (running.ok) {
552
+ state = running.state;
553
+ }
538
554
  const mcpTransport = resolveMcpTransport(runtime);
539
555
  console.log(chalk.cyan(`Dispatching to MCP ${mcpTransport}: ${describeMcpRuntimeTarget(runtime)}`));
540
556
  console.log(chalk.dim(`Turn: ${turn.turn_id} Role: ${roleId} Phase: ${state.phase} Tool: ${runtime?.tool_name || 'agentxchain_turn'}`));
@@ -589,6 +605,13 @@ export async function stepCommand(opts) {
589
605
  console.log(chalk.green(`MCP tool completed${mcpResult.toolName ? ` (${mcpResult.toolName})` : ''}. Staged result detected.`));
590
606
  console.log('');
591
607
  } else if (runtimeType === 'remote_agent') {
608
+ const running = transitionActiveTurnLifecycle(root, turn.turn_id, 'running', {
609
+ stream: 'request',
610
+ at: new Date().toISOString(),
611
+ });
612
+ if (running.ok) {
613
+ state = running.state;
614
+ }
592
615
  console.log(chalk.cyan(`Dispatching to remote agent: ${describeRemoteAgentTarget(runtime)}`));
593
616
  console.log(chalk.dim(`Turn: ${turn.turn_id} Role: ${roleId} Phase: ${state.phase}`));
594
617
 
@@ -667,8 +690,25 @@ export async function stepCommand(opts) {
667
690
 
668
691
  // BUG-6: stream subprocess output by default (--stream or --verbose), suppress with --quiet
669
692
  const shouldStream = opts.stream || opts.verbose || false;
693
+ let runningMarked = false;
694
+ const ensureStartingState = (pid = null, at = new Date().toISOString()) => {
695
+ const starting = transitionActiveTurnLifecycle(root, turn.turn_id, 'starting', { pid, at });
696
+ if (starting.ok) {
697
+ state = starting.state;
698
+ }
699
+ };
700
+ const ensureRunningState = (stream = 'stdout', at = new Date().toISOString()) => {
701
+ if (runningMarked) return;
702
+ runningMarked = true;
703
+ const running = transitionActiveTurnLifecycle(root, turn.turn_id, 'running', { stream, at });
704
+ if (running.ok) {
705
+ state = running.state;
706
+ }
707
+ };
670
708
  const cliResult = await dispatchLocalCli(root, state, config, {
671
709
  signal: controller.signal,
710
+ onSpawnAttached: ({ pid, at }) => ensureStartingState(pid, at),
711
+ onFirstOutput: ({ at, stream }) => ensureRunningState(stream, at),
672
712
  onStdout: shouldStream ? (text) => process.stdout.write(chalk.dim(text)) : undefined,
673
713
  onStderr: shouldStream ? (text) => process.stderr.write(chalk.yellow(text)) : undefined,
674
714
  verifyManifest: true,
@@ -714,6 +754,28 @@ export async function stepCommand(opts) {
714
754
  process.exit(1);
715
755
  }
716
756
 
757
+ if (cliResult.startupFailure) {
758
+ const freshState = loadProjectState(root, config) || state;
759
+ const failed = failTurnStartup(root, freshState, config, turn.turn_id, {
760
+ failure_type: cliResult.startupFailureType || 'no_subprocess_output',
761
+ threshold_ms: config?.run_loop?.startup_watchdog_ms ?? 30_000,
762
+ running_ms: freshState?.active_turns?.[turn.turn_id]?.started_at
763
+ ? Math.max(0, Date.now() - new Date(freshState.active_turns[turn.turn_id].started_at).getTime())
764
+ : 0,
765
+ recommendation: `Turn ${turn.turn_id} failed to start within the startup watchdog window. Run \`agentxchain reissue-turn --turn ${turn.turn_id} --reason ghost\` to recover.`,
766
+ });
767
+ if (failed.ok) {
768
+ state = failed.state;
769
+ }
770
+
771
+ console.log('');
772
+ console.log(chalk.red(`Turn startup failed: ${cliResult.error}`));
773
+ console.log(chalk.dim('The turn was retained as failed_start. You can:'));
774
+ console.log(chalk.dim(` - Reissue immediately: agentxchain reissue-turn --turn ${turn.turn_id} --reason ghost`));
775
+ console.log(chalk.dim(' - Inspect status: agentxchain status'));
776
+ process.exit(1);
777
+ }
778
+
717
779
  if (!cliResult.ok) {
718
780
  const blocked = markRunBlocked(root, {
719
781
  blockedOn: `dispatch:${cliResult.exitCode != null ? `exit-${cliResult.exitCode}` : 'subprocess_failed'}`,
@@ -744,6 +806,10 @@ export async function stepCommand(opts) {
744
806
  process.exit(1);
745
807
  }
746
808
 
809
+ if (!runningMarked) {
810
+ ensureRunningState('staged_result', cliResult.firstOutputAt || new Date().toISOString());
811
+ }
812
+
747
813
  console.log(chalk.green('Subprocess completed. Staged result detected.'));
748
814
  console.log('');
749
815
  } else {
@@ -29,6 +29,7 @@
29
29
  import { readFileSync, writeFileSync, existsSync, mkdirSync, rmSync } from 'fs';
30
30
  import { join } from 'path';
31
31
  import { evaluateTokenBudget, SYSTEM_PROMPT, SEPARATOR } from '../token-budget.js';
32
+ import { hasMinimumTurnResultShape } from '../turn-result-shape.js';
32
33
  import {
33
34
  getDispatchApiRequestPath,
34
35
  getDispatchContextPath,
@@ -1072,6 +1073,13 @@ export async function dispatchApiProxy(root, state, config, options = {}) {
1072
1073
  turnResult.cost = { ...aggregateUsage };
1073
1074
  }
1074
1075
 
1076
+ if (!hasMinimumTurnResultShape(turnResult)) {
1077
+ return {
1078
+ ok: false,
1079
+ error: 'API response did not contain a valid turn result with the minimum governed turn-result fields',
1080
+ };
1081
+ }
1082
+
1075
1083
  // Stage the turn result
1076
1084
  try {
1077
1085
  writeFileSync(
@@ -18,7 +18,7 @@
18
18
  */
19
19
 
20
20
  import { spawn } from 'child_process';
21
- import { existsSync, readFileSync, statSync, mkdirSync, writeFileSync } from 'fs';
21
+ import { existsSync, readFileSync, mkdirSync, writeFileSync } from 'fs';
22
22
  import { join } from 'path';
23
23
  import {
24
24
  getDispatchContextPath,
@@ -29,6 +29,16 @@ import {
29
29
  getTurnStagingResultPath,
30
30
  } from '../turn-paths.js';
31
31
  import { verifyDispatchManifestForAdapter } from '../dispatch-manifest.js';
32
+ import { hasMeaningfulStagedResult } from '../staged-result-proof.js';
33
+
34
+ const DIAGNOSTIC_ENV_KEYS = [
35
+ 'PATH',
36
+ 'HOME',
37
+ 'PWD',
38
+ 'SHELL',
39
+ 'TMPDIR',
40
+ 'AGENTXCHAIN_TURN_ID',
41
+ ];
32
42
 
33
43
  /**
34
44
  * Launch a local CLI subprocess for a governed turn.
@@ -37,7 +47,7 @@ import { verifyDispatchManifestForAdapter } from '../dispatch-manifest.js';
37
47
  * passes them as the prompt to the configured CLI command.
38
48
  *
39
49
  * @param {string} root - project root directory
40
- * @param {object} state - current governed state (must have current_turn)
50
+ * @param {object} state - current governed state (must expose an active turn via active_turns; current_turn is a non-enumerable compatibility alias re-attached on load, not a persisted schema field)
41
51
  * @param {object} config - normalized config
42
52
  * @param {object} [options]
43
53
  * @param {AbortSignal} [options.signal] - abort signal for cancellation
@@ -48,7 +58,15 @@ import { verifyDispatchManifestForAdapter } from '../dispatch-manifest.js';
48
58
  * @returns {Promise<{ ok: boolean, exitCode?: number, timedOut?: boolean, aborted?: boolean, error?: string, logs?: string[] }>}
49
59
  */
50
60
  export async function dispatchLocalCli(root, state, config, options = {}) {
51
- const { signal, onStdout, onStderr, turnId } = options;
61
+ const {
62
+ signal,
63
+ onStdout,
64
+ onStderr,
65
+ onSpawnAttached,
66
+ onFirstOutput,
67
+ startupWatchdogMs = config?.run_loop?.startup_watchdog_ms ?? 30_000,
68
+ turnId,
69
+ } = options;
52
70
 
53
71
  const turn = resolveTargetTurn(state, turnId);
54
72
  if (!turn) {
@@ -103,6 +121,10 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
103
121
 
104
122
  // Capture logs for dispatch record
105
123
  const logs = [];
124
+ const runtimeCwd = runtime.cwd ? join(root, runtime.cwd) : root;
125
+ const spawnEnv = { ...process.env, AGENTXCHAIN_TURN_ID: turn.turn_id };
126
+ const stdinBytes = transport === 'stdin' ? Buffer.byteLength(fullPrompt, 'utf8') : 0;
127
+ const diagnosticArgs = redactPromptArgs(args, fullPrompt, transport);
106
128
 
107
129
  return new Promise((resolve) => {
108
130
  if (signal?.aborted) {
@@ -112,37 +134,143 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
112
134
 
113
135
  let child;
114
136
  try {
137
+ appendDiagnostic(logs, 'spawn_prepare', {
138
+ runtime_id: runtimeId,
139
+ turn_id: turn.turn_id,
140
+ command,
141
+ args: diagnosticArgs,
142
+ cwd: runtimeCwd,
143
+ prompt_transport: transport,
144
+ stdin_bytes: stdinBytes,
145
+ env: pickDiagnosticEnv(spawnEnv),
146
+ });
115
147
  child = spawn(command, args, {
116
- cwd: runtime.cwd ? join(root, runtime.cwd) : root,
148
+ cwd: runtimeCwd,
117
149
  stdio: ['pipe', 'pipe', 'pipe'],
118
- env: { ...process.env, AGENTXCHAIN_TURN_ID: turn.turn_id },
150
+ env: spawnEnv,
119
151
  });
120
152
  } catch (err) {
121
- resolve({ ok: false, error: `Failed to spawn "${command}": ${err.message}`, logs });
153
+ appendDiagnostic(logs, 'spawn_error', normalizeDiagnosticError(err));
154
+ resolve({
155
+ ok: false,
156
+ startupFailure: true,
157
+ startupFailureType: 'runtime_spawn_failed',
158
+ error: `Failed to spawn "${command}": ${err.message}`,
159
+ logs,
160
+ });
122
161
  return;
123
162
  }
124
163
 
125
164
  let settled = false;
165
+ let firstOutputAt = null;
166
+ let spawnConfirmedAt = null;
167
+ let spawnConfirmedAtMs = null;
168
+ let firstOutputLatencyMs = null;
169
+ let startupWatchdog = null;
170
+ let startupTimedOut = false;
171
+ let startupFailureType = null;
172
+ let stdoutBytes = 0;
173
+ let stderrBytes = 0;
174
+
126
175
  const settle = (result) => {
127
176
  if (settled) return;
128
177
  settled = true;
129
178
  resolve(result);
130
179
  };
131
180
 
181
+ const clearStartupWatchdog = () => {
182
+ if (startupWatchdog) {
183
+ clearTimeout(startupWatchdog);
184
+ startupWatchdog = null;
185
+ }
186
+ };
187
+
188
+ const armStartupWatchdog = () => {
189
+ if (startupWatchdog || !(startupWatchdogMs > 0 && Number.isFinite(startupWatchdogMs))) {
190
+ return;
191
+ }
192
+ startupWatchdog = setTimeout(() => {
193
+ if (firstOutputAt || isStagedResultReady(join(root, getTurnStagingResultPath(turn.turn_id)))) {
194
+ return;
195
+ }
196
+ startupTimedOut = true;
197
+ startupFailureType = 'no_subprocess_output';
198
+ logs.push(`[adapter] Startup watchdog fired after ${Math.round(startupWatchdogMs / 1000)}s with no output.`);
199
+ appendDiagnostic(logs, 'startup_watchdog_fired', {
200
+ startup_watchdog_ms: startupWatchdogMs,
201
+ pid: child.pid ?? null,
202
+ spawn_confirmed_at: spawnConfirmedAt,
203
+ elapsed_since_spawn_ms: spawnConfirmedAtMs == null ? null : Math.max(0, Date.now() - spawnConfirmedAtMs),
204
+ });
205
+ try {
206
+ child.kill('SIGTERM');
207
+ } catch {}
208
+ }, startupWatchdogMs);
209
+ };
210
+
211
+ const recordFirstOutput = (stream) => {
212
+ if (firstOutputAt) return;
213
+ firstOutputAt = new Date().toISOString();
214
+ firstOutputLatencyMs = spawnConfirmedAtMs == null ? null : Math.max(0, Date.now() - spawnConfirmedAtMs);
215
+ clearStartupWatchdog();
216
+ appendDiagnostic(logs, 'first_output', {
217
+ at: firstOutputAt,
218
+ stream,
219
+ pid: child.pid ?? null,
220
+ startup_latency_ms: firstOutputLatencyMs,
221
+ });
222
+ if (onFirstOutput) {
223
+ try {
224
+ onFirstOutput({ pid: child.pid ?? null, at: firstOutputAt, stream });
225
+ } catch {}
226
+ }
227
+ };
228
+
229
+ child.once('spawn', () => {
230
+ spawnConfirmedAtMs = Date.now();
231
+ spawnConfirmedAt = new Date().toISOString();
232
+ appendDiagnostic(logs, 'spawn_attached', {
233
+ pid: child.pid ?? null,
234
+ at: spawnConfirmedAt,
235
+ startup_watchdog_ms: startupWatchdogMs,
236
+ });
237
+ if (onSpawnAttached) {
238
+ try {
239
+ onSpawnAttached({ pid: child.pid ?? null, at: spawnConfirmedAt });
240
+ } catch {}
241
+ }
242
+ armStartupWatchdog();
243
+ });
244
+
132
245
  // Deliver prompt via stdin if transport is "stdin"; otherwise close immediately
133
246
  if (child.stdin) {
247
+ child.stdin.on('error', (err) => {
248
+ appendDiagnostic(logs, 'stdin_error', {
249
+ at: new Date().toISOString(),
250
+ stdin_bytes: stdinBytes,
251
+ ...normalizeDiagnosticError(err),
252
+ });
253
+ });
134
254
  try {
135
255
  if (transport === 'stdin') {
136
256
  child.stdin.write(fullPrompt);
137
257
  }
138
258
  child.stdin.end();
139
- } catch {}
259
+ } catch (err) {
260
+ appendDiagnostic(logs, 'stdin_error', {
261
+ at: new Date().toISOString(),
262
+ stdin_bytes: stdinBytes,
263
+ ...normalizeDiagnosticError(err),
264
+ });
265
+ }
140
266
  }
141
267
 
142
268
  // Collect stdout/stderr
143
269
  if (child.stdout) {
144
270
  child.stdout.on('data', (chunk) => {
145
271
  const text = chunk.toString();
272
+ stdoutBytes += Buffer.byteLength(text);
273
+ recordFirstOutput('stdout');
146
274
  logs.push(text);
147
275
  if (onStdout) onStdout(text);
148
276
  });
@@ -151,6 +279,8 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
151
279
  if (child.stderr) {
152
280
  child.stderr.on('data', (chunk) => {
153
281
  const text = chunk.toString();
282
+ stderrBytes += Buffer.byteLength(text);
283
+ recordFirstOutput('stderr');
154
284
  logs.push('[stderr] ' + text);
155
285
  if (onStderr) onStderr(text);
156
286
  });
@@ -180,6 +310,7 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
180
310
  // Abort signal handling
181
311
  const onAbort = () => {
182
312
  logs.push('[adapter] Abort signal received. Sending SIGTERM.');
313
+ clearStartupWatchdog();
183
314
  clearTimeout(timeoutHandle);
184
315
  clearTimeout(sigkillHandle);
185
316
  try {
@@ -197,6 +328,7 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
197
328
 
198
329
  // Process exit
199
330
  child.on('close', (exitCode, killSignal) => {
331
+ clearStartupWatchdog();
200
332
  clearTimeout(timeoutHandle);
201
333
  clearTimeout(sigkillHandle);
202
334
  if (signal) signal.removeEventListener('abort', onAbort);
@@ -210,17 +342,81 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
210
342
 
211
343
  // Check if staged result was written (regardless of exit code)
212
344
  const hasResult = isStagedResultReady(join(root, getTurnStagingResultPath(turn.turn_id)));
345
+ if (hasResult && !firstOutputAt) {
346
+ recordFirstOutput('staged_result');
347
+ }
348
+ const exitDiagnostic = {
349
+ pid: child.pid ?? null,
350
+ exit_code: exitCode,
351
+ signal: killSignal,
352
+ spawn_confirmed_at: spawnConfirmedAt,
353
+ elapsed_since_spawn_ms: spawnConfirmedAtMs == null ? null : Math.max(0, Date.now() - spawnConfirmedAtMs),
354
+ first_output_at: firstOutputAt,
355
+ startup_latency_ms: firstOutputLatencyMs,
356
+ stdout_bytes: stdoutBytes,
357
+ stderr_bytes: stderrBytes,
358
+ staged_result_ready: hasResult,
359
+ };
360
+ if (startupTimedOut) {
361
+ exitDiagnostic.startup_failure_type = startupFailureType || 'no_subprocess_output';
362
+ } else if (!spawnConfirmedAt) {
363
+ exitDiagnostic.startup_failure_type = 'runtime_spawn_failed';
364
+ } else if (timedOut) {
365
+ exitDiagnostic.timed_out = true;
366
+ } else if (!firstOutputAt) {
367
+ exitDiagnostic.startup_failure_type = 'no_subprocess_output';
368
+ }
369
+ appendDiagnostic(logs, 'process_exit', exitDiagnostic);
213
370
 
214
371
  if (hasResult) {
215
- settle({ ok: true, exitCode, timedOut: false, aborted: false, logs });
372
+ settle({ ok: true, exitCode, timedOut: false, aborted: false, logs, firstOutputAt });
373
+ } else if (startupTimedOut) {
374
+ settle({
375
+ ok: false,
376
+ exitCode,
377
+ timedOut: false,
378
+ aborted: false,
379
+ startupFailure: true,
380
+ startupFailureType: startupFailureType || 'no_subprocess_output',
381
+ startupWatchdogMs,
382
+ firstOutputAt,
383
+ error: `Subprocess produced no output within ${Math.round(startupWatchdogMs / 1000)}s and did not stage a turn result.`,
384
+ logs,
385
+ });
386
+ } else if (!spawnConfirmedAt) {
387
+ settle({
388
+ ok: false,
389
+ exitCode,
390
+ timedOut: false,
391
+ aborted: false,
392
+ startupFailure: true,
393
+ startupFailureType: 'runtime_spawn_failed',
394
+ firstOutputAt,
395
+ error: `Subprocess exited (code ${exitCode}) before reporting a successful spawn or staging a turn result.`,
396
+ logs,
397
+ });
216
398
  } else if (timedOut) {
217
399
  settle({ ok: false, exitCode, timedOut: true, aborted: false, error: 'Turn timed out without producing a staged result.', logs });
400
+ } else if (!firstOutputAt) {
401
+ settle({
402
+ ok: false,
403
+ exitCode,
404
+ timedOut: false,
405
+ aborted: false,
406
+ startupFailure: true,
407
+ startupFailureType: 'no_subprocess_output',
408
+ startupWatchdogMs,
409
+ firstOutputAt,
410
+ error: `Subprocess exited (code ${exitCode}) before producing output or staging a turn result.`,
411
+ logs,
412
+ });
218
413
  } else {
219
414
  settle({
220
415
  ok: false,
221
416
  exitCode,
222
417
  timedOut: false,
223
418
  aborted: false,
419
+ firstOutputAt,
224
420
  error: `Subprocess exited (code ${exitCode}) without writing a staged turn result to ${getTurnStagingResultPath(turn.turn_id)}.`,
225
421
  logs,
226
422
  });
@@ -228,10 +424,37 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
228
424
  });
229
425
 
230
426
  child.on('error', (err) => {
427
+ clearStartupWatchdog();
231
428
  clearTimeout(timeoutHandle);
232
429
  clearTimeout(sigkillHandle);
233
430
  if (signal) signal.removeEventListener('abort', onAbort);
234
- settle({ ok: false, error: `Subprocess error: ${err.message}`, logs });
431
+ // BUG-54 hypothesis #1 fix: explicitly release stdio streams on the
432
+ // error path so Node reclaims pipe handles immediately instead of
433
+ // waiting for GC. Without this, repeated `runtime_spawn_failed` turns
434
+ // leak ~4 handles per failure until the next GC sweep, which in a
435
+ // long-running `run --continuous` session can push the parent process
436
+ // toward its fd limit and cascade additional spawn failures.
437
+ try { child.stdin?.destroy(); } catch {}
438
+ try { child.stdout?.destroy(); } catch {}
439
+ try { child.stderr?.destroy(); } catch {}
440
+ appendDiagnostic(logs, 'spawn_error', {
441
+ pid: child.pid ?? null,
442
+ spawn_confirmed_at: spawnConfirmedAt,
443
+ elapsed_since_spawn_ms: spawnConfirmedAtMs == null ? null : Math.max(0, Date.now() - spawnConfirmedAtMs),
444
+ first_output_at: firstOutputAt,
445
+ startup_latency_ms: firstOutputLatencyMs,
446
+ stdout_bytes: stdoutBytes,
447
+ stderr_bytes: stderrBytes,
448
+ ...normalizeDiagnosticError(err),
449
+ });
450
+ settle({
451
+ ok: false,
452
+ startupFailure: !firstOutputAt,
453
+ startupFailureType: !firstOutputAt ? 'runtime_spawn_failed' : null,
454
+ firstOutputAt,
455
+ error: `Subprocess error: ${err.message}`,
456
+ logs,
457
+ });
235
458
  });
236
459
  });
237
460
  }
@@ -322,15 +545,13 @@ function resolvePromptTransport(runtime) {
322
545
 
323
546
  /**
324
547
  * Check if the staged result file exists and has meaningful content.
548
+ * Delegates to the shared `hasMeaningfulStagedResult` helper so watchdog,
549
+ * manual adapter, and local-cli adapter all agree on what counts as proof.
550
+ * Per DEC-BUG51-STAGING-PLACEHOLDER-NOT-PROOF-001, placeholders (`{}`, blank,
551
+ * whitespace-only, or `{}\n`) are cleanup artifacts, not evidence.
325
552
  */
326
553
  function isStagedResultReady(filePath) {
327
- try {
328
- if (!existsSync(filePath)) return false;
329
- const stat = statSync(filePath);
330
- return stat.size > 2; // Must be more than just "{}" or empty
331
- } catch {
332
- return false;
333
- }
554
+ return hasMeaningfulStagedResult(filePath);
334
555
  }
335
556
 
336
557
  function resolveTargetTurn(state, turnId) {
@@ -340,4 +561,38 @@ function resolveTargetTurn(state, turnId) {
340
561
  return state?.current_turn || Object.values(state?.active_turns || {})[0];
341
562
  }
342
563
 
564
+ function appendDiagnostic(logs, label, payload) {
565
+ logs.push(`[adapter:diag] ${label} ${JSON.stringify(payload)}\n`);
566
+ }
567
+
568
+ function pickDiagnosticEnv(env) {
569
+ return Object.fromEntries(
570
+ DIAGNOSTIC_ENV_KEYS
571
+ .filter((key) => typeof env?.[key] === 'string' && env[key].length > 0)
572
+ .map((key) => [key, env[key]]),
573
+ );
574
+ }
575
+
576
+ function redactPromptArgs(args, fullPrompt, transport) {
577
+ const promptPlaceholder = `<prompt:${Buffer.byteLength(fullPrompt, 'utf8')} bytes>`;
578
+ return args.map((arg) => {
579
+ if (typeof arg !== 'string') {
580
+ return arg;
581
+ }
582
+ if (transport === 'argv' && arg === fullPrompt) {
583
+ return promptPlaceholder;
584
+ }
585
+ return arg;
586
+ });
587
+ }
588
+
589
+ function normalizeDiagnosticError(err) {
590
+ return {
591
+ code: err?.code || null,
592
+ errno: err?.errno || null,
593
+ syscall: err?.syscall || null,
594
+ message: err?.message || String(err),
595
+ };
596
+ }
597
+
343
598
  export { resolvePromptTransport };