agentxchain 2.147.0 → 2.149.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import {
4
4
  PROVIDER_ENDPOINTS,
5
5
  } from './adapters/api-proxy-adapter.js';
6
6
  import { probeRuntimeSpawnContext } from './runtime-spawn-context.js';
7
+ import { getClaudeSubprocessAuthIssue, normalizeCommandTokens } from './claude-local-auth.js';
7
8
 
8
9
  const PROBEABLE_RUNTIME_TYPES = new Set(['local_cli', 'api_proxy', 'mcp', 'remote_agent']);
9
10
  const DEFAULT_TIMEOUT_MS = 8_000;
@@ -165,6 +166,38 @@ async function probeLocalCommand(runtimeId, runtime, probeKindLabel, options = {
165
166
  }
166
167
 
167
168
  const spawnProbe = probeRuntimeSpawnContext(options.root || process.cwd(), runtime, { runtimeId });
169
+ const claudeAuthIssue = getClaudeSubprocessAuthIssue(runtime);
170
+
171
+ // DEC-BUG54-CLAUDE-AUTH-PREFLIGHT-001 / DEC-BUG54-VALIDATE-AUTH-PREFLIGHT-001
172
+ // Auth-preflight is a config-shape defect that must fire regardless of whether
173
+ // the binary currently resolves on PATH. Matches connector-validate.js:108-138
174
+ // ordering: a Claude local_cli runtime with no env auth and no --bare is a
175
+ // deterministic hang-on-spawn shape the operator must fix before anything
176
+ // else. If they fix auth (or add --bare) but still do not have claude
177
+ // installed, the next connector check surfaces command_presence after they
178
+ // fix the config — that is the correct operator progression.
179
+ if (claudeAuthIssue) {
180
+ return {
181
+ ...base,
182
+ level: 'fail',
183
+ probe_kind: 'auth_preflight',
184
+ command: spawnProbe.command || head,
185
+ error_code: 'claude_auth_preflight_failed',
186
+ detail: claudeAuthIssue.detail,
187
+ fix: claudeAuthIssue.fix,
188
+ auth_env_present: claudeAuthIssue.auth_env_present,
189
+ };
190
+ }
191
+
192
+ if (!spawnProbe.ok) {
193
+ return {
194
+ ...base,
195
+ level: 'fail',
196
+ command: spawnProbe.command || head,
197
+ detail: spawnProbe.detail,
198
+ };
199
+ }
200
+
168
201
  if (spawnProbe.ok) {
169
202
  return {
170
203
  ...base,
@@ -173,13 +206,6 @@ async function probeLocalCommand(runtimeId, runtime, probeKindLabel, options = {
173
206
  detail: spawnProbe.detail,
174
207
  };
175
208
  }
176
-
177
- return {
178
- ...base,
179
- level: 'fail',
180
- command: spawnProbe.command || head,
181
- detail: spawnProbe.detail,
182
- };
183
209
  }
184
210
 
185
211
  async function probeApiProxy(runtimeId, runtime, timeoutMs) {
@@ -375,6 +401,7 @@ function analyzeLocalCliAuthorityIntent(runtimeId, runtime, roles) {
375
401
  // Prompt transport validation
376
402
  const transport = runtime.prompt_transport || 'dispatch_bundle_only';
377
403
  const knownTransports = KNOWN_CLI_TRANSPORTS[binaryName];
404
+ const claudeAuthIssue = getClaudeSubprocessAuthIssue(runtime);
378
405
 
379
406
  if (transport === 'argv' && !commandTokens.some((token) => token.includes('{prompt}'))) {
380
407
  warnings.push({
@@ -395,24 +422,21 @@ function analyzeLocalCliAuthorityIntent(runtimeId, runtime, roles) {
395
422
  });
396
423
  }
397
424
 
425
+ if (claudeAuthIssue) {
426
+ warnings.push({
427
+ probe_kind: 'auth_preflight',
428
+ level: 'warn',
429
+ detail: claudeAuthIssue.detail,
430
+ fix: claudeAuthIssue.fix,
431
+ });
432
+ }
433
+
398
434
  return { warnings };
399
435
  }
400
436
 
401
437
  /**
402
438
  * Normalize a runtime's command field into an array of tokens.
403
439
  */
404
- function normalizeCommandTokens(runtime) {
405
- if (Array.isArray(runtime?.command)) {
406
- return runtime.command.flatMap((element) =>
407
- typeof element === 'string' ? element.trim().split(/\s+/).filter(Boolean) : []
408
- );
409
- }
410
- if (typeof runtime?.command === 'string' && runtime.command.trim()) {
411
- return runtime.command.trim().split(/\s+/).filter(Boolean);
412
- }
413
- return [];
414
- }
415
-
416
440
  export async function probeConnectorRuntime(runtimeId, runtime, options = {}) {
417
441
  const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS;
418
442
  const roles = options.roles || null;
@@ -433,8 +457,11 @@ export async function probeConnectorRuntime(runtimeId, runtime, options = {}) {
433
457
  // Add authority-intent and transport analysis when roles are available
434
458
  if (roles) {
435
459
  const { warnings } = analyzeLocalCliAuthorityIntent(runtimeId, runtime, roles);
436
- if (warnings.length > 0) {
437
- result.authority_warnings = warnings;
460
+ const visibleWarnings = result.error_code === 'claude_auth_preflight_failed'
461
+ ? warnings.filter((warning) => warning.probe_kind !== 'auth_preflight')
462
+ : warnings;
463
+ if (visibleWarnings.length > 0) {
464
+ result.authority_warnings = visibleWarnings;
438
465
  // Promote result level to 'warn' if binary is present but authority intent is wrong
439
466
  if (result.level === 'pass') {
440
467
  result.level = 'warn';
@@ -24,6 +24,7 @@ import { getDispatchPromptPath, getTurnStagingResultPath } from './turn-paths.js
24
24
  import { validateStagedTurnResult } from './turn-result-validator.js';
25
25
  import { probeRuntimeSpawnContext } from './runtime-spawn-context.js';
26
26
  import { buildConnectorSchemaContract } from './connector-schema-contract.js';
27
+ import { getClaudeSubprocessAuthIssue } from './claude-local-auth.js';
27
28
 
28
29
  const VALIDATABLE_RUNTIME_TYPES = new Set(['local_cli', 'api_proxy', 'mcp', 'remote_agent']);
29
30
  const DEFAULT_VALIDATE_TIMEOUT_MS = 120_000;
@@ -104,6 +105,39 @@ export async function validateConfiguredConnector(sourceRoot, options = {}) {
104
105
  };
105
106
  }
106
107
 
108
+ // DEC-BUG54-CLAUDE-AUTH-PREFLIGHT-001 — refuse the known-hanging Claude
109
+ // local_cli shape before burning the scratch-workspace + synthetic-dispatch
110
+ // ceremony. The adapter also refuses this shape via `claude_auth_preflight_failed`,
111
+ // but the operator gets a faster, identical-fix message if we catch it here.
112
+ const claudeAuthIssue = getClaudeSubprocessAuthIssue(runtime);
113
+ if (claudeAuthIssue) {
114
+ return {
115
+ ok: false,
116
+ exitCode: 1,
117
+ overall: 'fail',
118
+ runtime_id: runtimeId,
119
+ runtime_type: runtime.type,
120
+ role_id: roleSelection.roleId,
121
+ timeout_ms: timeoutMs,
122
+ warnings: [
123
+ ...roleSelection.warnings,
124
+ {
125
+ probe_kind: 'auth_preflight',
126
+ level: 'fail',
127
+ detail: claudeAuthIssue.detail,
128
+ fix: claudeAuthIssue.fix,
129
+ },
130
+ ],
131
+ error_code: 'claude_auth_preflight_failed',
132
+ error: claudeAuthIssue.detail,
133
+ auth_env_present: claudeAuthIssue.auth_env_present,
134
+ fix: claudeAuthIssue.fix,
135
+ dispatch: null,
136
+ validation: null,
137
+ scratch_root: null,
138
+ };
139
+ }
140
+
107
141
  const tempBase = mkdtempSync(join(tmpdir(), 'axc-connector-validate-'));
108
142
  const scratchRoot = join(tempBase, 'workspace');
109
143
  const warnings = [...roleSelection.warnings];
@@ -11,6 +11,10 @@
11
11
 
12
12
  import { writeFileSync, unlinkSync, readFileSync, existsSync, mkdirSync, readdirSync } from 'node:fs';
13
13
  import { join, dirname, basename } from 'node:path';
14
+ import {
15
+ isDispatchProgressDiagnosticStream,
16
+ isDispatchProgressProofOutputStream,
17
+ } from './dispatch-streams.js';
14
18
 
15
19
  export const LEGACY_DISPATCH_PROGRESS_PATH = '.agentxchain/dispatch-progress.json';
16
20
  export const DISPATCH_PROGRESS_FILE_PREFIX = '.agentxchain/dispatch-progress-';
@@ -138,15 +142,37 @@ export function createDispatchProgressTracker(root, turn, options = {}) {
138
142
  const now = new Date().toISOString();
139
143
  const wasSilent = state.activity_type === 'silent';
140
144
  state.last_activity_at = now;
141
- state.first_output_at = state.first_output_at || now;
142
- state.activity_type = 'output';
143
- state.silent_since = null;
144
- if (stream === 'stderr') {
145
+ // DEC-BUG54-STDERR-IS-NOT-STARTUP-PROOF-002 (Turn 88) extended to the
146
+ // progress tracker in Turn 89: stderr is diagnostic evidence, not usable
147
+ // startup proof. Only stdout may set `first_output_at`. stderr still
148
+ // increments `stderr_lines` for silence detection and diagnostics.
149
+ let recognizedActivity = false;
150
+ if (isDispatchProgressDiagnosticStream(stream)) {
145
151
  state.stderr_lines += lineCount;
146
- } else {
152
+ recognizedActivity = true;
153
+ } else if (isDispatchProgressProofOutputStream(stream)) {
154
+ state.first_output_at = state.first_output_at || now;
147
155
  state.output_lines += lineCount;
156
+ recognizedActivity = true;
157
+ }
158
+ // DEC-BUG54-DIAGNOSTIC-ACTIVITY-TYPE-001 (Turn 91): activity_type and
159
+ // activity_summary must reflect whether operator-usable stdout proof has
160
+ // arrived. A stderr-only subprocess that never attached stdout must NOT
161
+ // be rendered as "Producing output" on the operator status surface —
162
+ // that is a false live-progress signal for a failing startup. Only when
163
+ // `output_lines > 0` may we claim 'output'; otherwise recognized stderr
164
+ // activity is surfaced as 'diagnostic_only'. Unknown stream labels do
165
+ // not mutate activity_type (Turn 90 closed-vocabulary contract).
166
+ if (recognizedActivity) {
167
+ if (state.output_lines > 0) {
168
+ state.activity_type = 'output';
169
+ state.activity_summary = `Producing output (${state.output_lines} lines)`;
170
+ } else {
171
+ state.activity_type = 'diagnostic_only';
172
+ state.activity_summary = `Diagnostic output only (${state.stderr_lines} stderr lines)`;
173
+ }
174
+ state.silent_since = null;
148
175
  }
149
- state.activity_summary = `Producing output (${state.output_lines} lines)`;
150
176
  dirty = true;
151
177
  maybeWrite();
152
178
  if (adapter_type === 'local_cli') {
@@ -0,0 +1,21 @@
1
+ const TURN_RUNNING_PROOF_STREAMS = new Set(['stdout', 'request', 'staged_result']);
2
+
3
+ export function isKnownTurnRunningProofStream(stream) {
4
+ return typeof stream === 'string' && TURN_RUNNING_PROOF_STREAMS.has(stream);
5
+ }
6
+
7
+ export function isPersistedTurnStartupProofStream(stream) {
8
+ if (stream == null) {
9
+ // Legacy states may have first_output_at without a tagged stream.
10
+ return true;
11
+ }
12
+ return isKnownTurnRunningProofStream(stream);
13
+ }
14
+
15
+ export function isDispatchProgressProofOutputStream(stream) {
16
+ return stream === 'stdout';
17
+ }
18
+
19
+ export function isDispatchProgressDiagnosticStream(stream) {
20
+ return stream === 'stderr';
21
+ }
@@ -77,6 +77,7 @@ import {
77
77
  derivePhaseScopeFromIntentMetadata,
78
78
  evaluateAcceptanceItemLifecycle,
79
79
  } from './intent-phase-scope.js';
80
+ import { isKnownTurnRunningProofStream } from './dispatch-streams.js';
80
81
 
81
82
  // ── Constants ────────────────────────────────────────────────────────────────
82
83
 
@@ -995,8 +996,10 @@ export function transitionActiveTurnLifecycle(root, turnId, nextStatus, options
995
996
  } else if (nextStatus === 'running') {
996
997
  nextTurn.status = 'running';
997
998
  nextTurn.started_at = nextTurn.started_at || nowIso;
998
- nextTurn.first_output_at = nextTurn.first_output_at || nowIso;
999
- if (options.stream) {
999
+ if (options.stream == null) {
1000
+ nextTurn.first_output_at = nextTurn.first_output_at || nowIso;
1001
+ } else if (isKnownTurnRunningProofStream(options.stream)) {
1002
+ nextTurn.first_output_at = nextTurn.first_output_at || nowIso;
1000
1003
  nextTurn.first_output_stream = nextTurn.first_output_stream || options.stream;
1001
1004
  }
1002
1005
  } else {
@@ -1531,6 +1534,68 @@ function findHistoryTurnRequest(historyEntries, turnId, kind) {
1531
1534
  return entry;
1532
1535
  }
1533
1536
 
1537
+ function findMatchingPhaseTransitionDeclarer(historyEntries, gateFailure) {
1538
+ if (!Array.isArray(historyEntries) || historyEntries.length === 0) {
1539
+ return null;
1540
+ }
1541
+
1542
+ const targetPhase = typeof gateFailure?.to_phase === 'string' && gateFailure.to_phase.length > 0
1543
+ ? gateFailure.to_phase
1544
+ : null;
1545
+ const sourcePhase = typeof gateFailure?.from_phase === 'string' && gateFailure.from_phase.length > 0
1546
+ ? gateFailure.from_phase
1547
+ : null;
1548
+
1549
+ return [...historyEntries].reverse().find((entry) => {
1550
+ if (!entry?.phase_transition_request) {
1551
+ return false;
1552
+ }
1553
+ if (targetPhase && entry.phase_transition_request !== targetPhase) {
1554
+ return false;
1555
+ }
1556
+ if (sourcePhase && entry.phase && entry.phase !== sourcePhase) {
1557
+ return false;
1558
+ }
1559
+ return true;
1560
+ }) || null;
1561
+ }
1562
+
1563
+ function resolvePhaseTransitionSource(historyEntries, gateFailure, fallbackTurnId, queuedPhaseTransition = null) {
1564
+ const requestedTurnId = gateFailure?.requested_by_turn
1565
+ || queuedPhaseTransition?.requested_by_turn
1566
+ || fallbackTurnId
1567
+ || null;
1568
+ const requestedSource = findHistoryTurnRequest(historyEntries, requestedTurnId, 'phase_transition');
1569
+ if (requestedSource?.phase_transition_request) {
1570
+ return requestedSource;
1571
+ }
1572
+
1573
+ // Turn 94: a bare null-failure path only gets the exact last_completed_turn
1574
+ // lookup. Without a surviving gate_failure or queued_phase_transition
1575
+ // descriptor, mining "the latest request anywhere in history" can replay an
1576
+ // unrelated older phase request on resume.
1577
+ if (!gateFailure && !queuedPhaseTransition) {
1578
+ return requestedSource;
1579
+ }
1580
+
1581
+ const fallbackSource = findMatchingPhaseTransitionDeclarer(
1582
+ historyEntries,
1583
+ gateFailure || (
1584
+ queuedPhaseTransition
1585
+ ? {
1586
+ from_phase: queuedPhaseTransition.from || null,
1587
+ to_phase: queuedPhaseTransition.to || null,
1588
+ }
1589
+ : null
1590
+ ),
1591
+ );
1592
+ if (fallbackSource?.phase_transition_request) {
1593
+ return { ...fallbackSource, phase_transition_request: fallbackSource.phase_transition_request };
1594
+ }
1595
+
1596
+ return requestedSource;
1597
+ }
1598
+
1534
1599
  function buildBlockedReason({ category, recovery, turnId, blockedAt = new Date().toISOString() }) {
1535
1600
  return {
1536
1601
  category,
@@ -2535,7 +2600,19 @@ export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null)
2535
2600
  }
2536
2601
 
2537
2602
  const gateFailure = currentState.last_gate_failure;
2538
- if (gateFailure?.gate_type !== 'phase_transition') {
2603
+ // BUG-52 Turn 93 (DEC-BUG52-NEEDS-HUMAN-PHASE-ADVANCE-001): accept two entry
2604
+ // shapes. (A) gate_failed left `last_gate_failure.gate_type === 'phase_transition'`
2605
+ // (existing Turn 57-60 coverage). (B) The accepted turn emitted
2606
+ // `status: 'needs_human'`, which short-circuits gate evaluation inside
2607
+ // `applyAcceptedTurn` (see needs_human guard at line 4657) — so
2608
+ // `last_gate_failure` stays null and `queued_phase_transition` stays null, but
2609
+ // the turn's `phase_transition_request` is preserved in history. After unblock
2610
+ // clears the human block, we must still attempt to advance using the
2611
+ // history-declared request; otherwise the dispatcher re-dispatches the current
2612
+ // phase's entry role and the tester reproduces the planning_signoff false-loop.
2613
+ // A non-`phase_transition` gate failure (e.g. run_completion) is still a hard
2614
+ // skip — this expansion only opens the null-failure path.
2615
+ if (gateFailure && gateFailure.gate_type !== 'phase_transition') {
2539
2616
  return {
2540
2617
  ok: true,
2541
2618
  state: attachLegacyCurrentTurnAlias(currentState),
@@ -2544,10 +2621,11 @@ export function reconcilePhaseAdvanceBeforeDispatch(root, config, state = null)
2544
2621
  }
2545
2622
 
2546
2623
  const historyEntries = readJsonlEntries(root, HISTORY_PATH);
2547
- const phaseSource = findHistoryTurnRequest(
2624
+ const phaseSource = resolvePhaseTransitionSource(
2548
2625
  historyEntries,
2549
- gateFailure.requested_by_turn || currentState.last_completed_turn_id || null,
2550
- 'phase_transition',
2626
+ gateFailure,
2627
+ currentState.last_completed_turn_id || null,
2628
+ currentState.queued_phase_transition || null,
2551
2629
  );
2552
2630
  if (!phaseSource?.phase_transition_request) {
2553
2631
  return {
@@ -3729,8 +3807,37 @@ function _acceptGovernedTurnLocked(root, config, opts) {
3729
3807
  ],
3730
3808
  );
3731
3809
  if (!dirtyParity.clean) {
3732
- transitionToFailedAcceptance(root, state, currentTurn, dirtyParity.reason, {
3733
- error_code: 'artifact_dirty_tree_mismatch',
3810
+ // BUG-55 sub-defect B: when the turn declared verification commands or
3811
+ // machine evidence, undeclared dirty files are most likely verification
3812
+ // outputs that need classification under verification.produced_files
3813
+ // (disposition 'ignore' to clean up, or 'artifact' to include in the
3814
+ // checkpoint). Surface a dedicated error class + message so the agent
3815
+ // knows the correct remediation surface, instead of the generic
3816
+ // files_changed-or-produced_files-or-clean advice that the non-
3817
+ // verification path emits.
3818
+ const verification = turnResult.verification && typeof turnResult.verification === 'object'
3819
+ ? turnResult.verification
3820
+ : {};
3821
+ const declaredVerificationCommands = Array.isArray(verification.commands)
3822
+ && verification.commands.some((c) => typeof c === 'string' && c.trim().length > 0);
3823
+ const declaredMachineEvidence = Array.isArray(verification.machine_evidence)
3824
+ && verification.machine_evidence.some((e) => e && typeof e === 'object' && typeof e.command === 'string' && e.command.trim().length > 0);
3825
+ const verificationWasDeclared = declaredVerificationCommands || declaredMachineEvidence;
3826
+
3827
+ let failureReason = dirtyParity.reason;
3828
+ let failureErrorCode = 'artifact_dirty_tree_mismatch';
3829
+ if (verificationWasDeclared) {
3830
+ failureErrorCode = 'undeclared_verification_outputs';
3831
+ const undeclared = Array.isArray(dirtyParity.unexpected_dirty_files)
3832
+ ? dirtyParity.unexpected_dirty_files
3833
+ : [];
3834
+ const listForMessage = undeclared.slice(0, 5).join(', ')
3835
+ + (undeclared.length > 5 ? '...' : '');
3836
+ failureReason = `Verification was declared (commands or machine_evidence), but these files are dirty and not classified: ${listForMessage}. Classify each under verification.produced_files with disposition "ignore" (the file should be cleaned up after replay) or "artifact" (the file should be checkpointed as part of the turn), OR add it to files_changed if it is a core turn mutation. Acceptance cannot proceed until the declared contract matches the working tree.`;
3837
+ }
3838
+
3839
+ transitionToFailedAcceptance(root, state, currentTurn, failureReason, {
3840
+ error_code: failureErrorCode,
3734
3841
  stage: 'artifact_observation',
3735
3842
  extra: {
3736
3843
  unexpected_dirty_files: dirtyParity.unexpected_dirty_files,
@@ -3739,13 +3846,14 @@ function _acceptGovernedTurnLocked(root, config, opts) {
3739
3846
  });
3740
3847
  return {
3741
3848
  ok: false,
3742
- error: dirtyParity.reason,
3849
+ error: failureReason,
3850
+ error_code: failureErrorCode,
3743
3851
  validation: {
3744
3852
  ...validation,
3745
3853
  ok: false,
3746
3854
  stage: 'artifact_observation',
3747
3855
  error_class: 'artifact_error',
3748
- errors: [dirtyParity.reason],
3856
+ errors: [failureReason],
3749
3857
  warnings: validation.warnings,
3750
3858
  },
3751
3859
  };
@@ -442,6 +442,9 @@ export function validateV4Config(data, projectRoot) {
442
442
  if (!VALID_RUNTIME_TYPES.includes(rt.type)) {
443
443
  errors.push(`Runtime "${id}": type must be one of: ${VALID_RUNTIME_TYPES.join(', ')}`);
444
444
  }
445
+ if (rt.type === 'local_cli') {
446
+ validateRuntimePositiveInteger(`Runtime "${id}": startup_watchdog_ms`, rt.startup_watchdog_ms, errors);
447
+ }
445
448
  // Validate prompt_transport for local_cli runtimes
446
449
  if (rt.type === 'local_cli' && rt.prompt_transport) {
447
450
  if (!VALID_PROMPT_TRANSPORTS.includes(rt.prompt_transport)) {
@@ -652,6 +655,15 @@ function validateRunLoopPositiveInteger(path, value, errors) {
652
655
  }
653
656
  }
654
657
 
658
+ function validateRuntimePositiveInteger(path, value, errors) {
659
+ if (value === undefined || value === null) {
660
+ return;
661
+ }
662
+ if (typeof value !== 'number' || !Number.isInteger(value) || value < 1) {
663
+ errors.push(`${path} must be a positive integer (milliseconds)`);
664
+ }
665
+ }
666
+
655
667
  export function validateBudgetConfig(budget) {
656
668
  const errors = [];
657
669
 
@@ -253,6 +253,11 @@
253
253
  "cwd": {
254
254
  "$ref": "#/$defs/non_empty_string"
255
255
  },
256
+ "startup_watchdog_ms": {
257
+ "type": "integer",
258
+ "minimum": 1,
259
+ "description": "Optional local_cli-specific override for the startup watchdog. When set, this runtime uses the declared threshold before falling back to run_loop.startup_watchdog_ms."
260
+ },
256
261
  "prompt_transport": {
257
262
  "enum": ["argv", "stdin", "dispatch_bundle_only"]
258
263
  },
@@ -148,7 +148,10 @@
148
148
  },
149
149
  "commands": {
150
150
  "type": "array",
151
- "items": { "type": "string" },
151
+ "items": {
152
+ "type": "string",
153
+ "pattern": "\\S"
154
+ },
152
155
  "description": "Verification commands that were run."
153
156
  },
154
157
  "evidence_summary": {
@@ -161,7 +164,10 @@
161
164
  "type": "object",
162
165
  "required": ["command", "exit_code"],
163
166
  "properties": {
164
- "command": { "type": "string" },
167
+ "command": {
168
+ "type": "string",
169
+ "pattern": "\\S"
170
+ },
165
171
  "exit_code": { "type": "integer" }
166
172
  }
167
173
  }
@@ -24,7 +24,8 @@
24
24
  * requiring a background daemon.
25
25
  *
26
26
  * Default thresholds:
27
- * - Startup watchdog: 30 seconds (configurable via run_loop.startup_watchdog_ms)
27
+ * - Startup watchdog: 30 seconds (configurable via run_loop.startup_watchdog_ms
28
+ * or runtimes.<id>.startup_watchdog_ms for local_cli runtimes)
28
29
  * - local_cli stale turns: 10 minutes
29
30
  * - api_proxy stale turns: 5 minutes
30
31
  * - Configurable via run_loop.stale_turn_threshold_ms in agentxchain.json
@@ -36,6 +37,7 @@ import { safeWriteJson } from './safe-write.js';
36
37
  import { emitRunEvent, readRunEvents } from './run-events.js';
37
38
  import { getTurnStagingResultPath } from './turn-paths.js';
38
39
  import { getDispatchProgressRelativePath } from './dispatch-progress.js';
40
+ import { isPersistedTurnStartupProofStream } from './dispatch-streams.js';
39
41
  import { hasMeaningfulStagedResult } from './staged-result-proof.js';
40
42
 
41
43
  const DEFAULT_LOCAL_CLI_THRESHOLD_MS = 10 * 60 * 1000; // 10 minutes
@@ -122,7 +124,6 @@ export function detectGhostTurns(root, state, config) {
122
124
  const activeTurns = state?.active_turns || {};
123
125
  const ghosts = [];
124
126
  const now = Date.now();
125
- const startupThreshold = resolveStartupThreshold(config);
126
127
 
127
128
  for (const [turnId, turn] of Object.entries(activeTurns)) {
128
129
  if (!['dispatched', 'starting', 'running', 'retrying'].includes(turn.status)) continue;
@@ -130,6 +131,13 @@ export function detectGhostTurns(root, state, config) {
130
131
  const lifecycleStart = parseGhostLifecycleStart(turn);
131
132
  if (!Number.isFinite(lifecycleStart)) continue;
132
133
 
134
+ // BUG-54 follow-up: per-turn threshold honors per-runtime startup override.
135
+ // Without this, an operator who sets `runtimes.<id>.startup_watchdog_ms`
136
+ // higher than the global to accommodate a slow QA/Claude runtime would still
137
+ // have ghost detection fire at the global threshold, defeating the override.
138
+ const runtime = config?.runtimes?.[turn.runtime_id];
139
+ const startupThreshold = resolveStartupThreshold(config, runtime);
140
+
133
141
  const runningMs = now - lifecycleStart;
134
142
  if (runningMs < startupThreshold) continue;
135
143
 
@@ -274,7 +282,16 @@ function resolveThreshold(turn, config) {
274
282
  return DEFAULT_LOCAL_CLI_THRESHOLD_MS;
275
283
  }
276
284
 
277
- function resolveStartupThreshold(config) {
285
+ function resolveStartupThreshold(config, runtime) {
286
+ // BUG-54 follow-up: per-runtime override beats the global.
287
+ // Mirrors `resolveStartupWatchdogMs()` in local-cli-adapter.js so the
288
+ // ghost-detection scanner uses the same threshold the in-flight adapter
289
+ // watchdog uses; otherwise the scanner pre-empts the override.
290
+ if (runtime && runtime.type === 'local_cli'
291
+ && Number.isInteger(runtime.startup_watchdog_ms)
292
+ && runtime.startup_watchdog_ms > 0) {
293
+ return runtime.startup_watchdog_ms;
294
+ }
278
295
  const configThreshold = config?.run_loop?.startup_watchdog_ms;
279
296
  if (typeof configThreshold === 'number' && configThreshold > 0) {
280
297
  return configThreshold;
@@ -291,6 +308,7 @@ export function failTurnStartup(root, state, config, turnId, details = {}) {
291
308
  if (!turn) {
292
309
  return { ok: false, error: `Turn ${turnId} not found in active turns` };
293
310
  }
311
+ const runtime = config?.runtimes?.[turn.runtime_id];
294
312
 
295
313
  const nowIso = new Date().toISOString();
296
314
  const activeTurns = { ...(state.active_turns || {}) };
@@ -300,7 +318,7 @@ export function failTurnStartup(root, state, config, turnId, details = {}) {
300
318
  role: turn.assigned_role || 'unknown',
301
319
  runtime_id: turn.runtime_id || 'unknown',
302
320
  running_ms: details.running_ms ?? computeLifecycleAgeMs(turn),
303
- threshold_ms: details.threshold_ms ?? resolveStartupThreshold(config),
321
+ threshold_ms: details.threshold_ms ?? resolveStartupThreshold(config, runtime),
304
322
  failure_type: classifyStartupFailureType(turn, null, details.failure_type || 'no_subprocess_output'),
305
323
  recommendation: details.recommendation
306
324
  || `Turn ${turnId} failed to start cleanly. Run \`agentxchain reissue-turn --turn ${turnId} --reason ghost\` to recover.`,
@@ -476,7 +494,14 @@ function mapStartupFailureEventType(failureType) {
476
494
  }
477
495
 
478
496
  function hasStartupProof(turn, progress) {
479
- if (turn.first_output_at) {
497
+ // DEC-BUG54-STDERR-IS-NOT-STARTUP-PROOF-002 (Turn 88) extended to the
498
+ // fast-startup watchdog in Turn 89: stderr activity is not startup proof.
499
+ // A subprocess that spawns and emits stderr-only text must still be caught
500
+ // by the fast watchdog as stdout_attach_failed. Only stdout-derived signals
501
+ // (stream-tagged `turn.first_output_at`, `progress.first_output_at`, or
502
+ // `progress.output_lines`) satisfy startup proof. `progress.stderr_lines`
503
+ // deliberately does NOT.
504
+ if (turn.first_output_at && isPersistedTurnStartupProofStream(turn.first_output_stream)) {
480
505
  return true;
481
506
  }
482
507
  if (!progress || typeof progress !== 'object') {
@@ -485,7 +510,7 @@ function hasStartupProof(turn, progress) {
485
510
  if (progress.first_output_at) {
486
511
  return true;
487
512
  }
488
- return Number(progress.output_lines || 0) > 0 || Number(progress.stderr_lines || 0) > 0;
513
+ return Number(progress.output_lines || 0) > 0;
489
514
  }
490
515
 
491
516
  function hasTurnScopedStagedResult(root, turnId) {