agentxchain 2.147.0 → 2.149.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/components/timeline.js +15 -2
- package/package.json +1 -1
- package/scripts/reproduce-bug-54.mjs +623 -0
- package/src/commands/connector.js +23 -4
- package/src/commands/doctor.js +11 -0
- package/src/commands/run.js +18 -3
- package/src/commands/status.js +30 -3
- package/src/commands/step.js +8 -2
- package/src/lib/adapters/local-cli-adapter.js +191 -7
- package/src/lib/claude-local-auth.js +61 -0
- package/src/lib/connector-probe.js +48 -21
- package/src/lib/connector-validate.js +34 -0
- package/src/lib/dispatch-progress.js +32 -6
- package/src/lib/dispatch-streams.js +21 -0
- package/src/lib/governed-state.js +118 -10
- package/src/lib/normalized-config.js +12 -0
- package/src/lib/schemas/agentxchain-config.schema.json +5 -0
- package/src/lib/schemas/turn-result.schema.json +8 -2
- package/src/lib/stale-turn-watchdog.js +31 -6
- package/src/lib/turn-checkpoint.js +112 -1
- package/src/lib/turn-result-validator.js +11 -2
|
@@ -5,6 +5,17 @@ import { DEFAULT_VALIDATE_TIMEOUT_MS, validateConfiguredConnector } from '../lib
|
|
|
5
5
|
import { DEFAULT_TIMEOUT_MS, probeConfiguredConnectors } from '../lib/connector-probe.js';
|
|
6
6
|
import { buildRuntimeCapabilityReport } from '../lib/runtime-capabilities.js';
|
|
7
7
|
|
|
8
|
+
function warningDetail(warning) {
|
|
9
|
+
if (typeof warning === 'string') {
|
|
10
|
+
return warning;
|
|
11
|
+
}
|
|
12
|
+
return warning?.detail || JSON.stringify(warning);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function warningFix(warning) {
|
|
16
|
+
return typeof warning === 'object' && warning?.fix ? warning.fix : null;
|
|
17
|
+
}
|
|
18
|
+
|
|
8
19
|
function printJson(result, exitCode) {
|
|
9
20
|
console.log(JSON.stringify(result, null, 2));
|
|
10
21
|
process.exit(exitCode);
|
|
@@ -49,11 +60,15 @@ function printText(result, exitCode) {
|
|
|
49
60
|
console.log(` ${chalk.dim('Time:')} ${connector.latency_ms}ms`);
|
|
50
61
|
}
|
|
51
62
|
console.log(` ${chalk.dim('Detail:')} ${connector.detail}`);
|
|
63
|
+
if (connector.fix) {
|
|
64
|
+
console.log(` ${chalk.dim('Fix:')} ${connector.fix}`);
|
|
65
|
+
}
|
|
52
66
|
if (Array.isArray(connector.authority_warnings) && connector.authority_warnings.length > 0) {
|
|
53
67
|
for (const warning of connector.authority_warnings) {
|
|
54
|
-
console.log(` ${chalk.yellow('⚠')} ${warning
|
|
55
|
-
|
|
56
|
-
|
|
68
|
+
console.log(` ${chalk.yellow('⚠')} ${warningDetail(warning)}`);
|
|
69
|
+
const fix = warningFix(warning);
|
|
70
|
+
if (fix) {
|
|
71
|
+
console.log(` ${chalk.dim('Fix:')} ${fix}`);
|
|
57
72
|
}
|
|
58
73
|
}
|
|
59
74
|
}
|
|
@@ -161,7 +176,11 @@ function printValidateText(result, exitCode) {
|
|
|
161
176
|
if (Array.isArray(result.warnings) && result.warnings.length > 0) {
|
|
162
177
|
console.log('');
|
|
163
178
|
for (const warning of result.warnings) {
|
|
164
|
-
console.log(` ${chalk.yellow('!')} ${warning}`);
|
|
179
|
+
console.log(` ${chalk.yellow('!')} ${warningDetail(warning)}`);
|
|
180
|
+
const fix = warningFix(warning);
|
|
181
|
+
if (fix) {
|
|
182
|
+
console.log(` ${chalk.dim('Fix:')} ${fix}`);
|
|
183
|
+
}
|
|
165
184
|
}
|
|
166
185
|
}
|
|
167
186
|
|
package/src/commands/doctor.js
CHANGED
|
@@ -21,6 +21,7 @@ import { detectActiveTurnBindingDrift, detectStateBundleDesync } from '../lib/go
|
|
|
21
21
|
import { findPendingApprovedIntents } from '../lib/intake.js';
|
|
22
22
|
import { checkCleanBaseline } from '../lib/repo-observer.js';
|
|
23
23
|
import { probeRuntimeSpawnContext } from '../lib/runtime-spawn-context.js';
|
|
24
|
+
import { getClaudeSubprocessAuthIssue } from '../lib/claude-local-auth.js';
|
|
24
25
|
|
|
25
26
|
export async function doctorCommand(opts = {}) {
|
|
26
27
|
const root = findProjectRoot(process.cwd());
|
|
@@ -500,6 +501,16 @@ function checkRuntimeReachable(root, rtId, rt, boundRoleEntries = []) {
|
|
|
500
501
|
|
|
501
502
|
case 'local_cli': {
|
|
502
503
|
const probe = probeRuntimeSpawnContext(root, rt, { runtimeId: rtId });
|
|
504
|
+
if (probe.ok) {
|
|
505
|
+
const claudeAuthIssue = getClaudeSubprocessAuthIssue(rt);
|
|
506
|
+
if (claudeAuthIssue) {
|
|
507
|
+
return attachRuntimeContract({
|
|
508
|
+
...base,
|
|
509
|
+
level: 'warn',
|
|
510
|
+
detail: `${probe.detail} ${claudeAuthIssue.detail} ${claudeAuthIssue.fix}`,
|
|
511
|
+
}, rtId, rt, boundRoleEntries);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
503
514
|
return attachRuntimeContract({ ...base, level: probe.ok ? 'pass' : 'fail', detail: probe.detail }, rtId, rt, boundRoleEntries);
|
|
504
515
|
}
|
|
505
516
|
|
package/src/commands/run.js
CHANGED
|
@@ -25,6 +25,7 @@ import { validateParentRun } from '../lib/run-history.js';
|
|
|
25
25
|
import { dispatchApiProxy } from '../lib/adapters/api-proxy-adapter.js';
|
|
26
26
|
import {
|
|
27
27
|
dispatchLocalCli,
|
|
28
|
+
resolveStartupWatchdogMs,
|
|
28
29
|
saveDispatchLogs,
|
|
29
30
|
resolvePromptTransport,
|
|
30
31
|
} from '../lib/adapters/local-cli-adapter.js';
|
|
@@ -52,6 +53,7 @@ import { emitRunEvent } from '../lib/run-events.js';
|
|
|
52
53
|
import { checkpointAcceptedTurn } from '../lib/turn-checkpoint.js';
|
|
53
54
|
import { failTurnStartup } from '../lib/stale-turn-watchdog.js';
|
|
54
55
|
import { hasMinimumTurnResultShape } from '../lib/turn-result-shape.js';
|
|
56
|
+
import { isKnownTurnRunningProofStream } from '../lib/dispatch-streams.js';
|
|
55
57
|
|
|
56
58
|
export async function runCommand(opts) {
|
|
57
59
|
const context = loadProjectContext();
|
|
@@ -343,7 +345,10 @@ export async function executeGovernedRun(context, opts = {}) {
|
|
|
343
345
|
});
|
|
344
346
|
};
|
|
345
347
|
|
|
346
|
-
const ensureRunningState = (stream =
|
|
348
|
+
const ensureRunningState = (stream = null, at = new Date().toISOString()) => {
|
|
349
|
+
if (stream != null && !isKnownTurnRunningProofStream(stream)) {
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
347
352
|
if (runningMarked) return;
|
|
348
353
|
runningMarked = true;
|
|
349
354
|
transitionActiveTurnLifecycle(projectRoot, turn.turn_id, 'running', { stream, at });
|
|
@@ -359,7 +364,16 @@ export async function executeGovernedRun(context, opts = {}) {
|
|
|
359
364
|
};
|
|
360
365
|
|
|
361
366
|
const recordOutputActivity = (stream, text) => {
|
|
362
|
-
|
|
367
|
+
// DEC-BUG54-STDERR-IS-NOT-STARTUP-PROOF-002 (Turn 88) extended to the
|
|
368
|
+
// run-command lifecycle in Turn 89: stderr activity must NOT promote a
|
|
369
|
+
// turn from `starting` to `running`. stdout (or the adapter's
|
|
370
|
+
// onFirstOutput callback, which is stdout/staged_result only post-Turn
|
|
371
|
+
// 88) is the only signal that satisfies the lifecycle transition.
|
|
372
|
+
// stderr is still tracked by the progress tracker for silence detection
|
|
373
|
+
// and operator diagnostics.
|
|
374
|
+
if (stream != null && isKnownTurnRunningProofStream(stream)) {
|
|
375
|
+
ensureRunningState(stream);
|
|
376
|
+
}
|
|
363
377
|
const lines = text.split('\n').length - 1 || 1;
|
|
364
378
|
const wasSilent = tracker.onOutput(stream, lines);
|
|
365
379
|
if (wasSilent) {
|
|
@@ -473,9 +487,10 @@ export async function executeGovernedRun(context, opts = {}) {
|
|
|
473
487
|
|
|
474
488
|
if (adapterResult.startupFailure) {
|
|
475
489
|
const freshState = loadProjectState(projectRoot, cfg) || state;
|
|
490
|
+
const startupThresholdMs = resolveStartupWatchdogMs(cfg, runtime);
|
|
476
491
|
failTurnStartup(projectRoot, freshState, cfg, turn.turn_id, {
|
|
477
492
|
failure_type: adapterResult.startupFailureType || 'no_subprocess_output',
|
|
478
|
-
threshold_ms:
|
|
493
|
+
threshold_ms: startupThresholdMs,
|
|
479
494
|
running_ms: freshState?.active_turns?.[turn.turn_id]?.started_at
|
|
480
495
|
? Math.max(0, Date.now() - new Date(freshState.active_turns[turn.turn_id].started_at).getTime())
|
|
481
496
|
: 0,
|
package/src/commands/status.js
CHANGED
|
@@ -383,7 +383,7 @@ function renderGovernedStatus(context, opts) {
|
|
|
383
383
|
console.log(` ${chalk.dim(' or:')} ${chalk.cyan(`agentxchain accept-turn --turn ${turn.turn_id}`)} — re-attempt acceptance`);
|
|
384
384
|
}
|
|
385
385
|
if (turn.status === 'failed_start') {
|
|
386
|
-
console.log(` ${chalk.dim('Reason:')} ${turn.failed_start_reason
|
|
386
|
+
console.log(` ${chalk.dim('Reason:')} ${normalizeStartupFailureReasonForDisplay(turn.failed_start_reason)}`);
|
|
387
387
|
const recover = turn.recovery_command || `agentxchain reissue-turn --turn ${turn.turn_id} --reason ghost`;
|
|
388
388
|
console.log(` ${chalk.dim('Recover:')} ${chalk.cyan(recover)}`);
|
|
389
389
|
}
|
|
@@ -443,7 +443,7 @@ function renderGovernedStatus(context, opts) {
|
|
|
443
443
|
console.log(` ${chalk.dim(' or:')} ${chalk.cyan(mergeAction.command)}`);
|
|
444
444
|
}
|
|
445
445
|
if (singleActiveTurn.status === 'failed_start') {
|
|
446
|
-
console.log(` ${chalk.dim('Reason:')} ${singleActiveTurn.failed_start_reason
|
|
446
|
+
console.log(` ${chalk.dim('Reason:')} ${normalizeStartupFailureReasonForDisplay(singleActiveTurn.failed_start_reason)}`);
|
|
447
447
|
const recover = singleActiveTurn.recovery_command || `agentxchain reissue-turn --turn ${singleActiveTurn.turn_id} --reason ghost`;
|
|
448
448
|
console.log(` ${chalk.dim('Recover:')} ${chalk.cyan(recover)}`);
|
|
449
449
|
}
|
|
@@ -883,6 +883,24 @@ function pluralizeRepoDecisionCount(count, singular, plural) {
|
|
|
883
883
|
return `${count} ${count === 1 ? singular : plural}`;
|
|
884
884
|
}
|
|
885
885
|
|
|
886
|
+
// BUG-54 vocabulary discipline (`DEC-BUG54-OPERATOR-SUBTYPE-DISPLAY-001`).
|
|
887
|
+
// Operator-facing status surfaces must render a typed startup-failure subtype,
|
|
888
|
+
// not the raw adapter signal `no_subprocess_output`. Public docs
|
|
889
|
+
// (website-v2/docs/cli.mdx) only document `runtime_spawn_failed` and
|
|
890
|
+
// `stdout_attach_failed` as the operator-visible subtypes; the `no_subprocess_output`
|
|
891
|
+
// label is an internal adapter/classification fallback and must not leak to the
|
|
892
|
+
// CLI status display. The adapter semantics for `no_subprocess_output` ("we
|
|
893
|
+
// watched for stdout and saw none inside the startup watchdog window") are
|
|
894
|
+
// identical to the operator subtype `stdout_attach_failed`, so that is the
|
|
895
|
+
// correct display normalization.
|
|
896
|
+
const TYPED_STARTUP_FAILURE_SUBTYPES = new Set(['runtime_spawn_failed', 'stdout_attach_failed']);
|
|
897
|
+
function normalizeStartupFailureReasonForDisplay(rawReason) {
|
|
898
|
+
if (typeof rawReason === 'string' && TYPED_STARTUP_FAILURE_SUBTYPES.has(rawReason)) {
|
|
899
|
+
return rawReason;
|
|
900
|
+
}
|
|
901
|
+
return 'stdout_attach_failed';
|
|
902
|
+
}
|
|
903
|
+
|
|
886
904
|
function filterDispatchProgressForActiveTurns(progressByTurn, activeTurns) {
|
|
887
905
|
const filtered = {};
|
|
888
906
|
if (!progressByTurn || typeof progressByTurn !== 'object') {
|
|
@@ -897,7 +915,7 @@ function filterDispatchProgressForActiveTurns(progressByTurn, activeTurns) {
|
|
|
897
915
|
return filtered;
|
|
898
916
|
}
|
|
899
917
|
|
|
900
|
-
function formatDispatchActivityLine(progress) {
|
|
918
|
+
export function formatDispatchActivityLine(progress) {
|
|
901
919
|
if (!progress || typeof progress !== 'object') return null;
|
|
902
920
|
const lastAct = progress.last_activity_at ? new Date(progress.last_activity_at) : null;
|
|
903
921
|
const agoSec = lastAct && !Number.isNaN(lastAct.getTime())
|
|
@@ -918,6 +936,15 @@ function formatDispatchActivityLine(progress) {
|
|
|
918
936
|
if (progress.activity_type === 'response') {
|
|
919
937
|
return chalk.green('API response received');
|
|
920
938
|
}
|
|
939
|
+
// DEC-BUG54-DIAGNOSTIC-ACTIVITY-TYPE-001 (Turn 91): stderr-only activity
|
|
940
|
+
// must be rendered as yellow "Diagnostic output only" — never as the green
|
|
941
|
+
// "Producing output" signal that previously leaked onto the operator
|
|
942
|
+
// surface for failing-startup subprocesses whose stdout never attached.
|
|
943
|
+
if (progress.activity_type === 'diagnostic_only') {
|
|
944
|
+
const agoLabel = agoSec != null && agoSec > 0 ? `, last ${agoSec}s ago` : '';
|
|
945
|
+
return chalk.yellow('Diagnostic output only') +
|
|
946
|
+
` (${progress.stderr_lines || 0} stderr lines, no stdout yet${agoLabel})`;
|
|
947
|
+
}
|
|
921
948
|
const agoLabel = agoSec != null && agoSec > 0 ? `, last ${agoSec}s ago` : '';
|
|
922
949
|
return chalk.green('Producing output') + ` (${progress.output_lines || 0} lines${agoLabel})`;
|
|
923
950
|
}
|
package/src/commands/step.js
CHANGED
|
@@ -49,6 +49,7 @@ import {
|
|
|
49
49
|
} from '../lib/adapters/manual-adapter.js';
|
|
50
50
|
import {
|
|
51
51
|
dispatchLocalCli,
|
|
52
|
+
resolveStartupWatchdogMs,
|
|
52
53
|
saveDispatchLogs,
|
|
53
54
|
resolvePromptTransport,
|
|
54
55
|
} from '../lib/adapters/local-cli-adapter.js';
|
|
@@ -73,6 +74,7 @@ import { shouldSuggestManualQaFallback } from '../lib/manual-qa-fallback.js';
|
|
|
73
74
|
import { evaluateApprovalSlaReminders } from '../lib/notification-runner.js';
|
|
74
75
|
import { consumeNextApprovedIntent } from '../lib/intake.js';
|
|
75
76
|
import { failTurnStartup, reconcileStaleTurns } from '../lib/stale-turn-watchdog.js';
|
|
77
|
+
import { isKnownTurnRunningProofStream } from '../lib/dispatch-streams.js';
|
|
76
78
|
|
|
77
79
|
export async function stepCommand(opts) {
|
|
78
80
|
const context = loadProjectContext();
|
|
@@ -697,7 +699,10 @@ export async function stepCommand(opts) {
|
|
|
697
699
|
state = starting.state;
|
|
698
700
|
}
|
|
699
701
|
};
|
|
700
|
-
const ensureRunningState = (stream =
|
|
702
|
+
const ensureRunningState = (stream = null, at = new Date().toISOString()) => {
|
|
703
|
+
if (stream != null && !isKnownTurnRunningProofStream(stream)) {
|
|
704
|
+
return;
|
|
705
|
+
}
|
|
701
706
|
if (runningMarked) return;
|
|
702
707
|
runningMarked = true;
|
|
703
708
|
const running = transitionActiveTurnLifecycle(root, turn.turn_id, 'running', { stream, at });
|
|
@@ -756,9 +761,10 @@ export async function stepCommand(opts) {
|
|
|
756
761
|
|
|
757
762
|
if (cliResult.startupFailure) {
|
|
758
763
|
const freshState = loadProjectState(root, config) || state;
|
|
764
|
+
const startupThresholdMs = resolveStartupWatchdogMs(config, runtime);
|
|
759
765
|
const failed = failTurnStartup(root, freshState, config, turn.turn_id, {
|
|
760
766
|
failure_type: cliResult.startupFailureType || 'no_subprocess_output',
|
|
761
|
-
threshold_ms:
|
|
767
|
+
threshold_ms: startupThresholdMs,
|
|
762
768
|
running_ms: freshState?.active_turns?.[turn.turn_id]?.started_at
|
|
763
769
|
? Math.max(0, Date.now() - new Date(freshState.active_turns[turn.turn_id].started_at).getTime())
|
|
764
770
|
: 0,
|
|
@@ -30,6 +30,17 @@ import {
|
|
|
30
30
|
} from '../turn-paths.js';
|
|
31
31
|
import { verifyDispatchManifestForAdapter } from '../dispatch-manifest.js';
|
|
32
32
|
import { hasMeaningfulStagedResult } from '../staged-result-proof.js';
|
|
33
|
+
import { getClaudeSubprocessAuthIssue } from '../claude-local-auth.js';
|
|
34
|
+
|
|
35
|
+
const DIAGNOSTIC_ENV_KEYS = [
|
|
36
|
+
'PATH',
|
|
37
|
+
'HOME',
|
|
38
|
+
'PWD',
|
|
39
|
+
'SHELL',
|
|
40
|
+
'TMPDIR',
|
|
41
|
+
'AGENTXCHAIN_TURN_ID',
|
|
42
|
+
];
|
|
43
|
+
const DIAGNOSTIC_STDERR_EXCERPT_LIMIT = 800;
|
|
33
44
|
|
|
34
45
|
/**
|
|
35
46
|
* Launch a local CLI subprocess for a governed turn.
|
|
@@ -55,7 +66,7 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
55
66
|
onStderr,
|
|
56
67
|
onSpawnAttached,
|
|
57
68
|
onFirstOutput,
|
|
58
|
-
startupWatchdogMs
|
|
69
|
+
startupWatchdogMs: startupWatchdogOverrideMs,
|
|
59
70
|
turnId,
|
|
60
71
|
} = options;
|
|
61
72
|
|
|
@@ -76,6 +87,7 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
76
87
|
if (!runtime) {
|
|
77
88
|
return { ok: false, error: `Runtime "${runtimeId}" not found in config` };
|
|
78
89
|
}
|
|
90
|
+
const startupWatchdogMs = startupWatchdogOverrideMs ?? resolveStartupWatchdogMs(config, runtime);
|
|
79
91
|
|
|
80
92
|
// Read the dispatch bundle prompt
|
|
81
93
|
const promptPath = join(root, getDispatchPromptPath(turn.turn_id));
|
|
@@ -112,6 +124,25 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
112
124
|
|
|
113
125
|
// Capture logs for dispatch record
|
|
114
126
|
const logs = [];
|
|
127
|
+
const runtimeCwd = runtime.cwd ? join(root, runtime.cwd) : root;
|
|
128
|
+
const spawnEnv = { ...process.env, AGENTXCHAIN_TURN_ID: turn.turn_id };
|
|
129
|
+
const stdinBytes = transport === 'stdin' ? Buffer.byteLength(fullPrompt, 'utf8') : 0;
|
|
130
|
+
const diagnosticArgs = redactPromptArgs(args, fullPrompt, transport);
|
|
131
|
+
const claudeAuthIssue = getClaudeSubprocessAuthIssue(runtime, spawnEnv);
|
|
132
|
+
|
|
133
|
+
if (claudeAuthIssue) {
|
|
134
|
+
appendDiagnostic(logs, 'claude_auth_preflight_failed', {
|
|
135
|
+
runtime_id: runtimeId,
|
|
136
|
+
turn_id: turn.turn_id,
|
|
137
|
+
auth_env_present: claudeAuthIssue.auth_env_present,
|
|
138
|
+
recommendation: claudeAuthIssue.fix,
|
|
139
|
+
});
|
|
140
|
+
return {
|
|
141
|
+
ok: false,
|
|
142
|
+
error: `${claudeAuthIssue.detail} ${claudeAuthIssue.fix}`,
|
|
143
|
+
logs,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
115
146
|
|
|
116
147
|
return new Promise((resolve) => {
|
|
117
148
|
if (signal?.aborted) {
|
|
@@ -121,12 +152,23 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
121
152
|
|
|
122
153
|
let child;
|
|
123
154
|
try {
|
|
155
|
+
appendDiagnostic(logs, 'spawn_prepare', {
|
|
156
|
+
runtime_id: runtimeId,
|
|
157
|
+
turn_id: turn.turn_id,
|
|
158
|
+
command,
|
|
159
|
+
args: diagnosticArgs,
|
|
160
|
+
cwd: runtimeCwd,
|
|
161
|
+
prompt_transport: transport,
|
|
162
|
+
stdin_bytes: stdinBytes,
|
|
163
|
+
env: pickDiagnosticEnv(spawnEnv),
|
|
164
|
+
});
|
|
124
165
|
child = spawn(command, args, {
|
|
125
|
-
cwd:
|
|
166
|
+
cwd: runtimeCwd,
|
|
126
167
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
127
|
-
env:
|
|
168
|
+
env: spawnEnv,
|
|
128
169
|
});
|
|
129
170
|
} catch (err) {
|
|
171
|
+
appendDiagnostic(logs, 'spawn_error', normalizeDiagnosticError(err));
|
|
130
172
|
resolve({
|
|
131
173
|
ok: false,
|
|
132
174
|
startupFailure: true,
|
|
@@ -139,10 +181,16 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
139
181
|
|
|
140
182
|
let settled = false;
|
|
141
183
|
let firstOutputAt = null;
|
|
184
|
+
let firstOutputStream = null;
|
|
142
185
|
let spawnConfirmedAt = null;
|
|
186
|
+
let spawnConfirmedAtMs = null;
|
|
187
|
+
let firstOutputLatencyMs = null;
|
|
143
188
|
let startupWatchdog = null;
|
|
144
189
|
let startupTimedOut = false;
|
|
145
190
|
let startupFailureType = null;
|
|
191
|
+
let stdoutBytes = 0;
|
|
192
|
+
let stderrBytes = 0;
|
|
193
|
+
let stderrExcerpt = '';
|
|
146
194
|
|
|
147
195
|
const settle = (result) => {
|
|
148
196
|
if (settled) return;
|
|
@@ -168,8 +216,14 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
168
216
|
startupTimedOut = true;
|
|
169
217
|
startupFailureType = 'no_subprocess_output';
|
|
170
218
|
logs.push(`[adapter] Startup watchdog fired after ${Math.round(startupWatchdogMs / 1000)}s with no output.`);
|
|
171
|
-
|
|
172
|
-
|
|
219
|
+
appendDiagnostic(logs, 'startup_watchdog_fired', {
|
|
220
|
+
startup_watchdog_ms: startupWatchdogMs,
|
|
221
|
+
pid: child.pid ?? null,
|
|
222
|
+
spawn_confirmed_at: spawnConfirmedAt,
|
|
223
|
+
elapsed_since_spawn_ms: spawnConfirmedAtMs == null ? null : Math.max(0, Date.now() - spawnConfirmedAtMs),
|
|
224
|
+
});
|
|
225
|
+
try {
|
|
226
|
+
child.kill('SIGTERM');
|
|
173
227
|
} catch {}
|
|
174
228
|
}, startupWatchdogMs);
|
|
175
229
|
};
|
|
@@ -177,7 +231,15 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
177
231
|
const recordFirstOutput = (stream) => {
|
|
178
232
|
if (firstOutputAt) return;
|
|
179
233
|
firstOutputAt = new Date().toISOString();
|
|
234
|
+
firstOutputStream = stream;
|
|
235
|
+
firstOutputLatencyMs = spawnConfirmedAtMs == null ? null : Math.max(0, Date.now() - spawnConfirmedAtMs);
|
|
180
236
|
clearStartupWatchdog();
|
|
237
|
+
appendDiagnostic(logs, 'first_output', {
|
|
238
|
+
at: firstOutputAt,
|
|
239
|
+
stream,
|
|
240
|
+
pid: child.pid ?? null,
|
|
241
|
+
startup_latency_ms: firstOutputLatencyMs,
|
|
242
|
+
});
|
|
181
243
|
if (onFirstOutput) {
|
|
182
244
|
try {
|
|
183
245
|
onFirstOutput({ pid: child.pid ?? null, at: firstOutputAt, stream });
|
|
@@ -186,7 +248,13 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
186
248
|
};
|
|
187
249
|
|
|
188
250
|
child.once('spawn', () => {
|
|
251
|
+
spawnConfirmedAtMs = Date.now();
|
|
189
252
|
spawnConfirmedAt = new Date().toISOString();
|
|
253
|
+
appendDiagnostic(logs, 'spawn_attached', {
|
|
254
|
+
pid: child.pid ?? null,
|
|
255
|
+
at: spawnConfirmedAt,
|
|
256
|
+
startup_watchdog_ms: startupWatchdogMs,
|
|
257
|
+
});
|
|
190
258
|
if (onSpawnAttached) {
|
|
191
259
|
try {
|
|
192
260
|
onSpawnAttached({ pid: child.pid ?? null, at: spawnConfirmedAt });
|
|
@@ -197,18 +265,32 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
197
265
|
|
|
198
266
|
// Deliver prompt via stdin if transport is "stdin"; otherwise close immediately
|
|
199
267
|
if (child.stdin) {
|
|
268
|
+
child.stdin.on('error', (err) => {
|
|
269
|
+
appendDiagnostic(logs, 'stdin_error', {
|
|
270
|
+
at: new Date().toISOString(),
|
|
271
|
+
stdin_bytes: stdinBytes,
|
|
272
|
+
...normalizeDiagnosticError(err),
|
|
273
|
+
});
|
|
274
|
+
});
|
|
200
275
|
try {
|
|
201
276
|
if (transport === 'stdin') {
|
|
202
277
|
child.stdin.write(fullPrompt);
|
|
203
278
|
}
|
|
204
279
|
child.stdin.end();
|
|
205
|
-
} catch {
|
|
280
|
+
} catch (err) {
|
|
281
|
+
appendDiagnostic(logs, 'stdin_error', {
|
|
282
|
+
at: new Date().toISOString(),
|
|
283
|
+
stdin_bytes: stdinBytes,
|
|
284
|
+
...normalizeDiagnosticError(err),
|
|
285
|
+
});
|
|
286
|
+
}
|
|
206
287
|
}
|
|
207
288
|
|
|
208
289
|
// Collect stdout/stderr
|
|
209
290
|
if (child.stdout) {
|
|
210
291
|
child.stdout.on('data', (chunk) => {
|
|
211
292
|
const text = chunk.toString();
|
|
293
|
+
stdoutBytes += Buffer.byteLength(text);
|
|
212
294
|
recordFirstOutput('stdout');
|
|
213
295
|
logs.push(text);
|
|
214
296
|
if (onStdout) onStdout(text);
|
|
@@ -218,7 +300,8 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
218
300
|
if (child.stderr) {
|
|
219
301
|
child.stderr.on('data', (chunk) => {
|
|
220
302
|
const text = chunk.toString();
|
|
221
|
-
|
|
303
|
+
stderrBytes += Buffer.byteLength(text);
|
|
304
|
+
stderrExcerpt = appendDiagnosticExcerpt(stderrExcerpt, text, DIAGNOSTIC_STDERR_EXCERPT_LIMIT);
|
|
222
305
|
logs.push('[stderr] ' + text);
|
|
223
306
|
if (onStderr) onStderr(text);
|
|
224
307
|
});
|
|
@@ -283,6 +366,34 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
283
366
|
if (hasResult && !firstOutputAt) {
|
|
284
367
|
recordFirstOutput('staged_result');
|
|
285
368
|
}
|
|
369
|
+
const exitDiagnostic = {
|
|
370
|
+
pid: child.pid ?? null,
|
|
371
|
+
exit_code: exitCode,
|
|
372
|
+
signal: killSignal,
|
|
373
|
+
exit_signal: killSignal,
|
|
374
|
+
spawn_confirmed_at: spawnConfirmedAt,
|
|
375
|
+
elapsed_since_spawn_ms: spawnConfirmedAtMs == null ? null : Math.max(0, Date.now() - spawnConfirmedAtMs),
|
|
376
|
+
first_output_at: firstOutputAt,
|
|
377
|
+
first_output_stream: firstOutputStream,
|
|
378
|
+
startup_latency_ms: firstOutputLatencyMs,
|
|
379
|
+
stdout_bytes: stdoutBytes,
|
|
380
|
+
stderr_bytes: stderrBytes,
|
|
381
|
+
staged_result_ready: hasResult,
|
|
382
|
+
watchdog_fired: startupTimedOut,
|
|
383
|
+
};
|
|
384
|
+
if (stderrExcerpt) {
|
|
385
|
+
exitDiagnostic.stderr_excerpt = stderrExcerpt;
|
|
386
|
+
}
|
|
387
|
+
if (startupTimedOut) {
|
|
388
|
+
exitDiagnostic.startup_failure_type = startupFailureType || 'no_subprocess_output';
|
|
389
|
+
} else if (!spawnConfirmedAt) {
|
|
390
|
+
exitDiagnostic.startup_failure_type = 'runtime_spawn_failed';
|
|
391
|
+
} else if (timedOut) {
|
|
392
|
+
exitDiagnostic.timed_out = true;
|
|
393
|
+
} else if (!firstOutputAt) {
|
|
394
|
+
exitDiagnostic.startup_failure_type = 'no_subprocess_output';
|
|
395
|
+
}
|
|
396
|
+
appendDiagnostic(logs, 'process_exit', exitDiagnostic);
|
|
286
397
|
|
|
287
398
|
if (hasResult) {
|
|
288
399
|
settle({ ok: true, exitCode, timedOut: false, aborted: false, logs, firstOutputAt });
|
|
@@ -344,6 +455,25 @@ export async function dispatchLocalCli(root, state, config, options = {}) {
|
|
|
344
455
|
clearTimeout(timeoutHandle);
|
|
345
456
|
clearTimeout(sigkillHandle);
|
|
346
457
|
if (signal) signal.removeEventListener('abort', onAbort);
|
|
458
|
+
// BUG-54 hypothesis #1 fix: explicitly release stdio streams on the
|
|
459
|
+
// error path so Node reclaims pipe handles immediately instead of
|
|
460
|
+
// waiting for GC. Without this, repeated `runtime_spawn_failed` turns
|
|
461
|
+
// leak ~4 handles per failure until the next GC sweep, which in a
|
|
462
|
+
// long-running `run --continuous` session can push the parent process
|
|
463
|
+
// toward its fd limit and cascade additional spawn failures.
|
|
464
|
+
try { child.stdin?.destroy(); } catch {}
|
|
465
|
+
try { child.stdout?.destroy(); } catch {}
|
|
466
|
+
try { child.stderr?.destroy(); } catch {}
|
|
467
|
+
appendDiagnostic(logs, 'spawn_error', {
|
|
468
|
+
pid: child.pid ?? null,
|
|
469
|
+
spawn_confirmed_at: spawnConfirmedAt,
|
|
470
|
+
elapsed_since_spawn_ms: spawnConfirmedAtMs == null ? null : Math.max(0, Date.now() - spawnConfirmedAtMs),
|
|
471
|
+
first_output_at: firstOutputAt,
|
|
472
|
+
startup_latency_ms: firstOutputLatencyMs,
|
|
473
|
+
stdout_bytes: stdoutBytes,
|
|
474
|
+
stderr_bytes: stderrBytes,
|
|
475
|
+
...normalizeDiagnosticError(err),
|
|
476
|
+
});
|
|
347
477
|
settle({
|
|
348
478
|
ok: false,
|
|
349
479
|
startupFailure: !firstOutputAt,
|
|
@@ -440,6 +570,16 @@ function resolvePromptTransport(runtime) {
|
|
|
440
570
|
return hasPlaceholder ? 'argv' : 'dispatch_bundle_only';
|
|
441
571
|
}
|
|
442
572
|
|
|
573
|
+
function resolveStartupWatchdogMs(config, runtime) {
|
|
574
|
+
if (runtime?.type === 'local_cli' && Number.isInteger(runtime?.startup_watchdog_ms) && runtime.startup_watchdog_ms > 0) {
|
|
575
|
+
return runtime.startup_watchdog_ms;
|
|
576
|
+
}
|
|
577
|
+
if (Number.isInteger(config?.run_loop?.startup_watchdog_ms) && config.run_loop.startup_watchdog_ms > 0) {
|
|
578
|
+
return config.run_loop.startup_watchdog_ms;
|
|
579
|
+
}
|
|
580
|
+
return 30_000;
|
|
581
|
+
}
|
|
582
|
+
|
|
443
583
|
/**
|
|
444
584
|
* Check if the staged result file exists and has meaningful content.
|
|
445
585
|
* Delegates to the shared `hasMeaningfulStagedResult` helper so watchdog,
|
|
@@ -458,4 +598,48 @@ function resolveTargetTurn(state, turnId) {
|
|
|
458
598
|
return state?.current_turn || Object.values(state?.active_turns || {})[0];
|
|
459
599
|
}
|
|
460
600
|
|
|
601
|
+
function appendDiagnostic(logs, label, payload) {
|
|
602
|
+
logs.push(`[adapter:diag] ${label} ${JSON.stringify(payload)}\n`);
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
function pickDiagnosticEnv(env) {
|
|
606
|
+
return Object.fromEntries(
|
|
607
|
+
DIAGNOSTIC_ENV_KEYS
|
|
608
|
+
.filter((key) => typeof env?.[key] === 'string' && env[key].length > 0)
|
|
609
|
+
.map((key) => [key, env[key]]),
|
|
610
|
+
);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
function redactPromptArgs(args, fullPrompt, transport) {
|
|
614
|
+
const promptPlaceholder = `<prompt:${Buffer.byteLength(fullPrompt, 'utf8')} bytes>`;
|
|
615
|
+
return args.map((arg) => {
|
|
616
|
+
if (typeof arg !== 'string') {
|
|
617
|
+
return arg;
|
|
618
|
+
}
|
|
619
|
+
if (transport === 'argv' && arg === fullPrompt) {
|
|
620
|
+
return promptPlaceholder;
|
|
621
|
+
}
|
|
622
|
+
return arg;
|
|
623
|
+
});
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
function normalizeDiagnosticError(err) {
|
|
627
|
+
return {
|
|
628
|
+
code: err?.code || null,
|
|
629
|
+
errno: err?.errno || null,
|
|
630
|
+
syscall: err?.syscall || null,
|
|
631
|
+
message: err?.message || String(err),
|
|
632
|
+
};
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
function appendDiagnosticExcerpt(existing, chunk, limit) {
|
|
636
|
+
const combined = `${existing}${chunk}`;
|
|
637
|
+
if (combined.length <= limit) {
|
|
638
|
+
return combined;
|
|
639
|
+
}
|
|
640
|
+
return combined.slice(combined.length - limit);
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
export { resolveCommand };
|
|
461
644
|
export { resolvePromptTransport };
|
|
645
|
+
export { resolveStartupWatchdogMs };
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
const CLAUDE_ENV_AUTH_KEYS = [
|
|
2
|
+
'ANTHROPIC_API_KEY',
|
|
3
|
+
'CLAUDE_API_KEY',
|
|
4
|
+
'CLAUDE_CODE_OAUTH_TOKEN',
|
|
5
|
+
'CLAUDE_CODE_USE_VERTEX',
|
|
6
|
+
'CLAUDE_CODE_USE_BEDROCK',
|
|
7
|
+
];
|
|
8
|
+
|
|
9
|
+
function normalizeCommandTokens(runtime) {
|
|
10
|
+
if (Array.isArray(runtime?.command)) {
|
|
11
|
+
return runtime.command.flatMap((element) =>
|
|
12
|
+
typeof element === 'string' ? element.trim().split(/\s+/).filter(Boolean) : []
|
|
13
|
+
);
|
|
14
|
+
}
|
|
15
|
+
if (typeof runtime?.command === 'string' && runtime.command.trim()) {
|
|
16
|
+
return runtime.command.trim().split(/\s+/).filter(Boolean);
|
|
17
|
+
}
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function isClaudeLocalCliRuntime(runtime) {
|
|
22
|
+
const tokens = normalizeCommandTokens(runtime);
|
|
23
|
+
if (tokens.length === 0) {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
const head = tokens[0].toLowerCase();
|
|
27
|
+
return head === 'claude' || head.endsWith('/claude');
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function hasClaudeBareFlag(runtime) {
|
|
31
|
+
return normalizeCommandTokens(runtime).includes('--bare');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function getClaudeEnvAuthPresence(env = process.env) {
|
|
35
|
+
return Object.fromEntries(
|
|
36
|
+
CLAUDE_ENV_AUTH_KEYS.map((key) => [key, Boolean(env?.[key])]),
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function hasClaudeEnvAuth(env = process.env) {
|
|
41
|
+
return Object.values(getClaudeEnvAuthPresence(env)).some(Boolean);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function getClaudeSubprocessAuthIssue(runtime, env = process.env) {
|
|
45
|
+
if (!isClaudeLocalCliRuntime(runtime)) {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (hasClaudeBareFlag(runtime) || hasClaudeEnvAuth(env)) {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const auth_env_present = getClaudeEnvAuthPresence(env);
|
|
54
|
+
return {
|
|
55
|
+
auth_env_present,
|
|
56
|
+
detail: 'Claude local_cli runtime has no env-based auth and is missing "--bare"; non-interactive subprocesses can hang on macOS keychain reads.',
|
|
57
|
+
fix: 'Export ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN before running AgentXchain, or add "--bare" to the Claude command if you intentionally want env-only auth.',
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export { CLAUDE_ENV_AUTH_KEYS, normalizeCommandTokens };
|