agentxchain 2.149.2 → 2.151.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentxchain",
3
- "version": "2.149.2",
3
+ "version": "2.151.0",
4
4
  "description": "CLI for AgentXchain — governed multi-agent software delivery",
5
5
  "type": "module",
6
6
  "bin": {
@@ -28,7 +28,7 @@
28
28
  "test": "npm run test:vitest && npm run test:node",
29
29
  "test:vitest": "vitest run --reporter=verbose",
30
30
  "test:beta": "node --test test/beta-tester-scenarios/*.test.js",
31
- "test:node": "node --test test/*.test.js test/beta-tester-scenarios/*.test.js",
31
+ "test:node": "node --test --test-timeout=60000 --test-concurrency=4 test/*.test.js test/beta-tester-scenarios/*.test.js",
32
32
  "preflight:release": "bash scripts/release-preflight.sh",
33
33
  "preflight:release:strict": "bash scripts/release-preflight.sh --strict",
34
34
  "check:release-alignment": "node scripts/check-release-alignment.mjs",
@@ -37,6 +37,7 @@
37
37
  "bump:release": "bash scripts/release-bump.sh",
38
38
  "sync:homebrew": "bash scripts/sync-homebrew.sh",
39
39
  "verify:post-publish": "bash scripts/verify-post-publish.sh",
40
+ "collect:pack-sha-diagnostic": "node scripts/collect-pack-sha-diagnostic.mjs",
40
41
  "build:macos": "bun build bin/agentxchain.js --compile --target=bun-darwin-arm64 --outfile=dist/agentxchain-macos-arm64",
41
42
  "build:linux": "bun build bin/agentxchain.js --compile --target=bun-linux-x64 --outfile=dist/agentxchain-linux-x64",
42
43
  "publish:npm": "bash scripts/publish-npm.sh"
@@ -0,0 +1,344 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Collect pack-SHA diagnostic evidence from `publish-npm-on-tag.yml` runs.
4
+ *
5
+ * Purpose:
6
+ * Turn 129 (`DEC-PUBLISH-WORKFLOW-PACK-SHA-DIAGNOSTIC-ONLY-001`) added
7
+ * runner-local `npm pack` SHA capture + registry `dist.shasum`/`dist.integrity`
8
+ * comparison to the publish workflow as diagnostic-only evidence. Each
9
+ * published tag now emits `PACK_SHA_DIAGNOSTIC:` and `PACK_INTEGRITY_DIAGNOSTIC:`
10
+ * log lines with MATCH/MISMATCH verdicts.
11
+ *
12
+ * A real reproducible-publish gate cannot be designed until we have ≥3 release
13
+ * cycles of evidence. This script turns the per-run log lines into a
14
+ * multi-release evidence view so the threshold can be evaluated at a glance.
15
+ *
16
+ * Behavior:
17
+ * Default: uses `gh run list` to fetch the last N `publish-npm-on-tag.yml`
18
+ * runs, then `gh run view <id> --log` to scrape the two diagnostic tags from
19
+ * each run's logs, and prints a table summary plus aggregate MATCH/MISMATCH
20
+ * counts.
21
+ *
22
+ * Test / offline mode: `--log-file <path>` parses a single saved log instead
23
+ * of calling `gh`. Useful for unit tests and local debugging without GH auth.
24
+ *
25
+ * Usage:
26
+ * cd cli && npm run collect:pack-sha-diagnostic -- # last 10 runs
27
+ * cd cli && npm run collect:pack-sha-diagnostic -- --limit 20
28
+ * node cli/scripts/collect-pack-sha-diagnostic.mjs # direct path
29
+ * node cli/scripts/collect-pack-sha-diagnostic.mjs --limit 20
30
+ * node cli/scripts/collect-pack-sha-diagnostic.mjs --format json
31
+ * node cli/scripts/collect-pack-sha-diagnostic.mjs --workflow publish-npm-on-tag.yml
32
+ * node cli/scripts/collect-pack-sha-diagnostic.mjs --log-file /tmp/run.log
33
+ *
34
+ * How to read the output:
35
+ * - `MATCH` means the workflow's runner-local pack value matched the npm
36
+ * registry value for that release run.
37
+ * - `MISMATCH` means the runner-local pack value differed from registry
38
+ * truth. Treat it as investigation evidence, not an automatic release
39
+ * failure.
40
+ * - `unavailable` means the diagnostic ran but could not form a comparison
41
+ * (for example, registry metadata was not ready).
42
+ * - `missing` means the diagnostic tag was absent, usually because the run
43
+ * was an already-published rerun and skipped local packing.
44
+ * - Only non-rerun `MATCH` verdicts count toward the "≥3 MATCH" evidence
45
+ * threshold from `DEC-PUBLISH-WORKFLOW-PACK-SHA-DIAGNOSTIC-ONLY-001`.
46
+ * That threshold only permits designing a future gate; it is not a gate
47
+ * by itself.
48
+ *
49
+ * Diagnostic-only. This script does not gate releases, mutate state, or fail
50
+ * on MISMATCH. It prints evidence; a gate is a future decision.
51
+ */
52
+
53
+ import { execFileSync } from 'node:child_process';
54
+ import { readFileSync } from 'node:fs';
55
+
56
+ const DEFAULT_WORKFLOW = 'publish-npm-on-tag.yml';
57
+ const DEFAULT_LIMIT = 10;
58
+
59
+ /**
60
+ * Parse a publish workflow log for the Turn 129 diagnostic tags.
61
+ *
62
+ * Returns a plain object with:
63
+ * - shaVerdict: 'MATCH' | 'MISMATCH' | 'unavailable' | 'missing'
64
+ * - shaDetail: the line body after the ':' (MATCH/MISMATCH reason) or null
65
+ * - integrityVerdict: 'MATCH' | 'MISMATCH' | 'unavailable' | 'missing'
66
+ * - integrityDetail: the line body after the ':' or null
67
+ * - version: the `agentxchain@X.Y.Z` version extracted from the SHA tag, or null
68
+ *
69
+ * A log with no `PACK_SHA_DIAGNOSTIC:` tag returns shaVerdict = 'missing'
70
+ * (the diagnostic step did not run — e.g. `already_published` rerun).
71
+ *
72
+ * A log whose SHA tag says "unavailable" (registry dist missing, runner pack
73
+ * failed) returns shaVerdict = 'unavailable' — distinct from MATCH/MISMATCH
74
+ * because the diagnostic could not form a verdict.
75
+ */
76
+ export function parseDiagnosticLines(logText) {
77
+ const shaRegex = /PACK_SHA_DIAGNOSTIC:\s*([^\n]+)/;
78
+ const integrityRegex = /PACK_INTEGRITY_DIAGNOSTIC:\s*([^\n]+)/;
79
+
80
+ const classifyVerdict = (detail) => {
81
+ if (!detail) return 'missing';
82
+ const head = detail.trim().split(/\s+/)[0] ?? '';
83
+ if (head === 'MATCH') return 'MATCH';
84
+ if (head === 'MISMATCH') return 'MISMATCH';
85
+ return 'unavailable';
86
+ };
87
+
88
+ const shaMatch = logText.match(shaRegex);
89
+ const integrityMatch = logText.match(integrityRegex);
90
+
91
+ const shaDetail = shaMatch ? shaMatch[1].trim() : null;
92
+ const integrityDetail = integrityMatch ? integrityMatch[1].trim() : null;
93
+
94
+ const shaVerdict = shaMatch ? classifyVerdict(shaDetail) : 'missing';
95
+ const integrityVerdict = integrityMatch
96
+ ? classifyVerdict(integrityDetail)
97
+ : 'missing';
98
+
99
+ // Try to pull `agentxchain@X.Y.Z` from either diagnostic line.
100
+ let version = null;
101
+ const versionSource = `${shaDetail ?? ''} ${integrityDetail ?? ''}`;
102
+ const versionMatch = versionSource.match(/agentxchain@(\d+\.\d+\.\d+)/);
103
+ if (versionMatch) version = versionMatch[1];
104
+
105
+ return { shaVerdict, shaDetail, integrityVerdict, integrityDetail, version };
106
+ }
107
+
108
+ /**
109
+ * Render an array of run records as a fixed-width text table.
110
+ * Pure function, no side effects — safe to call from tests.
111
+ */
112
+ export function renderTable(rows) {
113
+ if (rows.length === 0) {
114
+ return 'No publish-npm-on-tag.yml runs found.';
115
+ }
116
+ const header = ['version', 'run_id', 'sha', 'integrity', 'created_at', 'url'];
117
+ const body = rows.map((r) => [
118
+ r.version ?? '-',
119
+ String(r.runId ?? '-'),
120
+ r.shaVerdict,
121
+ r.integrityVerdict,
122
+ r.createdAt ?? '-',
123
+ r.url ?? '-',
124
+ ]);
125
+ const widths = header.map((h, i) =>
126
+ Math.max(h.length, ...body.map((row) => row[i].length)),
127
+ );
128
+ const pad = (cells) =>
129
+ cells.map((c, i) => c.padEnd(widths[i])).join(' ');
130
+ const lines = [pad(header), pad(widths.map((w) => '-'.repeat(w)))];
131
+ for (const row of body) lines.push(pad(row));
132
+ return lines.join('\n');
133
+ }
134
+
135
+ /**
136
+ * Summarize MATCH/MISMATCH/unavailable/missing counts across rows.
137
+ * Used by `renderTable` callers to emit the "≥3 releases of MATCH" threshold
138
+ * status described in DEC-PUBLISH-WORKFLOW-PACK-SHA-DIAGNOSTIC-ONLY-001.
139
+ */
140
+ export function summarize(rows) {
141
+ const count = (field) => {
142
+ const tally = { MATCH: 0, MISMATCH: 0, unavailable: 0, missing: 0 };
143
+ for (const r of rows) {
144
+ const verdict = r[field];
145
+ if (verdict in tally) tally[verdict] += 1;
146
+ }
147
+ return tally;
148
+ };
149
+ const sha = count('shaVerdict');
150
+ const integrity = count('integrityVerdict');
151
+ return { totalRuns: rows.length, sha, integrity };
152
+ }
153
+
154
+ function parseArgs(argv) {
155
+ const args = {
156
+ limit: DEFAULT_LIMIT,
157
+ workflow: DEFAULT_WORKFLOW,
158
+ format: 'table',
159
+ logFile: null,
160
+ repo: null,
161
+ };
162
+ for (let i = 0; i < argv.length; i += 1) {
163
+ const arg = argv[i];
164
+ if (arg === '--limit') {
165
+ args.limit = Number(argv[i + 1]);
166
+ i += 1;
167
+ } else if (arg === '--workflow') {
168
+ args.workflow = argv[i + 1];
169
+ i += 1;
170
+ } else if (arg === '--format') {
171
+ args.format = argv[i + 1];
172
+ i += 1;
173
+ } else if (arg === '--log-file') {
174
+ args.logFile = argv[i + 1];
175
+ i += 1;
176
+ } else if (arg === '--repo') {
177
+ args.repo = argv[i + 1];
178
+ i += 1;
179
+ } else if (arg === '--help' || arg === '-h') {
180
+ args.help = true;
181
+ } else {
182
+ throw new Error(`unknown argument: ${arg}`);
183
+ }
184
+ }
185
+ if (!Number.isInteger(args.limit) || args.limit <= 0) {
186
+ throw new Error(`--limit must be a positive integer, got: ${args.limit}`);
187
+ }
188
+ if (!['table', 'json'].includes(args.format)) {
189
+ throw new Error(`--format must be "table" or "json", got: ${args.format}`);
190
+ }
191
+ return args;
192
+ }
193
+
194
+ function printHelp() {
195
+ process.stdout.write(
196
+ [
197
+ 'Usage: node cli/scripts/collect-pack-sha-diagnostic.mjs [options]',
198
+ '',
199
+ 'Options:',
200
+ ' --limit <N> Number of recent runs to inspect (default: 10)',
201
+ ' --workflow <name> Workflow filename (default: publish-npm-on-tag.yml)',
202
+ ' --format table|json Output format (default: table)',
203
+ ' --log-file <path> Parse a single saved log file instead of calling gh',
204
+ ' --repo <owner/name> Override repo (defaults to gh current repo)',
205
+ ' -h, --help Show this help',
206
+ '',
207
+ 'Emits MATCH/MISMATCH/unavailable/missing counts for PACK_SHA_DIAGNOSTIC',
208
+ 'and PACK_INTEGRITY_DIAGNOSTIC tags. Diagnostic-only; never fails.',
209
+ '',
210
+ ].join('\n'),
211
+ );
212
+ }
213
+
214
+ function ghJson(args) {
215
+ const out = execFileSync('gh', args, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
216
+ return JSON.parse(out);
217
+ }
218
+
219
+ function ghText(args) {
220
+ return execFileSync('gh', args, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
221
+ }
222
+
223
+ function collectFromGh({ limit, workflow, repo }) {
224
+ const listArgs = [
225
+ 'run', 'list',
226
+ '--workflow', workflow,
227
+ '--limit', String(limit),
228
+ '--json', 'databaseId,displayTitle,conclusion,createdAt,url,headBranch,headSha',
229
+ ];
230
+ if (repo) listArgs.push('--repo', repo);
231
+
232
+ let runs;
233
+ try {
234
+ runs = ghJson(listArgs);
235
+ } catch (err) {
236
+ throw new Error(
237
+ `Failed to list workflow runs via gh. Is the GitHub CLI installed and authenticated? (${err.message})`,
238
+ );
239
+ }
240
+
241
+ const rows = [];
242
+ for (const run of runs) {
243
+ const viewArgs = ['run', 'view', String(run.databaseId), '--log'];
244
+ if (repo) viewArgs.push('--repo', repo);
245
+ let log = '';
246
+ try {
247
+ log = ghText(viewArgs);
248
+ } catch (err) {
249
+ // gh run view --log fails when logs are expired (>90d) or mid-run.
250
+ // Record the run with missing verdicts rather than aborting the whole sweep.
251
+ rows.push({
252
+ runId: run.databaseId,
253
+ displayTitle: run.displayTitle,
254
+ conclusion: run.conclusion,
255
+ createdAt: run.createdAt,
256
+ url: run.url,
257
+ headBranch: run.headBranch,
258
+ headSha: run.headSha,
259
+ shaVerdict: 'missing',
260
+ integrityVerdict: 'missing',
261
+ shaDetail: null,
262
+ integrityDetail: null,
263
+ version: null,
264
+ logError: err.message,
265
+ });
266
+ continue;
267
+ }
268
+ const parsed = parseDiagnosticLines(log);
269
+ rows.push({
270
+ runId: run.databaseId,
271
+ displayTitle: run.displayTitle,
272
+ conclusion: run.conclusion,
273
+ createdAt: run.createdAt,
274
+ url: run.url,
275
+ headBranch: run.headBranch,
276
+ headSha: run.headSha,
277
+ ...parsed,
278
+ });
279
+ }
280
+ return rows;
281
+ }
282
+
283
+ async function main(argv) {
284
+ let args;
285
+ try {
286
+ args = parseArgs(argv);
287
+ } catch (err) {
288
+ process.stderr.write(`collect-pack-sha-diagnostic: ${err.message}\n`);
289
+ printHelp();
290
+ process.exit(2);
291
+ }
292
+ if (args.help) {
293
+ printHelp();
294
+ return;
295
+ }
296
+
297
+ let rows;
298
+ if (args.logFile) {
299
+ const log = readFileSync(args.logFile, 'utf8');
300
+ rows = [{ runId: null, createdAt: null, url: args.logFile, ...parseDiagnosticLines(log) }];
301
+ } else {
302
+ rows = collectFromGh({ limit: args.limit, workflow: args.workflow, repo: args.repo });
303
+ }
304
+
305
+ if (args.format === 'json') {
306
+ process.stdout.write(JSON.stringify({ rows, summary: summarize(rows) }, null, 2));
307
+ process.stdout.write('\n');
308
+ return;
309
+ }
310
+
311
+ const table = renderTable(rows);
312
+ const summary = summarize(rows);
313
+ process.stdout.write(`${table}\n\n`);
314
+ process.stdout.write(
315
+ [
316
+ `Runs inspected: ${summary.totalRuns}`,
317
+ `SHA MATCH: ${summary.sha.MATCH}`,
318
+ `SHA MISMATCH: ${summary.sha.MISMATCH}`,
319
+ `SHA unavailable: ${summary.sha.unavailable}`,
320
+ `SHA missing: ${summary.sha.missing} (rerun / no diagnostic)`,
321
+ `INTEGRITY MATCH: ${summary.integrity.MATCH}`,
322
+ `INTEGRITY MISMATCH: ${summary.integrity.MISMATCH}`,
323
+ `INTEGRITY unavailable: ${summary.integrity.unavailable}`,
324
+ `INTEGRITY missing: ${summary.integrity.missing}`,
325
+ '',
326
+ 'Diagnostic-only. ≥3 MATCH on both SHA + INTEGRITY is the threshold',
327
+ 'named in DEC-PUBLISH-WORKFLOW-PACK-SHA-DIAGNOSTIC-ONLY-001 before any',
328
+ 'reproducible-publish gate can be designed.',
329
+ '',
330
+ ].join('\n'),
331
+ );
332
+ }
333
+
334
+ // Only run main when invoked directly (not when imported by tests).
335
+ const invokedDirectly =
336
+ import.meta.url === `file://${process.argv[1]}` ||
337
+ (process.argv[1] && import.meta.url.endsWith(process.argv[1].replace(/^.*\//, '')));
338
+
339
+ if (invokedDirectly) {
340
+ main(process.argv.slice(2)).catch((err) => {
341
+ process.stderr.write(`collect-pack-sha-diagnostic: ${err.stack || err.message}\n`);
342
+ process.exit(1);
343
+ });
344
+ }
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env bash
2
+ # Prepublish gate — local-first quality floor before `git tag`.
3
+ #
4
+ # Replaces per-commit remote CI coverage that `.github/workflows/ci.yml` will
5
+ # drop when CICD-SHRINK lands. Runs the same checks the GitHub-hosted runners
6
+ # ran, in-process on the agent's box, before any tag or publish-workflow is
7
+ # triggered. See .planning/CICD_REDUCTION_PLAN.md §7.
8
+ #
9
+ # Usage:
10
+ # bash cli/scripts/prepublish-gate.sh <target-version>
11
+ #
12
+ # Exit 0 + prints "PREPUBLISH GATE PASSED for <version>" → safe to tag/push.
13
+ # Exit non-zero → do NOT tag, do NOT push. Fix the failure locally first.
14
+ #
15
+ # Discipline rule (CICD-SHRINK acceptance, new in DEC-RELEASE-CUT-AND-PUSH-AS-ATOMIC-001):
16
+ # the release-cut turn MUST include this script's "PREPUBLISH GATE PASSED" line
17
+ # in the turn's Evidence block before `git tag` is created.
18
+
19
+ set -uo pipefail
20
+
21
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
22
+ CLI_DIR="${SCRIPT_DIR}/.."
23
+ cd "$CLI_DIR"
24
+
25
+ usage() {
26
+ echo "Usage: bash cli/scripts/prepublish-gate.sh <target-version>" >&2
27
+ echo " <target-version> Semver string (e.g., 2.150.0)." >&2
28
+ }
29
+
30
+ if [[ $# -ne 1 ]]; then
31
+ usage
32
+ exit 1
33
+ fi
34
+
35
+ TARGET_VERSION="$1"
36
+ if ! [[ "$TARGET_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
37
+ echo "Error: invalid semver '${TARGET_VERSION}'" >&2
38
+ usage
39
+ exit 1
40
+ fi
41
+
42
+ echo "Prepublish gate — target version ${TARGET_VERSION}"
43
+ echo "================================================="
44
+ echo "cwd: ${CLI_DIR}"
45
+ echo ""
46
+
47
+ STEP_STATUS=0
48
+ step_fail() {
49
+ echo ""
50
+ echo " FAIL: $1"
51
+ STEP_STATUS=1
52
+ }
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Step 1 — Full test suite (replaces per-push CI).
56
+ #
57
+ # Runs via `npm test` so Vitest + Node test phases are both exercised, and the
58
+ # Node phase inherits the `--test-timeout=60000 --test-concurrency=4` caps from
59
+ # package.json (DEC-BUG57-FAILFAST-NODE-TEST-001). We pass `--test-timeout`
60
+ # explicitly too so the gate cannot be silently weakened by a future
61
+ # package.json change. The node runner honors the last `--test-timeout` wins.
62
+ # ---------------------------------------------------------------------------
63
+ echo "[1/4] Full test suite — cd cli && npm test -- --test-timeout=60000"
64
+ if npm test -- --test-timeout=60000; then
65
+ echo " PASS: full test suite green"
66
+ else
67
+ step_fail "full test suite failed"
68
+ fi
69
+ echo ""
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # Step 2 — Release preflight in publish-gate mode.
73
+ #
74
+ # `release-preflight.sh --publish-gate` enforces strict mode (clean tree,
75
+ # bumped package.json, CHANGELOG entry present, release-alignment manifest
76
+ # aligned, pack dry-run succeeds) and runs only the release-gate-critical
77
+ # test subset. Step 1 already ran the full suite; step 2 enforces the
78
+ # release-specific invariants.
79
+ # ---------------------------------------------------------------------------
80
+ echo "[2/4] Release preflight — bash scripts/release-preflight.sh --publish-gate"
81
+ if bash "${SCRIPT_DIR}/release-preflight.sh" --publish-gate --target-version "${TARGET_VERSION}"; then
82
+ echo " PASS: release-preflight gate green"
83
+ else
84
+ step_fail "release-preflight gate failed"
85
+ fi
86
+ echo ""
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Step 3 — npm pack --dry-run (claim-reality coverage).
90
+ #
91
+ # Proves the tarball the publish workflow will upload is reproducible from
92
+ # HEAD. The publish workflow itself re-runs this; we catch pack failures here
93
+ # before the tag is even created, so a broken `files:` glob or missing dist
94
+ # artifact never reaches remote CI.
95
+ # ---------------------------------------------------------------------------
96
+ echo "[3/4] Pack dry-run — npm pack --dry-run"
97
+ if npm pack --dry-run >/dev/null 2>&1; then
98
+ echo " PASS: npm pack --dry-run succeeded"
99
+ else
100
+ step_fail "npm pack --dry-run failed (rerun with streamed output for details)"
101
+ npm pack --dry-run 2>&1 | tail -20 || true
102
+ fi
103
+ echo ""
104
+
105
+ # ---------------------------------------------------------------------------
106
+ # Step 4 — Release-alignment manifest (17-surface drift gate).
107
+ #
108
+ # `check-release-alignment.mjs` owns the shared manifest of every surface that
109
+ # must reference the target version (CHANGELOG, website release pages,
110
+ # capabilities.json, implementor guide, launch evidence, onboarding docs,
111
+ # marketing drafts, llms.txt, homebrew mirror, package.json). Step 2 runs the
112
+ # same check, but we invoke it directly so the final status line is visible
113
+ # in the gate's own output — agents reading this log get an explicit
114
+ # alignment-pass signal without having to scrape the preflight's mid-run
115
+ # block.
116
+ # ---------------------------------------------------------------------------
117
+ echo "[4/4] Release alignment — node scripts/check-release-alignment.mjs --scope current"
118
+ if node "${SCRIPT_DIR}/check-release-alignment.mjs" --scope current --target-version "${TARGET_VERSION}"; then
119
+ echo " PASS: release alignment green"
120
+ else
121
+ step_fail "release alignment failed"
122
+ fi
123
+ echo ""
124
+
125
+ echo "================================================="
126
+ if [[ "${STEP_STATUS}" -ne 0 ]]; then
127
+ echo "PREPUBLISH GATE FAILED for ${TARGET_VERSION} — do NOT tag, do NOT push."
128
+ exit 1
129
+ fi
130
+
131
+ echo "PREPUBLISH GATE PASSED for ${TARGET_VERSION} — safe to tag and push."
132
+ exit 0
@@ -342,12 +342,12 @@ else
342
342
  PREFLIGHT_FAILED=0
343
343
 
344
344
  # 8.5a. Full test suite with release env vars
345
- if env AGENTXCHAIN_RELEASE_TARGET_VERSION="${TARGET_VERSION}" AGENTXCHAIN_RELEASE_PREFLIGHT=1 npm test >/dev/null 2>&1; then
345
+ if env AGENTXCHAIN_RELEASE_TARGET_VERSION="${TARGET_VERSION}" AGENTXCHAIN_RELEASE_PREFLIGHT=1 npm test -- --test-timeout=60000 >/dev/null 2>&1; then
346
346
  echo " OK: test suite passed"
347
347
  else
348
348
  echo " FAIL: test suite failed" >&2
349
349
  echo " Re-running with output for diagnostics..." >&2
350
- env AGENTXCHAIN_RELEASE_TARGET_VERSION="${TARGET_VERSION}" AGENTXCHAIN_RELEASE_PREFLIGHT=1 npm test 2>&1 | tail -30 >&2
350
+ env AGENTXCHAIN_RELEASE_TARGET_VERSION="${TARGET_VERSION}" AGENTXCHAIN_RELEASE_PREFLIGHT=1 npm test -- --test-timeout=60000 2>&1 | tail -30 >&2
351
351
  PREFLIGHT_FAILED=1
352
352
  fi
353
353
 
@@ -57,20 +57,27 @@ function extractSummaryParagraph(text, version) {
57
57
  }
58
58
 
59
59
  function extractAggregateEvidenceLine(text) {
60
- const matches = [...text.matchAll(/^-\s+.*\b(\d+)\s+tests\b.*\b0 failures\b.*$/gm)];
60
+ const matches = [...text.matchAll(/^-\s+(.*\b(\d+)\s+tests\b.*\b0 failures\b.*)$/gm)];
61
61
  if (matches.length === 0) {
62
62
  throw new Error('Concrete aggregate evidence line missing from governed release page');
63
63
  }
64
64
 
65
65
  const aggregate = matches.reduce((best, match) => {
66
- const count = Number(match[1]);
66
+ const count = Number(match[2]);
67
67
  if (!best || count > best.count) {
68
- return { count, line: match[0] };
68
+ return { count, line: match[1] };
69
69
  }
70
70
  return best;
71
71
  }, null);
72
72
 
73
- return aggregate.line.replace(/\*\*/g, '').replace(/`/g, '').replace(/,/g, '').trim();
73
+ const line = aggregate.line.replace(/\*\*/g, '').replace(/`/g, '').replace(/,/g, '').trim();
74
+ const evidenceMatch = line.match(/\b\d+\s+tests\b.*\b0 failures\b.*/);
75
+ if (!evidenceMatch) {
76
+ throw new Error('Concrete aggregate evidence line missing from governed release page');
77
+ }
78
+ const prefix = line.slice(0, evidenceMatch.index).trim();
79
+ const evidence = evidenceMatch[0].trim();
80
+ return `- ${evidence}${prefix ? ` — ${prefix.replace(/[→-]\s*$/, '').trim()}` : ''}`;
74
81
  }
75
82
 
76
83
  function getPreviousVersionTag(repoRoot, version) {
@@ -58,7 +58,7 @@
58
58
  * purpose), and does NOT require the governed dispatcher to be running.
59
59
  */
60
60
 
61
- import { spawn } from 'child_process';
61
+ import { spawn, spawnSync } from 'child_process';
62
62
  import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from 'fs';
63
63
  import { join, resolve } from 'path';
64
64
  import { fileURLToPath } from 'url';
@@ -315,20 +315,6 @@ async function runOneAttempt({
315
315
  attempt.spawn_attached_elapsed_ms = now - t0;
316
316
  });
317
317
 
318
- if (child.stdin) {
319
- child.stdin.on('error', (err) => {
320
- // Capture but do not fail — adapter behavior matches: stdin EPIPE is
321
- // logged and the spawn continues to play out via close/error events.
322
- attempt.stderr += `[repro:stdin_error] ${err?.code || ''} ${err?.message || ''}\n`;
323
- });
324
- try {
325
- if (transport === 'stdin') child.stdin.write(fullPrompt);
326
- child.stdin.end();
327
- } catch (err) {
328
- attempt.stderr += `[repro:stdin_throw] ${err?.code || ''} ${err?.message || ''}\n`;
329
- }
330
- }
331
-
332
318
  if (child.stdout) {
333
319
  child.stdout.on('data', (chunk) => {
334
320
  const text = chunk.toString();
@@ -343,6 +329,20 @@ async function runOneAttempt({
343
329
  });
344
330
  }
345
331
 
332
+ if (child.stdin) {
333
+ child.stdin.on('error', (err) => {
334
+ // Capture but do not fail — adapter behavior matches: stdin EPIPE is
335
+ // logged and the spawn continues to play out via close/error events.
336
+ attempt.stderr += `[repro:stdin_error] ${err?.code || ''} ${err?.message || ''}\n`;
337
+ });
338
+ try {
339
+ if (transport === 'stdin') child.stdin.write(fullPrompt);
340
+ child.stdin.end();
341
+ } catch (err) {
342
+ attempt.stderr += `[repro:stdin_throw] ${err?.code || ''} ${err?.message || ''}\n`;
343
+ }
344
+ }
345
+
346
346
  if (child.stderr) {
347
347
  child.stderr.on('data', (chunk) => {
348
348
  const text = chunk.toString();
@@ -496,6 +496,7 @@ async function main() {
496
496
  const stdinBytes = transport === 'stdin' ? Buffer.byteLength(fullPrompt) : 0;
497
497
  const diagnosticArgs = redactArgs(args, fullPrompt, transport);
498
498
  const envSnapshot = snapshotEnv(spawnEnv);
499
+ const commandProbe = probeCommand(command, runtimeCwd, spawnEnv);
499
500
 
500
501
  const header = {
501
502
  repro_version: 1,
@@ -510,6 +511,7 @@ async function main() {
510
511
  stdin_bytes: stdinBytes,
511
512
  prompt_source: promptSource,
512
513
  env_snapshot: envSnapshot,
514
+ command_probe: commandProbe,
513
515
  watchdog_ms: opts.noWatchdog ? null : watchdogMs,
514
516
  no_watchdog: opts.noWatchdog,
515
517
  attempts_planned: opts.attempts,
@@ -529,6 +531,11 @@ async function main() {
529
531
  console.error(`[repro] prompt : ${promptSource.kind} (${promptSource.length_bytes} bytes)`);
530
532
  console.error(`[repro] watchdog_ms : ${header.watchdog_ms ?? 'disabled'}`);
531
533
  console.error(`[repro] auth env : ${JSON.stringify(envSnapshot.auth_env_present)}`);
534
+ if (commandProbe.kind === 'claude_version') {
535
+ console.error(`[repro] claude probe : status=${commandProbe.status ?? '-'} signal=${commandProbe.signal ?? '-'} stdout=${JSON.stringify(commandProbe.stdout || '')}`);
536
+ } else {
537
+ console.error(`[repro] command probe: ${commandProbe.kind} (${commandProbe.reason})`);
538
+ }
532
539
  console.error(`[repro] attempts : ${header.attempts_planned}`);
533
540
  console.error('');
534
541
 
@@ -617,6 +624,65 @@ function summarize(attempts) {
617
624
  };
618
625
  }
619
626
 
627
+ function probeCommand(command, cwd, env) {
628
+ if (!isClaudeCommand(command)) {
629
+ return {
630
+ kind: 'skipped',
631
+ reason: 'not a claude command',
632
+ };
633
+ }
634
+ try {
635
+ const result = spawnSync(command, ['--version'], {
636
+ cwd,
637
+ env,
638
+ encoding: 'utf8',
639
+ timeout: 10_000,
640
+ maxBuffer: 1024 * 1024,
641
+ });
642
+ return {
643
+ kind: 'claude_version',
644
+ command,
645
+ args: ['--version'],
646
+ timeout_ms: 10_000,
647
+ status: result.status,
648
+ signal: result.signal,
649
+ stdout: result.stdout || '',
650
+ stderr: result.stderr || '',
651
+ error: result.error ? {
652
+ code: result.error.code ?? null,
653
+ errno: result.error.errno ?? null,
654
+ syscall: result.error.syscall ?? null,
655
+ message: result.error.message || String(result.error),
656
+ } : null,
657
+ timed_out: result.error?.code === 'ETIMEDOUT',
658
+ };
659
+ } catch (err) {
660
+ return {
661
+ kind: 'claude_version',
662
+ command,
663
+ args: ['--version'],
664
+ timeout_ms: 10_000,
665
+ status: null,
666
+ signal: null,
667
+ stdout: '',
668
+ stderr: '',
669
+ error: {
670
+ code: err?.code ?? null,
671
+ errno: err?.errno ?? null,
672
+ syscall: err?.syscall ?? null,
673
+ message: err?.message || String(err),
674
+ },
675
+ timed_out: err?.code === 'ETIMEDOUT',
676
+ };
677
+ }
678
+ }
679
+
680
+ function isClaudeCommand(command) {
681
+ if (typeof command !== 'string') return false;
682
+ const normalized = command.replace(/\\/g, '/');
683
+ return normalized === 'claude' || normalized.endsWith('/claude');
684
+ }
685
+
620
686
  main().catch((err) => {
621
687
  console.error(`[repro] fatal: ${err?.stack || err?.message || err}`);
622
688
  process.exit(99);