npm - agentxchain - Versions diffs - 2.149.2 → 2.151.0 - Mend

agentxchain 2.149.2 → 2.151.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/package.json +3 -2
package/scripts/collect-pack-sha-diagnostic.mjs +344 -0
package/scripts/prepublish-gate.sh +132 -0
package/scripts/release-bump.sh +2 -2
package/scripts/render-github-release-body.mjs +11 -4
package/scripts/reproduce-bug-54.mjs +81 -15
package/src/commands/init.js +36 -4
package/src/commands/mission.js +6 -2
package/src/commands/restart.js +1 -1
package/src/commands/turn.js +14 -5
package/src/lib/adapters/local-cli-adapter.js +25 -23
package/src/lib/approval-policy.js +44 -0
package/src/lib/governed-state.js +118 -1
package/src/lib/governed-templates.js +1 -0
package/src/lib/mission-plans.js +14 -2
package/src/lib/normalized-config.js +23 -0
package/src/lib/schemas/agentxchain-config.schema.json +90 -2
package/src/lib/stale-turn-watchdog.js +3 -3
package/src/templates/governed/enterprise-app.json +35 -5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentxchain",
-  "version": "2.149.2",
+  "version": "2.151.0",
   "description": "CLI for AgentXchain — governed multi-agent software delivery",
   "type": "module",
   "bin": {
@@ -28,7 +28,7 @@
     "test": "npm run test:vitest && npm run test:node",
     "test:vitest": "vitest run --reporter=verbose",
     "test:beta": "node --test test/beta-tester-scenarios/*.test.js",
-    "test:node": "node --test test/*.test.js test/beta-tester-scenarios/*.test.js",
+    "test:node": "node --test --test-timeout=60000 --test-concurrency=4 test/*.test.js test/beta-tester-scenarios/*.test.js",
     "preflight:release": "bash scripts/release-preflight.sh",
     "preflight:release:strict": "bash scripts/release-preflight.sh --strict",
     "check:release-alignment": "node scripts/check-release-alignment.mjs",
@@ -37,6 +37,7 @@
     "bump:release": "bash scripts/release-bump.sh",
     "sync:homebrew": "bash scripts/sync-homebrew.sh",
     "verify:post-publish": "bash scripts/verify-post-publish.sh",
+    "collect:pack-sha-diagnostic": "node scripts/collect-pack-sha-diagnostic.mjs",
     "build:macos": "bun build bin/agentxchain.js --compile --target=bun-darwin-arm64 --outfile=dist/agentxchain-macos-arm64",
     "build:linux": "bun build bin/agentxchain.js --compile --target=bun-linux-x64 --outfile=dist/agentxchain-linux-x64",
     "publish:npm": "bash scripts/publish-npm.sh"

package/scripts/collect-pack-sha-diagnostic.mjs ADDED Viewed

@@ -0,0 +1,344 @@
+#!/usr/bin/env node
+/**
+ * Collect pack-SHA diagnostic evidence from `publish-npm-on-tag.yml` runs.
+ *
+ * Purpose:
+ *   Turn 129 (`DEC-PUBLISH-WORKFLOW-PACK-SHA-DIAGNOSTIC-ONLY-001`) added
+ *   runner-local `npm pack` SHA capture + registry `dist.shasum`/`dist.integrity`
+ *   comparison to the publish workflow as diagnostic-only evidence. Each
+ *   published tag now emits `PACK_SHA_DIAGNOSTIC:` and `PACK_INTEGRITY_DIAGNOSTIC:`
+ *   log lines with MATCH/MISMATCH verdicts.
+ *
+ *   A real reproducible-publish gate cannot be designed until we have ≥3 release
+ *   cycles of evidence. This script turns the per-run log lines into a
+ *   multi-release evidence view so the threshold can be evaluated at a glance.
+ *
+ * Behavior:
+ *   Default: uses `gh run list` to fetch the last N `publish-npm-on-tag.yml`
+ *   runs, then `gh run view <id> --log` to scrape the two diagnostic tags from
+ *   each run's logs, and prints a table summary plus aggregate MATCH/MISMATCH
+ *   counts.
+ *
+ *   Test / offline mode: `--log-file <path>` parses a single saved log instead
+ *   of calling `gh`. Useful for unit tests and local debugging without GH auth.
+ *
+ * Usage:
+ *   cd cli && npm run collect:pack-sha-diagnostic --                  # last 10 runs
+ *   cd cli && npm run collect:pack-sha-diagnostic -- --limit 20
+ *   node cli/scripts/collect-pack-sha-diagnostic.mjs                    # direct path
+ *   node cli/scripts/collect-pack-sha-diagnostic.mjs --limit 20
+ *   node cli/scripts/collect-pack-sha-diagnostic.mjs --format json
+ *   node cli/scripts/collect-pack-sha-diagnostic.mjs --workflow publish-npm-on-tag.yml
+ *   node cli/scripts/collect-pack-sha-diagnostic.mjs --log-file /tmp/run.log
+ *
+ * How to read the output:
+ *   - `MATCH` means the workflow's runner-local pack value matched the npm
+ *     registry value for that release run.
+ *   - `MISMATCH` means the runner-local pack value differed from registry
+ *     truth. Treat it as investigation evidence, not an automatic release
+ *     failure.
+ *   - `unavailable` means the diagnostic ran but could not form a comparison
+ *     (for example, registry metadata was not ready).
+ *   - `missing` means the diagnostic tag was absent, usually because the run
+ *     was an already-published rerun and skipped local packing.
+ *   - Only non-rerun `MATCH` verdicts count toward the "≥3 MATCH" evidence
+ *     threshold from `DEC-PUBLISH-WORKFLOW-PACK-SHA-DIAGNOSTIC-ONLY-001`.
+ *     That threshold only permits designing a future gate; it is not a gate
+ *     by itself.
+ *
+ * Diagnostic-only. This script does not gate releases, mutate state, or fail
+ * on MISMATCH. It prints evidence; a gate is a future decision.
+ */
+import { execFileSync } from 'node:child_process';
+import { readFileSync } from 'node:fs';
+const DEFAULT_WORKFLOW = 'publish-npm-on-tag.yml';
+const DEFAULT_LIMIT = 10;
+/**
+ * Parse a publish workflow log for the Turn 129 diagnostic tags.
+ *
+ * Returns a plain object with:
+ *   - shaVerdict:       'MATCH' | 'MISMATCH' | 'unavailable' | 'missing'
+ *   - shaDetail:        the line body after the ':' (MATCH/MISMATCH reason) or null
+ *   - integrityVerdict: 'MATCH' | 'MISMATCH' | 'unavailable' | 'missing'
+ *   - integrityDetail:  the line body after the ':' or null
+ *   - version:          the `agentxchain@X.Y.Z` version extracted from the SHA tag, or null
+ *
+ * A log with no `PACK_SHA_DIAGNOSTIC:` tag returns shaVerdict = 'missing'
+ * (the diagnostic step did not run — e.g. `already_published` rerun).
+ *
+ * A log whose SHA tag says "unavailable" (registry dist missing, runner pack
+ * failed) returns shaVerdict = 'unavailable' — distinct from MATCH/MISMATCH
+ * because the diagnostic could not form a verdict.
+ */
+export function parseDiagnosticLines(logText) {
+  const shaRegex = /PACK_SHA_DIAGNOSTIC:\s*([^\n]+)/;
+  const integrityRegex = /PACK_INTEGRITY_DIAGNOSTIC:\s*([^\n]+)/;
+  const classifyVerdict = (detail) => {
+    if (!detail) return 'missing';
+    const head = detail.trim().split(/\s+/)[0] ?? '';
+    if (head === 'MATCH') return 'MATCH';
+    if (head === 'MISMATCH') return 'MISMATCH';
+    return 'unavailable';
+  };
+  const shaMatch = logText.match(shaRegex);
+  const integrityMatch = logText.match(integrityRegex);
+  const shaDetail = shaMatch ? shaMatch[1].trim() : null;
+  const integrityDetail = integrityMatch ? integrityMatch[1].trim() : null;
+  const shaVerdict = shaMatch ? classifyVerdict(shaDetail) : 'missing';
+  const integrityVerdict = integrityMatch
+    ? classifyVerdict(integrityDetail)
+    : 'missing';
+  // Try to pull `agentxchain@X.Y.Z` from either diagnostic line.
+  let version = null;
+  const versionSource = `${shaDetail ?? ''} ${integrityDetail ?? ''}`;
+  const versionMatch = versionSource.match(/agentxchain@(\d+\.\d+\.\d+)/);
+  if (versionMatch) version = versionMatch[1];
+  return { shaVerdict, shaDetail, integrityVerdict, integrityDetail, version };
+}
+/**
+ * Render an array of run records as a fixed-width text table.
+ * Pure function, no side effects — safe to call from tests.
+ */
+export function renderTable(rows) {
+  if (rows.length === 0) {
+    return 'No publish-npm-on-tag.yml runs found.';
+  }
+  const header = ['version', 'run_id', 'sha', 'integrity', 'created_at', 'url'];
+  const body = rows.map((r) => [
+    r.version ?? '-',
+    String(r.runId ?? '-'),
+    r.shaVerdict,
+    r.integrityVerdict,
+    r.createdAt ?? '-',
+    r.url ?? '-',
+  ]);
+  const widths = header.map((h, i) =>
+    Math.max(h.length, ...body.map((row) => row[i].length)),
+  );
+  const pad = (cells) =>
+    cells.map((c, i) => c.padEnd(widths[i])).join('  ');
+  const lines = [pad(header), pad(widths.map((w) => '-'.repeat(w)))];
+  for (const row of body) lines.push(pad(row));
+  return lines.join('\n');
+}
+/**
+ * Summarize MATCH/MISMATCH/unavailable/missing counts across rows.
+ * Used by `renderTable` callers to emit the "≥3 releases of MATCH" threshold
+ * status described in DEC-PUBLISH-WORKFLOW-PACK-SHA-DIAGNOSTIC-ONLY-001.
+ */
+export function summarize(rows) {
+  const count = (field) => {
+    const tally = { MATCH: 0, MISMATCH: 0, unavailable: 0, missing: 0 };
+    for (const r of rows) {
+      const verdict = r[field];
+      if (verdict in tally) tally[verdict] += 1;
+    }
+    return tally;
+  };
+  const sha = count('shaVerdict');
+  const integrity = count('integrityVerdict');
+  return { totalRuns: rows.length, sha, integrity };
+}
+function parseArgs(argv) {
+  const args = {
+    limit: DEFAULT_LIMIT,
+    workflow: DEFAULT_WORKFLOW,
+    format: 'table',
+    logFile: null,
+    repo: null,
+  };
+  for (let i = 0; i < argv.length; i += 1) {
+    const arg = argv[i];
+    if (arg === '--limit') {
+      args.limit = Number(argv[i + 1]);
+      i += 1;
+    } else if (arg === '--workflow') {
+      args.workflow = argv[i + 1];
+      i += 1;
+    } else if (arg === '--format') {
+      args.format = argv[i + 1];
+      i += 1;
+    } else if (arg === '--log-file') {
+      args.logFile = argv[i + 1];
+      i += 1;
+    } else if (arg === '--repo') {
+      args.repo = argv[i + 1];
+      i += 1;
+    } else if (arg === '--help' || arg === '-h') {
+      args.help = true;
+    } else {
+      throw new Error(`unknown argument: ${arg}`);
+    }
+  }
+  if (!Number.isInteger(args.limit) || args.limit <= 0) {
+    throw new Error(`--limit must be a positive integer, got: ${args.limit}`);
+  }
+  if (!['table', 'json'].includes(args.format)) {
+    throw new Error(`--format must be "table" or "json", got: ${args.format}`);
+  }
+  return args;
+}
+function printHelp() {
+  process.stdout.write(
+    [
+      'Usage: node cli/scripts/collect-pack-sha-diagnostic.mjs [options]',
+      '',
+      'Options:',
+      '  --limit <N>         Number of recent runs to inspect (default: 10)',
+      '  --workflow <name>   Workflow filename (default: publish-npm-on-tag.yml)',
+      '  --format table|json Output format (default: table)',
+      '  --log-file <path>   Parse a single saved log file instead of calling gh',
+      '  --repo <owner/name> Override repo (defaults to gh current repo)',
+      '  -h, --help          Show this help',
+      '',
+      'Emits MATCH/MISMATCH/unavailable/missing counts for PACK_SHA_DIAGNOSTIC',
+      'and PACK_INTEGRITY_DIAGNOSTIC tags. Diagnostic-only; never fails.',
+      '',
+    ].join('\n'),
+  );
+}
+function ghJson(args) {
+  const out = execFileSync('gh', args, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
+  return JSON.parse(out);
+}
+function ghText(args) {
+  return execFileSync('gh', args, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
+}
+function collectFromGh({ limit, workflow, repo }) {
+  const listArgs = [
+    'run', 'list',
+    '--workflow', workflow,
+    '--limit', String(limit),
+    '--json', 'databaseId,displayTitle,conclusion,createdAt,url,headBranch,headSha',
+  ];
+  if (repo) listArgs.push('--repo', repo);
+  let runs;
+  try {
+    runs = ghJson(listArgs);
+  } catch (err) {
+    throw new Error(
+      `Failed to list workflow runs via gh. Is the GitHub CLI installed and authenticated? (${err.message})`,
+    );
+  }
+  const rows = [];
+  for (const run of runs) {
+    const viewArgs = ['run', 'view', String(run.databaseId), '--log'];
+    if (repo) viewArgs.push('--repo', repo);
+    let log = '';
+    try {
+      log = ghText(viewArgs);
+    } catch (err) {
+      // gh run view --log fails when logs are expired (>90d) or mid-run.
+      // Record the run with missing verdicts rather than aborting the whole sweep.
+      rows.push({
+        runId: run.databaseId,
+        displayTitle: run.displayTitle,
+        conclusion: run.conclusion,
+        createdAt: run.createdAt,
+        url: run.url,
+        headBranch: run.headBranch,
+        headSha: run.headSha,
+        shaVerdict: 'missing',
+        integrityVerdict: 'missing',
+        shaDetail: null,
+        integrityDetail: null,
+        version: null,
+        logError: err.message,
+      });
+      continue;
+    }
+    const parsed = parseDiagnosticLines(log);
+    rows.push({
+      runId: run.databaseId,
+      displayTitle: run.displayTitle,
+      conclusion: run.conclusion,
+      createdAt: run.createdAt,
+      url: run.url,
+      headBranch: run.headBranch,
+      headSha: run.headSha,
+      ...parsed,
+    });
+  }
+  return rows;
+}
+async function main(argv) {
+  let args;
+  try {
+    args = parseArgs(argv);
+  } catch (err) {
+    process.stderr.write(`collect-pack-sha-diagnostic: ${err.message}\n`);
+    printHelp();
+    process.exit(2);
+  }
+  if (args.help) {
+    printHelp();
+    return;
+  }
+  let rows;
+  if (args.logFile) {
+    const log = readFileSync(args.logFile, 'utf8');
+    rows = [{ runId: null, createdAt: null, url: args.logFile, ...parseDiagnosticLines(log) }];
+  } else {
+    rows = collectFromGh({ limit: args.limit, workflow: args.workflow, repo: args.repo });
+  }
+  if (args.format === 'json') {
+    process.stdout.write(JSON.stringify({ rows, summary: summarize(rows) }, null, 2));
+    process.stdout.write('\n');
+    return;
+  }
+  const table = renderTable(rows);
+  const summary = summarize(rows);
+  process.stdout.write(`${table}\n\n`);
+  process.stdout.write(
+    [
+      `Runs inspected:        ${summary.totalRuns}`,
+      `SHA MATCH:             ${summary.sha.MATCH}`,
+      `SHA MISMATCH:          ${summary.sha.MISMATCH}`,
+      `SHA unavailable:       ${summary.sha.unavailable}`,
+      `SHA missing:           ${summary.sha.missing}  (rerun / no diagnostic)`,
+      `INTEGRITY MATCH:       ${summary.integrity.MATCH}`,
+      `INTEGRITY MISMATCH:    ${summary.integrity.MISMATCH}`,
+      `INTEGRITY unavailable: ${summary.integrity.unavailable}`,
+      `INTEGRITY missing:     ${summary.integrity.missing}`,
+      '',
+      'Diagnostic-only. ≥3 MATCH on both SHA + INTEGRITY is the threshold',
+      'named in DEC-PUBLISH-WORKFLOW-PACK-SHA-DIAGNOSTIC-ONLY-001 before any',
+      'reproducible-publish gate can be designed.',
+      '',
+    ].join('\n'),
+  );
+}
+// Only run main when invoked directly (not when imported by tests).
+const invokedDirectly =
+  import.meta.url === `file://${process.argv[1]}` ||
+  (process.argv[1] && import.meta.url.endsWith(process.argv[1].replace(/^.*\//, '')));
+if (invokedDirectly) {
+  main(process.argv.slice(2)).catch((err) => {
+    process.stderr.write(`collect-pack-sha-diagnostic: ${err.stack || err.message}\n`);
+    process.exit(1);
+  });
+}

package/scripts/prepublish-gate.sh ADDED Viewed

@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+# Prepublish gate — local-first quality floor before `git tag`.
+#
+# Replaces per-commit remote CI coverage that `.github/workflows/ci.yml` will
+# drop when CICD-SHRINK lands. Runs the same checks the GitHub-hosted runners
+# ran, in-process on the agent's box, before any tag or publish-workflow is
+# triggered. See .planning/CICD_REDUCTION_PLAN.md §7.
+#
+# Usage:
+#   bash cli/scripts/prepublish-gate.sh <target-version>
+#
+# Exit 0 + prints "PREPUBLISH GATE PASSED for <version>" → safe to tag/push.
+# Exit non-zero → do NOT tag, do NOT push. Fix the failure locally first.
+#
+# Discipline rule (CICD-SHRINK acceptance, new in DEC-RELEASE-CUT-AND-PUSH-AS-ATOMIC-001):
+# the release-cut turn MUST include this script's "PREPUBLISH GATE PASSED" line
+# in the turn's Evidence block before `git tag` is created.
+set -uo pipefail
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+CLI_DIR="${SCRIPT_DIR}/.."
+cd "$CLI_DIR"
+usage() {
+  echo "Usage: bash cli/scripts/prepublish-gate.sh <target-version>" >&2
+  echo "  <target-version>  Semver string (e.g., 2.150.0)." >&2
+}
+if [[ $# -ne 1 ]]; then
+  usage
+  exit 1
+fi
+TARGET_VERSION="$1"
+if ! [[ "$TARGET_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+  echo "Error: invalid semver '${TARGET_VERSION}'" >&2
+  usage
+  exit 1
+fi
+echo "Prepublish gate — target version ${TARGET_VERSION}"
+echo "================================================="
+echo "cwd: ${CLI_DIR}"
+echo ""
+STEP_STATUS=0
+step_fail() {
+  echo ""
+  echo "  FAIL: $1"
+  STEP_STATUS=1
+}
+# ---------------------------------------------------------------------------
+# Step 1 — Full test suite (replaces per-push CI).
+#
+# Runs via `npm test` so Vitest + Node test phases are both exercised, and the
+# Node phase inherits the `--test-timeout=60000 --test-concurrency=4` caps from
+# package.json (DEC-BUG57-FAILFAST-NODE-TEST-001). We pass `--test-timeout`
+# explicitly too so the gate cannot be silently weakened by a future
+# package.json change. The node runner honors the last `--test-timeout` wins.
+# ---------------------------------------------------------------------------
+echo "[1/4] Full test suite — cd cli && npm test -- --test-timeout=60000"
+if npm test -- --test-timeout=60000; then
+  echo "  PASS: full test suite green"
+else
+  step_fail "full test suite failed"
+fi
+echo ""
+# ---------------------------------------------------------------------------
+# Step 2 — Release preflight in publish-gate mode.
+#
+# `release-preflight.sh --publish-gate` enforces strict mode (clean tree,
+# bumped package.json, CHANGELOG entry present, release-alignment manifest
+# aligned, pack dry-run succeeds) and runs only the release-gate-critical
+# test subset. Step 1 already ran the full suite; step 2 enforces the
+# release-specific invariants.
+# ---------------------------------------------------------------------------
+echo "[2/4] Release preflight — bash scripts/release-preflight.sh --publish-gate"
+if bash "${SCRIPT_DIR}/release-preflight.sh" --publish-gate --target-version "${TARGET_VERSION}"; then
+  echo "  PASS: release-preflight gate green"
+else
+  step_fail "release-preflight gate failed"
+fi
+echo ""
+# ---------------------------------------------------------------------------
+# Step 3 — npm pack --dry-run (claim-reality coverage).
+#
+# Proves the tarball the publish workflow will upload is reproducible from
+# HEAD. The publish workflow itself re-runs this; we catch pack failures here
+# before the tag is even created, so a broken `files:` glob or missing dist
+# artifact never reaches remote CI.
+# ---------------------------------------------------------------------------
+echo "[3/4] Pack dry-run — npm pack --dry-run"
+if npm pack --dry-run >/dev/null 2>&1; then
+  echo "  PASS: npm pack --dry-run succeeded"
+else
+  step_fail "npm pack --dry-run failed (rerun with streamed output for details)"
+  npm pack --dry-run 2>&1 | tail -20 || true
+fi
+echo ""
+# ---------------------------------------------------------------------------
+# Step 4 — Release-alignment manifest (17-surface drift gate).
+#
+# `check-release-alignment.mjs` owns the shared manifest of every surface that
+# must reference the target version (CHANGELOG, website release pages,
+# capabilities.json, implementor guide, launch evidence, onboarding docs,
+# marketing drafts, llms.txt, homebrew mirror, package.json). Step 2 runs the
+# same check, but we invoke it directly so the final status line is visible
+# in the gate's own output — agents reading this log get an explicit
+# alignment-pass signal without having to scrape the preflight's mid-run
+# block.
+# ---------------------------------------------------------------------------
+echo "[4/4] Release alignment — node scripts/check-release-alignment.mjs --scope current"
+if node "${SCRIPT_DIR}/check-release-alignment.mjs" --scope current --target-version "${TARGET_VERSION}"; then
+  echo "  PASS: release alignment green"
+else
+  step_fail "release alignment failed"
+fi
+echo ""
+echo "================================================="
+if [[ "${STEP_STATUS}" -ne 0 ]]; then
+  echo "PREPUBLISH GATE FAILED for ${TARGET_VERSION} — do NOT tag, do NOT push."
+  exit 1
+fi
+echo "PREPUBLISH GATE PASSED for ${TARGET_VERSION} — safe to tag and push."
+exit 0

package/scripts/release-bump.sh CHANGED Viewed

@@ -342,12 +342,12 @@ else
   PREFLIGHT_FAILED=0
   # 8.5a. Full test suite with release env vars
-  if env AGENTXCHAIN_RELEASE_TARGET_VERSION="${TARGET_VERSION}" AGENTXCHAIN_RELEASE_PREFLIGHT=1 npm test >/dev/null 2>&1; then
+  if env AGENTXCHAIN_RELEASE_TARGET_VERSION="${TARGET_VERSION}" AGENTXCHAIN_RELEASE_PREFLIGHT=1 npm test -- --test-timeout=60000 >/dev/null 2>&1; then
     echo "  OK: test suite passed"
   else
     echo "  FAIL: test suite failed" >&2
     echo "  Re-running with output for diagnostics..." >&2
-    env AGENTXCHAIN_RELEASE_TARGET_VERSION="${TARGET_VERSION}" AGENTXCHAIN_RELEASE_PREFLIGHT=1 npm test 2>&1 | tail -30 >&2
+    env AGENTXCHAIN_RELEASE_TARGET_VERSION="${TARGET_VERSION}" AGENTXCHAIN_RELEASE_PREFLIGHT=1 npm test -- --test-timeout=60000 2>&1 | tail -30 >&2
     PREFLIGHT_FAILED=1
   fi

package/scripts/render-github-release-body.mjs CHANGED Viewed

@@ -57,20 +57,27 @@ function extractSummaryParagraph(text, version) {
 }
 function extractAggregateEvidenceLine(text) {
-  const matches = [...text.matchAll(/^-\s+.*\b(\d+)\s+tests\b.*\b0 failures\b.*$/gm)];
+  const matches = [...text.matchAll(/^-\s+(.*\b(\d+)\s+tests\b.*\b0 failures\b.*)$/gm)];
   if (matches.length === 0) {
     throw new Error('Concrete aggregate evidence line missing from governed release page');
   }
   const aggregate = matches.reduce((best, match) => {
-    const count = Number(match[1]);
+    const count = Number(match[2]);
     if (!best || count > best.count) {
-      return { count, line: match[0] };
+      return { count, line: match[1] };
     }
     return best;
   }, null);
-  return aggregate.line.replace(/\*\*/g, '').replace(/`/g, '').replace(/,/g, '').trim();
+  const line = aggregate.line.replace(/\*\*/g, '').replace(/`/g, '').replace(/,/g, '').trim();
+  const evidenceMatch = line.match(/\b\d+\s+tests\b.*\b0 failures\b.*/);
+  if (!evidenceMatch) {
+    throw new Error('Concrete aggregate evidence line missing from governed release page');
+  }
+  const prefix = line.slice(0, evidenceMatch.index).trim();
+  const evidence = evidenceMatch[0].trim();
+  return `- ${evidence}${prefix ? ` — ${prefix.replace(/[→-]\s*$/, '').trim()}` : ''}`;
 }
 function getPreviousVersionTag(repoRoot, version) {

package/scripts/reproduce-bug-54.mjs CHANGED Viewed

@@ -58,7 +58,7 @@
  * purpose), and does NOT require the governed dispatcher to be running.
  */
-import { spawn } from 'child_process';
+import { spawn, spawnSync } from 'child_process';
 import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from 'fs';
 import { join, resolve } from 'path';
 import { fileURLToPath } from 'url';
@@ -315,20 +315,6 @@ async function runOneAttempt({
       attempt.spawn_attached_elapsed_ms = now - t0;
     });
-    if (child.stdin) {
-      child.stdin.on('error', (err) => {
-        // Capture but do not fail — adapter behavior matches: stdin EPIPE is
-        // logged and the spawn continues to play out via close/error events.
-        attempt.stderr += `[repro:stdin_error] ${err?.code || ''} ${err?.message || ''}\n`;
-      });
-      try {
-        if (transport === 'stdin') child.stdin.write(fullPrompt);
-        child.stdin.end();
-      } catch (err) {
-        attempt.stderr += `[repro:stdin_throw] ${err?.code || ''} ${err?.message || ''}\n`;
-      }
-    }
     if (child.stdout) {
       child.stdout.on('data', (chunk) => {
         const text = chunk.toString();
@@ -343,6 +329,20 @@ async function runOneAttempt({
       });
     }
+    if (child.stdin) {
+      child.stdin.on('error', (err) => {
+        // Capture but do not fail — adapter behavior matches: stdin EPIPE is
+        // logged and the spawn continues to play out via close/error events.
+        attempt.stderr += `[repro:stdin_error] ${err?.code || ''} ${err?.message || ''}\n`;
+      });
+      try {
+        if (transport === 'stdin') child.stdin.write(fullPrompt);
+        child.stdin.end();
+      } catch (err) {
+        attempt.stderr += `[repro:stdin_throw] ${err?.code || ''} ${err?.message || ''}\n`;
+      }
+    }
     if (child.stderr) {
       child.stderr.on('data', (chunk) => {
         const text = chunk.toString();
@@ -496,6 +496,7 @@ async function main() {
   const stdinBytes = transport === 'stdin' ? Buffer.byteLength(fullPrompt) : 0;
   const diagnosticArgs = redactArgs(args, fullPrompt, transport);
   const envSnapshot = snapshotEnv(spawnEnv);
+  const commandProbe = probeCommand(command, runtimeCwd, spawnEnv);
   const header = {
     repro_version: 1,
@@ -510,6 +511,7 @@ async function main() {
     stdin_bytes: stdinBytes,
     prompt_source: promptSource,
     env_snapshot: envSnapshot,
+    command_probe: commandProbe,
     watchdog_ms: opts.noWatchdog ? null : watchdogMs,
     no_watchdog: opts.noWatchdog,
     attempts_planned: opts.attempts,
@@ -529,6 +531,11 @@ async function main() {
   console.error(`[repro] prompt       : ${promptSource.kind} (${promptSource.length_bytes} bytes)`);
   console.error(`[repro] watchdog_ms  : ${header.watchdog_ms ?? 'disabled'}`);
   console.error(`[repro] auth env     : ${JSON.stringify(envSnapshot.auth_env_present)}`);
+  if (commandProbe.kind === 'claude_version') {
+    console.error(`[repro] claude probe : status=${commandProbe.status ?? '-'} signal=${commandProbe.signal ?? '-'} stdout=${JSON.stringify(commandProbe.stdout || '')}`);
+  } else {
+    console.error(`[repro] command probe: ${commandProbe.kind} (${commandProbe.reason})`);
+  }
   console.error(`[repro] attempts     : ${header.attempts_planned}`);
   console.error('');
@@ -617,6 +624,65 @@ function summarize(attempts) {
   };
 }
+function probeCommand(command, cwd, env) {
+  if (!isClaudeCommand(command)) {
+    return {
+      kind: 'skipped',
+      reason: 'not a claude command',
+    };
+  }
+  try {
+    const result = spawnSync(command, ['--version'], {
+      cwd,
+      env,
+      encoding: 'utf8',
+      timeout: 10_000,
+      maxBuffer: 1024 * 1024,
+    });
+    return {
+      kind: 'claude_version',
+      command,
+      args: ['--version'],
+      timeout_ms: 10_000,
+      status: result.status,
+      signal: result.signal,
+      stdout: result.stdout || '',
+      stderr: result.stderr || '',
+      error: result.error ? {
+        code: result.error.code ?? null,
+        errno: result.error.errno ?? null,
+        syscall: result.error.syscall ?? null,
+        message: result.error.message || String(result.error),
+      } : null,
+      timed_out: result.error?.code === 'ETIMEDOUT',
+    };
+  } catch (err) {
+    return {
+      kind: 'claude_version',
+      command,
+      args: ['--version'],
+      timeout_ms: 10_000,
+      status: null,
+      signal: null,
+      stdout: '',
+      stderr: '',
+      error: {
+        code: err?.code ?? null,
+        errno: err?.errno ?? null,
+        syscall: err?.syscall ?? null,
+        message: err?.message || String(err),
+      },
+      timed_out: err?.code === 'ETIMEDOUT',
+    };
+  }
+}
+function isClaudeCommand(command) {
+  if (typeof command !== 'string') return false;
+  const normalized = command.replace(/\\/g, '/');
+  return normalized === 'claude' || normalized.endsWith('/claude');
+}
 main().catch((err) => {
   console.error(`[repro] fatal: ${err?.stack || err?.message || err}`);
   process.exit(99);