@bookedsolid/rea 0.27.0 → 0.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,9 +39,34 @@ import { loadPolicyAsync } from '../policy/loader.js';
39
39
  import { CodexNotInstalledError, CodexProtocolError, CodexSubprocessError, CodexTimeoutError, IRON_GATE_DEFAULT_MODEL, IRON_GATE_DEFAULT_REASONING, createRealGitExecutor, runCodexReview, } from '../hooks/push-gate/codex-runner.js';
40
40
  import { resolvePushGatePolicy } from '../hooks/push-gate/policy.js';
41
41
  import { resolveBaseRef } from '../hooks/push-gate/base.js';
42
- import { summarizeReview } from '../hooks/push-gate/findings.js';
42
+ import { summarizeReview, } from '../hooks/push-gate/findings.js';
43
+ import { writeLastReview } from '../hooks/push-gate/report.js';
43
44
  import { computeTreeToken, EMPTY_TREE_SHA } from '../audit/content-token.js';
45
+ import { compileDefaultSecretPatterns, redactSecrets, } from '../gateway/middleware/redact.js';
44
46
  import { err, log } from './utils.js';
47
+ /** Relative path to the last-review snapshot, surfaced in JSON output. */
48
+ const LAST_REVIEW_RELATIVE = '.rea/last-review.json';
49
+ /**
50
+ * 0.28.1 defect-V round-1 P2-1: shared redactor for the
51
+ * `writeLastReview` failure path. The canonical writer redacts findings
52
+ * before serialization; if it threw we still need to redact the
53
+ * in-memory findings before they reach `--with-findings` stdout or
54
+ * `--json --with-findings`. Without this, a writer failure (read-only
55
+ * .rea/, ENOSPC, race) would let unredacted Codex prose — which can
56
+ * quote secrets from the diff — escape via the new surfaces, defeating
57
+ * the redaction guarantee the writer provides.
58
+ */
59
+ function redactFindingsInMemory(findings) {
60
+ const patterns = compileDefaultSecretPatterns({ source: 'default' });
61
+ const redactStr = (s) => redactSecrets(s, patterns).output;
62
+ return findings.map((f) => ({
63
+ severity: f.severity,
64
+ title: redactStr(f.title),
65
+ body: redactStr(f.body),
66
+ ...(f.file !== undefined ? { file: f.file } : {}),
67
+ ...(f.line !== undefined ? { line: f.line } : {}),
68
+ }));
69
+ }
45
70
  const PROVIDER_CODEX = 'codex';
46
71
  /**
47
72
  * Probe `codex --version` synchronously. Same shape as the push-gate's
@@ -84,7 +109,7 @@ async function resolveLocalReviewMode(baseDir) {
84
109
  * the commander binding can stay thin. Throws via `process.exit` (CLI
85
110
  * convention across `src/cli/`).
86
111
  */
87
- export async function runReview(options) {
112
+ export async function runReview(options, deps = {}) {
88
113
  const baseDir = process.cwd();
89
114
  const strictFailOn = options.strictFailOn ?? 'blocking';
90
115
  const { mode, policy } = await resolveLocalReviewMode(baseDir);
@@ -131,7 +156,8 @@ export async function runReview(options) {
131
156
  // Codex available — run the review.
132
157
  let outcome;
133
158
  try {
134
- outcome = await executeCodexReview(baseDir, options);
159
+ const exec = deps.executeCodexReview ?? executeCodexReview;
160
+ outcome = await exec(baseDir, options);
135
161
  }
136
162
  catch (e) {
137
163
  const msg = e instanceof Error ? e.message : String(e);
@@ -168,6 +194,49 @@ export async function runReview(options) {
168
194
  if (probe.version !== undefined)
169
195
  metadata.provider_version = probe.version;
170
196
  await safeAudit(baseDir, LOCAL_REVIEW_TOOL_NAME, outcome.verdict === 'blocking' ? InvocationStatus.Denied : InvocationStatus.Allowed, metadata, policy);
197
+ // 0.28.1 defect-V: persist `.rea/last-review.json` on EVERY successful
198
+ // codex run (pass / concerns / blocking) BEFORE the exit so agents can
199
+ // read structured findings to remediate. Pre-fix only the push-gate
200
+ // wrote this file; `rea review` discarded the bodies after counting,
201
+ // so consumers saw stale snapshots from days-old push-gate runs (Ava
202
+ // reported a 2026-05-08 file surviving across new 2026-05-09 runs).
203
+ //
204
+ // Reuses the push-gate's writer — the canonical atomic-write path with
205
+ // redaction. We do NOT inline a second implementation: any divergence
206
+ // between the two writers would silently desynchronize the schema for
207
+ // `rea preflight` and any tooling that reads last-review.json.
208
+ //
209
+ // Skipped/error paths (codex unavailable, codex error) do NOT call this
210
+ // — there are no findings to serialize.
211
+ let lastReviewWritten;
212
+ try {
213
+ // `LocalReviewVerdict` permits `'error'` for the audit-record schema
214
+ // (transport / subprocess failures) but the codex success path can
215
+ // only produce pass | concerns | blocking — we caught throw above.
216
+ // Narrow here so the report writer's stricter `Verdict` type accepts
217
+ // it without losing the audit shape elsewhere in this file.
218
+ const verdict = outcome.verdict;
219
+ lastReviewWritten = writeLastReview({
220
+ baseDir,
221
+ summary: {
222
+ verdict,
223
+ findings: outcome.findings,
224
+ reviewText: outcome.reviewText,
225
+ },
226
+ baseRef: outcome.baseRef,
227
+ headSha: outcome.headSha,
228
+ eventCount: outcome.eventCount,
229
+ durationSeconds: outcome.durationSeconds,
230
+ });
231
+ }
232
+ catch (e) {
233
+ // last-review.json is a remediation surface, not a gate. A write
234
+ // failure (read-only fs, ENOSPC, race with another run) must not
235
+ // change the verdict-driven exit code. Surface the error to stderr
236
+ // so operators can correlate, then continue.
237
+ const msg = e instanceof Error ? e.message : String(e);
238
+ process.stderr.write(`rea: last-review.json write failed: ${msg}\n`);
239
+ }
171
240
  // Decide exit code based on strictFailOn.
172
241
  let exitCode;
173
242
  if (outcome.verdict === 'blocking') {
@@ -179,8 +248,17 @@ export async function runReview(options) {
179
248
  else {
180
249
  exitCode = 0;
181
250
  }
251
+ // 0.28.1 defect-V: redacted findings come from the writer when it
252
+ // succeeded (so `--with-findings` shows the same bodies that landed on
253
+ // disk). When the write FAILED we re-redact the in-memory findings
254
+ // inline (round-1 P2-1) — without this fallback, secrets that codex
255
+ // copied from the diff into a finding body would escape via stdout/
256
+ // JSON in the exact failure mode where the on-disk surface is gone.
257
+ const findingsForOutput = lastReviewWritten !== undefined
258
+ ? lastReviewWritten.findings
259
+ : redactFindingsInMemory(outcome.findings);
182
260
  if (options.json === true) {
183
- process.stdout.write(JSON.stringify({
261
+ const payload = {
184
262
  status: outcome.verdict,
185
263
  finding_count: outcome.findingCount,
186
264
  head_sha: outcome.headSha,
@@ -190,14 +268,89 @@ export async function runReview(options) {
190
268
  reasoning_effort: outcome.reasoningEffort,
191
269
  duration_seconds: outcome.durationSeconds,
192
270
  exit_code: exitCode,
193
- }) + '\n');
271
+ // 0.28.1 defect-V round-1 P2-2: only advertise `last_review_path`
272
+ // when the writer actually produced a current snapshot. If the
273
+ // write threw, the file on disk is either missing or a stale
274
+ // snapshot from an older run — pointing JSON consumers at it
275
+ // would let agents remediate against the wrong findings while
276
+ // the current run still exits successfully. Emit `null` and an
277
+ // explicit `last_review_error` so consumers can branch
278
+ // deterministically.
279
+ last_review_path: lastReviewWritten !== undefined ? LAST_REVIEW_RELATIVE : null,
280
+ };
281
+ if (lastReviewWritten === undefined) {
282
+ payload.last_review_error = 'write_failed';
283
+ }
284
+ if (options.withFindings === true) {
285
+ // Mirror last-review.json's Finding shape so JSON consumers see one
286
+ // schema. Findings are pre-redacted (writer-redacted on success,
287
+ // re-redacted inline on writer failure — see findingsForOutput).
288
+ payload.findings = findingsForOutput;
289
+ }
290
+ process.stdout.write(JSON.stringify(payload) + '\n');
194
291
  }
195
292
  else {
196
293
  log(`local review: ${outcome.verdict} (${outcome.findingCount} finding(s)) — head=${outcome.headSha.slice(0, 12)} base=${outcome.baseRef}`);
197
294
  log(`audit entry written: tool_name=${LOCAL_REVIEW_TOOL_NAME}`);
295
+ if (options.withFindings === true) {
296
+ printFindingsBySeverity(findingsForOutput, lastReviewWritten !== undefined);
297
+ }
198
298
  }
199
299
  process.exit(exitCode);
200
300
  }
301
+ /**
302
+ * 0.28.1 defect-V — group findings by severity (P1 → P2 → P3) and print
303
+ * to stdout via `log()`. Each finding renders as
304
+ *
305
+ * - [P1] <title> — <file>:<line>
306
+ *
307
+ * mirroring the codex-banner shape produced by the push-gate, so muscle
308
+ * memory transfers between the two surfaces. The full body is intentionally
309
+ * NOT printed here — the body can be very long, and the canonical place to
310
+ * read full bodies is `.rea/last-review.json`. We print enough to identify
311
+ * each finding and drive the agent to the file.
312
+ *
313
+ * Round-2 P2 fix: only point at last-review.json when the writer
314
+ * actually produced a current snapshot. Mirrors the JSON-path guard on
315
+ * `last_review_path`. If the write failed, the on-disk file is missing
316
+ * or stale; pointing a human there would let them remediate against the
317
+ * wrong findings. Falls back to a self-contained banner that names the
318
+ * failure mode.
319
+ */
320
+ function printFindingsBySeverity(findings, lastReviewWritten) {
321
+ if (findings.length === 0)
322
+ return;
323
+ const order = ['P1', 'P2', 'P3'];
324
+ log('');
325
+ if (lastReviewWritten) {
326
+ log(`findings (see ${LAST_REVIEW_RELATIVE} for full bodies):`);
327
+ }
328
+ else {
329
+ log('findings (last-review.json write FAILED — bodies shown inline below; stale file may exist on disk and should be ignored):');
330
+ }
331
+ for (const sev of order) {
332
+ const group = findings.filter((f) => f.severity === sev);
333
+ if (group.length === 0)
334
+ continue;
335
+ for (const f of group) {
336
+ const loc = f.file !== undefined ? ` — ${f.file}${f.line !== undefined ? `:${f.line}` : ''}` : '';
337
+ log(` - [${sev}] ${f.title}${loc}`);
338
+ // Round-3 P2 fix: when the writer failed, the on-disk surface is
339
+ // gone — agents and humans have no other place to read the body.
340
+ // Render the body inline (already redacted upstream) so the
341
+ // banner's "bodies shown inline below" promise is truthful and
342
+ // remediation can still happen. On the success path, bodies stay
343
+ // in last-review.json so the stdout surface stays scannable.
344
+ if (!lastReviewWritten && f.body.length > 0) {
345
+ for (const bodyLine of f.body.split(/\r?\n/)) {
346
+ if (bodyLine.length === 0)
347
+ continue;
348
+ log(` ${bodyLine}`);
349
+ }
350
+ }
351
+ }
352
+ }
353
+ }
201
354
  /**
202
355
  * Execute the codex review subprocess and translate the output to a
203
356
  * verdict. Reuses the push-gate's resolved policy so `codex_model` /
@@ -260,6 +413,13 @@ async function executeCodexReview(baseDir, options) {
260
413
  durationSeconds: codexResult.durationSeconds,
261
414
  model: resolved.codex_model ?? IRON_GATE_DEFAULT_MODEL,
262
415
  reasoningEffort: resolved.codex_reasoning_effort ?? IRON_GATE_DEFAULT_REASONING,
416
+ // 0.28.1 defect-V: thread the structured findings + reviewText + event
417
+ // count through to the caller so `runReview` can persist last-review.json
418
+ // and (optionally) print bodies. Pre-fix these were dropped on the floor
419
+ // after `summary.findings.length` was computed.
420
+ findings: summary.findings,
421
+ reviewText: codexResult.reviewText,
422
+ eventCount: codexResult.eventCount,
263
423
  };
264
424
  }
265
425
  function classifyCodexError(e) {
@@ -313,11 +473,13 @@ export function registerReviewCommand(program) {
313
473
  return raw;
314
474
  })
315
475
  .option('--json', 'emit a single-line JSON result instead of human-readable output')
476
+ .option('--with-findings', 'after the summary, print findings grouped by severity (P1/P2/P3); when combined with --json, the JSON payload gains a `findings` array')
316
477
  .action(async (opts) => {
317
478
  await runReview({
318
479
  ...(opts.base !== undefined ? { base: opts.base } : {}),
319
480
  ...(opts.strictFailOn !== undefined ? { strictFailOn: opts.strictFailOn } : {}),
320
481
  ...(opts.json === true ? { json: true } : {}),
482
+ ...(opts.withFindings === true ? { withFindings: true } : {}),
321
483
  });
322
484
  });
323
485
  }
@@ -75,6 +75,12 @@ export interface LiveDownstreamSnapshot {
75
75
  circuit_state: 'closed' | 'open' | 'half-open';
76
76
  retry_at: string | null;
77
77
  last_error: string | null;
78
+ /**
79
+ * 0.28.0 helix-025 F1 — `'never' | 'ok' | 'errored'` tri-state.
80
+ * `null` for snapshots written by pre-0.28.0 gateways that did not
81
+ * include the field (back-compat).
82
+ */
83
+ connection_state: 'never' | 'ok' | 'errored' | null;
78
84
  tools_count: number | null;
79
85
  open_transitions: number;
80
86
  session_blocker_emitted: boolean;
@@ -129,6 +129,12 @@ function parseDownstreamEntry(raw) {
129
129
  const circuit = r.circuit_state === 'open' || r.circuit_state === 'half-open' || r.circuit_state === 'closed'
130
130
  ? r.circuit_state
131
131
  : 'closed';
132
+ // 0.28.0 helix-025 F1: tri-state. `null` when the snapshot was written
133
+ // by a pre-0.28.0 gateway (back-compat) — the pretty-printer renders
134
+ // that as "—" rather than fabricating a value.
135
+ const connectionState = r.connection_state === 'never' || r.connection_state === 'ok' || r.connection_state === 'errored'
136
+ ? r.connection_state
137
+ : null;
132
138
  return {
133
139
  name: r.name,
134
140
  connected: typeof r.connected === 'boolean' ? r.connected : false,
@@ -136,6 +142,7 @@ function parseDownstreamEntry(raw) {
136
142
  circuit_state: circuit,
137
143
  retry_at: typeof r.retry_at === 'string' ? r.retry_at : null,
138
144
  last_error: typeof r.last_error === 'string' ? r.last_error : null,
145
+ connection_state: connectionState,
139
146
  tools_count: typeof r.tools_count === 'number' && Number.isInteger(r.tools_count) ? r.tools_count : null,
140
147
  open_transitions: typeof r.open_transitions === 'number' && Number.isInteger(r.open_transitions)
141
148
  ? r.open_transitions
@@ -0,0 +1,149 @@
1
+ /**
2
+ * `rea verify-claim <claim-id>` — replay a recorded security-claim PoC
3
+ * battery against the currently-installed (or in-tree dogfood) rea CLI.
4
+ *
5
+ * The centerpiece of 0.28.0 (4th structural pivot — claims as
6
+ * machine-verifiable artifacts rather than prose-only release notes).
7
+ *
8
+ * Each claim lives at `data/claims/<id>.json` and lists 1..N PoCs.
9
+ * Every PoC has a `type` that names the executor:
10
+ *
11
+ * - `scan-bash` (primary): pipes `input` into
12
+ * `dist/cli/index.js hook scan-bash --mode <protected|blocked>` and
13
+ * compares the resulting verdict to `expected_verdict`.
14
+ * - `shellcheck` (helix-031 case): runs shellcheck on `target` and
15
+ * asserts the run is clean (no SC<code> warnings).
16
+ *
17
+ * Resolution order for the rea CLI under test:
18
+ *
19
+ * - `--installed` → resolves to `<cwd>/node_modules/@bookedsolid/rea/dist/cli/index.js`.
20
+ * This is the canonical "verify against MY pinned rea" mode for
21
+ * consumers — tells them whether the version they actually have
22
+ * installed still rejects the PoCs the claim targets.
23
+ * - default → uses the same `dist/cli/index.js` that ships with the
24
+ * CLI itself (i.e. the rea repo's own dogfood). Resolved relative
25
+ * to the running script.
26
+ *
27
+ * Exit codes:
28
+ *
29
+ * - 0 — every PoC matched the recorded `expected_verdict`.
30
+ * - 1 — at least one PoC mismatched (regression — investigate).
31
+ * - 2 — claim id is unknown / no JSON file at `data/claims/<id>.json`.
32
+ */
33
+ import { type SpawnSyncReturns } from 'node:child_process';
34
+ import type { Command } from 'commander';
35
+ export interface ScanBashPoC {
36
+ id: string;
37
+ type: 'scan-bash';
38
+ input: string;
39
+ mode: 'protected' | 'blocked';
40
+ expected_verdict: 'allow' | 'block';
41
+ }
42
+ export interface ShellcheckPoC {
43
+ id: string;
44
+ type: 'shellcheck';
45
+ target: string;
46
+ expected_verdict: 'clean';
47
+ }
48
+ export type ClaimPoC = ScanBashPoC | ShellcheckPoC;
49
+ export interface Claim {
50
+ id: string;
51
+ title: string;
52
+ introduced_in: string;
53
+ closed_in: string;
54
+ summary?: string;
55
+ pocs: ClaimPoC[];
56
+ }
57
+ export interface VerifyClaimOptions {
58
+ /** Resolve the CLI to `<cwd>/node_modules/@bookedsolid/rea/dist/cli/index.js`. */
59
+ installed?: boolean;
60
+ /** Emit a single JSON document on stdout. */
61
+ json?: boolean;
62
+ /**
63
+ * Override the claim-file root. Production resolves this internally
64
+ * (ships at `data/claims/` next to the package). Tests pass an
65
+ * absolute path so they can stage fixtures.
66
+ */
67
+ claimsDir?: string;
68
+ /**
69
+ * Override the rea CLI under test. Wins over `installed`. Used by
70
+ * tests to point at a stub binary. Production callers leave this
71
+ * unset.
72
+ */
73
+ cliOverride?: string;
74
+ /**
75
+ * Override the working directory the `--installed` resolver uses.
76
+ * Defaults to `process.cwd()`; tests pass a tmp dir.
77
+ */
78
+ cwd?: string;
79
+ }
80
+ export interface PoCResult {
81
+ poc_id: string;
82
+ type: ClaimPoC['type'];
83
+ expected: string;
84
+ actual: string;
85
+ match: boolean;
86
+ /** Empty on match; populated on mismatch with a one-line diagnostic. */
87
+ detail: string;
88
+ }
89
+ export interface VerifyClaimResult {
90
+ claim_id: string;
91
+ cli: string;
92
+ total: number;
93
+ matched: number;
94
+ mismatched: number;
95
+ results: PoCResult[];
96
+ exit_code: 0 | 1 | 2;
97
+ }
98
+ /**
99
+ * Resolve the directory holding the bundled claim JSON files. Walks up
100
+ * from the running script (or from this file at dev time) looking for
101
+ * a `data/claims/` sibling. Returns null when the directory cannot be
102
+ * located — the caller falls back to whatever `claimsDir` override was
103
+ * passed.
104
+ */
105
+ export declare function resolveDefaultClaimsDir(): string | null;
106
+ /**
107
+ * Load and validate a claim file. Throws on malformed JSON or shape
108
+ * mismatch — `runVerifyClaim` translates the throw into exit-code 2 +
109
+ * a stderr message.
110
+ */
111
+ export declare function loadClaim(claimsDir: string, claimId: string): Claim;
112
+ /**
113
+ * Resolve the rea CLI to invoke for `scan-bash` PoCs.
114
+ *
115
+ * Precedence: cliOverride > --installed > sibling dogfood dist/cli/index.js.
116
+ *
117
+ * Returns a pair `[command, args]` so the caller can do
118
+ * `spawnSync(cmd, [...args, 'hook', 'scan-bash', ...])`. The shape
119
+ * keeps node-vs-direct-binary differences localized to this resolver.
120
+ */
121
+ export declare function resolveCli(opts: VerifyClaimOptions): {
122
+ cmd: string;
123
+ args: string[];
124
+ path: string;
125
+ };
126
+ interface SpawnImpl {
127
+ (cmd: string, args: string[], options: {
128
+ input?: string;
129
+ encoding: 'utf8';
130
+ timeout: number;
131
+ }): SpawnSyncReturns<string>;
132
+ }
133
+ /**
134
+ * Run a single PoC against the resolved CLI. Pure function — no global
135
+ * state, all dependencies threaded through `cliCmd` / `cliArgs` / `spawn`.
136
+ * Tests substitute `spawn` with a fake.
137
+ */
138
+ export declare function runPoC(poc: ClaimPoC, cliCmd: string, cliArgs: string[], spawn?: SpawnImpl, cwd?: string): PoCResult;
139
+ /**
140
+ * Run all PoCs in a claim. Pure — exposed so tests can drive without
141
+ * spawning processes if they substitute `spawn`.
142
+ */
143
+ export declare function runVerifyClaimSync(claim: Claim, cliCmd: string, cliArgs: string[], cliPath: string, spawn?: SpawnImpl, cwd?: string): VerifyClaimResult;
144
+ export declare function runVerifyClaim(claimId: string, opts: VerifyClaimOptions): Promise<void>;
145
+ /**
146
+ * Attach `rea verify-claim <claim-id>` to the commander program.
147
+ */
148
+ export declare function registerVerifyClaimCommand(program: Command): void;
149
+ export {};