@bookedsolid/rea 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.husky/commit-msg CHANGED
@@ -86,9 +86,9 @@ MATCHES=""
86
86
  # Pattern 2 below catches Co-Authored-By with named tools regardless of
87
87
  # email, so dropping users.noreply.github.com from this branch only
88
88
  # relaxes the check for human collaborators — never for AI.
89
- if grep -qiE 'Co-Authored-By:.*noreply@(anthropic\.com|openai\.com|github-copilot|github\.com|claude\.ai|chatgpt\.com|googlemail\.com|google\.com|cursor\.com|codeium\.com|tabnine\.com|amazon\.com|amazonaws\.com|amazon-q\.amazonaws\.com|cody\.dev|sourcegraph\.com)' "$COMMIT_MSG_FILE" 2>/dev/null; then
89
+ if grep -qiE 'Co-Authored-By:.*noreply@(anthropic\.com|openai\.com|github-copilot|github\.com|claude\.ai|chatgpt\.com|googlemail\.com|google\.com|cursor\.com|codeium\.com|tabnine\.com|amazon\.com|amazonaws\.com|amazon-q\.amazonaws\.com|cody\.dev|sourcegraph\.com|mistral\.ai|xai-org|x\.ai|inflection\.ai|perplexity\.ai|replit\.com|jetbrains\.com|bito\.ai|pieces\.app|phind\.com|you\.com)' "$COMMIT_MSG_FILE" 2>/dev/null; then
90
90
  BLOCKED=1
91
- MATCHES="${MATCHES}$(grep -niE 'Co-Authored-By:.*noreply@(anthropic\.com|openai\.com|github-copilot|github\.com|claude\.ai|chatgpt\.com|googlemail\.com|google\.com|cursor\.com|codeium\.com|tabnine\.com|amazon\.com|amazonaws\.com|amazon-q\.amazonaws\.com|cody\.dev|sourcegraph\.com)' "$COMMIT_MSG_FILE" 2>/dev/null)
91
+ MATCHES="${MATCHES}$(grep -niE 'Co-Authored-By:.*noreply@(anthropic\.com|openai\.com|github-copilot|github\.com|claude\.ai|chatgpt\.com|googlemail\.com|google\.com|cursor\.com|codeium\.com|tabnine\.com|amazon\.com|amazonaws\.com|amazon-q\.amazonaws\.com|cody\.dev|sourcegraph\.com|mistral\.ai|xai-org|x\.ai|inflection\.ai|perplexity\.ai|replit\.com|jetbrains\.com|bito\.ai|pieces\.app|phind\.com|you\.com)' "$COMMIT_MSG_FILE" 2>/dev/null)
92
92
  "
93
93
  fi
94
94
 
package/dist/cli/init.js CHANGED
@@ -297,6 +297,23 @@ function writePolicyYaml(targetDir, config, layered) {
297
297
  lines.push(` max_bash_output_lines: ${cp.max_bash_output_lines}`);
298
298
  }
299
299
  }
300
+ // 0.18.1+ helixir #9: emit audit.rotation when the layered profile
301
+ // declared it. Empty `rotation: {}` opts in to documented defaults
302
+ // (50 MiB / 30 days); explicit values override.
303
+ if (layered.audit !== undefined) {
304
+ lines.push(`audit:`);
305
+ if (layered.audit.rotation !== undefined) {
306
+ const rot = layered.audit.rotation;
307
+ const hasFields = rot.max_bytes !== undefined || rot.max_age_days !== undefined;
308
+ lines.push(hasFields ? ` rotation:` : ` rotation: {}`);
309
+ if (rot.max_bytes !== undefined) {
310
+ lines.push(` max_bytes: ${rot.max_bytes}`);
311
+ }
312
+ if (rot.max_age_days !== undefined) {
313
+ lines.push(` max_age_days: ${rot.max_age_days}`);
314
+ }
315
+ }
316
+ }
300
317
  // G11.4: always emit the review block explicitly. Making the value
301
318
  // visible in the generated file helps the operator notice what was
302
319
  // chosen at init time and simplifies switching modes later (edit a
@@ -19,6 +19,23 @@
19
19
  * and so the one git dependency surface is in one place.
20
20
  */
21
21
  import type { ChildProcessWithoutNullStreams } from 'node:child_process';
22
+ /**
23
+ * Default codex model when policy doesn't pin one. Always passed via
24
+ * `-c model="<name>"` so codex's own default (`codex-auto-review` at
25
+ * medium reasoning) is unreachable through the rea push-gate.
26
+ *
27
+ * 0.19.0 code-reviewer P3-4: exported as a single source of truth.
28
+ * `src/hooks/push-gate/index.ts` imports this for the verdict-cache
29
+ * write so the cached `model` field reflects the same constant the
30
+ * runner actually used. Bump here to bump everywhere.
31
+ */
32
+ export declare const IRON_GATE_DEFAULT_MODEL = "gpt-5.4";
33
+ /**
34
+ * Default reasoning effort when policy doesn't pin one. `high` for
35
+ * verdict stability — the helixir 2026-04-26 thrashing came from the
36
+ * lower-reasoning default.
37
+ */
38
+ export declare const IRON_GATE_DEFAULT_REASONING: 'low' | 'medium' | 'high';
22
39
  export declare class CodexNotInstalledError extends Error {
23
40
  readonly kind: "not-installed";
24
41
  constructor();
@@ -20,6 +20,26 @@
20
20
  */
21
21
  import { spawn, spawnSync } from 'node:child_process';
22
22
  // ---------------------------------------------------------------------------
23
+ // Iron-gate runtime defaults (0.18.0+)
24
+ // ---------------------------------------------------------------------------
25
+ /**
26
+ * Default codex model when policy doesn't pin one. Always passed via
27
+ * `-c model="<name>"` so codex's own default (`codex-auto-review` at
28
+ * medium reasoning) is unreachable through the rea push-gate.
29
+ *
30
+ * 0.19.0 code-reviewer P3-4: exported as a single source of truth.
31
+ * `src/hooks/push-gate/index.ts` imports this for the verdict-cache
32
+ * write so the cached `model` field reflects the same constant the
33
+ * runner actually used. Bump here to bump everywhere.
34
+ */
35
+ export const IRON_GATE_DEFAULT_MODEL = 'gpt-5.4';
36
+ /**
37
+ * Default reasoning effort when policy doesn't pin one. `high` for
38
+ * verdict stability — the helixir 2026-04-26 thrashing came from the
39
+ * lower-reasoning default.
40
+ */
41
+ export const IRON_GATE_DEFAULT_REASONING = 'high';
42
+ // ---------------------------------------------------------------------------
23
43
  // Errors
24
44
  // ---------------------------------------------------------------------------
25
45
  export class CodexNotInstalledError extends Error {
@@ -151,8 +171,10 @@ export async function runCodexReview(options) {
151
171
  // Codex's TOML parser interprets the value, so we wrap strings in TOML
152
172
  // quotes — `-c model="gpt-5.4"` not `-c model=gpt-5.4` — to ensure the
153
173
  // value lands as a string regardless of upstream parsing changes.
154
- const effectiveModel = options.model !== undefined && options.model.length > 0 ? options.model : 'gpt-5.4';
155
- const effectiveReasoning = options.reasoningEffort ?? 'high';
174
+ const effectiveModel = options.model !== undefined && options.model.length > 0
175
+ ? options.model
176
+ : IRON_GATE_DEFAULT_MODEL;
177
+ const effectiveReasoning = options.reasoningEffort ?? IRON_GATE_DEFAULT_REASONING;
156
178
  const overrideArgs = [
157
179
  '-c',
158
180
  `model="${escapeTomlString(effectiveModel)}"`,
@@ -23,13 +23,15 @@
23
23
  */
24
24
  import path from 'node:path';
25
25
  import { appendAuditRecord } from '../../audit/append.js';
26
+ import { loadPolicyAsync } from '../../policy/loader.js';
26
27
  import { Tier, InvocationStatus } from '../../policy/types.js';
27
28
  import { resolvePushGatePolicy, PUSH_GATE_DEFAULT_LAST_N_COMMITS_FALLBACK, } from './policy.js';
28
29
  import { readHalt } from './halt.js';
29
30
  import { resolveBaseRef } from './base.js';
30
- import { createRealGitExecutor, runCodexReview, CodexNotInstalledError, CodexProtocolError, CodexSubprocessError, CodexTimeoutError, } from './codex-runner.js';
31
+ import { createRealGitExecutor, runCodexReview, CodexNotInstalledError, CodexProtocolError, CodexSubprocessError, CodexTimeoutError, IRON_GATE_DEFAULT_MODEL, IRON_GATE_DEFAULT_REASONING, } from './codex-runner.js';
31
32
  import { summarizeReview } from './findings.js';
32
33
  import { renderBanner, writeLastReview } from './report.js';
34
+ import { isFlip, lookupVerdict, writeVerdict, } from './verdict-cache.js';
33
35
  /**
34
36
  * Parse the raw pre-push stdin text into refspecs. Each line is four
35
37
  * whitespace-separated fields. Blank lines and malformed lines are
@@ -72,6 +74,8 @@ const EVT_DISABLED = 'rea.push_gate.disabled';
72
74
  const EVT_SKIPPED = 'rea.push_gate.skipped';
73
75
  const EVT_EMPTY = 'rea.push_gate.empty_diff';
74
76
  const EVT_ERROR = 'rea.push_gate.error';
77
+ const EVT_CACHE_HIT = 'rea.push_gate.cache_hit';
78
+ const EVT_VERDICT_FLIP = 'rea.push_gate.verdict_flip';
75
79
  // ---------------------------------------------------------------------------
76
80
  // Composer
77
81
  // ---------------------------------------------------------------------------
@@ -84,13 +88,27 @@ export async function runPushGate(deps) {
84
88
  const runCodexFn = deps.runCodex ?? runCodexReview;
85
89
  const appendAuditFn = deps.appendAudit ?? appendAuditRecord;
86
90
  const git = deps.git ?? createRealGitExecutor(deps.baseDir);
91
+ // 0.19.0 backend-engineer review P1-1: load the full Policy once and
92
+ // thread it to every safeAppend so audit rotation actually fires.
93
+ // Pre-fix the rotator short-circuited because policy was never passed
94
+ // through, silently disabling the `audit.rotation: {}` opt-in shipped
95
+ // in 0.18.1 for the bst-internal profile. A failure to load policy
96
+ // here is non-fatal — the gate continues; audit rotation just stays
97
+ // disabled for this run (back-compat).
98
+ let fullPolicy;
99
+ try {
100
+ fullPolicy = await loadPolicyAsync(deps.baseDir);
101
+ }
102
+ catch {
103
+ fullPolicy = undefined;
104
+ }
87
105
  // 1. HALT wins over everything, including `review.codex_required: false`.
88
106
  // Reading it before policy also means a corrupted policy.yaml doesn't
89
107
  // prevent the kill-switch from firing.
90
108
  const halt = readHaltFn(deps.baseDir);
91
109
  if (halt.halted) {
92
110
  stderr(`REA HALT: ${halt.reason ?? 'unknown'}\nAll push operations suspended. Run: rea unfreeze\n`);
93
- await safeAppend(appendAuditFn, deps.baseDir, EVT_HALTED, {
111
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_HALTED, fullPolicy, {
94
112
  reason: halt.reason ?? 'unknown',
95
113
  });
96
114
  return {
@@ -108,14 +126,14 @@ export async function runPushGate(deps) {
108
126
  catch (e) {
109
127
  const msg = e instanceof Error ? e.message : String(e);
110
128
  stderr(`PUSH BLOCKED: failed to load .rea/policy.yaml — ${msg}\n`);
111
- await safeAppend(appendAuditFn, deps.baseDir, EVT_ERROR, {
129
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_ERROR, fullPolicy, {
112
130
  kind: 'policy-load',
113
131
  error: msg,
114
132
  });
115
133
  return { status: 'error', exitCode: 2, summary: `policy-load error: ${msg}` };
116
134
  }
117
135
  if (!policy.codex_required) {
118
- await safeAppend(appendAuditFn, deps.baseDir, EVT_DISABLED, {
136
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_DISABLED, fullPolicy, {
119
137
  policy_missing: policy.policyMissing,
120
138
  });
121
139
  return {
@@ -153,7 +171,7 @@ export async function runPushGate(deps) {
153
171
  const skipVar = skipPush.length > 0 ? 'REA_SKIP_PUSH_GATE' : 'REA_SKIP_CODEX_REVIEW';
154
172
  const skipReason = skipVar === 'REA_SKIP_PUSH_GATE' ? skipPush : skipCodex;
155
173
  stderr(`rea: ${skipVar}=${skipReason} — push-gate skipped (audited).\n`);
156
- await safeAppend(appendAuditFn, deps.baseDir, EVT_SKIPPED, {
174
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_SKIPPED, fullPolicy, {
157
175
  reason: skipReason,
158
176
  skip_var: skipVar,
159
177
  });
@@ -248,7 +266,7 @@ export async function runPushGate(deps) {
248
266
  }
249
267
  if (headSha.length === 0) {
250
268
  stderr('PUSH BLOCKED: could not resolve HEAD SHA. Is this a valid git repo?\n');
251
- await safeAppend(appendAuditFn, deps.baseDir, EVT_ERROR, { kind: 'head-sha-missing' });
269
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_ERROR, fullPolicy, { kind: 'head-sha-missing' });
252
270
  return { status: 'error', exitCode: 2, summary: 'head-sha-missing' };
253
271
  }
254
272
  // 4b. Auto-narrow probe (J / 0.13.0). When the resolved base is far
@@ -318,7 +336,7 @@ export async function runPushGate(deps) {
318
336
  // no-op relative to base.
319
337
  const diff = git.diffNames(base.ref, headSha);
320
338
  if (diff.length === 0) {
321
- await safeAppend(appendAuditFn, deps.baseDir, EVT_EMPTY, {
339
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_EMPTY, fullPolicy, {
322
340
  base_ref: base.ref,
323
341
  base_source: base.source,
324
342
  head_sha: headSha,
@@ -335,7 +353,46 @@ export async function runPushGate(deps) {
335
353
  headSha,
336
354
  };
337
355
  }
338
- // 6. Run Codex. Typed errors translate to exit 2 with distinct stderr.
356
+ // 6a. Verdict cache lookup (0.18.1 helixir #1, #4, #7, #8). Same-SHA
357
+ // pushes within the configured TTL skip the codex invocation and
358
+ // reuse the cached verdict — durable PASS. Cache is bypassed when
359
+ // policy.review.cache_ttl_ms is 0. Cache miss / expired falls
360
+ // through to the codex call below.
361
+ const cacheLookup = policy.cache_ttl_ms > 0 ? lookupVerdict(deps.baseDir, headSha) : { hit: false };
362
+ if (cacheLookup.hit && cacheLookup.entry !== undefined) {
363
+ const cached = cacheLookup.entry;
364
+ const cachedBlocked = cached.verdict === 'blocking'
365
+ || (cached.verdict === 'concerns' && policy.concerns_blocks && !isConcernsOverrideSet(env));
366
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_CACHE_HIT, fullPolicy, {
367
+ verdict: cached.verdict,
368
+ finding_count: cached.finding_count,
369
+ base_ref: base.ref,
370
+ base_source: base.source,
371
+ head_sha: headSha,
372
+ cached_reviewed_at: cached.reviewed_at,
373
+ cached_model: cached.model,
374
+ cached_reasoning_effort: cached.reasoning_effort,
375
+ blocked: cachedBlocked,
376
+ });
377
+ return {
378
+ status: cachedBlocked
379
+ ? cached.verdict === 'blocking'
380
+ ? 'blocking'
381
+ : 'concerns'
382
+ : cached.verdict === 'blocking'
383
+ ? 'blocking'
384
+ : cached.verdict === 'concerns'
385
+ ? 'concerns'
386
+ : 'pass',
387
+ exitCode: cachedBlocked ? 2 : 0,
388
+ summary: `${cached.verdict}: ${cached.finding_count} finding(s) (cached)`,
389
+ verdict: cached.verdict,
390
+ findingCount: cached.finding_count,
391
+ baseRef: base.ref,
392
+ headSha,
393
+ };
394
+ }
395
+ // 6b. Run Codex. Typed errors translate to exit 2 with distinct stderr.
339
396
  try {
340
397
  const codexResult = await runCodexFn({
341
398
  baseRef: base.ref,
@@ -372,7 +429,43 @@ export async function runPushGate(deps) {
372
429
  blocked,
373
430
  lastReviewPath,
374
431
  }));
375
- await safeAppend(appendAuditFn, deps.baseDir, EVT_REVIEWED, {
432
+ // 0.18.1 verdict cache write + flip detection. The lookup at step
433
+ // 6a already returned miss/expired; if `cacheLookup.entry` is set,
434
+ // a stale entry existed — compare its verdict to the fresh one and
435
+ // emit a flip event when they differ. Operators can grep
436
+ // `rea.push_gate.verdict_flip` in the audit log to detect codex
437
+ // non-determinism (helixir #8).
438
+ if (policy.cache_ttl_ms > 0) {
439
+ const flipped = isFlip(cacheLookup.entry, summary.verdict);
440
+ if (flipped && cacheLookup.entry !== undefined) {
441
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_VERDICT_FLIP, fullPolicy, {
442
+ head_sha: headSha,
443
+ prior_verdict: cacheLookup.entry.verdict,
444
+ fresh_verdict: summary.verdict,
445
+ prior_reviewed_at: cacheLookup.entry.reviewed_at,
446
+ base_ref: base.ref,
447
+ });
448
+ }
449
+ const entry = {
450
+ verdict: summary.verdict,
451
+ finding_count: summary.findings.length,
452
+ reviewed_at: deps.now !== undefined ? deps.now().toISOString() : new Date().toISOString(),
453
+ model: policy.codex_model ?? IRON_GATE_DEFAULT_MODEL,
454
+ reasoning_effort: policy.codex_reasoning_effort ?? IRON_GATE_DEFAULT_REASONING,
455
+ ttl_ms: policy.cache_ttl_ms,
456
+ };
457
+ try {
458
+ await writeVerdict(deps.baseDir, headSha, entry);
459
+ }
460
+ catch {
461
+ // Cache writes are best-effort. A failure here must NOT
462
+ // affect the verdict — log to stderr (already done by the
463
+ // caller via banner) and proceed. Foreign-schema (v3+ cache
464
+ // from a future rea version) lands here and is correctly
465
+ // declined — overwriting would lose forward-compat data.
466
+ }
467
+ }
468
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_REVIEWED, fullPolicy, {
376
469
  verdict: summary.verdict,
377
470
  finding_count: summary.findings.length,
378
471
  base_ref: base.ref,
@@ -386,6 +479,9 @@ export async function runPushGate(deps) {
386
479
  last_n_commits_requested: base.lastNCommitsRequested,
387
480
  auto_narrowed: autoNarrowed ? true : undefined,
388
481
  original_commit_count: originalCommitCount !== null ? originalCommitCount : undefined,
482
+ flipped: cacheLookup.entry !== undefined && isFlip(cacheLookup.entry, summary.verdict)
483
+ ? true
484
+ : undefined,
389
485
  });
390
486
  if (blocked) {
391
487
  return {
@@ -413,7 +509,7 @@ export async function runPushGate(deps) {
413
509
  };
414
510
  }
415
511
  catch (e) {
416
- return handleCodexError(e, deps, base, headSha, appendAuditFn);
512
+ return handleCodexError(e, deps, base, headSha, appendAuditFn, fullPolicy);
417
513
  }
418
514
  }
419
515
  function isConcernsOverrideSet(env) {
@@ -423,7 +519,7 @@ function isConcernsOverrideSet(env) {
423
519
  const normalized = raw.trim().toLowerCase();
424
520
  return normalized === '1' || normalized === 'true' || normalized === 'yes';
425
521
  }
426
- async function handleCodexError(e, deps, base, headSha, appendAuditFn) {
522
+ async function handleCodexError(e, deps, base, headSha, appendAuditFn, policy) {
427
523
  const stderr = deps.stderr;
428
524
  const runError = classifyCodexError(e);
429
525
  const metadata = {
@@ -435,7 +531,7 @@ async function handleCodexError(e, deps, base, headSha, appendAuditFn) {
435
531
  if (runError.message.length > 0)
436
532
  metadata.error = runError.message;
437
533
  stderr(`PUSH BLOCKED: ${runError.message}\n`);
438
- await safeAppend(appendAuditFn, deps.baseDir, EVT_ERROR, metadata);
534
+ await safeAppend(appendAuditFn, deps.baseDir, EVT_ERROR, policy, metadata);
439
535
  return {
440
536
  status: 'error',
441
537
  exitCode: 2,
@@ -463,7 +559,7 @@ function classifyCodexError(e) {
463
559
  * its primary result. The hash chain remains intact if this succeeds; on
464
560
  * failure we've already made the gate decision based on the actual review.
465
561
  */
466
- async function safeAppend(appendFn, baseDir, toolName, metadata) {
562
+ async function safeAppend(appendFn, baseDir, toolName, policy, metadata) {
467
563
  try {
468
564
  // Prune undefined values — the audit record schema's `metadata` is an
469
565
  // arbitrary map, but `undefined` values cause JSON.stringify to emit
@@ -473,12 +569,19 @@ async function safeAppend(appendFn, baseDir, toolName, metadata) {
473
569
  if (v !== undefined)
474
570
  cleanMeta[k] = v;
475
571
  }
572
+ // 0.19.0 P1-1 fix (backend-engineer review): pass the loaded Policy
573
+ // through so `appendAuditRecord` → `maybeRotate` actually fires.
574
+ // Pre-fix the policy was never threaded; rotation short-circuited
575
+ // to `{ rotated: false }` on the entire push-gate audit-emission
576
+ // path, silently disabling the `audit.rotation: {}` opt-in shipped
577
+ // in 0.18.1 for the bst-internal profile.
476
578
  await appendFn(baseDir, {
477
579
  tool_name: toolName,
478
580
  server_name: AUDIT_SERVER_NAME,
479
581
  tier: Tier.Read,
480
582
  status: InvocationStatus.Allowed,
481
583
  ...(Object.keys(cleanMeta).length > 0 ? { metadata: cleanMeta } : {}),
584
+ ...(policy !== undefined ? { policy } : {}),
482
585
  });
483
586
  }
484
587
  catch (e) {
@@ -56,6 +56,12 @@ export interface ResolvedReviewPolicy {
56
56
  * codex's own default (currently `medium`).
57
57
  */
58
58
  codex_reasoning_effort: 'low' | 'medium' | 'high' | undefined;
59
+ /**
60
+ * Verdict cache TTL in milliseconds (0.18.1+). `0` disables caching;
61
+ * positive values enable the same-SHA short-circuit. Default 86_400_000
62
+ * (24 hours) when policy.review.cache_ttl_ms is unset.
63
+ */
64
+ cache_ttl_ms: number;
59
65
  /** `true` when `.rea/policy.yaml` was absent; defaults apply. */
60
66
  policyMissing: boolean;
61
67
  }
@@ -97,6 +103,17 @@ export declare const PUSH_GATE_DEFAULT_CODEX_MODEL = "gpt-5.4";
97
103
  * `.rea/policy.yaml` for cost-bounded environments.
98
104
  */
99
105
  export declare const PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT: 'low' | 'medium' | 'high';
106
+ /**
107
+ * Default verdict-cache TTL in milliseconds (0.18.1+). 24 hours: long
108
+ * enough to amortize multi-push iteration of the same SHA (push, push
109
+ * --force-with-lease after a quick fixup, push again post-rebase),
110
+ * short enough that a stale cache from yesterday doesn't suppress
111
+ * review of code whose context (env, dependencies, .rea/policy.yaml)
112
+ * has changed. Operators can shorten to a few minutes for tighter
113
+ * loops or extend via `policy.review.cache_ttl_ms`. `0` disables
114
+ * caching — every push re-invokes codex (pre-0.18.1 behavior).
115
+ */
116
+ export declare const PUSH_GATE_DEFAULT_CACHE_TTL_MS: number;
100
117
  /**
101
118
  * Resolve the push-gate policy for `baseDir`. Never throws — a malformed
102
119
  * policy file surfaces as a typed error via the underlying zod validator,
@@ -66,6 +66,17 @@ export const PUSH_GATE_DEFAULT_CODEX_MODEL = 'gpt-5.4';
66
66
  * `.rea/policy.yaml` for cost-bounded environments.
67
67
  */
68
68
  export const PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT = 'high';
69
+ /**
70
+ * Default verdict-cache TTL in milliseconds (0.18.1+). 24 hours: long
71
+ * enough to amortize multi-push iteration of the same SHA (push, push
72
+ * --force-with-lease after a quick fixup, push again post-rebase),
73
+ * short enough that a stale cache from yesterday doesn't suppress
74
+ * review of code whose context (env, dependencies, .rea/policy.yaml)
75
+ * has changed. Operators can shorten to a few minutes for tighter
76
+ * loops or extend via `policy.review.cache_ttl_ms`. `0` disables
77
+ * caching — every push re-invokes codex (pre-0.18.1 behavior).
78
+ */
79
+ export const PUSH_GATE_DEFAULT_CACHE_TTL_MS = 24 * 60 * 60 * 1_000;
69
80
  /**
70
81
  * Resolve the push-gate policy for `baseDir`. Never throws — a malformed
71
82
  * policy file surfaces as a typed error via the underlying zod validator,
@@ -87,6 +98,7 @@ export async function resolvePushGatePolicy(baseDir) {
87
98
  auto_narrow_threshold: PUSH_GATE_DEFAULT_AUTO_NARROW_THRESHOLD,
88
99
  codex_model: PUSH_GATE_DEFAULT_CODEX_MODEL,
89
100
  codex_reasoning_effort: PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT,
101
+ cache_ttl_ms: PUSH_GATE_DEFAULT_CACHE_TTL_MS,
90
102
  policyMissing: true,
91
103
  };
92
104
  }
@@ -100,6 +112,7 @@ export async function resolvePushGatePolicy(baseDir) {
100
112
  auto_narrow_threshold: review.auto_narrow_threshold ?? PUSH_GATE_DEFAULT_AUTO_NARROW_THRESHOLD,
101
113
  codex_model: review.codex_model ?? PUSH_GATE_DEFAULT_CODEX_MODEL,
102
114
  codex_reasoning_effort: review.codex_reasoning_effort ?? PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT,
115
+ cache_ttl_ms: review.cache_ttl_ms ?? PUSH_GATE_DEFAULT_CACHE_TTL_MS,
103
116
  policyMissing: false,
104
117
  };
105
118
  }
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Durable verdict cache for the push-gate (helixir #1, #4, #7, #8 / 0.18.1).
3
+ *
4
+ * Pre-0.18.1 the push-gate was strictly stateless: every push of the same
5
+ * `head_sha` invoked `codex exec review` afresh. helixir round 82 reproduced
6
+ * the failure mode — push #1 of `9fbdfb63` returned PASS, push #2 of the
7
+ * IDENTICAL commit returned CONCERNS — 1 P2. The verdict instability is
8
+ * a property of codex's stochastic decoding at `reasoning_effort: high`;
9
+ * rea cannot eliminate it, but rea CAN make a clean PASS DURABLE so the
10
+ * second push of the same SHA doesn't roll the dice again.
11
+ *
12
+ * Design:
13
+ *
14
+ * .rea/last-review.cache.json
15
+ * {
16
+ * schema_version: 2,
17
+ * entries: {
18
+ * "<head_sha>": {
19
+ * verdict: "pass" | "concerns" | "blocking",
20
+ * finding_count: number,
21
+ * reviewed_at: ISO8601,
22
+ * model: string,
23
+ * reasoning_effort: "low" | "medium" | "high",
24
+ * ttl_ms: number, // policy.review.cache_ttl_ms at write time
25
+ * },
26
+ * ...
27
+ * }
28
+ * }
29
+ *
30
+ * - Hit (within TTL): emit `rea.push_gate.cache_hit` audit event, exit
31
+ * with the cached verdict + finding count; codex is NOT invoked.
32
+ * - Miss or expired: invoke codex; on success, write the new entry.
33
+ * - Flip detection: if a new codex result on the same SHA produces a
34
+ * verdict different from the cached one, set `last-review.json.flip_flag = true`,
35
+ * emit `rea.push_gate.verdict_flip`, and overwrite the cache with
36
+ * the fresh result. Operators can detect non-determinism from the
37
+ * audit log alone (helixir #8).
38
+ * - REA_SKIP_CODEX_REVIEW short-circuits BEFORE cache lookup (unchanged).
39
+ *
40
+ * 0.19.0 review fixes:
41
+ * - Concurrent writes are now serialized via `withAuditLock` on the
42
+ * `.rea/` directory (backend-engineer P1-2; security M3). Two
43
+ * concurrent push-gate runs no longer race read-modify-write.
44
+ * - Tmp filenames carry a high-entropy suffix (PID + millis + random)
45
+ * and are unlinked in finally so a crash mid-write doesn't leave
46
+ * stale state (backend-engineer P1-3; code-reviewer P2-1).
47
+ * - All three writers (writeVerdict, clearVerdict, pruneOlderThan,
48
+ * clearAll) route through one `_atomicWrite` helper — no asymmetry
49
+ * between paths (code-reviewer P2-2).
50
+ * - On unrecognized schema_version, reads return undefined AND
51
+ * writes refuse to overwrite — the v3 cache stays intact for a
52
+ * future rea version that knows how to read it (code-reviewer P3-5;
53
+ * backend-engineer P2-2).
54
+ *
55
+ * The cache is OPTIONAL by design: existing callers that don't pass a
56
+ * `cacheImpl` get the legacy stateless path. Tests inject a fake.
57
+ */
58
+ import type { Verdict as ReviewVerdict } from './findings.js';
59
+ export declare const VERDICT_CACHE_FILE = "last-review.cache.json";
60
+ export declare const VERDICT_CACHE_SCHEMA_VERSION: 2;
61
+ export declare const DEFAULT_CACHE_TTL_MS: number;
62
+ export interface VerdictCacheEntry {
63
+ verdict: ReviewVerdict;
64
+ finding_count: number;
65
+ reviewed_at: string;
66
+ model: string;
67
+ reasoning_effort: 'low' | 'medium' | 'high';
68
+ ttl_ms: number;
69
+ }
70
+ export interface VerdictCacheLookupResult {
71
+ /** True if a non-expired entry exists for this SHA. */
72
+ hit: boolean;
73
+ /** The entry, present on both hit and miss-of-stale-entry. Used for flip detection. */
74
+ entry?: VerdictCacheEntry;
75
+ /** True if the entry exists but is past TTL. */
76
+ expired?: boolean;
77
+ }
78
+ /**
79
+ * Read the cache file and look up `head_sha`. Missing file, malformed
80
+ * JSON, missing entry, and unsupported schema_version all resolve to a
81
+ * miss with `entry: undefined` — the caller proceeds to codex.
82
+ */
83
+ export declare function lookupVerdict(baseDir: string, headSha: string, now?: Date): VerdictCacheLookupResult;
84
+ /**
85
+ * Detect whether a new verdict contradicts a previously-cached verdict
86
+ * on the same SHA. Used by `runPushGate` to set the flip-flag on
87
+ * last-review.json and emit the `verdict_flip` audit event.
88
+ */
89
+ export declare function isFlip(prior: VerdictCacheEntry | undefined, fresh: ReviewVerdict): boolean;
90
+ /**
91
+ * Write a fresh verdict entry. Atomic via tmp-file + rename, serialized
92
+ * via `withAuditLock` on `.rea/`. Refuses to overwrite when the existing
93
+ * cache has an unrecognized schema_version (forward-compat — a v3 cache
94
+ * from a future rea version stays intact for that version to read).
95
+ */
96
+ export declare function writeVerdict(baseDir: string, headSha: string, entry: VerdictCacheEntry): Promise<void>;
97
+ /**
98
+ * Remove a single SHA from the cache. Returns true if the entry existed.
99
+ */
100
+ export declare function clearVerdict(baseDir: string, headSha: string): Promise<boolean>;
101
+ /**
102
+ * Remove ALL entries from the cache. Returns the count of removed entries.
103
+ */
104
+ export declare function clearAll(baseDir: string): Promise<number>;
105
+ /**
106
+ * Remove entries whose `reviewed_at` is older than `olderThanMs` from `now`.
107
+ * Returns the count of removed entries.
108
+ */
109
+ export declare function pruneOlderThan(baseDir: string, olderThanMs: number, now?: Date): Promise<number>;
110
+ /**
111
+ * Read all entries (used by `rea cache stats` / `rea cache show`).
112
+ * Returns empty object on any read error (missing file, malformed JSON,
113
+ * unsupported schema_version).
114
+ */
115
+ export declare function listEntries(baseDir: string): Record<string, VerdictCacheEntry>;
116
+ /**
117
+ * Thrown by writeVerdict when the existing cache file has an
118
+ * unrecognized schema_version. The caller (push-gate) catches this
119
+ * and treats the write as best-effort failure (log to stderr,
120
+ * continue) rather than overwriting forward-compat data.
121
+ */
122
+ export declare class VerdictCacheForeignSchemaError extends Error {
123
+ readonly cachePath: string;
124
+ readonly kind: "foreign-schema";
125
+ constructor(cachePath: string);
126
+ }
@@ -0,0 +1,276 @@
1
+ /**
2
+ * Durable verdict cache for the push-gate (helixir #1, #4, #7, #8 / 0.18.1).
3
+ *
4
+ * Pre-0.18.1 the push-gate was strictly stateless: every push of the same
5
+ * `head_sha` invoked `codex exec review` afresh. helixir round 82 reproduced
6
+ * the failure mode — push #1 of `9fbdfb63` returned PASS, push #2 of the
7
+ * IDENTICAL commit returned CONCERNS — 1 P2. The verdict instability is
8
+ * a property of codex's stochastic decoding at `reasoning_effort: high`;
9
+ * rea cannot eliminate it, but rea CAN make a clean PASS DURABLE so the
10
+ * second push of the same SHA doesn't roll the dice again.
11
+ *
12
+ * Design:
13
+ *
14
+ * .rea/last-review.cache.json
15
+ * {
16
+ * schema_version: 2,
17
+ * entries: {
18
+ * "<head_sha>": {
19
+ * verdict: "pass" | "concerns" | "blocking",
20
+ * finding_count: number,
21
+ * reviewed_at: ISO8601,
22
+ * model: string,
23
+ * reasoning_effort: "low" | "medium" | "high",
24
+ * ttl_ms: number, // policy.review.cache_ttl_ms at write time
25
+ * },
26
+ * ...
27
+ * }
28
+ * }
29
+ *
30
+ * - Hit (within TTL): emit `rea.push_gate.cache_hit` audit event, exit
31
+ * with the cached verdict + finding count; codex is NOT invoked.
32
+ * - Miss or expired: invoke codex; on success, write the new entry.
33
+ * - Flip detection: if a new codex result on the same SHA produces a
34
+ * verdict different from the cached one, set `last-review.json.flip_flag = true`,
35
+ * emit `rea.push_gate.verdict_flip`, and overwrite the cache with
36
+ * the fresh result. Operators can detect non-determinism from the
37
+ * audit log alone (helixir #8).
38
+ * - REA_SKIP_CODEX_REVIEW short-circuits BEFORE cache lookup (unchanged).
39
+ *
40
+ * 0.19.0 review fixes:
41
+ * - Concurrent writes are now serialized via `withAuditLock` on the
42
+ * `.rea/` directory (backend-engineer P1-2; security M3). Two
43
+ * concurrent push-gate runs no longer race read-modify-write.
44
+ * - Tmp filenames carry a high-entropy suffix (PID + millis + random)
45
+ * and are unlinked in finally so a crash mid-write doesn't leave
46
+ * stale state (backend-engineer P1-3; code-reviewer P2-1).
47
+ * - All three writers (writeVerdict, clearVerdict, pruneOlderThan,
48
+ * clearAll) route through one `_atomicWrite` helper — no asymmetry
49
+ * between paths (code-reviewer P2-2).
50
+ * - On unrecognized schema_version, reads return undefined AND
51
+ * writes refuse to overwrite — the v3 cache stays intact for a
52
+ * future rea version that knows how to read it (code-reviewer P3-5;
53
+ * backend-engineer P2-2).
54
+ *
55
+ * The cache is OPTIONAL by design: existing callers that don't pass a
56
+ * `cacheImpl` get the legacy stateless path. Tests inject a fake.
57
+ */
58
+ import crypto from 'node:crypto';
59
+ import fs from 'node:fs';
60
+ import path from 'node:path';
61
+ import { withAuditLock } from '../../audit/fs.js';
62
+ export const VERDICT_CACHE_FILE = 'last-review.cache.json';
63
+ export const VERDICT_CACHE_SCHEMA_VERSION = 2;
64
+ export const DEFAULT_CACHE_TTL_MS = 24 * 60 * 60 * 1_000; // 24h
65
+ /**
66
+ * Read the cache file and look up `head_sha`. Missing file, malformed
67
+ * JSON, missing entry, and unsupported schema_version all resolve to a
68
+ * miss with `entry: undefined` — the caller proceeds to codex.
69
+ */
70
+ export function lookupVerdict(baseDir, headSha, now = new Date()) {
71
+ const file = readCacheFile(baseDir);
72
+ if (file === undefined)
73
+ return { hit: false };
74
+ const entry = file.entries[headSha];
75
+ if (entry === undefined)
76
+ return { hit: false };
77
+ const reviewedAtMs = Date.parse(entry.reviewed_at);
78
+ if (Number.isNaN(reviewedAtMs))
79
+ return { hit: false, entry };
80
+ const ageMs = now.getTime() - reviewedAtMs;
81
+ if (ageMs >= entry.ttl_ms) {
82
+ return { hit: false, entry, expired: true };
83
+ }
84
+ return { hit: true, entry };
85
+ }
86
+ /**
87
+ * Detect whether a new verdict contradicts a previously-cached verdict
88
+ * on the same SHA. Used by `runPushGate` to set the flip-flag on
89
+ * last-review.json and emit the `verdict_flip` audit event.
90
+ */
91
+ export function isFlip(prior, fresh) {
92
+ if (prior === undefined)
93
+ return false;
94
+ return prior.verdict !== fresh;
95
+ }
96
+ /**
97
+ * Write a fresh verdict entry. Atomic via tmp-file + rename, serialized
98
+ * via `withAuditLock` on `.rea/`. Refuses to overwrite when the existing
99
+ * cache has an unrecognized schema_version (forward-compat — a v3 cache
100
+ * from a future rea version stays intact for that version to read).
101
+ */
102
+ export async function writeVerdict(baseDir, headSha, entry) {
103
+ const reaDir = path.join(baseDir, '.rea');
104
+ if (!fs.existsSync(reaDir)) {
105
+ fs.mkdirSync(reaDir, { recursive: true });
106
+ }
107
+ const cachePath = path.join(reaDir, VERDICT_CACHE_FILE);
108
+ await withAuditLock(cachePath, async () => {
109
+ if (foreignSchemaPresent(baseDir)) {
110
+ throw new VerdictCacheForeignSchemaError(cachePath);
111
+ }
112
+ const existing = readCacheFile(baseDir);
113
+ const next = {
114
+ schema_version: VERDICT_CACHE_SCHEMA_VERSION,
115
+ entries: { ...(existing?.entries ?? {}), [headSha]: entry },
116
+ };
117
+ _atomicWriteJson(cachePath, next);
118
+ });
119
+ }
120
+ /**
121
+ * Remove a single SHA from the cache. Returns true if the entry existed.
122
+ */
123
+ export async function clearVerdict(baseDir, headSha) {
124
+ const cachePath = path.join(baseDir, '.rea', VERDICT_CACHE_FILE);
125
+ return withAuditLock(cachePath, async () => {
126
+ const file = readCacheFile(baseDir);
127
+ if (file === undefined || file.entries[headSha] === undefined)
128
+ return false;
129
+ const next = {
130
+ schema_version: VERDICT_CACHE_SCHEMA_VERSION,
131
+ entries: { ...file.entries },
132
+ };
133
+ delete next.entries[headSha];
134
+ _atomicWriteJson(cachePath, next);
135
+ return true;
136
+ });
137
+ }
138
+ /**
139
+ * Remove ALL entries from the cache. Returns the count of removed entries.
140
+ */
141
+ export async function clearAll(baseDir) {
142
+ const reaDir = path.join(baseDir, '.rea');
143
+ const cachePath = path.join(reaDir, VERDICT_CACHE_FILE);
144
+ if (!fs.existsSync(reaDir)) {
145
+ fs.mkdirSync(reaDir, { recursive: true });
146
+ }
147
+ return withAuditLock(cachePath, async () => {
148
+ const file = readCacheFile(baseDir);
149
+ const count = file === undefined ? 0 : Object.keys(file.entries).length;
150
+ const empty = {
151
+ schema_version: VERDICT_CACHE_SCHEMA_VERSION,
152
+ entries: {},
153
+ };
154
+ _atomicWriteJson(cachePath, empty);
155
+ return count;
156
+ });
157
+ }
158
+ /**
159
+ * Remove entries whose `reviewed_at` is older than `olderThanMs` from `now`.
160
+ * Returns the count of removed entries.
161
+ */
162
+ export async function pruneOlderThan(baseDir, olderThanMs, now = new Date()) {
163
+ const cachePath = path.join(baseDir, '.rea', VERDICT_CACHE_FILE);
164
+ return withAuditLock(cachePath, async () => {
165
+ const file = readCacheFile(baseDir);
166
+ if (file === undefined)
167
+ return 0;
168
+ const cutoff = now.getTime() - olderThanMs;
169
+ const surviving = {};
170
+ let removed = 0;
171
+ for (const [sha, entry] of Object.entries(file.entries)) {
172
+ const reviewedAtMs = Date.parse(entry.reviewed_at);
173
+ if (Number.isNaN(reviewedAtMs) || reviewedAtMs >= cutoff) {
174
+ surviving[sha] = entry;
175
+ }
176
+ else {
177
+ removed += 1;
178
+ }
179
+ }
180
+ if (removed === 0)
181
+ return 0;
182
+ const next = {
183
+ schema_version: VERDICT_CACHE_SCHEMA_VERSION,
184
+ entries: surviving,
185
+ };
186
+ _atomicWriteJson(cachePath, next);
187
+ return removed;
188
+ });
189
+ }
190
+ /**
191
+ * Read all entries (used by `rea cache stats` / `rea cache show`).
192
+ * Returns empty object on any read error (missing file, malformed JSON,
193
+ * unsupported schema_version).
194
+ */
195
+ export function listEntries(baseDir) {
196
+ const file = readCacheFile(baseDir);
197
+ return file?.entries ?? {};
198
+ }
199
+ /**
200
+ * Thrown by writeVerdict when the existing cache file has an
201
+ * unrecognized schema_version. The caller (push-gate) catches this
202
+ * and treats the write as best-effort failure (log to stderr,
203
+ * continue) rather than overwriting forward-compat data.
204
+ */
205
+ export class VerdictCacheForeignSchemaError extends Error {
206
+ cachePath;
207
+ kind = 'foreign-schema';
208
+ constructor(cachePath) {
209
+ super(`Refused to overwrite ${cachePath}: existing cache has unrecognized schema_version. ` +
210
+ `Either delete the file or run with a newer rea that supports it.`);
211
+ this.cachePath = cachePath;
212
+ this.name = 'VerdictCacheForeignSchemaError';
213
+ }
214
+ }
215
+ function readCacheFile(baseDir) {
216
+ const parsed = readForeignCacheFile(baseDir);
217
+ if (parsed === undefined)
218
+ return undefined;
219
+ if (parsed.schema_version !== VERDICT_CACHE_SCHEMA_VERSION)
220
+ return undefined;
221
+ // We checked schema_version exactly; entries shape is the v2 contract.
222
+ return parsed;
223
+ }
224
+ function readForeignCacheFile(baseDir) {
225
+ const cachePath = path.join(baseDir, '.rea', VERDICT_CACHE_FILE);
226
+ if (!fs.existsSync(cachePath))
227
+ return undefined;
228
+ try {
229
+ const raw = fs.readFileSync(cachePath, 'utf8');
230
+ const parsed = JSON.parse(raw);
231
+ if (typeof parsed !== 'object' || parsed === null)
232
+ return undefined;
233
+ const sv = parsed.schema_version;
234
+ if (typeof sv !== 'number')
235
+ return undefined;
236
+ const entries = parsed.entries;
237
+ if (typeof entries !== 'object' || entries === null)
238
+ return undefined;
239
+ return parsed;
240
+ }
241
+ catch {
242
+ return undefined;
243
+ }
244
+ }
245
+ function foreignSchemaPresent(baseDir) {
246
+ const parsed = readForeignCacheFile(baseDir);
247
+ if (parsed === undefined)
248
+ return false;
249
+ return parsed.schema_version !== VERDICT_CACHE_SCHEMA_VERSION;
250
+ }
251
+ /**
252
+ * Atomic JSON write: stringify → write tmp → fsync → rename.
253
+ *
254
+ * Tmp filename: `${target}.tmp.${pid}.${ms}.${random8}` — collision-
255
+ * resistant under concurrent writes, PID reuse, and same-process
256
+ * parallel calls. On any failure, the tmp file is unlinked so a crash
257
+ * mid-write doesn't leave stale state.
258
+ */
259
+ function _atomicWriteJson(targetPath, payload) {
260
+ const tmp = `${targetPath}.tmp.${process.pid}.${Date.now()}.${crypto.randomBytes(4).toString('hex')}`;
261
+ try {
262
+ fs.writeFileSync(tmp, `${JSON.stringify(payload, null, 2)}\n`, 'utf8');
263
+ fs.renameSync(tmp, targetPath);
264
+ }
265
+ catch (e) {
266
+ try {
267
+ if (fs.existsSync(tmp))
268
+ fs.unlinkSync(tmp);
269
+ }
270
+ catch {
271
+ // Tmp already gone or unlink failed — caller's error is the
272
+ // important signal.
273
+ }
274
+ throw e;
275
+ }
276
+ }
@@ -81,6 +81,15 @@ declare const PolicySchema: z.ZodObject<{
81
81
  * matters less than throughput.
82
82
  */
83
83
  codex_reasoning_effort: z.ZodOptional<z.ZodEnum<["low", "medium", "high"]>>;
84
+ /**
85
+ * Verdict cache TTL in milliseconds (0.18.1+ helixir #1, #4, #7, #8).
86
+ * Default 86_400_000 (24 hours). When a push of `head_sha` produces
87
+ * a non-blocking verdict, the result is written to
88
+ * `.rea/last-review.cache.json`. Subsequent pushes of the same SHA
89
+ * within the TTL skip the codex invocation and reuse the cached
90
+ * verdict. Set to 0 to disable caching (every push re-invokes codex).
91
+ */
92
+ cache_ttl_ms: z.ZodOptional<z.ZodNumber>;
84
93
  }, "strict", z.ZodTypeAny, {
85
94
  codex_required?: boolean | undefined;
86
95
  concerns_blocks?: boolean | undefined;
@@ -89,6 +98,7 @@ declare const PolicySchema: z.ZodObject<{
89
98
  auto_narrow_threshold?: number | undefined;
90
99
  codex_model?: string | undefined;
91
100
  codex_reasoning_effort?: "low" | "medium" | "high" | undefined;
101
+ cache_ttl_ms?: number | undefined;
92
102
  }, {
93
103
  codex_required?: boolean | undefined;
94
104
  concerns_blocks?: boolean | undefined;
@@ -97,6 +107,7 @@ declare const PolicySchema: z.ZodObject<{
97
107
  auto_narrow_threshold?: number | undefined;
98
108
  codex_model?: string | undefined;
99
109
  codex_reasoning_effort?: "low" | "medium" | "high" | undefined;
110
+ cache_ttl_ms?: number | undefined;
100
111
  }>>;
101
112
  redact: z.ZodOptional<z.ZodObject<{
102
113
  match_timeout_ms: z.ZodOptional<z.ZodNumber>;
@@ -196,6 +207,7 @@ declare const PolicySchema: z.ZodObject<{
196
207
  auto_narrow_threshold?: number | undefined;
197
208
  codex_model?: string | undefined;
198
209
  codex_reasoning_effort?: "low" | "medium" | "high" | undefined;
210
+ cache_ttl_ms?: number | undefined;
199
211
  } | undefined;
200
212
  redact?: {
201
213
  match_timeout_ms?: number | undefined;
@@ -245,6 +257,7 @@ declare const PolicySchema: z.ZodObject<{
245
257
  auto_narrow_threshold?: number | undefined;
246
258
  codex_model?: string | undefined;
247
259
  codex_reasoning_effort?: "low" | "medium" | "high" | undefined;
260
+ cache_ttl_ms?: number | undefined;
248
261
  } | undefined;
249
262
  redact?: {
250
263
  match_timeout_ms?: number | undefined;
@@ -58,7 +58,12 @@ const ReviewPolicySchema = z
58
58
  * NOT want to lock consumers to a hardcoded enum that drifts behind
59
59
  * upstream. Codex itself validates the model name at exec time.
60
60
  */
61
- codex_model: z.string().min(1).optional(),
61
+ // 0.19.0 security review M4: restrict to a safe character class so
62
+ // a typo or malicious value can't smuggle TOML control characters
63
+ // (NUL, NL, CR, escape sequences) through the `-c model="<value>"`
64
+ // injection point. Accepts published codex model names; rejects
65
+ // re-quote / TOML-escape edge cases.
66
+ codex_model: z.string().regex(/^[a-zA-Z0-9._-]{1,64}$/).optional(),
62
67
  /**
63
68
  * Codex reasoning effort knob (0.13.4+). Pinned via
64
69
  * `-c model_reasoning_effort="<level>"` on every invocation. Only
@@ -72,6 +77,15 @@ const ReviewPolicySchema = z
72
77
  * matters less than throughput.
73
78
  */
74
79
  codex_reasoning_effort: z.enum(['low', 'medium', 'high']).optional(),
80
+ /**
81
+ * Verdict cache TTL in milliseconds (0.18.1+ helixir #1, #4, #7, #8).
82
+ * Default 86_400_000 (24 hours). When a push of `head_sha` produces
83
+ * a non-blocking verdict, the result is written to
84
+ * `.rea/last-review.cache.json`. Subsequent pushes of the same SHA
85
+ * within the TTL skip the codex invocation and reuse the cached
86
+ * verdict. Set to 0 to disable caching (every push re-invokes codex).
87
+ */
88
+ cache_ttl_ms: z.number().int().nonnegative().optional(),
75
89
  })
76
90
  .strict();
77
91
  /**
@@ -47,6 +47,28 @@ export declare const ProfileSchema: z.ZodObject<{
47
47
  delegate_to_subagent?: string[] | undefined;
48
48
  max_bash_output_lines?: number | undefined;
49
49
  }>>;
50
+ audit: z.ZodOptional<z.ZodObject<{
51
+ rotation: z.ZodOptional<z.ZodObject<{
52
+ max_bytes: z.ZodOptional<z.ZodNumber>;
53
+ max_age_days: z.ZodOptional<z.ZodNumber>;
54
+ }, "strip", z.ZodTypeAny, {
55
+ max_bytes?: number | undefined;
56
+ max_age_days?: number | undefined;
57
+ }, {
58
+ max_bytes?: number | undefined;
59
+ max_age_days?: number | undefined;
60
+ }>>;
61
+ }, "strip", z.ZodTypeAny, {
62
+ rotation?: {
63
+ max_bytes?: number | undefined;
64
+ max_age_days?: number | undefined;
65
+ } | undefined;
66
+ }, {
67
+ rotation?: {
68
+ max_bytes?: number | undefined;
69
+ max_age_days?: number | undefined;
70
+ } | undefined;
71
+ }>>;
50
72
  }, "strict", z.ZodTypeAny, {
51
73
  autonomy_level?: AutonomyLevel | undefined;
52
74
  max_autonomy_level?: AutonomyLevel | undefined;
@@ -64,6 +86,12 @@ export declare const ProfileSchema: z.ZodObject<{
64
86
  delegate_to_subagent?: string[] | undefined;
65
87
  max_bash_output_lines?: number | undefined;
66
88
  } | undefined;
89
+ audit?: {
90
+ rotation?: {
91
+ max_bytes?: number | undefined;
92
+ max_age_days?: number | undefined;
93
+ } | undefined;
94
+ } | undefined;
67
95
  }, {
68
96
  autonomy_level?: AutonomyLevel | undefined;
69
97
  max_autonomy_level?: AutonomyLevel | undefined;
@@ -81,6 +109,12 @@ export declare const ProfileSchema: z.ZodObject<{
81
109
  delegate_to_subagent?: string[] | undefined;
82
110
  max_bash_output_lines?: number | undefined;
83
111
  } | undefined;
112
+ audit?: {
113
+ rotation?: {
114
+ max_bytes?: number | undefined;
115
+ max_age_days?: number | undefined;
116
+ } | undefined;
117
+ } | undefined;
84
118
  }>;
85
119
  export type Profile = z.infer<typeof ProfileSchema>;
86
120
  /** Hard defaults applied before any profile or wizard answer. */
@@ -54,6 +54,21 @@ export const ProfileSchema = z
54
54
  injection_detection: z.enum(['block', 'warn']).optional(),
55
55
  injection: InjectionProfileSchema.optional(),
56
56
  context_protection: ContextProtectionProfileSchema.optional(),
57
+ // 0.18.1+ helixir #9: profiles can ship audit-rotation defaults.
58
+ // The full audit policy block validates at load time via
59
+ // `AuditPolicySchema` in loader.ts; profiles only need to declare
60
+ // the rotation knob (most consumer profiles will leave this empty
61
+ // — the default 50 MiB / 30 days are sane).
62
+ audit: z
63
+ .object({
64
+ rotation: z
65
+ .object({
66
+ max_bytes: z.number().int().positive().optional(),
67
+ max_age_days: z.number().int().positive().optional(),
68
+ })
69
+ .optional(),
70
+ })
71
+ .optional(),
57
72
  })
58
73
  .strict();
59
74
  /** Hard defaults applied before any profile or wizard answer. */
@@ -158,6 +158,17 @@ export interface ReviewPolicy {
158
158
  * throughput.
159
159
  */
160
160
  codex_reasoning_effort?: 'low' | 'medium' | 'high';
161
+ /**
162
+ * Verdict cache TTL in milliseconds (0.18.1+ helixir #1, #4, #7, #8).
163
+ * Default 86_400_000 (24 hours). When a push of `head_sha` produces a
164
+ * non-blocking verdict, the result is written to
165
+ * `.rea/last-review.cache.json`. Subsequent pushes of the same SHA
166
+ * within the TTL skip the codex invocation and reuse the cached
167
+ * verdict. Set to `0` to disable caching (every push re-invokes
168
+ * codex — pre-0.18.1 behavior). Verdict flips on the same SHA emit
169
+ * a `rea.push_gate.verdict_flip` audit event and overwrite the cache.
170
+ */
171
+ cache_ttl_ms?: number;
161
172
  }
162
173
  /**
163
174
  * User-supplied redaction pattern entry. Each pattern has a stable `name` used
@@ -181,7 +181,14 @@ _rea_unwrap_nested_shells() {
181
181
  # alternation `(^|[[:space:]&|;])` therefore cannot anchor on a
182
182
  # masked separator, and the shell-name token itself can no longer
183
183
  # appear adjacent to a masked quote-introducer.
184
- WRAP = "(^|[[:space:]&|;])(bash|sh|zsh|dash|ksh)([[:space:]]+-[a-zA-Z]+)*[[:space:]]+-(c|lc|lic|ic|cl|cli|li|il)[[:space:]]+"
184
+ # 0.19.0 security review M1: extend the shell-name set to cover
185
+ # every commonly-installed POSIX-style shell. mksh / oksh / yash /
186
+ # posh ship on minimal containers, csh/tcsh on legacy macOS,
187
+ # fish on dev workstations. Each accepts -c with a quoted body.
188
+ # NOTE: pwsh (PowerShell) uses -Command / -EncodedCommand and is
189
+ # NOT covered here. Adding pwsh requires a separate code path
190
+ # because EncodedCommand base64-decodes at runtime.
191
+ WRAP = "(^|[[:space:]&|;])(bash|sh|zsh|dash|ksh|mksh|oksh|posh|yash|csh|tcsh|fish)([[:space:]]+-[a-zA-Z]+)*[[:space:]]+-(c|lc|lic|ic|cl|cli|li|il)[[:space:]]+"
185
192
  # Track the cursor in BOTH raw and masked. Because the mask is
186
193
  # byte-for-byte width-preserving, the same RSTART/RLENGTH applies
187
194
  # to both — but each iteration of the loop must SLICE both strings
@@ -45,6 +45,10 @@ REA_PROTECTED_PATTERNS_FULL=(
45
45
  '.husky/'
46
46
  '.rea/policy.yaml'
47
47
  '.rea/HALT'
48
+ # 0.19.0 security review C1: the verdict cache is a security boundary
49
+ # since 0.18.1. A forged entry would skip codex on next push of that
50
+ # SHA. Protect it like the kill-switch.
51
+ '.rea/last-review.cache.json'
48
52
  )
49
53
 
50
54
  # Kill-switch invariants — never relaxable. Subset of FULL.
@@ -52,6 +56,7 @@ REA_KILL_SWITCH_INVARIANTS=(
52
56
  '.claude/settings.json'
53
57
  '.rea/policy.yaml'
54
58
  '.rea/HALT'
59
+ '.rea/last-review.cache.json'
55
60
  )
56
61
 
57
62
  # Effective patterns after applying the relax list. Computed lazily on
@@ -102,7 +102,7 @@ FOUND=0
102
102
  # below catches Co-Authored-By with named tools regardless of the email
103
103
  # domain, so dropping `users.noreply.github.com` from the noreply
104
104
  # pattern only relaxes the check for human collaborators — never for AI.
105
- if any_segment_matches "$CMD" 'Co-Authored-By:.*noreply@(anthropic\.com|openai\.com|github-copilot|github\.com|claude\.ai|chatgpt\.com|googlemail\.com|google\.com|cursor\.com|codeium\.com|tabnine\.com|amazon\.com|amazonaws\.com|amazon-q\.amazonaws\.com|cody\.dev|sourcegraph\.com)'; then
105
+ if any_segment_matches "$CMD" 'Co-Authored-By:.*noreply@(anthropic\.com|openai\.com|github-copilot|github\.com|claude\.ai|chatgpt\.com|googlemail\.com|google\.com|cursor\.com|codeium\.com|tabnine\.com|amazon\.com|amazonaws\.com|amazon-q\.amazonaws\.com|cody\.dev|sourcegraph\.com|mistral\.ai|xai-org|x\.ai|inflection\.ai|perplexity\.ai|replit\.com|jetbrains\.com|bito\.ai|pieces\.app|phind\.com|you\.com)'; then
106
106
  FOUND=1
107
107
  fi
108
108
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bookedsolid/rea",
3
- "version": "0.18.0",
3
+ "version": "0.20.0",
4
4
  "description": "Agentic governance layer for Claude Code — policy enforcement, hook-based safety gates, audit logging, and Codex-integrated adversarial review for AI-assisted projects",
5
5
  "license": "MIT",
6
6
  "author": "Booked Solid Technology <oss@bookedsolid.tech> (https://bookedsolid.tech)",
@@ -84,6 +84,7 @@
84
84
  "@typescript-eslint/eslint-plugin": "^8.0.0",
85
85
  "@typescript-eslint/parser": "^8.0.0",
86
86
  "@vitest/coverage-v8": "^3.2.4",
87
+ "ajv": "^8.17.1",
87
88
  "eslint": "^10.2.0",
88
89
  "prettier": "^3.8.1",
89
90
  "typescript": "^5.8.0",
@@ -28,3 +28,11 @@ context_protection:
28
28
  - pnpm run test
29
29
  - pnpm run lint
30
30
  max_bash_output_lines: 100
31
+ # 0.18.1+ helixir #9: enable audit log rotation by default for
32
+ # bst-internal. Long sessions accumulate 100s of push_gate.reviewed
33
+ # entries; without rotation the audit file grows unbounded. The empty
34
+ # `rotation: {}` block opts in to the documented defaults — 50 MiB
35
+ # OR 30 days, whichever arrives first. Rotation marker preserves the
36
+ # hash chain across the boundary.
37
+ audit:
38
+ rotation: {}
@@ -116,13 +116,58 @@ try {
116
116
 
117
117
  if (manifestVersion === installedVersion) process.exit(0);
118
118
 
119
- // Package-manager-agnostic message. Any of `npx rea upgrade`,
119
+ // 0.18.1+ helixir #3: opt-in auto-upgrade. Pre-fix the drift was
120
+ // detected and a "run rea upgrade" nudge printed, but consumers had
121
+ // to run the upgrade by hand on every install. With
122
+ // `REA_AUTO_UPGRADE=1` (or `--yes` semantics inferred from a
123
+ // package.json field), the postinstall runs `rea upgrade --yes`
124
+ // for them. Defaults to PRINT-ONLY for back-compat — silent
125
+ // mutation of the consumer's `.claude/` / `.husky/` on every
126
+ // install would surprise existing users.
127
+ const autoUpgrade =
128
+ process.env.REA_AUTO_UPGRADE === '1' ||
129
+ process.env.REA_AUTO_UPGRADE === 'true';
130
+
131
+ if (autoUpgrade) {
132
+ // Best-effort: invoke `rea upgrade --yes`. Failures fall through to
133
+ // the print path so the consumer still sees the drift advisory.
134
+ try {
135
+ const reaCli = path.join(consumerRoot, 'node_modules', '.bin', 'rea');
136
+ if (fs.existsSync(reaCli)) {
137
+ const { spawnSync } = await import('node:child_process');
138
+ // 0.19.0 backend-engineer P2-1: 5-min wall-clock cap so a hung
139
+ // upgrade falls through to print-only instead of hanging the
140
+ // consumer's `npm install`. 0.19.0 code-reviewer P3-6:
141
+ // Windows shim (.bin/rea.cmd) requires `shell: true` —
142
+ // detect via process.platform.
143
+ const res = spawnSync(reaCli, ['upgrade', '--yes'], {
144
+ cwd: consumerRoot,
145
+ stdio: 'inherit',
146
+ env: process.env,
147
+ timeout: 5 * 60 * 1000,
148
+ shell: process.platform === 'win32',
149
+ });
150
+ if (res.status === 0) {
151
+ NOTE([
152
+ `@bookedsolid/rea: auto-upgraded from v${manifestVersion} to v${installedVersion}.`,
153
+ `(REA_AUTO_UPGRADE=1; set REA_AUTO_UPGRADE=0 to opt out.)`,
154
+ ]);
155
+ process.exit(0);
156
+ }
157
+ }
158
+ } catch {
159
+ // Fall through to the manual-nudge path below.
160
+ }
161
+ }
162
+
163
+ // Package-manager-agnostic nudge. Any of `npx rea upgrade`,
120
164
  // `pnpm exec rea upgrade`, or `yarn rea upgrade` works; recommending `npx`
121
165
  // covers the widest audience without privileging pnpm in error output.
122
166
  NOTE([
123
167
  `@bookedsolid/rea v${installedVersion} installed; manifest at v${manifestVersion}.`,
124
168
  `Run \`npx rea upgrade\` to sync .claude/, .husky/, and managed fragments.`,
125
169
  `(Or \`npx rea doctor --drift\` to preview without changes.)`,
170
+ `(Set \`REA_AUTO_UPGRADE=1\` to auto-run upgrade on future installs.)`,
126
171
  ]);
127
172
  } catch {
128
173
  // Any uncaught failure → silent success. Never break the consumer's install.