@bookedsolid/rea 0.17.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/commit-msg +10 -2
- package/agents/codex-adversarial.md +7 -2
- package/commands/codex-review.md +8 -4
- package/dist/cli/init.js +17 -0
- package/dist/cli/upgrade.js +16 -1
- package/dist/hooks/push-gate/codex-runner.js +18 -7
- package/dist/hooks/push-gate/index.js +80 -1
- package/dist/hooks/push-gate/policy.d.ts +17 -0
- package/dist/hooks/push-gate/policy.js +13 -0
- package/dist/hooks/push-gate/verdict-cache.d.ts +98 -0
- package/dist/hooks/push-gate/verdict-cache.js +190 -0
- package/dist/policy/loader.d.ts +21 -4
- package/dist/policy/loader.js +17 -4
- package/dist/policy/profiles.d.ts +34 -0
- package/dist/policy/profiles.js +15 -0
- package/dist/policy/types.d.ts +11 -0
- package/hooks/_lib/cmd-segments.sh +144 -20
- package/hooks/_lib/policy-read.sh +91 -3
- package/hooks/_lib/protected-paths.sh +78 -7
- package/hooks/attribution-advisory.sh +28 -3
- package/hooks/security-disclosure-gate.sh +17 -0
- package/package.json +1 -1
- package/profiles/bst-internal.yaml +8 -0
- package/scripts/postinstall.mjs +39 -1
package/.husky/commit-msg
CHANGED
|
@@ -78,9 +78,17 @@ BLOCKED=0
|
|
|
78
78
|
MATCHES=""
|
|
79
79
|
|
|
80
80
|
# Pattern 1: Co-Authored-By with noreply@ email
|
|
81
|
-
|
|
81
|
+
# 0.18.0 helix-020 / discord-ops Round 10 #3 fix (G4.B):
|
|
82
|
+
# the pre-fix pattern `Co-Authored-By:.*noreply@` matched both AI-tool
|
|
83
|
+
# noreply addresses AND legitimate `<user>@users.noreply.github.com`
|
|
84
|
+
# collaborator credits — blocking honest co-author footers from human
|
|
85
|
+
# contributors. Refined to enumerate AI-tool noreply domains explicitly;
|
|
86
|
+
# Pattern 2 below catches Co-Authored-By with named tools regardless of
|
|
87
|
+
# email, so dropping users.noreply.github.com from this branch only
|
|
88
|
+
# relaxes the check for human collaborators — never for AI.
|
|
89
|
+
if grep -qiE 'Co-Authored-By:.*noreply@(anthropic\.com|openai\.com|github-copilot|github\.com|claude\.ai|chatgpt\.com|googlemail\.com|google\.com|cursor\.com|codeium\.com|tabnine\.com|amazon\.com|amazonaws\.com|amazon-q\.amazonaws\.com|cody\.dev|sourcegraph\.com)' "$COMMIT_MSG_FILE" 2>/dev/null; then
|
|
82
90
|
BLOCKED=1
|
|
83
|
-
MATCHES="${MATCHES}$(grep -niE 'Co-Authored-By:.*noreply@' "$COMMIT_MSG_FILE" 2>/dev/null)
|
|
91
|
+
MATCHES="${MATCHES}$(grep -niE 'Co-Authored-By:.*noreply@(anthropic\.com|openai\.com|github-copilot|github\.com|claude\.ai|chatgpt\.com|googlemail\.com|google\.com|cursor\.com|codeium\.com|tabnine\.com|amazon\.com|amazonaws\.com|amazon-q\.amazonaws\.com|cody\.dev|sourcegraph\.com)' "$COMMIT_MSG_FILE" 2>/dev/null)
|
|
84
92
|
"
|
|
85
93
|
fi
|
|
86
94
|
|
|
@@ -32,13 +32,18 @@ You may read additional files in the repo if needed for context, but do so read-
|
|
|
32
32
|
1. **Check HALT and policy** — read `.rea/policy.yaml`, check `.rea/HALT`. If frozen, stop immediately.
|
|
33
33
|
2. **Validate Codex availability** — if `/codex` is not installed, report and stop. Do not silently fall back to another reviewer.
|
|
34
34
|
3. **Prepare the Codex invocation** — construct the adversarial-review prompt with the diff, commit log, and any relevant context files.
|
|
35
|
-
4. **Invoke `/codex:adversarial-review`** —
|
|
35
|
+
4. **Invoke `/codex:adversarial-review --model gpt-5.4`** — pass the `--model` flag explicitly to pin the iron-gate model regardless of plugin defaults or `~/.codex/config.toml` resolution. The codex-companion script accepts `--model` (see `codex-companion.mjs:684`). This call flows through the REA middleware chain (audit → kill-switch → tier → policy → redact → injection → execute → result-size-cap).
|
|
36
36
|
|
|
37
37
|
**Model pinning (0.16.1+):** when the codex plugin's adversarial-review supports model overrides, request `gpt-5.4` with `model_reasoning_effort: high` to match the push-gate's iron-gate defaults. Pre-0.16.1, in-session adversarial reviews ran on whatever the plugin defaulted to (likely `codex-auto-review` at medium reasoning) — meaningfully WEAKER than the push-gate's `gpt-5.4` + `high`. This caused a "in-session review passes, push-gate review fails" pattern reported by helix across 014 / 015 / 016. If the plugin call accepts model parameters, pass them. If it does not, fall back to invoking `codex exec review --base <ref> --json --ephemeral -c model="gpt-5.4" -c model_reasoning_effort="high"` directly via `Bash` — same shape the push-gate uses (see `src/hooks/push-gate/codex-runner.ts::runCodexReview`). The cost of the stronger model is small relative to the cost of shipping a release with a P1 bypass that gets caught at consumer push time.
|
|
38
38
|
5. **Parse the Codex output** — extract structured findings.
|
|
39
39
|
6. **Classify findings** by category: security, correctness, edge cases, test gaps, API design, performance.
|
|
40
40
|
7. **Assign verdict**: `pass` (no material findings), `concerns` (findings worth addressing but not blocking), `blocking` (findings that must be fixed before merge).
|
|
41
|
-
8. **Emit an audit entry — REQUIRED** for every `/codex-review` invocation.
|
|
41
|
+
8. **Emit an audit entry — REQUIRED** for every `/codex-review` invocation. This is one of three identical contract checkpoints:
|
|
42
|
+
- The runtime always emits (`src/hooks/push-gate/index.ts` calls `appendAuditRecord` via `safeAppend` on every completed review — see `EVT_REVIEWED`).
|
|
43
|
+
- This agent always emits (this step).
|
|
44
|
+
- The `/codex-review` slash command's Step 3 verifies the entry exists and surfaces "review never happened" as a failure if it does not.
|
|
45
|
+
|
|
46
|
+
The pre-push gate does not consult audit records to decide pass/fail (post-0.11.0 the gate is stateless), but the audit record is still the operator's only forensic trail for an interactive review. Without it, "did this review actually happen" becomes unanswerable. Reconciled in 0.18.0 (helixir Finding #6 across cycles 1–7) so the three documents — `commands/codex-review.md`, `agents/codex-adversarial.md`, `src/hooks/push-gate/index.ts` — describe the same contract in identical wording. Append via the public `@bookedsolid/rea/audit` helper:
|
|
42
47
|
|
|
43
48
|
```ts
|
|
44
49
|
import { appendAuditRecord, CODEX_REVIEW_TOOL_NAME, CODEX_REVIEW_SERVER_NAME, Tier, InvocationStatus } from '@bookedsolid/rea/audit';
|
package/commands/codex-review.md
CHANGED
|
@@ -55,17 +55,21 @@ Invoke the `codex-adversarial` agent with:
|
|
|
55
55
|
|
|
56
56
|
The agent wraps `/codex:adversarial-review` and returns structured findings.
|
|
57
57
|
|
|
58
|
-
## Step 3 —
|
|
58
|
+
## Step 3 — Verify audit entry — REQUIRED
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
The `codex-adversarial` agent **MUST** emit an audit entry for every invocation. This is the same contract documented in `agents/codex-adversarial.md` Step 4 and matches the runtime behavior of `rea hook push-gate` (which always calls `appendAuditRecord` on a completed review — see `src/hooks/push-gate/index.ts`'s `EVT_REVIEWED` path).
|
|
61
61
|
|
|
62
|
-
|
|
62
|
+
Verify the entry was written:
|
|
63
63
|
|
|
64
64
|
```bash
|
|
65
65
|
tail -n 1 .rea/audit.jsonl
|
|
66
66
|
```
|
|
67
67
|
|
|
68
|
-
|
|
68
|
+
The expected entry has `tool_name: "codex.review"`, `server_name: "codex"`, and `metadata` containing `head_sha`, `target`, `finding_count`, and `verdict`. If the entry is missing, the review **did not complete its contract** — surface that to the user as a failure.
|
|
69
|
+
|
|
70
|
+
**Why audit emission is required even though the pre-push gate is stateless:** the 0.11.0 push-gate decides pass/fail on Codex's live verdict, not on a receipt in the audit log — but the audit record is still the operator's only forensic trail for an interactive `/codex-review` run. Without it, "did this review actually happen" becomes unanswerable, which is exactly the failure mode helixir flagged across rounds 65/66/73 in the 0.13–0.17 cycle. Runtime always emits; the agent always emits; the slash command verifies. Three checkpoints, one contract.
|
|
71
|
+
|
|
72
|
+
(Earlier docs in 0.15+ said this step was "optional"; that wording contradicted both the agent's Step 4 and the runtime behavior of `safeAppend` in `src/hooks/push-gate/index.ts`. Reconciled in 0.18.0 — helixir Finding #6 across cycles 1–7.)
|
|
69
73
|
|
|
70
74
|
## Step 4 — Report
|
|
71
75
|
|
package/dist/cli/init.js
CHANGED
|
@@ -297,6 +297,23 @@ function writePolicyYaml(targetDir, config, layered) {
|
|
|
297
297
|
lines.push(` max_bash_output_lines: ${cp.max_bash_output_lines}`);
|
|
298
298
|
}
|
|
299
299
|
}
|
|
300
|
+
// 0.18.1+ helixir #9: emit audit.rotation when the layered profile
|
|
301
|
+
// declared it. Empty `rotation: {}` opts in to documented defaults
|
|
302
|
+
// (50 MiB / 30 days); explicit values override.
|
|
303
|
+
if (layered.audit !== undefined) {
|
|
304
|
+
lines.push(`audit:`);
|
|
305
|
+
if (layered.audit.rotation !== undefined) {
|
|
306
|
+
const rot = layered.audit.rotation;
|
|
307
|
+
const hasFields = rot.max_bytes !== undefined || rot.max_age_days !== undefined;
|
|
308
|
+
lines.push(hasFields ? ` rotation:` : ` rotation: {}`);
|
|
309
|
+
if (rot.max_bytes !== undefined) {
|
|
310
|
+
lines.push(` max_bytes: ${rot.max_bytes}`);
|
|
311
|
+
}
|
|
312
|
+
if (rot.max_age_days !== undefined) {
|
|
313
|
+
lines.push(` max_age_days: ${rot.max_age_days}`);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
300
317
|
// G11.4: always emit the review block explicitly. Making the value
|
|
301
318
|
// visible in the generated file helps the operator notice what was
|
|
302
319
|
// chosen at init time and simplifies switching modes later (edit a
|
package/dist/cli/upgrade.js
CHANGED
|
@@ -635,7 +635,22 @@ export async function runUpgrade(options = {}) {
|
|
|
635
635
|
}
|
|
636
636
|
const now = new Date().toISOString();
|
|
637
637
|
const installedAt = existingManifest?.installed_at ?? now;
|
|
638
|
-
|
|
638
|
+
// 0.18.0 helix-020 G6 fix: pre-fix the upgrade path read profile from
|
|
639
|
+
// the existing manifest only — and pre-0.2.0 manifests recorded
|
|
640
|
+
// `"unknown"` as a placeholder. Every subsequent `rea upgrade` then
|
|
641
|
+
// re-stamped `"unknown"` forever. Authoritative source for the
|
|
642
|
+
// profile is `.rea/policy.yaml`; the manifest is a derivative
|
|
643
|
+
// record. Read policy first; fall back to existing manifest only
|
|
644
|
+
// when policy load fails (covers the bootstrap case where the
|
|
645
|
+
// manifest exists but policy is malformed).
|
|
646
|
+
let profile;
|
|
647
|
+
try {
|
|
648
|
+
const livePolicy = loadPolicy(resolvedRoot);
|
|
649
|
+
profile = livePolicy.profile;
|
|
650
|
+
}
|
|
651
|
+
catch {
|
|
652
|
+
profile = existingManifest?.profile ?? 'unknown';
|
|
653
|
+
}
|
|
639
654
|
const freshManifest = {
|
|
640
655
|
version: getPkgVersion(),
|
|
641
656
|
profile,
|
|
@@ -136,18 +136,29 @@ function escapeTomlString(value) {
|
|
|
136
136
|
*/
|
|
137
137
|
export async function runCodexReview(options) {
|
|
138
138
|
const spawner = options.spawnImpl ?? spawn;
|
|
139
|
+
// 0.18.0 iron-gate runtime default: ALWAYS pass model + reasoning
|
|
140
|
+
// effort to codex. Pre-fix, undefined options fell back to codex's
|
|
141
|
+
// own default (`codex-auto-review` at medium reasoning), which
|
|
142
|
+
// bypassed the iron-gate intent and let weaker reviews ship. Now
|
|
143
|
+
// the runtime hardcodes `gpt-5.4` + `high` as the floor; policy
|
|
144
|
+
// can OVERRIDE to a different model/effort but cannot opt out into
|
|
145
|
+
// codex's defaults (config.toml or otherwise). The user's directive
|
|
146
|
+
// — "we want codex to be using its BEST. EVERY TIME" — is enforced
|
|
147
|
+
// here, not at the policy layer.
|
|
148
|
+
//
|
|
139
149
|
// Model + reasoning overrides go BEFORE the `exec` subcommand because
|
|
140
150
|
// `-c key=value` is a top-level codex CLI flag, not an `exec` flag.
|
|
141
151
|
// Codex's TOML parser interprets the value, so we wrap strings in TOML
|
|
142
152
|
// quotes — `-c model="gpt-5.4"` not `-c model=gpt-5.4` — to ensure the
|
|
143
153
|
// value lands as a string regardless of upstream parsing changes.
|
|
144
|
-
const
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
154
|
+
const effectiveModel = options.model !== undefined && options.model.length > 0 ? options.model : 'gpt-5.4';
|
|
155
|
+
const effectiveReasoning = options.reasoningEffort ?? 'high';
|
|
156
|
+
const overrideArgs = [
|
|
157
|
+
'-c',
|
|
158
|
+
`model="${escapeTomlString(effectiveModel)}"`,
|
|
159
|
+
'-c',
|
|
160
|
+
`model_reasoning_effort="${escapeTomlString(effectiveReasoning)}"`,
|
|
161
|
+
];
|
|
151
162
|
const baseArgs = [
|
|
152
163
|
...overrideArgs,
|
|
153
164
|
'exec',
|
|
@@ -30,6 +30,7 @@ import { resolveBaseRef } from './base.js';
|
|
|
30
30
|
import { createRealGitExecutor, runCodexReview, CodexNotInstalledError, CodexProtocolError, CodexSubprocessError, CodexTimeoutError, } from './codex-runner.js';
|
|
31
31
|
import { summarizeReview } from './findings.js';
|
|
32
32
|
import { renderBanner, writeLastReview } from './report.js';
|
|
33
|
+
import { isFlip, lookupVerdict, writeVerdict, } from './verdict-cache.js';
|
|
33
34
|
/**
|
|
34
35
|
* Parse the raw pre-push stdin text into refspecs. Each line is four
|
|
35
36
|
* whitespace-separated fields. Blank lines and malformed lines are
|
|
@@ -72,6 +73,8 @@ const EVT_DISABLED = 'rea.push_gate.disabled';
|
|
|
72
73
|
const EVT_SKIPPED = 'rea.push_gate.skipped';
|
|
73
74
|
const EVT_EMPTY = 'rea.push_gate.empty_diff';
|
|
74
75
|
const EVT_ERROR = 'rea.push_gate.error';
|
|
76
|
+
const EVT_CACHE_HIT = 'rea.push_gate.cache_hit';
|
|
77
|
+
const EVT_VERDICT_FLIP = 'rea.push_gate.verdict_flip';
|
|
75
78
|
// ---------------------------------------------------------------------------
|
|
76
79
|
// Composer
|
|
77
80
|
// ---------------------------------------------------------------------------
|
|
@@ -335,7 +338,46 @@ export async function runPushGate(deps) {
|
|
|
335
338
|
headSha,
|
|
336
339
|
};
|
|
337
340
|
}
|
|
338
|
-
//
|
|
341
|
+
// 6a. Verdict cache lookup (0.18.1 helixir #1, #4, #7, #8). Same-SHA
|
|
342
|
+
// pushes within the configured TTL skip the codex invocation and
|
|
343
|
+
// reuse the cached verdict — durable PASS. Cache is bypassed when
|
|
344
|
+
// policy.review.cache_ttl_ms is 0. Cache miss / expired falls
|
|
345
|
+
// through to the codex call below.
|
|
346
|
+
const cacheLookup = policy.cache_ttl_ms > 0 ? lookupVerdict(deps.baseDir, headSha) : { hit: false };
|
|
347
|
+
if (cacheLookup.hit && cacheLookup.entry !== undefined) {
|
|
348
|
+
const cached = cacheLookup.entry;
|
|
349
|
+
const cachedBlocked = cached.verdict === 'blocking'
|
|
350
|
+
|| (cached.verdict === 'concerns' && policy.concerns_blocks && !isConcernsOverrideSet(env));
|
|
351
|
+
await safeAppend(appendAuditFn, deps.baseDir, EVT_CACHE_HIT, {
|
|
352
|
+
verdict: cached.verdict,
|
|
353
|
+
finding_count: cached.finding_count,
|
|
354
|
+
base_ref: base.ref,
|
|
355
|
+
base_source: base.source,
|
|
356
|
+
head_sha: headSha,
|
|
357
|
+
cached_reviewed_at: cached.reviewed_at,
|
|
358
|
+
cached_model: cached.model,
|
|
359
|
+
cached_reasoning_effort: cached.reasoning_effort,
|
|
360
|
+
blocked: cachedBlocked,
|
|
361
|
+
});
|
|
362
|
+
return {
|
|
363
|
+
status: cachedBlocked
|
|
364
|
+
? cached.verdict === 'blocking'
|
|
365
|
+
? 'blocking'
|
|
366
|
+
: 'concerns'
|
|
367
|
+
: cached.verdict === 'blocking'
|
|
368
|
+
? 'blocking'
|
|
369
|
+
: cached.verdict === 'concerns'
|
|
370
|
+
? 'concerns'
|
|
371
|
+
: 'pass',
|
|
372
|
+
exitCode: cachedBlocked ? 2 : 0,
|
|
373
|
+
summary: `${cached.verdict}: ${cached.finding_count} finding(s) (cached)`,
|
|
374
|
+
verdict: cached.verdict,
|
|
375
|
+
findingCount: cached.finding_count,
|
|
376
|
+
baseRef: base.ref,
|
|
377
|
+
headSha,
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
// 6b. Run Codex. Typed errors translate to exit 2 with distinct stderr.
|
|
339
381
|
try {
|
|
340
382
|
const codexResult = await runCodexFn({
|
|
341
383
|
baseRef: base.ref,
|
|
@@ -372,6 +414,40 @@ export async function runPushGate(deps) {
|
|
|
372
414
|
blocked,
|
|
373
415
|
lastReviewPath,
|
|
374
416
|
}));
|
|
417
|
+
// 0.18.1 verdict cache write + flip detection. The lookup at step
|
|
418
|
+
// 6a already returned miss/expired; if `cacheLookup.entry` is set,
|
|
419
|
+
// a stale entry existed — compare its verdict to the fresh one and
|
|
420
|
+
// emit a flip event when they differ. Operators can grep
|
|
421
|
+
// `rea.push_gate.verdict_flip` in the audit log to detect codex
|
|
422
|
+
// non-determinism (helixir #8).
|
|
423
|
+
if (policy.cache_ttl_ms > 0) {
|
|
424
|
+
const flipped = isFlip(cacheLookup.entry, summary.verdict);
|
|
425
|
+
if (flipped && cacheLookup.entry !== undefined) {
|
|
426
|
+
await safeAppend(appendAuditFn, deps.baseDir, EVT_VERDICT_FLIP, {
|
|
427
|
+
head_sha: headSha,
|
|
428
|
+
prior_verdict: cacheLookup.entry.verdict,
|
|
429
|
+
fresh_verdict: summary.verdict,
|
|
430
|
+
prior_reviewed_at: cacheLookup.entry.reviewed_at,
|
|
431
|
+
base_ref: base.ref,
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
const entry = {
|
|
435
|
+
verdict: summary.verdict,
|
|
436
|
+
finding_count: summary.findings.length,
|
|
437
|
+
reviewed_at: deps.now !== undefined ? deps.now().toISOString() : new Date().toISOString(),
|
|
438
|
+
model: policy.codex_model ?? 'gpt-5.4',
|
|
439
|
+
reasoning_effort: policy.codex_reasoning_effort ?? 'high',
|
|
440
|
+
ttl_ms: policy.cache_ttl_ms,
|
|
441
|
+
};
|
|
442
|
+
try {
|
|
443
|
+
writeVerdict(deps.baseDir, headSha, entry);
|
|
444
|
+
}
|
|
445
|
+
catch {
|
|
446
|
+
// Cache writes are best-effort. A failure here must NOT
|
|
447
|
+
// affect the verdict — log to stderr (already done by the
|
|
448
|
+
// caller via banner) and proceed.
|
|
449
|
+
}
|
|
450
|
+
}
|
|
375
451
|
await safeAppend(appendAuditFn, deps.baseDir, EVT_REVIEWED, {
|
|
376
452
|
verdict: summary.verdict,
|
|
377
453
|
finding_count: summary.findings.length,
|
|
@@ -386,6 +462,9 @@ export async function runPushGate(deps) {
|
|
|
386
462
|
last_n_commits_requested: base.lastNCommitsRequested,
|
|
387
463
|
auto_narrowed: autoNarrowed ? true : undefined,
|
|
388
464
|
original_commit_count: originalCommitCount !== null ? originalCommitCount : undefined,
|
|
465
|
+
flipped: cacheLookup.entry !== undefined && isFlip(cacheLookup.entry, summary.verdict)
|
|
466
|
+
? true
|
|
467
|
+
: undefined,
|
|
389
468
|
});
|
|
390
469
|
if (blocked) {
|
|
391
470
|
return {
|
|
@@ -56,6 +56,12 @@ export interface ResolvedReviewPolicy {
|
|
|
56
56
|
* codex's own default (currently `medium`).
|
|
57
57
|
*/
|
|
58
58
|
codex_reasoning_effort: 'low' | 'medium' | 'high' | undefined;
|
|
59
|
+
/**
|
|
60
|
+
* Verdict cache TTL in milliseconds (0.18.1+). `0` disables caching;
|
|
61
|
+
* positive values enable the same-SHA short-circuit. Default 86_400_000
|
|
62
|
+
* (24 hours) when policy.review.cache_ttl_ms is unset.
|
|
63
|
+
*/
|
|
64
|
+
cache_ttl_ms: number;
|
|
59
65
|
/** `true` when `.rea/policy.yaml` was absent; defaults apply. */
|
|
60
66
|
policyMissing: boolean;
|
|
61
67
|
}
|
|
@@ -97,6 +103,17 @@ export declare const PUSH_GATE_DEFAULT_CODEX_MODEL = "gpt-5.4";
|
|
|
97
103
|
* `.rea/policy.yaml` for cost-bounded environments.
|
|
98
104
|
*/
|
|
99
105
|
export declare const PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT: 'low' | 'medium' | 'high';
|
|
106
|
+
/**
|
|
107
|
+
* Default verdict-cache TTL in milliseconds (0.18.1+). 24 hours: long
|
|
108
|
+
* enough to amortize multi-push iteration of the same SHA (push, push
|
|
109
|
+
* --force-with-lease after a quick fixup, push again post-rebase),
|
|
110
|
+
* short enough that a stale cache from yesterday doesn't suppress
|
|
111
|
+
* review of code whose context (env, dependencies, .rea/policy.yaml)
|
|
112
|
+
* has changed. Operators can shorten to a few minutes for tighter
|
|
113
|
+
* loops or extend via `policy.review.cache_ttl_ms`. `0` disables
|
|
114
|
+
* caching — every push re-invokes codex (pre-0.18.1 behavior).
|
|
115
|
+
*/
|
|
116
|
+
export declare const PUSH_GATE_DEFAULT_CACHE_TTL_MS: number;
|
|
100
117
|
/**
|
|
101
118
|
* Resolve the push-gate policy for `baseDir`. Never throws — a malformed
|
|
102
119
|
* policy file surfaces as a typed error via the underlying zod validator,
|
|
@@ -66,6 +66,17 @@ export const PUSH_GATE_DEFAULT_CODEX_MODEL = 'gpt-5.4';
|
|
|
66
66
|
* `.rea/policy.yaml` for cost-bounded environments.
|
|
67
67
|
*/
|
|
68
68
|
export const PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT = 'high';
|
|
69
|
+
/**
|
|
70
|
+
* Default verdict-cache TTL in milliseconds (0.18.1+). 24 hours: long
|
|
71
|
+
* enough to amortize multi-push iteration of the same SHA (push, push
|
|
72
|
+
* --force-with-lease after a quick fixup, push again post-rebase),
|
|
73
|
+
* short enough that a stale cache from yesterday doesn't suppress
|
|
74
|
+
* review of code whose context (env, dependencies, .rea/policy.yaml)
|
|
75
|
+
* has changed. Operators can shorten to a few minutes for tighter
|
|
76
|
+
* loops or extend via `policy.review.cache_ttl_ms`. `0` disables
|
|
77
|
+
* caching — every push re-invokes codex (pre-0.18.1 behavior).
|
|
78
|
+
*/
|
|
79
|
+
export const PUSH_GATE_DEFAULT_CACHE_TTL_MS = 24 * 60 * 60 * 1_000;
|
|
69
80
|
/**
|
|
70
81
|
* Resolve the push-gate policy for `baseDir`. Never throws — a malformed
|
|
71
82
|
* policy file surfaces as a typed error via the underlying zod validator,
|
|
@@ -87,6 +98,7 @@ export async function resolvePushGatePolicy(baseDir) {
|
|
|
87
98
|
auto_narrow_threshold: PUSH_GATE_DEFAULT_AUTO_NARROW_THRESHOLD,
|
|
88
99
|
codex_model: PUSH_GATE_DEFAULT_CODEX_MODEL,
|
|
89
100
|
codex_reasoning_effort: PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT,
|
|
101
|
+
cache_ttl_ms: PUSH_GATE_DEFAULT_CACHE_TTL_MS,
|
|
90
102
|
policyMissing: true,
|
|
91
103
|
};
|
|
92
104
|
}
|
|
@@ -100,6 +112,7 @@ export async function resolvePushGatePolicy(baseDir) {
|
|
|
100
112
|
auto_narrow_threshold: review.auto_narrow_threshold ?? PUSH_GATE_DEFAULT_AUTO_NARROW_THRESHOLD,
|
|
101
113
|
codex_model: review.codex_model ?? PUSH_GATE_DEFAULT_CODEX_MODEL,
|
|
102
114
|
codex_reasoning_effort: review.codex_reasoning_effort ?? PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT,
|
|
115
|
+
cache_ttl_ms: review.cache_ttl_ms ?? PUSH_GATE_DEFAULT_CACHE_TTL_MS,
|
|
103
116
|
policyMissing: false,
|
|
104
117
|
};
|
|
105
118
|
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Durable verdict cache for the push-gate (helixir #1, #4, #7, #8 / 0.18.1).
|
|
3
|
+
*
|
|
4
|
+
* Pre-0.18.1 the push-gate was strictly stateless: every push of the same
|
|
5
|
+
* `head_sha` invoked `codex exec review` afresh. helixir round 82 reproduced
|
|
6
|
+
* the failure mode — push #1 of `9fbdfb63` returned PASS, push #2 of the
|
|
7
|
+
* IDENTICAL commit returned CONCERNS — 1 P2. The verdict instability is
|
|
8
|
+
* a property of codex's stochastic decoding at `reasoning_effort: high`;
|
|
9
|
+
* rea cannot eliminate it, but rea CAN make a clean PASS DURABLE so the
|
|
10
|
+
* second push of the same SHA doesn't roll the dice again.
|
|
11
|
+
*
|
|
12
|
+
* Design:
|
|
13
|
+
*
|
|
14
|
+
* .rea/last-review.cache.json
|
|
15
|
+
* {
|
|
16
|
+
* schema_version: 2,
|
|
17
|
+
* entries: {
|
|
18
|
+
* "<head_sha>": {
|
|
19
|
+
* verdict: "pass" | "concerns" | "blocking",
|
|
20
|
+
* finding_count: number,
|
|
21
|
+
* reviewed_at: ISO8601,
|
|
22
|
+
* model: string,
|
|
23
|
+
* reasoning_effort: "low" | "medium" | "high",
|
|
24
|
+
* ttl_ms: number, // policy.review.cache_ttl_ms at write time
|
|
25
|
+
* },
|
|
26
|
+
* ...
|
|
27
|
+
* }
|
|
28
|
+
* }
|
|
29
|
+
*
|
|
30
|
+
* - Hit (within TTL): emit `rea.push_gate.cache_hit` audit event, exit
|
|
31
|
+
* with the cached verdict + finding count; codex is NOT invoked.
|
|
32
|
+
* - Miss or expired: invoke codex; on success, write the new entry.
|
|
33
|
+
* - Flip detection: if a new codex result on the same SHA produces a
|
|
34
|
+
* verdict different from the cached one, set `last-review.json.flip_flag = true`,
|
|
35
|
+
* emit `rea.push_gate.verdict_flip`, and overwrite the cache with
|
|
36
|
+
* the fresh result. Operators can detect non-determinism from the
|
|
37
|
+
* audit log alone (helixir #8).
|
|
38
|
+
* - REA_SKIP_CODEX_REVIEW short-circuits BEFORE cache lookup (unchanged).
|
|
39
|
+
*
|
|
40
|
+
* The cache is OPTIONAL by design: existing callers that don't pass a
|
|
41
|
+
* `cacheImpl` get the legacy stateless path. Tests inject a fake.
|
|
42
|
+
*/
|
|
43
|
+
import type { Verdict as ReviewVerdict } from './findings.js';
|
|
44
|
+
export declare const VERDICT_CACHE_FILE = "last-review.cache.json";
|
|
45
|
+
export declare const VERDICT_CACHE_SCHEMA_VERSION: 2;
|
|
46
|
+
export declare const DEFAULT_CACHE_TTL_MS: number;
|
|
47
|
+
export interface VerdictCacheEntry {
|
|
48
|
+
verdict: ReviewVerdict;
|
|
49
|
+
finding_count: number;
|
|
50
|
+
reviewed_at: string;
|
|
51
|
+
model: string;
|
|
52
|
+
reasoning_effort: 'low' | 'medium' | 'high';
|
|
53
|
+
ttl_ms: number;
|
|
54
|
+
}
|
|
55
|
+
export interface VerdictCacheLookupResult {
|
|
56
|
+
/** True if a non-expired entry exists for this SHA. */
|
|
57
|
+
hit: boolean;
|
|
58
|
+
/** The entry, present on both hit and miss-of-stale-entry. Used for flip detection. */
|
|
59
|
+
entry?: VerdictCacheEntry;
|
|
60
|
+
/** True if the entry exists but is past TTL. */
|
|
61
|
+
expired?: boolean;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Read the cache file and look up `head_sha`. Missing file, malformed
|
|
65
|
+
* JSON, missing entry, and unsupported schema_version all resolve to a
|
|
66
|
+
* miss with `entry: undefined` — the caller proceeds to codex.
|
|
67
|
+
*/
|
|
68
|
+
export declare function lookupVerdict(baseDir: string, headSha: string, now?: Date): VerdictCacheLookupResult;
|
|
69
|
+
/**
|
|
70
|
+
* Write a fresh verdict entry. Atomic via tmp-file + rename. Unrecognized
|
|
71
|
+
* pre-existing entries are preserved (forward-compat for v3+).
|
|
72
|
+
*/
|
|
73
|
+
export declare function writeVerdict(baseDir: string, headSha: string, entry: VerdictCacheEntry): void;
|
|
74
|
+
/**
|
|
75
|
+
* Detect whether a new verdict contradicts a previously-cached verdict
|
|
76
|
+
* on the same SHA. Used by `runPushGate` to set the flip-flag on
|
|
77
|
+
* last-review.json and emit the `verdict_flip` audit event.
|
|
78
|
+
*/
|
|
79
|
+
export declare function isFlip(prior: VerdictCacheEntry | undefined, fresh: ReviewVerdict): boolean;
|
|
80
|
+
/**
|
|
81
|
+
* Remove a single SHA from the cache. Returns true if the entry existed.
|
|
82
|
+
*/
|
|
83
|
+
export declare function clearVerdict(baseDir: string, headSha: string): boolean;
|
|
84
|
+
/**
|
|
85
|
+
* Remove ALL entries from the cache. Returns the count of removed entries.
|
|
86
|
+
*/
|
|
87
|
+
export declare function clearAll(baseDir: string): number;
|
|
88
|
+
/**
|
|
89
|
+
* Remove entries whose `reviewed_at` is older than `olderThanMs` from `now`.
|
|
90
|
+
* Returns the count of removed entries.
|
|
91
|
+
*/
|
|
92
|
+
export declare function pruneOlderThan(baseDir: string, olderThanMs: number, now?: Date): number;
|
|
93
|
+
/**
|
|
94
|
+
* Read all entries (used by `rea cache stats` / `rea cache show`).
|
|
95
|
+
* Returns empty object on any read error (missing file, malformed JSON,
|
|
96
|
+
* unsupported schema_version).
|
|
97
|
+
*/
|
|
98
|
+
export declare function listEntries(baseDir: string): Record<string, VerdictCacheEntry>;
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Durable verdict cache for the push-gate (helixir #1, #4, #7, #8 / 0.18.1).
|
|
3
|
+
*
|
|
4
|
+
* Pre-0.18.1 the push-gate was strictly stateless: every push of the same
|
|
5
|
+
* `head_sha` invoked `codex exec review` afresh. helixir round 82 reproduced
|
|
6
|
+
* the failure mode — push #1 of `9fbdfb63` returned PASS, push #2 of the
|
|
7
|
+
* IDENTICAL commit returned CONCERNS — 1 P2. The verdict instability is
|
|
8
|
+
* a property of codex's stochastic decoding at `reasoning_effort: high`;
|
|
9
|
+
* rea cannot eliminate it, but rea CAN make a clean PASS DURABLE so the
|
|
10
|
+
* second push of the same SHA doesn't roll the dice again.
|
|
11
|
+
*
|
|
12
|
+
* Design:
|
|
13
|
+
*
|
|
14
|
+
* .rea/last-review.cache.json
|
|
15
|
+
* {
|
|
16
|
+
* schema_version: 2,
|
|
17
|
+
* entries: {
|
|
18
|
+
* "<head_sha>": {
|
|
19
|
+
* verdict: "pass" | "concerns" | "blocking",
|
|
20
|
+
* finding_count: number,
|
|
21
|
+
* reviewed_at: ISO8601,
|
|
22
|
+
* model: string,
|
|
23
|
+
* reasoning_effort: "low" | "medium" | "high",
|
|
24
|
+
* ttl_ms: number, // policy.review.cache_ttl_ms at write time
|
|
25
|
+
* },
|
|
26
|
+
* ...
|
|
27
|
+
* }
|
|
28
|
+
* }
|
|
29
|
+
*
|
|
30
|
+
* - Hit (within TTL): emit `rea.push_gate.cache_hit` audit event, exit
|
|
31
|
+
* with the cached verdict + finding count; codex is NOT invoked.
|
|
32
|
+
* - Miss or expired: invoke codex; on success, write the new entry.
|
|
33
|
+
* - Flip detection: if a new codex result on the same SHA produces a
|
|
34
|
+
* verdict different from the cached one, set `last-review.json.flip_flag = true`,
|
|
35
|
+
* emit `rea.push_gate.verdict_flip`, and overwrite the cache with
|
|
36
|
+
* the fresh result. Operators can detect non-determinism from the
|
|
37
|
+
* audit log alone (helixir #8).
|
|
38
|
+
* - REA_SKIP_CODEX_REVIEW short-circuits BEFORE cache lookup (unchanged).
|
|
39
|
+
*
|
|
40
|
+
* The cache is OPTIONAL by design: existing callers that don't pass a
|
|
41
|
+
* `cacheImpl` get the legacy stateless path. Tests inject a fake.
|
|
42
|
+
*/
|
|
43
|
+
import fs from 'node:fs';
|
|
44
|
+
import path from 'node:path';
|
|
45
|
+
export const VERDICT_CACHE_FILE = 'last-review.cache.json';
|
|
46
|
+
export const VERDICT_CACHE_SCHEMA_VERSION = 2;
|
|
47
|
+
export const DEFAULT_CACHE_TTL_MS = 24 * 60 * 60 * 1_000; // 24h
|
|
48
|
+
/**
|
|
49
|
+
* Read the cache file and look up `head_sha`. Missing file, malformed
|
|
50
|
+
* JSON, missing entry, and unsupported schema_version all resolve to a
|
|
51
|
+
* miss with `entry: undefined` — the caller proceeds to codex.
|
|
52
|
+
*/
|
|
53
|
+
export function lookupVerdict(baseDir, headSha, now = new Date()) {
|
|
54
|
+
const file = readCacheFile(baseDir);
|
|
55
|
+
if (file === undefined)
|
|
56
|
+
return { hit: false };
|
|
57
|
+
const entry = file.entries[headSha];
|
|
58
|
+
if (entry === undefined)
|
|
59
|
+
return { hit: false };
|
|
60
|
+
const reviewedAtMs = Date.parse(entry.reviewed_at);
|
|
61
|
+
if (Number.isNaN(reviewedAtMs))
|
|
62
|
+
return { hit: false, entry };
|
|
63
|
+
const ageMs = now.getTime() - reviewedAtMs;
|
|
64
|
+
if (ageMs >= entry.ttl_ms) {
|
|
65
|
+
return { hit: false, entry, expired: true };
|
|
66
|
+
}
|
|
67
|
+
return { hit: true, entry };
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Write a fresh verdict entry. Atomic via tmp-file + rename. Unrecognized
|
|
71
|
+
* pre-existing entries are preserved (forward-compat for v3+).
|
|
72
|
+
*/
|
|
73
|
+
export function writeVerdict(baseDir, headSha, entry) {
|
|
74
|
+
const reaDir = path.join(baseDir, '.rea');
|
|
75
|
+
if (!fs.existsSync(reaDir)) {
|
|
76
|
+
fs.mkdirSync(reaDir, { recursive: true });
|
|
77
|
+
}
|
|
78
|
+
const cachePath = path.join(reaDir, VERDICT_CACHE_FILE);
|
|
79
|
+
const existing = readCacheFile(baseDir);
|
|
80
|
+
const next = {
|
|
81
|
+
schema_version: VERDICT_CACHE_SCHEMA_VERSION,
|
|
82
|
+
entries: { ...(existing?.entries ?? {}), [headSha]: entry },
|
|
83
|
+
};
|
|
84
|
+
const tmp = `${cachePath}.tmp.${process.pid}`;
|
|
85
|
+
fs.writeFileSync(tmp, `${JSON.stringify(next, null, 2)}\n`, 'utf8');
|
|
86
|
+
fs.renameSync(tmp, cachePath);
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Detect whether a new verdict contradicts a previously-cached verdict
|
|
90
|
+
* on the same SHA. Used by `runPushGate` to set the flip-flag on
|
|
91
|
+
* last-review.json and emit the `verdict_flip` audit event.
|
|
92
|
+
*/
|
|
93
|
+
export function isFlip(prior, fresh) {
|
|
94
|
+
if (prior === undefined)
|
|
95
|
+
return false;
|
|
96
|
+
return prior.verdict !== fresh;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Remove a single SHA from the cache. Returns true if the entry existed.
|
|
100
|
+
*/
|
|
101
|
+
export function clearVerdict(baseDir, headSha) {
|
|
102
|
+
const file = readCacheFile(baseDir);
|
|
103
|
+
if (file === undefined || file.entries[headSha] === undefined)
|
|
104
|
+
return false;
|
|
105
|
+
const next = {
|
|
106
|
+
schema_version: VERDICT_CACHE_SCHEMA_VERSION,
|
|
107
|
+
entries: { ...file.entries },
|
|
108
|
+
};
|
|
109
|
+
delete next.entries[headSha];
|
|
110
|
+
const cachePath = path.join(baseDir, '.rea', VERDICT_CACHE_FILE);
|
|
111
|
+
fs.writeFileSync(cachePath, `${JSON.stringify(next, null, 2)}\n`, 'utf8');
|
|
112
|
+
return true;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Remove ALL entries from the cache. Returns the count of removed entries.
|
|
116
|
+
*/
|
|
117
|
+
export function clearAll(baseDir) {
|
|
118
|
+
const file = readCacheFile(baseDir);
|
|
119
|
+
const cachePath = path.join(baseDir, '.rea', VERDICT_CACHE_FILE);
|
|
120
|
+
const count = file === undefined ? 0 : Object.keys(file.entries).length;
|
|
121
|
+
const empty = {
|
|
122
|
+
schema_version: VERDICT_CACHE_SCHEMA_VERSION,
|
|
123
|
+
entries: {},
|
|
124
|
+
};
|
|
125
|
+
if (!fs.existsSync(path.dirname(cachePath))) {
|
|
126
|
+
fs.mkdirSync(path.dirname(cachePath), { recursive: true });
|
|
127
|
+
}
|
|
128
|
+
fs.writeFileSync(cachePath, `${JSON.stringify(empty, null, 2)}\n`, 'utf8');
|
|
129
|
+
return count;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Remove entries whose `reviewed_at` is older than `olderThanMs` from `now`.
|
|
133
|
+
* Returns the count of removed entries.
|
|
134
|
+
*/
|
|
135
|
+
export function pruneOlderThan(baseDir, olderThanMs, now = new Date()) {
|
|
136
|
+
const file = readCacheFile(baseDir);
|
|
137
|
+
if (file === undefined)
|
|
138
|
+
return 0;
|
|
139
|
+
const cutoff = now.getTime() - olderThanMs;
|
|
140
|
+
const surviving = {};
|
|
141
|
+
let removed = 0;
|
|
142
|
+
for (const [sha, entry] of Object.entries(file.entries)) {
|
|
143
|
+
const reviewedAtMs = Date.parse(entry.reviewed_at);
|
|
144
|
+
if (Number.isNaN(reviewedAtMs) || reviewedAtMs >= cutoff) {
|
|
145
|
+
surviving[sha] = entry;
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
removed += 1;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
if (removed === 0)
|
|
152
|
+
return 0;
|
|
153
|
+
const next = {
|
|
154
|
+
schema_version: VERDICT_CACHE_SCHEMA_VERSION,
|
|
155
|
+
entries: surviving,
|
|
156
|
+
};
|
|
157
|
+
const cachePath = path.join(baseDir, '.rea', VERDICT_CACHE_FILE);
|
|
158
|
+
fs.writeFileSync(cachePath, `${JSON.stringify(next, null, 2)}\n`, 'utf8');
|
|
159
|
+
return removed;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Read all entries (used by `rea cache stats` / `rea cache show`).
|
|
163
|
+
* Returns empty object on any read error (missing file, malformed JSON,
|
|
164
|
+
* unsupported schema_version).
|
|
165
|
+
*/
|
|
166
|
+
export function listEntries(baseDir) {
|
|
167
|
+
const file = readCacheFile(baseDir);
|
|
168
|
+
return file?.entries ?? {};
|
|
169
|
+
}
|
|
170
|
+
function readCacheFile(baseDir) {
|
|
171
|
+
const cachePath = path.join(baseDir, '.rea', VERDICT_CACHE_FILE);
|
|
172
|
+
if (!fs.existsSync(cachePath))
|
|
173
|
+
return undefined;
|
|
174
|
+
try {
|
|
175
|
+
const raw = fs.readFileSync(cachePath, 'utf8');
|
|
176
|
+
const parsed = JSON.parse(raw);
|
|
177
|
+
if (typeof parsed !== 'object' ||
|
|
178
|
+
parsed === null ||
|
|
179
|
+
parsed.schema_version !== VERDICT_CACHE_SCHEMA_VERSION) {
|
|
180
|
+
return undefined;
|
|
181
|
+
}
|
|
182
|
+
const entries = parsed.entries;
|
|
183
|
+
if (typeof entries !== 'object' || entries === null)
|
|
184
|
+
return undefined;
|
|
185
|
+
return parsed;
|
|
186
|
+
}
|
|
187
|
+
catch {
|
|
188
|
+
return undefined;
|
|
189
|
+
}
|
|
190
|
+
}
|