@bookedsolid/rea 0.13.3 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/MIGRATING.md +46 -0
- package/dist/hooks/push-gate/codex-runner.d.ts +14 -0
- package/dist/hooks/push-gate/codex-runner.js +37 -1
- package/dist/hooks/push-gate/index.js +8 -0
- package/dist/hooks/push-gate/policy.d.ts +34 -0
- package/dist/hooks/push-gate/policy.js +25 -0
- package/dist/policy/loader.d.ts +38 -0
- package/dist/policy/loader.js +30 -0
- package/dist/policy/types.d.ts +28 -0
- package/hooks/blocked-paths-enforcer.sh +38 -0
- package/hooks/secret-scanner.sh +30 -0
- package/package.json +1 -1
package/MIGRATING.md
CHANGED
|
@@ -261,6 +261,50 @@ After migration, run `pnpm rea doctor`. The relevant lines:
|
|
|
261
261
|
- `[info] extension-hook fragments detected: N pre-push.d, M
|
|
262
262
|
commit-msg.d` — your fragment chain is active
|
|
263
263
|
|
|
264
|
+
## Codex model knobs (added in 0.14.0)
|
|
265
|
+
|
|
266
|
+
The push-gate now pins the flagship codex model and `high` reasoning
|
|
267
|
+
effort by default. Pre-0.14.0 it used codex's built-in default, which
|
|
268
|
+
is the special-purpose `codex-auto-review` model at `medium`
|
|
269
|
+
reasoning — a meaningfully weaker reviewer than the flagship.
|
|
270
|
+
Same-code-different-verdict thrashing on long-running branches was
|
|
271
|
+
substantially driven by the lower-reasoning default.
|
|
272
|
+
|
|
273
|
+
**Defaults (0.14.0+):**
|
|
274
|
+
|
|
275
|
+
```yaml
|
|
276
|
+
review:
|
|
277
|
+
codex_model: gpt-5.4 # was codex-auto-review (codex's own default)
|
|
278
|
+
codex_reasoning_effort: high # was medium (codex's own default)
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
You don't need to set these — `gpt-5.4` + `high` are baked in at the
|
|
282
|
+
package level. The policy keys exist for cost-bounded environments
|
|
283
|
+
that want to opt into a weaker model:
|
|
284
|
+
|
|
285
|
+
```yaml
|
|
286
|
+
review:
|
|
287
|
+
codex_model: codex-auto-review # opts back into the prior default
|
|
288
|
+
codex_reasoning_effort: medium
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
The model name is passed through to codex's TOML config layer
|
|
292
|
+
(`-c model="…"`); codex itself validates it. An unknown model name
|
|
293
|
+
surfaces as a clear runtime error at first push, not a silent
|
|
294
|
+
fallback. Codex's current catalog (as of 2026-05-03):
|
|
295
|
+
|
|
296
|
+
- `gpt-5.4` — flagship, reasoning-capable (recommended for review)
|
|
297
|
+
- `gpt-5.4-mini` — smaller, faster, cheaper, less reasoning depth
|
|
298
|
+
- `gpt-5.3-codex` — prior generation, code-specialized
|
|
299
|
+
- `gpt-5.3-codex-spark` — even faster prior gen
|
|
300
|
+
- `gpt-5.2` — older, generally avoid for security-relevant review
|
|
301
|
+
- `codex-auto-review` — special-purpose, lower reasoning ceiling
|
|
302
|
+
|
|
303
|
+
Reasoning effort is `low | medium | high`. `high` spends more compute
|
|
304
|
+
per finding and produces more consistent verdicts — fewer
|
|
305
|
+
same-code-different-verdict round-trips. Trade-off is push-gate
|
|
306
|
+
latency.
|
|
307
|
+
|
|
264
308
|
## Policy knobs worth setting
|
|
265
309
|
|
|
266
310
|
For consumers with a long-running migration branch (>30 commits since
|
|
@@ -274,6 +318,8 @@ review:
|
|
|
274
318
|
timeout_ms: 1800000 # 30 min — explicit pin
|
|
275
319
|
auto_narrow_threshold: 30 # 0 to disable auto-narrow
|
|
276
320
|
last_n_commits: 10 # explicit scope window
|
|
321
|
+
codex_model: gpt-5.4 # 0.14.0+ default; iron-gate
|
|
322
|
+
codex_reasoning_effort: high # 0.14.0+ default; iron-gate
|
|
277
323
|
```
|
|
278
324
|
|
|
279
325
|
## Bypass when you genuinely need to
|
|
@@ -72,6 +72,20 @@ export interface CodexRunOptions {
|
|
|
72
72
|
timeoutMs: number;
|
|
73
73
|
/** Optional custom review prompt; defaults to Codex's built-in. */
|
|
74
74
|
prompt?: string;
|
|
75
|
+
/**
|
|
76
|
+
* Codex CLI model override (0.13.4+). When set, the runner passes
|
|
77
|
+
* `-c model="<value>"` to `codex exec review`. Codex itself validates
|
|
78
|
+
* the name. `undefined` falls back to codex's own default
|
|
79
|
+
* (`codex-auto-review` today, NOT the `gpt-5.4` flagship).
|
|
80
|
+
*/
|
|
81
|
+
model?: string;
|
|
82
|
+
/**
|
|
83
|
+
* Codex reasoning effort (0.13.4+). When set, the runner passes
|
|
84
|
+
* `-c model_reasoning_effort="<value>"`. Only meaningful when paired
|
|
85
|
+
* with a reasoning-capable model (gpt-5.4, gpt-5.3-codex). Codex's
|
|
86
|
+
* own default is `medium`.
|
|
87
|
+
*/
|
|
88
|
+
reasoningEffort?: 'low' | 'medium' | 'high';
|
|
75
89
|
/**
|
|
76
90
|
* Env passthrough. Tests inject a clean env to prevent ambient overrides.
|
|
77
91
|
* Production passes `process.env`.
|
|
@@ -110,6 +110,22 @@ export function createRealGitExecutor(cwd) {
|
|
|
110
110
|
},
|
|
111
111
|
};
|
|
112
112
|
}
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
114
|
+
// Codex invocation
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
/**
|
|
117
|
+
* Escape a string for safe inclusion inside a TOML basic-string literal.
|
|
118
|
+
* Codex's `-c key=value` parser runs the value through TOML, so we have to
|
|
119
|
+
* close over the same escape contract — namely backslash and double-quote
|
|
120
|
+
* (TOML basic strings forbid raw `"` and `\` in the body). The model names
|
|
121
|
+
* and reasoning levels we expect (`gpt-5.4`, `high`, etc.) never contain
|
|
122
|
+
* either character; this guard exists so a future model-name typo with a
|
|
123
|
+
* shell metacharacter cannot smuggle a TOML escape that codex misparses
|
|
124
|
+
* into something dangerous.
|
|
125
|
+
*/
|
|
126
|
+
function escapeTomlString(value) {
|
|
127
|
+
return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
|
|
128
|
+
}
|
|
113
129
|
/**
|
|
114
130
|
* Execute `codex exec review` and return the concatenated review text on
|
|
115
131
|
* success. Callers then pass the text to `summarizeReview()` to get a
|
|
@@ -120,7 +136,27 @@ export function createRealGitExecutor(cwd) {
|
|
|
120
136
|
*/
|
|
121
137
|
export async function runCodexReview(options) {
|
|
122
138
|
const spawner = options.spawnImpl ?? spawn;
|
|
123
|
-
|
|
139
|
+
// Model + reasoning overrides go BEFORE the `exec` subcommand because
|
|
140
|
+
// `-c key=value` is a top-level codex CLI flag, not an `exec` flag.
|
|
141
|
+
// Codex's TOML parser interprets the value, so we wrap strings in TOML
|
|
142
|
+
// quotes — `-c model="gpt-5.4"` not `-c model=gpt-5.4` — to ensure the
|
|
143
|
+
// value lands as a string regardless of upstream parsing changes.
|
|
144
|
+
const overrideArgs = [];
|
|
145
|
+
if (options.model !== undefined && options.model.length > 0) {
|
|
146
|
+
overrideArgs.push('-c', `model="${escapeTomlString(options.model)}"`);
|
|
147
|
+
}
|
|
148
|
+
if (options.reasoningEffort !== undefined) {
|
|
149
|
+
overrideArgs.push('-c', `model_reasoning_effort="${escapeTomlString(options.reasoningEffort)}"`);
|
|
150
|
+
}
|
|
151
|
+
const baseArgs = [
|
|
152
|
+
...overrideArgs,
|
|
153
|
+
'exec',
|
|
154
|
+
'review',
|
|
155
|
+
'--base',
|
|
156
|
+
options.baseRef,
|
|
157
|
+
'--json',
|
|
158
|
+
'--ephemeral',
|
|
159
|
+
];
|
|
124
160
|
const args = options.prompt !== undefined && options.prompt.length > 0 ? [...baseArgs, options.prompt] : baseArgs;
|
|
125
161
|
let child;
|
|
126
162
|
try {
|
|
@@ -342,6 +342,14 @@ export async function runPushGate(deps) {
|
|
|
342
342
|
cwd: deps.baseDir,
|
|
343
343
|
timeoutMs: policy.timeout_ms,
|
|
344
344
|
env,
|
|
345
|
+
// 0.14.0+: pass the resolved policy's model + reasoning overrides so
|
|
346
|
+
// codex spawns with `-c model="<name>" -c model_reasoning_effort="<level>"`.
|
|
347
|
+
// Defaults (gpt-5.4 + high) are baked into resolvePushGatePolicy so
|
|
348
|
+
// policies that omit these keys still get the iron-gate defaults.
|
|
349
|
+
...(policy.codex_model !== undefined ? { model: policy.codex_model } : {}),
|
|
350
|
+
...(policy.codex_reasoning_effort !== undefined
|
|
351
|
+
? { reasoningEffort: policy.codex_reasoning_effort }
|
|
352
|
+
: {}),
|
|
345
353
|
});
|
|
346
354
|
const summary = summarizeReview(codexResult.reviewText);
|
|
347
355
|
const blocked = summary.verdict === 'blocking'
|
|
@@ -43,6 +43,19 @@ export interface ResolvedReviewPolicy {
|
|
|
43
43
|
* emits a stderr warning. Defaults to 30 when unset; 0 disables.
|
|
44
44
|
*/
|
|
45
45
|
auto_narrow_threshold: number;
|
|
46
|
+
/**
|
|
47
|
+
* Codex CLI model override (0.13.4+). When set, the runner passes
|
|
48
|
+
* `-c model="<value>"` to every `codex exec review`. `undefined` falls
|
|
49
|
+
* back to codex's own default (currently `codex-auto-review`, NOT the
|
|
50
|
+
* flagship `gpt-5.4`).
|
|
51
|
+
*/
|
|
52
|
+
codex_model: string | undefined;
|
|
53
|
+
/**
|
|
54
|
+
* Codex reasoning effort (0.13.4+). When set, the runner passes
|
|
55
|
+
* `-c model_reasoning_effort="<value>"`. `undefined` falls back to
|
|
56
|
+
* codex's own default (currently `medium`).
|
|
57
|
+
*/
|
|
58
|
+
codex_reasoning_effort: 'low' | 'medium' | 'high' | undefined;
|
|
46
59
|
/** `true` when `.rea/policy.yaml` was absent; defaults apply. */
|
|
47
60
|
policyMissing: boolean;
|
|
48
61
|
}
|
|
@@ -63,6 +76,27 @@ export declare const PUSH_GATE_DEFAULT_AUTO_NARROW_THRESHOLD = 30;
|
|
|
63
76
|
* recent work.
|
|
64
77
|
*/
|
|
65
78
|
export declare const PUSH_GATE_DEFAULT_LAST_N_COMMITS_FALLBACK = 10;
|
|
79
|
+
/**
|
|
80
|
+
* Default codex model for the push-gate (0.14.0+). Pinned to the flagship
|
|
81
|
+
* (`gpt-5.4`) instead of falling through to codex's own default of
|
|
82
|
+
* `codex-auto-review` (a lower-reasoning special-purpose model). Verdict
|
|
83
|
+
* stability matters more than per-push compute cost for adversarial
|
|
84
|
+
* review of consumer codebases — the helixir 2026-04-26 thrashing came
|
|
85
|
+
* from the lower-reasoning default.
|
|
86
|
+
*
|
|
87
|
+
* Override via `policy.review.codex_model: <name>` in `.rea/policy.yaml`
|
|
88
|
+
* for cost-bounded environments. `codex-auto-review` is the explicit
|
|
89
|
+
* opt-in to the prior 0.13.x behavior.
|
|
90
|
+
*/
|
|
91
|
+
export declare const PUSH_GATE_DEFAULT_CODEX_MODEL = "gpt-5.4";
|
|
92
|
+
/**
|
|
93
|
+
* Default codex reasoning effort (0.14.0+). Pinned to `high` for maximum
|
|
94
|
+
* compute per finding — fewer same-code-different-verdict round-trips.
|
|
95
|
+
* Trades latency for stability. Override via
|
|
96
|
+
* `policy.review.codex_reasoning_effort: medium | low` in
|
|
97
|
+
* `.rea/policy.yaml` for cost-bounded environments.
|
|
98
|
+
*/
|
|
99
|
+
export declare const PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT: 'low' | 'medium' | 'high';
|
|
66
100
|
/**
|
|
67
101
|
* Resolve the push-gate policy for `baseDir`. Never throws — a malformed
|
|
68
102
|
* policy file surfaces as a typed error via the underlying zod validator,
|
|
@@ -45,6 +45,27 @@ export const PUSH_GATE_DEFAULT_AUTO_NARROW_THRESHOLD = 30;
|
|
|
45
45
|
* recent work.
|
|
46
46
|
*/
|
|
47
47
|
export const PUSH_GATE_DEFAULT_LAST_N_COMMITS_FALLBACK = 10;
|
|
48
|
+
/**
|
|
49
|
+
* Default codex model for the push-gate (0.14.0+). Pinned to the flagship
|
|
50
|
+
* (`gpt-5.4`) instead of falling through to codex's own default of
|
|
51
|
+
* `codex-auto-review` (a lower-reasoning special-purpose model). Verdict
|
|
52
|
+
* stability matters more than per-push compute cost for adversarial
|
|
53
|
+
* review of consumer codebases — the helixir 2026-04-26 thrashing came
|
|
54
|
+
* from the lower-reasoning default.
|
|
55
|
+
*
|
|
56
|
+
* Override via `policy.review.codex_model: <name>` in `.rea/policy.yaml`
|
|
57
|
+
* for cost-bounded environments. `codex-auto-review` is the explicit
|
|
58
|
+
* opt-in to the prior 0.13.x behavior.
|
|
59
|
+
*/
|
|
60
|
+
export const PUSH_GATE_DEFAULT_CODEX_MODEL = 'gpt-5.4';
|
|
61
|
+
/**
|
|
62
|
+
* Default codex reasoning effort (0.14.0+). Pinned to `high` for maximum
|
|
63
|
+
* compute per finding — fewer same-code-different-verdict round-trips.
|
|
64
|
+
* Trades latency for stability. Override via
|
|
65
|
+
* `policy.review.codex_reasoning_effort: medium | low` in
|
|
66
|
+
* `.rea/policy.yaml` for cost-bounded environments.
|
|
67
|
+
*/
|
|
68
|
+
export const PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT = 'high';
|
|
48
69
|
/**
|
|
49
70
|
* Resolve the push-gate policy for `baseDir`. Never throws — a malformed
|
|
50
71
|
* policy file surfaces as a typed error via the underlying zod validator,
|
|
@@ -64,6 +85,8 @@ export async function resolvePushGatePolicy(baseDir) {
|
|
|
64
85
|
timeout_ms: PUSH_GATE_DEFAULT_TIMEOUT_MS,
|
|
65
86
|
last_n_commits: undefined,
|
|
66
87
|
auto_narrow_threshold: PUSH_GATE_DEFAULT_AUTO_NARROW_THRESHOLD,
|
|
88
|
+
codex_model: PUSH_GATE_DEFAULT_CODEX_MODEL,
|
|
89
|
+
codex_reasoning_effort: PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT,
|
|
67
90
|
policyMissing: true,
|
|
68
91
|
};
|
|
69
92
|
}
|
|
@@ -75,6 +98,8 @@ export async function resolvePushGatePolicy(baseDir) {
|
|
|
75
98
|
timeout_ms: review.timeout_ms ?? PUSH_GATE_DEFAULT_TIMEOUT_MS,
|
|
76
99
|
last_n_commits: review.last_n_commits,
|
|
77
100
|
auto_narrow_threshold: review.auto_narrow_threshold ?? PUSH_GATE_DEFAULT_AUTO_NARROW_THRESHOLD,
|
|
101
|
+
codex_model: review.codex_model ?? PUSH_GATE_DEFAULT_CODEX_MODEL,
|
|
102
|
+
codex_reasoning_effort: review.codex_reasoning_effort ?? PUSH_GATE_DEFAULT_CODEX_REASONING_EFFORT,
|
|
78
103
|
policyMissing: false,
|
|
79
104
|
};
|
|
80
105
|
}
|
package/dist/policy/loader.d.ts
CHANGED
|
@@ -45,18 +45,52 @@ declare const PolicySchema: z.ZodObject<{
|
|
|
45
45
|
* intent and auto-narrow stays out of the way).
|
|
46
46
|
*/
|
|
47
47
|
auto_narrow_threshold: z.ZodOptional<z.ZodNumber>;
|
|
48
|
+
/**
|
|
49
|
+
* Codex CLI model override (0.13.4+). Pinned via `-c model="<name>"` on
|
|
50
|
+
* every `codex exec review` invocation. When unset, codex's own default
|
|
51
|
+
* applies — which today is the special-purpose `codex-auto-review`
|
|
52
|
+
* model at `medium` reasoning, NOT the flagship.
|
|
53
|
+
*
|
|
54
|
+
* For serious adversarial review on consumer codebases (where verdict
|
|
55
|
+
* stability matters) the recommended setting is `gpt-5.4` with
|
|
56
|
+
* `codex_reasoning_effort: high`. Higher reasoning trades push-gate
|
|
57
|
+
* latency for finding consistency — fewer same-code-different-verdict
|
|
58
|
+
* round-trips like the 2026-04-26 helixir migration session.
|
|
59
|
+
*
|
|
60
|
+
* Loose string type: codex's model catalog evolves over time and we do
|
|
61
|
+
* NOT want to lock consumers to a hardcoded enum that drifts behind
|
|
62
|
+
* upstream. Codex itself validates the model name at exec time.
|
|
63
|
+
*/
|
|
64
|
+
codex_model: z.ZodOptional<z.ZodString>;
|
|
65
|
+
/**
|
|
66
|
+
* Codex reasoning effort knob (0.13.4+). Pinned via
|
|
67
|
+
* `-c model_reasoning_effort="<level>"` on every invocation. Only
|
|
68
|
+
* meaningful when paired with a reasoning-capable model (gpt-5.4,
|
|
69
|
+
* gpt-5.3-codex, etc.). The `codex-auto-review` model honors this
|
|
70
|
+
* but caps lower than gpt-5.4.
|
|
71
|
+
*
|
|
72
|
+
* Recommended: `high` for serious review on long-running branches
|
|
73
|
+
* (more compute spent per finding, fewer flips). `medium` is codex's
|
|
74
|
+
* own default. `low` for cost-bounded environments where consistency
|
|
75
|
+
* matters less than throughput.
|
|
76
|
+
*/
|
|
77
|
+
codex_reasoning_effort: z.ZodOptional<z.ZodEnum<["low", "medium", "high"]>>;
|
|
48
78
|
}, "strict", z.ZodTypeAny, {
|
|
49
79
|
codex_required?: boolean | undefined;
|
|
50
80
|
concerns_blocks?: boolean | undefined;
|
|
51
81
|
timeout_ms?: number | undefined;
|
|
52
82
|
last_n_commits?: number | undefined;
|
|
53
83
|
auto_narrow_threshold?: number | undefined;
|
|
84
|
+
codex_model?: string | undefined;
|
|
85
|
+
codex_reasoning_effort?: "low" | "medium" | "high" | undefined;
|
|
54
86
|
}, {
|
|
55
87
|
codex_required?: boolean | undefined;
|
|
56
88
|
concerns_blocks?: boolean | undefined;
|
|
57
89
|
timeout_ms?: number | undefined;
|
|
58
90
|
last_n_commits?: number | undefined;
|
|
59
91
|
auto_narrow_threshold?: number | undefined;
|
|
92
|
+
codex_model?: string | undefined;
|
|
93
|
+
codex_reasoning_effort?: "low" | "medium" | "high" | undefined;
|
|
60
94
|
}>>;
|
|
61
95
|
redact: z.ZodOptional<z.ZodObject<{
|
|
62
96
|
match_timeout_ms: z.ZodOptional<z.ZodNumber>;
|
|
@@ -152,6 +186,8 @@ declare const PolicySchema: z.ZodObject<{
|
|
|
152
186
|
timeout_ms?: number | undefined;
|
|
153
187
|
last_n_commits?: number | undefined;
|
|
154
188
|
auto_narrow_threshold?: number | undefined;
|
|
189
|
+
codex_model?: string | undefined;
|
|
190
|
+
codex_reasoning_effort?: "low" | "medium" | "high" | undefined;
|
|
155
191
|
} | undefined;
|
|
156
192
|
redact?: {
|
|
157
193
|
match_timeout_ms?: number | undefined;
|
|
@@ -197,6 +233,8 @@ declare const PolicySchema: z.ZodObject<{
|
|
|
197
233
|
timeout_ms?: number | undefined;
|
|
198
234
|
last_n_commits?: number | undefined;
|
|
199
235
|
auto_narrow_threshold?: number | undefined;
|
|
236
|
+
codex_model?: string | undefined;
|
|
237
|
+
codex_reasoning_effort?: "low" | "medium" | "high" | undefined;
|
|
200
238
|
} | undefined;
|
|
201
239
|
redact?: {
|
|
202
240
|
match_timeout_ms?: number | undefined;
|
package/dist/policy/loader.js
CHANGED
|
@@ -38,6 +38,36 @@ const ReviewPolicySchema = z
|
|
|
38
38
|
* intent and auto-narrow stays out of the way).
|
|
39
39
|
*/
|
|
40
40
|
auto_narrow_threshold: z.number().int().nonnegative().optional(),
|
|
41
|
+
/**
|
|
42
|
+
* Codex CLI model override (0.13.4+). Pinned via `-c model="<name>"` on
|
|
43
|
+
* every `codex exec review` invocation. When unset, codex's own default
|
|
44
|
+
* applies — which today is the special-purpose `codex-auto-review`
|
|
45
|
+
* model at `medium` reasoning, NOT the flagship.
|
|
46
|
+
*
|
|
47
|
+
* For serious adversarial review on consumer codebases (where verdict
|
|
48
|
+
* stability matters) the recommended setting is `gpt-5.4` with
|
|
49
|
+
* `codex_reasoning_effort: high`. Higher reasoning trades push-gate
|
|
50
|
+
* latency for finding consistency — fewer same-code-different-verdict
|
|
51
|
+
* round-trips like the 2026-04-26 helixir migration session.
|
|
52
|
+
*
|
|
53
|
+
* Loose string type: codex's model catalog evolves over time and we do
|
|
54
|
+
* NOT want to lock consumers to a hardcoded enum that drifts behind
|
|
55
|
+
* upstream. Codex itself validates the model name at exec time.
|
|
56
|
+
*/
|
|
57
|
+
codex_model: z.string().min(1).optional(),
|
|
58
|
+
/**
|
|
59
|
+
* Codex reasoning effort knob (0.13.4+). Pinned via
|
|
60
|
+
* `-c model_reasoning_effort="<level>"` on every invocation. Only
|
|
61
|
+
* meaningful when paired with a reasoning-capable model (gpt-5.4,
|
|
62
|
+
* gpt-5.3-codex, etc.). The `codex-auto-review` model honors this
|
|
63
|
+
* but caps lower than gpt-5.4.
|
|
64
|
+
*
|
|
65
|
+
* Recommended: `high` for serious review on long-running branches
|
|
66
|
+
* (more compute spent per finding, fewer flips). `medium` is codex's
|
|
67
|
+
* own default. `low` for cost-bounded environments where consistency
|
|
68
|
+
* matters less than throughput.
|
|
69
|
+
*/
|
|
70
|
+
codex_reasoning_effort: z.enum(['low', 'medium', 'high']).optional(),
|
|
41
71
|
})
|
|
42
72
|
.strict();
|
|
43
73
|
/**
|
package/dist/policy/types.d.ts
CHANGED
|
@@ -130,6 +130,34 @@ export interface ReviewPolicy {
|
|
|
130
130
|
* Non-negative integer. The loader rejects negative values.
|
|
131
131
|
*/
|
|
132
132
|
auto_narrow_threshold?: number;
|
|
133
|
+
/**
|
|
134
|
+
* Codex CLI model override (0.13.4+). Pinned via `-c model="<name>"` on
|
|
135
|
+
* every `codex exec review` invocation. When unset, codex's own default
|
|
136
|
+
* applies — which today is the special-purpose `codex-auto-review` model
|
|
137
|
+
* at medium reasoning, NOT the flagship.
|
|
138
|
+
*
|
|
139
|
+
* Recommended for serious adversarial review: `gpt-5.4` paired with
|
|
140
|
+
* `codex_reasoning_effort: high`. Higher reasoning trades push-gate
|
|
141
|
+
* latency for verdict consistency — fewer same-code-different-verdict
|
|
142
|
+
* round-trips like the 2026-04-26 helixir migration session.
|
|
143
|
+
*
|
|
144
|
+
* Loose string type — codex's model catalog evolves. Codex itself
|
|
145
|
+
* validates the model name at exec time; an unknown name surfaces as
|
|
146
|
+
* a clear runtime error rather than a silent fallback.
|
|
147
|
+
*/
|
|
148
|
+
codex_model?: string;
|
|
149
|
+
/**
|
|
150
|
+
* Codex reasoning effort (0.13.4+). Pinned via
|
|
151
|
+
* `-c model_reasoning_effort="<level>"` on every invocation. Only
|
|
152
|
+
* meaningful when paired with a reasoning-capable model (gpt-5.4,
|
|
153
|
+
* gpt-5.3-codex). Codex's own default is `medium`.
|
|
154
|
+
*
|
|
155
|
+
* Recommended: `high` for serious review on long-running branches
|
|
156
|
+
* (more compute spent per finding, fewer flips). `low` for
|
|
157
|
+
* cost-bounded environments where consistency matters less than
|
|
158
|
+
* throughput.
|
|
159
|
+
*/
|
|
160
|
+
codex_reasoning_effort?: 'low' | 'medium' | 'high';
|
|
133
161
|
}
|
|
134
162
|
/**
|
|
135
163
|
* User-supplied redaction pattern entry. Each pattern has a stable `name` used
|
|
@@ -113,6 +113,44 @@ normalize_path() {
|
|
|
113
113
|
|
|
114
114
|
NORMALIZED=$(normalize_path "$FILE_PATH")
|
|
115
115
|
|
|
116
|
+
# ── 5a. Path-traversal rejection (0.14.0 iron-gate fix) ───────────────────────
|
|
117
|
+
# Reject any path containing a `..` segment BEFORE the literal-match below.
|
|
118
|
+
# Without this, `foo/../CODEOWNERS` would get past `normalize_path()` (which
|
|
119
|
+
# only strips leading project root + URL-decodes) and the literal-match
|
|
120
|
+
# loop would compare `foo/../CODEOWNERS` against the literal `CODEOWNERS`
|
|
121
|
+
# entry — which doesn't match, so the policy lets the write through. The
|
|
122
|
+
# downstream Write/Edit tool then resolves the traversal and writes to
|
|
123
|
+
# `CODEOWNERS` anyway, defeating the gate.
|
|
124
|
+
#
|
|
125
|
+
# Mirrors settings-protection.sh §5a (which has had this guard since
|
|
126
|
+
# 0.10.x). Both pre- and post-decode forms are checked because
|
|
127
|
+
# normalize_path() URL-decodes earlier and an attacker could split the
|
|
128
|
+
# traversal across encodings (`%2E%2E/`, `..%2F`, etc.).
|
|
129
|
+
raw_has_traversal=0
|
|
130
|
+
norm_has_traversal=0
|
|
131
|
+
case "/$FILE_PATH/" in
|
|
132
|
+
*/../*) raw_has_traversal=1 ;;
|
|
133
|
+
esac
|
|
134
|
+
case "/$NORMALIZED/" in
|
|
135
|
+
*/../*) norm_has_traversal=1 ;;
|
|
136
|
+
esac
|
|
137
|
+
# Also catch URL-encoded traversal in case some tool routes raw-encoded
|
|
138
|
+
# paths through here (e.g. file:// inputs). normalize_path()'s decoder
|
|
139
|
+
# only handles a fixed set; an unrecognized encoding would slip past.
|
|
140
|
+
case "$FILE_PATH" in
|
|
141
|
+
*%2[Ee]%2[Ee]*|*%2[Ee].*|*.%2[Ee]*) raw_has_traversal=1 ;;
|
|
142
|
+
esac
|
|
143
|
+
if [[ "$raw_has_traversal" -eq 1 ]] || [[ "$norm_has_traversal" -eq 1 ]]; then
|
|
144
|
+
{
|
|
145
|
+
printf 'BLOCKED PATH: path traversal rejected\n'
|
|
146
|
+
printf '\n'
|
|
147
|
+
printf ' File: %s\n' "$FILE_PATH"
|
|
148
|
+
printf " Rule: path contains a '..' segment; rewrite to a canonical\n"
|
|
149
|
+
printf ' project-relative path without traversal.\n'
|
|
150
|
+
} >&2
|
|
151
|
+
exit 2
|
|
152
|
+
fi
|
|
153
|
+
|
|
116
154
|
for writable in "${AGENT_WRITABLE[@]}"; do
|
|
117
155
|
if [[ "$NORMALIZED" == "$writable" ]] || [[ "$NORMALIZED" == "$writable"* && "$writable" == */ ]]; then
|
|
118
156
|
exit 0
|
package/hooks/secret-scanner.sh
CHANGED
|
@@ -40,11 +40,41 @@ fi
|
|
|
40
40
|
FILE_PATH=$(printf '%s' "$INPUT" | jq -r '.tool_input.file_path // empty' 2>/dev/null)
|
|
41
41
|
CONTENT_WRITE=$(printf '%s' "$INPUT" | jq -r '.tool_input.content // empty' 2>/dev/null)
|
|
42
42
|
CONTENT_EDIT=$(printf '%s' "$INPUT" | jq -r '.tool_input.new_string // empty' 2>/dev/null)
|
|
43
|
+
# MultiEdit (0.14.0 fix): the payload is at tool_input.edits[].new_string —
|
|
44
|
+
# an array, not a scalar — and the prior versions of this hook never read
|
|
45
|
+
# it. Result: any agent could route credential writes through MultiEdit and
|
|
46
|
+
# bypass the secret scanner entirely. We extract every `new_string` value
|
|
47
|
+
# from the edits array and concatenate them with newlines so the awk-based
|
|
48
|
+
# pattern scan below treats them like any other write content.
|
|
49
|
+
#
|
|
50
|
+
# Defensive coercion (codex round-1 P1): a malformed payload where
|
|
51
|
+
# `new_string` is a number, object, or array would make jq error out, the
|
|
52
|
+
# `2>/dev/null` would swallow stderr, `CONTENT_MULTIEDIT` would be empty,
|
|
53
|
+
# and the precedence chain below would fall through to `exit 0` —
|
|
54
|
+
# silently allowing the write. Same fail-open mode for a non-array
|
|
55
|
+
# `edits` value. We:
|
|
56
|
+
#
|
|
57
|
+
# 1. Coerce `.tool_input.edits` to `[]` if it's anything other than an
|
|
58
|
+
# array (`if type=="array" then . else [] end`)
|
|
59
|
+
# 2. Coerce every `new_string` to a string via `tostring` so jq cannot
|
|
60
|
+
# fail on heterogeneous types
|
|
61
|
+
#
|
|
62
|
+
# Both layers fail closed: a malformed payload either yields the empty
|
|
63
|
+
# string (no scan needed, exit 0 from the precedence chain) or yields a
|
|
64
|
+
# pattern-scannable string. There is no path where jq errors silently and
|
|
65
|
+
# the hook falls through to allow.
|
|
66
|
+
CONTENT_MULTIEDIT=$(printf '%s' "$INPUT" | jq -r '
|
|
67
|
+
(.tool_input.edits // [] | if type=="array" then . else [] end)
|
|
68
|
+
| map((.new_string // "") | tostring)
|
|
69
|
+
| join("\n")
|
|
70
|
+
' 2>/dev/null)
|
|
43
71
|
|
|
44
72
|
if [[ -n "$CONTENT_WRITE" ]]; then
|
|
45
73
|
CONTENT="$CONTENT_WRITE"
|
|
46
74
|
elif [[ -n "$CONTENT_EDIT" ]]; then
|
|
47
75
|
CONTENT="$CONTENT_EDIT"
|
|
76
|
+
elif [[ -n "$CONTENT_MULTIEDIT" ]]; then
|
|
77
|
+
CONTENT="$CONTENT_MULTIEDIT"
|
|
48
78
|
else
|
|
49
79
|
exit 0
|
|
50
80
|
fi
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bookedsolid/rea",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.14.0",
|
|
4
4
|
"description": "Agentic governance layer for Claude Code — policy enforcement, hook-based safety gates, audit logging, and Codex-integrated adversarial review for AI-assisted projects",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Booked Solid Technology <oss@bookedsolid.tech> (https://bookedsolid.tech)",
|