@bookedsolid/rea 0.25.0 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,6 +89,36 @@ const ReviewPolicySchema = z
89
89
  * verdict. Set to 0 to disable caching (every push re-invokes codex).
90
90
  */
91
91
  cache_ttl_ms: z.number().int().nonnegative().optional(),
92
+ /**
93
+ * 0.26.0 local-first enforcement. Strict so a typo in the off-switch
94
+ * surface (`mode: of`, `refuse_at: pushh`) fails policy load instead
95
+ * of silently disabling. `bypass_env_var` is constrained to the
96
+ * shell-safe identifier alphabet so a nonsense value can't smuggle
97
+ * shell metacharacters through the Bash-tier gate that reads it.
98
+ */
99
+ local_review: z
100
+ .object({
101
+ mode: z.enum(['enforced', 'off']).optional(),
102
+ max_age_seconds: z.number().int().positive().optional(),
103
+ refuse_at: z.enum(['push', 'commit', 'both']).optional(),
104
+ bypass_env_var: z
105
+ .string()
106
+ .regex(/^[A-Z][A-Z0-9_]{0,63}$/)
107
+ .optional(),
108
+ })
109
+ .strict()
110
+ .optional(),
111
+ })
112
+ .strict();
113
+ /**
114
+ * 0.26.0 commit hygiene refusal thresholds. Top-level policy block (NOT
115
+ * under `review`) — it's a process-discipline knob, not a review knob.
116
+ * `rea preflight` reads it; the push-gate ignores it.
117
+ */
118
+ const CommitHygienePolicySchema = z
119
+ .object({
120
+ warn_at_commits: z.number().int().nonnegative().optional(),
121
+ refuse_at_commits: z.number().int().nonnegative().optional(),
92
122
  })
93
123
  .strict();
94
124
  /**
@@ -214,6 +244,9 @@ const PolicySchema = z
214
244
  patterns: z.array(z.string()).optional(),
215
245
  })
216
246
  .optional(),
247
+ // 0.26.0 commit-hygiene thresholds — top-level so it's discoverable
248
+ // separately from `review.local_review`. `rea preflight` consumes it.
249
+ commit_hygiene: CommitHygienePolicySchema.optional(),
217
250
  })
218
251
  .strict();
219
252
  const DEFAULT_CACHE_TTL_MS = 30_000;
@@ -169,6 +169,84 @@ export interface ReviewPolicy {
169
169
  * a `rea.push_gate.verdict_flip` audit event and overwrite the cache.
170
170
  */
171
171
  cache_ttl_ms?: number;
172
+ /**
173
+ * Local-first review enforcement (0.26.0+ — CTO directive 2026-05-05).
174
+ *
175
+ * The push-gate is the BACKUP layer. The primary review surface is the
176
+ * working tree BEFORE commit, run via `rea review`, recorded as a
177
+ * `rea.local_review` audit entry. The Bash-tier `local-review-gate.sh`
178
+ * hook + husky `rea preflight --strict` refuse `git push` (and optionally
179
+ * `git commit`) when no recent matching audit entry exists for HEAD.
180
+ *
181
+ * The off-switch is the FIRST-class concern. Teams without codex/claude
182
+ * installed set `mode: off` to disable the new enforcement layers
183
+ * cleanly — no env-var hacks, no policy strip, no special init flag.
184
+ *
185
+ * The provider seam is the audit-record `provider` field, NOT this
186
+ * policy block. Future providers (Claude-subagent, Pi, Gemma) write
187
+ * `rea.local_review` records with their own `provider:` value; this
188
+ * block governs WHETHER the gate fires, not WHO runs the review.
189
+ */
190
+ local_review?: LocalReviewPolicy;
191
+ }
192
+ /**
193
+ * Local-first review enforcement (0.26.0+).
194
+ *
195
+ * `mode: 'enforced'` — the new Bash-tier gate, husky preflight, and
196
+ * `rea review` requirement all fire. Pushes are refused unless a
197
+ * recent matching `rea.local_review` audit entry exists OR
198
+ * `bypass_env_var` is set with a non-empty reason.
199
+ *
200
+ * `mode: 'off'` — every new enforcement layer becomes a silent no-op.
201
+ * Teams without codex/claude opt out cleanly. The push-gate (which is
202
+ * a separate layer governed by `codex_required`) is unaffected by this
203
+ * setting.
204
+ *
205
+ * Default when unset: `enforced`. The CTO directive 2026-05-05 applies
206
+ * to ALL rea work, OSS + enterprise — the off-switch is opt-out, never
207
+ * opt-in.
208
+ */
209
+ export interface LocalReviewPolicy {
210
+ mode?: 'enforced' | 'off';
211
+ /**
212
+ * Maximum age (seconds) of a `rea.local_review` audit entry that
213
+ * `rea preflight` will accept as covering the current HEAD. A review
214
+ * older than this is treated as missing and the gate refuses.
215
+ * Default 86400 (24 hours).
216
+ */
217
+ max_age_seconds?: number;
218
+ /**
219
+ * Which git operations the Bash-tier gate refuses when no recent
220
+ * review covers HEAD.
221
+ * - `'push'` — refuse `git push` only (default)
222
+ * - `'commit'` — refuse `git commit` only
223
+ * - `'both'` — refuse both
224
+ *
225
+ * The husky pre-push hook honors `'push' | 'both'`. The Bash-tier
226
+ * hook honors all three.
227
+ */
228
+ refuse_at?: 'push' | 'commit' | 'both';
229
+ /**
230
+ * Env-var name that, when set with a non-empty value, causes
231
+ * `rea preflight` to short-circuit (exit 0) AFTER writing a
232
+ * `rea.local_review.skipped_override` audit entry that records
233
+ * the reason. Default `REA_SKIP_LOCAL_REVIEW`.
234
+ *
235
+ * The override is per-invocation, audited every time, and a
236
+ * release valve — not a sustained way to disable enforcement.
237
+ * Teams that need to DISABLE enforcement set `mode: off`.
238
+ */
239
+ bypass_env_var?: string;
240
+ }
241
+ /**
242
+ * Commit-hygiene refusal thresholds (0.26.0+). `rea preflight` runs
243
+ * `git rev-list --count <base>..HEAD` and compares against these
244
+ * thresholds. Set to a sentinel value (e.g. very large integer) to
245
+ * effectively disable.
246
+ */
247
+ export interface CommitHygienePolicy {
248
+ warn_at_commits?: number;
249
+ refuse_at_commits?: number;
172
250
  }
173
251
  /**
174
252
  * User-supplied redaction pattern entry. Each pattern has a stable `name` used
@@ -301,4 +379,15 @@ export interface Policy {
301
379
  architecture_review?: {
302
380
  patterns?: string[];
303
381
  };
382
+ /**
383
+ * Commit-hygiene refusal thresholds (0.26.0+). `rea preflight` checks
384
+ * `git rev-list --count <base>..HEAD`; `> warn_at_commits` warns
385
+ * (exit 1), `> refuse_at_commits` refuses (exit 2). The CTO directive
386
+ * 2026-05-05 sets the new BST default at warn_at=1 / refuse_at=5 to
387
+ * push every change toward squash-on-commit hygiene.
388
+ *
389
+ * Top-level (not under `review`) because it's a process-discipline
390
+ * knob, not a review knob. The push-gate doesn't consume it.
391
+ */
392
+ commit_hygiene?: CommitHygienePolicy;
304
393
  }
@@ -138,8 +138,32 @@ _rea_unwrap_at_depth() {
138
138
  # \x03 ETX — replaces in-quote `;`
139
139
  # \x05 ENQ — replaces in-quote `&`
140
140
  # \x06 ACK — replaces in-quote `|`
141
+ #
142
+ # 0.26.1 helix-028 P1 fix: feed the entire (possibly multiline) `$cmd`
143
+ # to awk as a SINGLE record using a multi-byte record separator
144
+ # (`\x1c\x1d` = FS+GS, control bytes that cannot appear in real shell
145
+ # input). Pre-fix, awk's default RS=`\n` made the masking awk process
146
+ # each line independently, which (a) dropped the newlines from the
147
+ # masked output and (b) reset the in-quote `mode` state per-line — so
148
+ # `bash -lc "printf x > .rea/HALT\ntrue"` had its closing `"` on line 2
149
+ # treated as an opening quote in plain mode, scrambling the mask. macOS
150
+ # BSD awk does NOT support NUL as RS (truncates after first record);
151
+ # `\x1c\x1d` is a portable multi-byte sentinel that awk's RS handles
152
+ # uniformly across BSD/GNU awk implementations.
153
+ #
154
+ # 0.26.1 ANSI-C sibling: also recognize `$'...'` (mode 3) as a quoted
155
+ # span. Pre-fix, the masker treated `$` as plain text in mode 0, so
156
+ # the closing `'` of `$'...'` was the only `'` the masker saw and it
157
+ # entered mode 2 there — flipping the mask state for the rest of the
158
+ # input. Mode 3 honors `\\`-escape semantics (so `\'` and `\\` inside
159
+ # the body do not prematurely terminate the span); on exit the closing
160
+ # `'` is masked to `\x02` (same as mode 2's exit) so the wrapper-scan
161
+ # can no longer treat in-quote `'` as a payload-opening quote.
162
+ local _unwrap_sep
163
+ _unwrap_sep=$'\x1c\x1d'
141
164
  local masked
142
- masked=$(printf '%s' "$cmd" | awk '
165
+ masked=$(printf '%s%s' "$cmd" "$_unwrap_sep" | awk '
166
+ BEGIN { RS = "\034\035" }
143
167
  {
144
168
  line = $0
145
169
  out = ""
@@ -149,12 +173,40 @@ _rea_unwrap_at_depth() {
149
173
  while (i <= n) {
150
174
  ch = substr(line, i, 1)
151
175
  if (mode == 0) {
176
+ # ANSI-C `$'\''...'\''` introducer: emit `$` and opening quote
177
+ # literally (so the wrapper-scan can detect the introducer)
178
+ # and enter mode 3.
179
+ if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
180
+ mode = 3
181
+ out = out "$" "'\''"
182
+ i += 2
183
+ continue
184
+ }
152
185
  if (ch == "\"") { mode = 1; out = out ch; i++; continue }
153
186
  if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
154
187
  out = out ch
155
188
  i++
156
189
  continue
157
190
  }
191
+ if (mode == 3) {
192
+ # ANSI-C: `\\X` is a literal escape pair (`\\\''`, `\\\\`, `\\n`,
193
+ # etc.). Preserve the pair so the closing `'\''` detector below
194
+ # does not exit on `\\\''`.
195
+ if (ch == "\\" && i < n) {
196
+ nxt = substr(line, i + 1, 1)
197
+ out = out ch nxt
198
+ i += 2
199
+ continue
200
+ }
201
+ if (ch == "'\''") { mode = 0; out = out "\002"; i++; continue }
202
+ if (ch == ";") { out = out "\003"; i++; continue }
203
+ if (ch == "&") { out = out "\005"; i++; continue }
204
+ if (ch == "|") { out = out "\006"; i++; continue }
205
+ if (ch == "\"") { out = out "\001"; i++; continue }
206
+ out = out ch
207
+ i++
208
+ continue
209
+ }
158
210
  if (mode == 2) {
159
211
  if (ch == "'\''") { mode = 0; out = out "\002"; i++; continue }
160
212
  if (ch == ";") { out = out "\003"; i++; continue }
@@ -183,15 +235,27 @@ _rea_unwrap_at_depth() {
183
235
  }
184
236
  printf "%s", out
185
237
  }')
186
- # Pass both raw and masked into awk. Wrapper-regex matches against the
187
- # masked form; payload extraction reads the raw form using the same
188
- # offsets. Because the mask is byte-for-byte width-preserving, the
189
- # same RSTART/RLENGTH applies to both.
238
+ # Pass both raw and masked into awk via stdin as NUL-region-separated
239
+ # records `awk -v raw="$cmd" -v masked="$masked"` errors with
240
+ # `awk: newline in string` the moment either string contains a literal
241
+ # newline. RS=`\x1c\x1d` (FS+GS multi-byte sentinel) survives newlines
242
+ # in either record. (BSD awk does not support NUL-as-RS reliably.)
243
+ # Wrapper-regex matches against the masked form; payload extraction
244
+ # reads the raw form using the same offsets. Because the mask is
245
+ # byte-for-byte width-preserving, the same RSTART/RLENGTH applies to
246
+ # both.
190
247
  #
191
248
  # 0.21.2: capture payloads to a local var; iterate to recurse.
249
+ # 0.26.1 helix-028 P1: switch from `awk -v` to NUL-region stdin.
250
+ # 0.26.1 ANSI-C sibling: handle `$'\''...'\''` as a third quoted-body
251
+ # form alongside `'\''...'\''` and `"..."`. Decode common escape
252
+ # sequences (`\\n`, `\\t`, `\\r`, `\\\\`, `\\\''`, `\\"`) when emitting
253
+ # the payload so the downstream segment splitter sees real newlines
254
+ # and splits on them.
192
255
  local _unwrap_payloads
193
- _unwrap_payloads=$(printf '' | awk -v raw="$cmd" -v masked="$masked" '
256
+ _unwrap_payloads=$(printf '%s%s%s%s' "$cmd" "$_unwrap_sep" "$masked" "$_unwrap_sep" | awk '
194
257
  BEGIN {
258
+ RS = "\034\035"
195
259
  # Wrapper-prefix regex: shell-name + optional flag tokens + -c-style flag.
196
260
  # Each flag token is `-` followed by 1+ letters and trailing space.
197
261
  # NOTE: matches only OUTSIDE outer quoted spans because in-quote
@@ -207,6 +271,10 @@ _rea_unwrap_at_depth() {
207
271
  # NOT covered here. Adding pwsh requires a separate code path
208
272
  # because EncodedCommand base64-decodes at runtime.
209
273
  WRAP = "(^|[[:space:]&|;])(bash|sh|zsh|dash|ksh|mksh|oksh|posh|yash|csh|tcsh|fish)([[:space:]]+-[a-zA-Z]+)*[[:space:]]+-(c|lc|lic|ic|cl|cli|li|il)[[:space:]]+"
274
+ }
275
+ NR == 1 { raw = $0; next }
276
+ NR == 2 {
277
+ masked = $0
210
278
  # Track the cursor in BOTH raw and masked. Because the mask is
211
279
  # byte-for-byte width-preserving, the same RSTART/RLENGTH applies
212
280
  # to both — but each iteration of the loop must SLICE both strings
@@ -225,8 +293,120 @@ _rea_unwrap_at_depth() {
225
293
  # verbatim only when it was an outer quote.
226
294
  first = substr(rtail, 1, 1)
227
295
  mfirst = substr(mtail, 1, 1)
296
+ # ANSI-C: `$'\''...'\''` introducer (raw and masked must both
297
+ # carry `$` followed by literal `'\''` — the masker preserves the
298
+ # opening pair when it transitions into mode 3).
299
+ if (first == "$" && substr(rtail, 2, 1) == "'\''" \
300
+ && mfirst == "$" && substr(mtail, 2, 1) == "'\''") {
301
+ body = substr(rtail, 3)
302
+ n = length(body)
303
+ j = 1
304
+ out = ""
305
+ closed = 0
306
+ while (j <= n) {
307
+ c = substr(body, j, 1)
308
+ if (c == "\\" && j < n) {
309
+ nxt = substr(body, j + 1, 1)
310
+ # Decode common ANSI-C escape sequences so the splitter
311
+ # downstream sees real bytes (e.g. `\n` → newline → segment
312
+ # boundary at protected/blocked-path detection time).
313
+ if (nxt == "n") { out = out "\n"; j += 2; continue }
314
+ if (nxt == "t") { out = out "\t"; j += 2; continue }
315
+ if (nxt == "r") { out = out "\r"; j += 2; continue }
316
+ if (nxt == "\\") { out = out "\\"; j += 2; continue }
317
+ if (nxt == "'\''") { out = out "'\''"; j += 2; continue }
318
+ if (nxt == "\"") { out = out "\""; j += 2; continue }
319
+ if (nxt == "a") { out = out "\007"; j += 2; continue }
320
+ if (nxt == "b") { out = out "\010"; j += 2; continue }
321
+ if (nxt == "e" || nxt == "E") { out = out "\033"; j += 2; continue }
322
+ if (nxt == "f") { out = out "\014"; j += 2; continue }
323
+ if (nxt == "v") { out = out "\013"; j += 2; continue }
324
+ if (nxt == "?") { out = out "?"; j += 2; continue }
325
+ # 0.26.1 helix-028 P1-2: `\xHH` (1–2 hex digits). Pre-fix
326
+ # `bash -lc $'\''echo > .rea/HALT\\x0Atrue'\''` had `\x0A`
327
+ # preserved as the literal pair `\x0A`, so the segment
328
+ # splitter never saw the real LF and the second statement
329
+ # (`true` / arbitrary attacker payload) was hidden in the
330
+ # same segment as the first. Decode here so the LF reaches
331
+ # the splitter.
332
+ if (nxt == "x") {
333
+ hex = ""
334
+ k = j + 2
335
+ while (k <= n && length(hex) < 2 \
336
+ && index("0123456789abcdefABCDEF", substr(body, k, 1)) > 0) {
337
+ hex = hex substr(body, k, 1)
338
+ k++
339
+ }
340
+ if (length(hex) > 0) {
341
+ # awk has no native hex parser. Walk the digits.
342
+ hv = 0
343
+ for (h = 1; h <= length(hex); h++) {
344
+ hd = substr(hex, h, 1)
345
+ di = index("0123456789abcdef", tolower(hd)) - 1
346
+ hv = hv * 16 + di
347
+ }
348
+ out = out sprintf("%c", hv)
349
+ j = k
350
+ continue
351
+ }
352
+ # `\x` with no digits — preserve pair literally.
353
+ out = out c nxt
354
+ j += 2
355
+ continue
356
+ }
357
+ # 0.26.1 helix-028 P1-2: `\NNN` octal (1–3 digits). Pre-fix
358
+ # `\012` (= LF) was preserved as a literal pair, same bypass
359
+ # class as `\xHH`.
360
+ if (nxt >= "0" && nxt <= "7") {
361
+ oct = nxt
362
+ k = j + 2
363
+ while (k <= n && length(oct) < 3 \
364
+ && substr(body, k, 1) >= "0" \
365
+ && substr(body, k, 1) <= "7") {
366
+ oct = oct substr(body, k, 1)
367
+ k++
368
+ }
369
+ ov = 0
370
+ for (h = 1; h <= length(oct); h++) {
371
+ ov = ov * 8 + (substr(oct, h, 1) + 0)
372
+ }
373
+ # Bash truncates to 8 bits.
374
+ ov = ov % 256
375
+ out = out sprintf("%c", ov)
376
+ j = k
377
+ continue
378
+ }
379
+ # Default: preserve pair (covers `\u…`, `\U…`, `\cX` — rarer
380
+ # shapes; the literal pair is still safer than silent decoding
381
+ # for unsupported escapes in this legacy-gate layer. The Node
382
+ # scanner — primary enforcement for protected/blocked paths
383
+ # since 0.23.0 — fails closed on these via decodeAnsiC).
384
+ out = out c nxt
385
+ j += 2
386
+ continue
387
+ }
388
+ if (c == "'\''") { closed = j; break }
389
+ out = out c
390
+ j++
391
+ }
392
+ if (closed == 0) {
393
+ mrest = substr(mtail, 3)
394
+ rrest = substr(rtail, 3)
395
+ continue
396
+ }
397
+ print out
398
+ # Skip past `$` (1) + opening `'\''` (1) + body (closed-1) +
399
+ # closing `'\''` (1) = 2 + closed bytes from mtail/rtail start.
400
+ mrest = substr(mtail, 2 + closed + 1)
401
+ rrest = substr(rtail, 2 + closed + 1)
402
+ continue
403
+ }
228
404
  if (first == "'\''" && mfirst == "'\''") {
229
405
  # Single-quoted body: no escape semantics; runs to next `'\''`.
406
+ # NOTE: index against the RAW body — the masker replaces the
407
+ # closing `'\''` of an outer single-quoted span with `\002`, so
408
+ # `index(mbody, "'\''")` would never find it. The raw body
409
+ # carries the literal closing `'\''` byte verbatim.
230
410
  body = substr(rtail, 2)
231
411
  mbody = substr(mtail, 2)
232
412
  end = index(body, "'\''")
@@ -278,10 +458,7 @@ _rea_unwrap_at_depth() {
278
458
  mrest = mtail
279
459
  rrest = rtail
280
460
  }
281
- }
282
- # Empty action with no input rules — explicitly drive the loop from
283
- # END so awk does not require any input records.
284
- END {}')
461
+ }')
285
462
  # Recurse on each extracted payload with depth+1.
286
463
  if [[ -n "$_unwrap_payloads" ]]; then
287
464
  while IFS= read -r _unwrap_p; do
@@ -363,16 +540,38 @@ _rea_split_segments() {
363
540
  out = ""
364
541
  i = 1
365
542
  n = length(line)
366
- mode = 0 # 0=plain, 1=double, 2=single
543
+ mode = 0 # 0=plain, 1=double, 2=single, 3=ANSI-C $'\''...'\''
367
544
  while (i <= n) {
368
545
  ch = substr(line, i, 1)
369
546
  if (mode == 0) {
547
+ # 0.26.1 helix-028 sibling: ANSI-C `$'\''...'\''` introducer.
548
+ # Pre-fix `echo $'\''a;b'\''` had its in-quote `;` un-masked and
549
+ # the splitter broke the segment at the `;`. Mode 3 honors
550
+ # backslash-escape pairs so `\\\''` and `\\\\` do not exit early.
551
+ if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
552
+ mode = 3; out = out "$" "'\''"; i += 2; continue
553
+ }
370
554
  if (ch == "\"") { mode = 1; out = out ch; i++; continue }
371
555
  if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
372
556
  out = out ch
373
557
  i++
374
558
  continue
375
559
  }
560
+ if (mode == 3) {
561
+ if (ch == "\\" && i < n) {
562
+ nxt = substr(line, i + 1, 1)
563
+ out = out ch nxt
564
+ i += 2
565
+ continue
566
+ }
567
+ if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
568
+ if (ch == ";") { out = out SC; i++; continue }
569
+ if (ch == "&") { out = out AMP; i++; continue }
570
+ if (ch == "|") { out = out PIPE; i++; continue }
571
+ out = out ch
572
+ i++
573
+ continue
574
+ }
376
575
  if (mode == 2) {
377
576
  # Single quotes: no escape semantics. Only `'\''` ends.
378
577
  if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
@@ -423,9 +622,24 @@ _rea_split_segments() {
423
622
  # in-quote `|` characters.
424
623
  quote_masked_cmd() {
425
624
  local cmd="$1"
426
- printf '%s' "$cmd" \
625
+ # 0.26.1 helix-028 sibling: feed the entire (possibly multiline) `$cmd`
626
+ # to awk as a SINGLE record using a multi-byte record separator
627
+ # (`\x1c\x1d` = FS+GS). Pre-fix, the default `RS=\n` split a multiline
628
+ # input across records and reset in-quote `mode` per-line, which both
629
+ # dropped the newlines AND scrambled the mask (the closing `"` on
630
+ # line 2 was treated as opening a new quoted span in plain mode).
631
+ # Also adds ANSI-C `$'\''...'\''` (mode 3) to mirror _rea_unwrap_at_depth's
632
+ # masker — same scope: in-quote `|`/`;`/`&` get masked, opening-pair
633
+ # `$'\''` is preserved for downstream detection, closing `'\''` is left
634
+ # literal here (this helper does not need a mode-exit-mask byte; the
635
+ # caller pattern-matches against literal `|`/`;`/`&` in the masked
636
+ # stream and benefits from preserving quote boundaries verbatim).
637
+ local _qm_sep
638
+ _qm_sep=$'\x1c\x1d'
639
+ printf '%s%s' "$cmd" "$_qm_sep" \
427
640
  | awk '
428
641
  BEGIN {
642
+ RS = "\034\035"
429
643
  INQ_PIPE = "__REA_INQUOTE_PIPE_a8f2c1__"
430
644
  INQ_SC = "__REA_INQUOTE_SC_a8f2c1__"
431
645
  INQ_AMP = "__REA_INQUOTE_AMP_a8f2c1__"
@@ -439,12 +653,29 @@ quote_masked_cmd() {
439
653
  while (i <= n) {
440
654
  ch = substr(line, i, 1)
441
655
  if (mode == 0) {
656
+ if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
657
+ mode = 3; out = out "$" "'\''"; i += 2; continue
658
+ }
442
659
  if (ch == "\"") { mode = 1; out = out ch; i++; continue }
443
660
  if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
444
661
  out = out ch
445
662
  i++
446
663
  continue
447
664
  }
665
+ if (mode == 3) {
666
+ if (ch == "\\" && i < n) {
667
+ out = out ch substr(line, i + 1, 1)
668
+ i += 2
669
+ continue
670
+ }
671
+ if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
672
+ if (ch == "|") { out = out INQ_PIPE; i++; continue }
673
+ if (ch == ";") { out = out INQ_SC; i++; continue }
674
+ if (ch == "&") { out = out INQ_AMP; i++; continue }
675
+ out = out ch
676
+ i++
677
+ continue
678
+ }
448
679
  if (mode == 2) {
449
680
  if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
450
681
  if (ch == "|") { out = out INQ_PIPE; i++; continue }
@@ -493,8 +724,21 @@ _rea_strip_prefix() {
493
724
  *)
494
725
  # Env-var assignment prefix (`KEY=value `) — only strip if the
495
726
  # token before the first space looks like NAME=value.
496
- if [[ "$seg" =~ ^[A-Za-z_][A-Za-z0-9_]*=[^[:space:]]+[[:space:]]+ ]]; then
497
- seg="${seg#* }"
727
+ #
728
+ # 0.26.0 round-25 P2-A fix: ANSI-C quoting `$'...'` was previously
729
+ # uncovered. `FOO=$'a b' git push` evaded the env-prefix stripper
730
+ # (whose value pattern `[^[:space:]]+` bailed at the space inside
731
+ # the ANSI-C body) AND the local-review-gate's raw-fallback regex.
732
+ # Add an explicit ANSI-C alternative here so the prefix is stripped
733
+ # cleanly even when the value carries embedded whitespace inside
734
+ # `$'...'`. Bash 3.2+ regex doesn't support non-capturing groups,
735
+ # so we keep the alternation flat.
736
+ if [[ "$seg" =~ ^[A-Za-z_][A-Za-z0-9_]*=([^[:space:]\"\'$]+|\"[^\"]*\"|\'[^\']*\'|\$\'[^\']*\')[[:space:]]+ ]]; then
737
+ # Compute prefix length and slice — `seg=${seg#* }` would split
738
+ # on the first space, which is INSIDE the value for ANSI-C and
739
+ # quoted forms. Slice by the matched length instead.
740
+ local _prefix_len=${#BASH_REMATCH[0]}
741
+ seg="${seg:_prefix_len}"
498
742
  seg="${seg#"${seg%%[![:space:]]*}"}"
499
743
  else
500
744
  break
@@ -621,3 +865,128 @@ any_segment_starts_with() {
621
865
  done < <(_rea_split_segments "$cmd")
622
866
  return 1
623
867
  }
868
+
869
+ # Return on stdout the FIRST segment of $1 (RAW form, env-var prefixes
870
+ # preserved) whose prefix-stripped form starts with the extended regex
871
+ # $2. Returns empty stdout and exit 1 if no segment matches.
872
+ #
873
+ # Use this when a downstream check needs to scope further parsing to the
874
+ # specific segment that triggered detection — e.g. local-review-gate's
875
+ # inline-bypass regex must only match `VAR=val git push` shapes inside
876
+ # the SAME segment that contained the `git push`, not anywhere in $CMD.
877
+ # Segment-scoped capture closes the round-24 P1 bypass class where
878
+ # `true VAR=fake git status; git push origin main` had the bypass shape
879
+ # in segment 1 and the real push in segment 2 — the un-scoped regex
880
+ # previously honored the bypass for the unrelated push.
881
+ #
882
+ # 0.26.0 helix-024 round-24 fix.
883
+ find_first_segment_starting_with() {
884
+ local cmd="$1"
885
+ local pattern="$2"
886
+ local segment stripped
887
+ while IFS= read -r segment; do
888
+ stripped=$(_rea_strip_prefix "$segment")
889
+ if printf '%s' "$stripped" | grep -qiE "^${pattern}"; then
890
+ printf '%s' "$segment"
891
+ return 0
892
+ fi
893
+ done < <(_rea_split_segments "$cmd")
894
+ return 1
895
+ }
896
+
897
+ # Return on stdout the FIRST segment of $1 (RAW — no prefix-stripping,
898
+ # but with leading whitespace trimmed for clean anchor matching) that
899
+ # matches the extended regex $2. Returns empty stdout and exit 1 if no
900
+ # segment matches.
901
+ #
902
+ # Companion to `any_segment_raw_matches`. Used by local-review-gate to
903
+ # capture the segment whose RAW shape (env-var prefixes intact) triggered
904
+ # the fallback `^([NAME=...])+git push` detector. The prefix-stripper's
905
+ # regex `NAME=[^[:space:]]+[[:space:]]+` bails on quoted-value-with-spaces,
906
+ # so `any_segment_starts_with` misses `REA_SKIP="urgent fix" git push`;
907
+ # the raw-anchor fallback catches it. Round-24's segment-scoped bypass
908
+ # regex must run against the SAME segment (raw form) that the fallback
909
+ # matched, not against the whole $CMD.
910
+ #
911
+ # 0.26.0 helix-024 round-24 fix.
912
+ find_first_segment_raw_matches() {
913
+ local cmd="$1"
914
+ local pattern="$2"
915
+ local segment
916
+ while IFS= read -r segment; do
917
+ segment="${segment#"${segment%%[![:space:]]*}"}"
918
+ if printf '%s' "$segment" | grep -qiE "$pattern"; then
919
+ printf '%s' "$segment"
920
+ return 0
921
+ fi
922
+ done < <(_rea_split_segments "$cmd")
923
+ return 1
924
+ }
925
+
926
+ # Return on stdout EVERY segment of $1 (RAW form) whose prefix-stripped
927
+ # form starts with the extended regex $2. Each match is a separate line.
928
+ # Returns empty stdout and exit 1 if no segments match.
929
+ #
930
+ # Use this when a downstream check needs to validate EVERY trigger segment
931
+ # — e.g. local-review-gate's per-segment bypass requirement. Pre-round-25
932
+ # fix the gate captured only the FIRST trigger segment via
933
+ # `find_first_segment_starting_with` and scoped the inline-bypass regex
934
+ # there. Multi-push laundering PoCs:
935
+ #
936
+ # REA_SKIP="x" git push fake --dry-run; git push origin main
937
+ # → first push has bypass, second does not. Pre-fix: bypass honored
938
+ # for FIRST segment only, but the gate exited 0 globally; second
939
+ # (real) push went through ungated.
940
+ #
941
+ # Round-25 P1-B closes that class by sweeping ALL trigger segments and
942
+ # requiring that EVERY one carries its own bypass. Any trigger segment
943
+ # without a bypass forces preflight invocation.
944
+ #
945
+ # 0.26.0 helix-026 round-25 P1-B fix.
946
+ find_all_segments_starting_with() {
947
+ local cmd="$1"
948
+ local pattern="$2"
949
+ local segment stripped
950
+ local _matched=0
951
+ while IFS= read -r segment; do
952
+ stripped=$(_rea_strip_prefix "$segment")
953
+ if printf '%s' "$stripped" | grep -qiE "^${pattern}"; then
954
+ printf '%s\n' "$segment"
955
+ _matched=1
956
+ fi
957
+ done < <(_rea_split_segments "$cmd")
958
+ if [ "$_matched" -eq 0 ]; then
959
+ return 1
960
+ fi
961
+ return 0
962
+ }
963
+
964
+ # Return on stdout EVERY segment of $1 (RAW — no prefix-stripping, but
965
+ # with leading whitespace trimmed for clean anchor matching) that matches
966
+ # the extended regex $2. Each match is a separate line. Returns empty
967
+ # stdout and exit 1 if no segments match.
968
+ #
969
+ # Companion to `find_all_segments_starting_with`. Used by
970
+ # local-review-gate's round-25 P1-B fix to sweep every trigger segment
971
+ # whose RAW shape (env-var prefixes intact) triggered the
972
+ # `^([NAME=...])+git push` fallback detector — so each can be validated
973
+ # for bypass independently.
974
+ #
975
+ # 0.26.0 helix-026 round-25 P1-B fix.
976
+ find_all_segments_raw_matches() {
977
+ local cmd="$1"
978
+ local pattern="$2"
979
+ local segment
980
+ local _matched=0
981
+ while IFS= read -r segment; do
982
+ segment="${segment#"${segment%%[![:space:]]*}"}"
983
+ if printf '%s' "$segment" | grep -qiE "$pattern"; then
984
+ printf '%s\n' "$segment"
985
+ _matched=1
986
+ fi
987
+ done < <(_rea_split_segments "$cmd")
988
+ if [ "$_matched" -eq 0 ]; then
989
+ return 1
990
+ fi
991
+ return 0
992
+ }