@bookedsolid/rea 0.26.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -262,7 +262,20 @@ export async function runCodexReview(options) {
262
262
  reject(new CodexTimeoutError(options.timeoutMs));
263
263
  }, options.timeoutMs);
264
264
  timer.unref?.();
265
- child.stdout.on('data', (chunk) => stdoutChunks.push(chunk));
265
+ child.stdout.on('data', (chunk) => {
266
+ stdoutChunks.push(chunk);
267
+ // 0.27.0 raw-stdout tee for `rea hook codex-review`. Sink errors
268
+ // are swallowed — a bad sink must not make a passing review fail.
269
+ const sink = options.rawStdoutSink;
270
+ if (sink !== undefined) {
271
+ try {
272
+ sink(chunk);
273
+ }
274
+ catch {
275
+ /* sink failure is non-fatal */
276
+ }
277
+ }
278
+ });
266
279
  child.stderr.on('data', (chunk) => stderrChunks.push(chunk));
267
280
  child.on('error', (e) => {
268
281
  clearTimeout(timer);
@@ -138,8 +138,37 @@ _rea_unwrap_at_depth() {
138
138
  # \x03 ETX — replaces in-quote `;`
139
139
  # \x05 ENQ — replaces in-quote `&`
140
140
  # \x06 ACK — replaces in-quote `|`
141
+ #
142
+ # 0.26.1 helix-028 P1 fix: feed the entire (possibly multiline) `$cmd`
143
+ # to awk as a SINGLE record using a multi-byte record separator
144
+ # (`\x1c\x1d` = FS+GS, control bytes that cannot appear in real shell
145
+ # input). Pre-fix, awk's default RS=`\n` made the masking awk process
146
+ # each line independently, which (a) dropped the newlines from the
147
+ # masked output and (b) reset the in-quote `mode` state per-line — so
148
+ # `bash -lc "printf x > .rea/HALT\ntrue"` had its closing `"` on line 2
149
+ # treated as an opening quote in plain mode, scrambling the mask. macOS
150
+ # BSD awk does NOT support NUL as RS (truncates after first record);
151
+ # `\x1c\x1d` is a portable multi-byte sentinel that awk's RS handles
152
+ # uniformly across BSD/GNU awk implementations.
153
+ #
154
+ # 0.26.1 ANSI-C sibling: also recognize `$'...'` (mode 3) as a quoted
155
+ # span. Pre-fix, the masker treated `$` as plain text in mode 0, so
156
+ # the closing `'` of `$'...'` was the only `'` the masker saw and it
157
+ # entered mode 2 there — flipping the mask state for the rest of the
158
+ # input. Mode 3 honors `\\`-escape semantics (so `\'` and `\\` inside
159
+ # the body do not prematurely terminate the span); on exit the closing
160
+ # `'` is masked to `\x02` (same as mode 2's exit) so the wrapper-scan
161
+ # can no longer treat in-quote `'` as a payload-opening quote.
162
+ local _unwrap_sep
163
+ _unwrap_sep=$'\x1c\x1d'
141
164
  local masked
142
- masked=$(printf '%s' "$cmd" | awk '
165
+ # shellcheck disable=SC1078
166
+ # SC1078 fires inside the awk program because shellcheck's bash parser
167
+ # cannot model awk's nested-quote semantics (`'\''` here is the
168
+ # bash-to-awk single-apostrophe escape pattern, not an unclosed shell
169
+ # string). Verified false-positive — the awk program parses cleanly.
170
+ masked=$(printf '%s%s' "$cmd" "$_unwrap_sep" | awk '
171
+ BEGIN { RS = "\034\035" }
143
172
  {
144
173
  line = $0
145
174
  out = ""
@@ -149,12 +178,40 @@ _rea_unwrap_at_depth() {
149
178
  while (i <= n) {
150
179
  ch = substr(line, i, 1)
151
180
  if (mode == 0) {
181
+ # ANSI-C `$'\''...'\''` introducer: emit `$` and opening quote
182
+ # literally (so the wrapper-scan can detect the introducer)
183
+ # and enter mode 3.
184
+ if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
185
+ mode = 3
186
+ out = out "$" "'\''"
187
+ i += 2
188
+ continue
189
+ }
152
190
  if (ch == "\"") { mode = 1; out = out ch; i++; continue }
153
191
  if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
154
192
  out = out ch
155
193
  i++
156
194
  continue
157
195
  }
196
+ if (mode == 3) {
197
+ # ANSI-C: `\\X` is a literal escape pair (`\\\''`, `\\\\`, `\\n`,
198
+ # etc.). Preserve the pair so the closing `'\''` detector below
199
+ # does not exit on `\\\''`.
200
+ if (ch == "\\" && i < n) {
201
+ nxt = substr(line, i + 1, 1)
202
+ out = out ch nxt
203
+ i += 2
204
+ continue
205
+ }
206
+ if (ch == "'\''") { mode = 0; out = out "\002"; i++; continue }
207
+ if (ch == ";") { out = out "\003"; i++; continue }
208
+ if (ch == "&") { out = out "\005"; i++; continue }
209
+ if (ch == "|") { out = out "\006"; i++; continue }
210
+ if (ch == "\"") { out = out "\001"; i++; continue }
211
+ out = out ch
212
+ i++
213
+ continue
214
+ }
158
215
  if (mode == 2) {
159
216
  if (ch == "'\''") { mode = 0; out = out "\002"; i++; continue }
160
217
  if (ch == ";") { out = out "\003"; i++; continue }
@@ -183,15 +240,27 @@ _rea_unwrap_at_depth() {
183
240
  }
184
241
  printf "%s", out
185
242
  }')
186
- # Pass both raw and masked into awk. Wrapper-regex matches against the
187
- # masked form; payload extraction reads the raw form using the same
188
- # offsets. Because the mask is byte-for-byte width-preserving, the
189
- # same RSTART/RLENGTH applies to both.
243
+ # Pass both raw and masked into awk via stdin as NUL-region-separated
244
+ # records `awk -v raw="$cmd" -v masked="$masked"` errors with
245
+ # `awk: newline in string` the moment either string contains a literal
246
+ # newline. RS=`\x1c\x1d` (FS+GS multi-byte sentinel) survives newlines
247
+ # in either record. (BSD awk does not support NUL-as-RS reliably.)
248
+ # Wrapper-regex matches against the masked form; payload extraction
249
+ # reads the raw form using the same offsets. Because the mask is
250
+ # byte-for-byte width-preserving, the same RSTART/RLENGTH applies to
251
+ # both.
190
252
  #
191
253
  # 0.21.2: capture payloads to a local var; iterate to recurse.
254
+ # 0.26.1 helix-028 P1: switch from `awk -v` to NUL-region stdin.
255
+ # 0.26.1 ANSI-C sibling: handle `$'\''...'\''` as a third quoted-body
256
+ # form alongside `'\''...'\''` and `"..."`. Decode common escape
257
+ # sequences (`\\n`, `\\t`, `\\r`, `\\\\`, `\\\''`, `\\"`) when emitting
258
+ # the payload so the downstream segment splitter sees real newlines
259
+ # and splits on them.
192
260
  local _unwrap_payloads
193
- _unwrap_payloads=$(printf '' | awk -v raw="$cmd" -v masked="$masked" '
261
+ _unwrap_payloads=$(printf '%s%s%s%s' "$cmd" "$_unwrap_sep" "$masked" "$_unwrap_sep" | awk '
194
262
  BEGIN {
263
+ RS = "\034\035"
195
264
  # Wrapper-prefix regex: shell-name + optional flag tokens + -c-style flag.
196
265
  # Each flag token is `-` followed by 1+ letters and trailing space.
197
266
  # NOTE: matches only OUTSIDE outer quoted spans because in-quote
@@ -207,6 +276,10 @@ _rea_unwrap_at_depth() {
207
276
  # NOT covered here. Adding pwsh requires a separate code path
208
277
  # because EncodedCommand base64-decodes at runtime.
209
278
  WRAP = "(^|[[:space:]&|;])(bash|sh|zsh|dash|ksh|mksh|oksh|posh|yash|csh|tcsh|fish)([[:space:]]+-[a-zA-Z]+)*[[:space:]]+-(c|lc|lic|ic|cl|cli|li|il)[[:space:]]+"
279
+ }
280
+ NR == 1 { raw = $0; next }
281
+ NR == 2 {
282
+ masked = $0
210
283
  # Track the cursor in BOTH raw and masked. Because the mask is
211
284
  # byte-for-byte width-preserving, the same RSTART/RLENGTH applies
212
285
  # to both — but each iteration of the loop must SLICE both strings
@@ -225,8 +298,120 @@ _rea_unwrap_at_depth() {
225
298
  # verbatim only when it was an outer quote.
226
299
  first = substr(rtail, 1, 1)
227
300
  mfirst = substr(mtail, 1, 1)
301
+ # ANSI-C: `$'\''...'\''` introducer (raw and masked must both
302
+ # carry `$` followed by literal `'\''` — the masker preserves the
303
+ # opening pair when it transitions into mode 3).
304
+ if (first == "$" && substr(rtail, 2, 1) == "'\''" \
305
+ && mfirst == "$" && substr(mtail, 2, 1) == "'\''") {
306
+ body = substr(rtail, 3)
307
+ n = length(body)
308
+ j = 1
309
+ out = ""
310
+ closed = 0
311
+ while (j <= n) {
312
+ c = substr(body, j, 1)
313
+ if (c == "\\" && j < n) {
314
+ nxt = substr(body, j + 1, 1)
315
+ # Decode common ANSI-C escape sequences so the splitter
316
+ # downstream sees real bytes (e.g. `\n` → newline → segment
317
+ # boundary at protected/blocked-path detection time).
318
+ if (nxt == "n") { out = out "\n"; j += 2; continue }
319
+ if (nxt == "t") { out = out "\t"; j += 2; continue }
320
+ if (nxt == "r") { out = out "\r"; j += 2; continue }
321
+ if (nxt == "\\") { out = out "\\"; j += 2; continue }
322
+ if (nxt == "'\''") { out = out "'\''"; j += 2; continue }
323
+ if (nxt == "\"") { out = out "\""; j += 2; continue }
324
+ if (nxt == "a") { out = out "\007"; j += 2; continue }
325
+ if (nxt == "b") { out = out "\010"; j += 2; continue }
326
+ if (nxt == "e" || nxt == "E") { out = out "\033"; j += 2; continue }
327
+ if (nxt == "f") { out = out "\014"; j += 2; continue }
328
+ if (nxt == "v") { out = out "\013"; j += 2; continue }
329
+ if (nxt == "?") { out = out "?"; j += 2; continue }
330
+ # 0.26.1 helix-028 P1-2: `\xHH` (1–2 hex digits). Pre-fix
331
+ # `bash -lc $'\''echo > .rea/HALT\\x0Atrue'\''` had `\x0A`
332
+ # preserved as the literal pair `\x0A`, so the segment
333
+ # splitter never saw the real LF and the second statement
334
+ # (`true` / arbitrary attacker payload) was hidden in the
335
+ # same segment as the first. Decode here so the LF reaches
336
+ # the splitter.
337
+ if (nxt == "x") {
338
+ hex = ""
339
+ k = j + 2
340
+ while (k <= n && length(hex) < 2 \
341
+ && index("0123456789abcdefABCDEF", substr(body, k, 1)) > 0) {
342
+ hex = hex substr(body, k, 1)
343
+ k++
344
+ }
345
+ if (length(hex) > 0) {
346
+ # awk has no native hex parser. Walk the digits.
347
+ hv = 0
348
+ for (h = 1; h <= length(hex); h++) {
349
+ hd = substr(hex, h, 1)
350
+ di = index("0123456789abcdef", tolower(hd)) - 1
351
+ hv = hv * 16 + di
352
+ }
353
+ out = out sprintf("%c", hv)
354
+ j = k
355
+ continue
356
+ }
357
+ # `\x` with no digits — preserve pair literally.
358
+ out = out c nxt
359
+ j += 2
360
+ continue
361
+ }
362
+ # 0.26.1 helix-028 P1-2: `\NNN` octal (1–3 digits). Pre-fix
363
+ # `\012` (= LF) was preserved as a literal pair, same bypass
364
+ # class as `\xHH`.
365
+ if (nxt >= "0" && nxt <= "7") {
366
+ oct = nxt
367
+ k = j + 2
368
+ while (k <= n && length(oct) < 3 \
369
+ && substr(body, k, 1) >= "0" \
370
+ && substr(body, k, 1) <= "7") {
371
+ oct = oct substr(body, k, 1)
372
+ k++
373
+ }
374
+ ov = 0
375
+ for (h = 1; h <= length(oct); h++) {
376
+ ov = ov * 8 + (substr(oct, h, 1) + 0)
377
+ }
378
+ # Bash truncates to 8 bits.
379
+ ov = ov % 256
380
+ out = out sprintf("%c", ov)
381
+ j = k
382
+ continue
383
+ }
384
+ # Default: preserve pair (covers `\u…`, `\U…`, `\cX` — rarer
385
+ # shapes; the literal pair is still safer than silent decoding
386
+ # for unsupported escapes in this legacy-gate layer. The Node
387
+ # scanner — primary enforcement for protected/blocked paths
388
+ # since 0.23.0 — fails closed on these via decodeAnsiC).
389
+ out = out c nxt
390
+ j += 2
391
+ continue
392
+ }
393
+ if (c == "'\''") { closed = j; break }
394
+ out = out c
395
+ j++
396
+ }
397
+ if (closed == 0) {
398
+ mrest = substr(mtail, 3)
399
+ rrest = substr(rtail, 3)
400
+ continue
401
+ }
402
+ print out
403
+ # Skip past `$` (1) + opening `'\''` (1) + body (closed-1) +
404
+ # closing `'\''` (1) = 2 + closed bytes from mtail/rtail start.
405
+ mrest = substr(mtail, 2 + closed + 1)
406
+ rrest = substr(rtail, 2 + closed + 1)
407
+ continue
408
+ }
228
409
  if (first == "'\''" && mfirst == "'\''") {
229
410
  # Single-quoted body: no escape semantics; runs to next `'\''`.
411
+ # NOTE: index against the RAW body — the masker replaces the
412
+ # closing `'\''` of an outer single-quoted span with `\002`, so
413
+ # `index(mbody, "'\''")` would never find it. The raw body
414
+ # carries the literal closing `'\''` byte verbatim.
230
415
  body = substr(rtail, 2)
231
416
  mbody = substr(mtail, 2)
232
417
  end = index(body, "'\''")
@@ -278,10 +463,7 @@ _rea_unwrap_at_depth() {
278
463
  mrest = mtail
279
464
  rrest = rtail
280
465
  }
281
- }
282
- # Empty action with no input rules — explicitly drive the loop from
283
- # END so awk does not require any input records.
284
- END {}')
466
+ }')
285
467
  # Recurse on each extracted payload with depth+1.
286
468
  if [[ -n "$_unwrap_payloads" ]]; then
287
469
  while IFS= read -r _unwrap_p; do
@@ -350,6 +532,11 @@ _rea_split_segments() {
350
532
  # records; the existing pipeline then quote-masks and splits each
351
533
  # record independently. Inner payload anchors trigger words for the
352
534
  # `any_segment_*` checks downstream.
535
+ # shellcheck disable=SC1078
536
+ # SC1078 fires inside the awk program because shellcheck's bash parser
537
+ # cannot model awk's nested-quote semantics (`'\''` here is the
538
+ # bash-to-awk single-apostrophe escape pattern, not an unclosed shell
539
+ # string). Verified false-positive — the awk program parses cleanly.
353
540
  _rea_unwrap_nested_shells "$cmd" \
354
541
  | awk '
355
542
  BEGIN {
@@ -363,16 +550,38 @@ _rea_split_segments() {
363
550
  out = ""
364
551
  i = 1
365
552
  n = length(line)
366
- mode = 0 # 0=plain, 1=double, 2=single
553
+ mode = 0 # 0=plain, 1=double, 2=single, 3=ANSI-C $'\''...'\''
367
554
  while (i <= n) {
368
555
  ch = substr(line, i, 1)
369
556
  if (mode == 0) {
557
+ # 0.26.1 helix-028 sibling: ANSI-C `$'\''...'\''` introducer.
558
+ # Pre-fix `echo $'\''a;b'\''` had its in-quote `;` un-masked and
559
+ # the splitter broke the segment at the `;`. Mode 3 honors
560
+ # backslash-escape pairs so `\\\''` and `\\\\` do not exit early.
561
+ if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
562
+ mode = 3; out = out "$" "'\''"; i += 2; continue
563
+ }
370
564
  if (ch == "\"") { mode = 1; out = out ch; i++; continue }
371
565
  if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
372
566
  out = out ch
373
567
  i++
374
568
  continue
375
569
  }
570
+ if (mode == 3) {
571
+ if (ch == "\\" && i < n) {
572
+ nxt = substr(line, i + 1, 1)
573
+ out = out ch nxt
574
+ i += 2
575
+ continue
576
+ }
577
+ if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
578
+ if (ch == ";") { out = out SC; i++; continue }
579
+ if (ch == "&") { out = out AMP; i++; continue }
580
+ if (ch == "|") { out = out PIPE; i++; continue }
581
+ out = out ch
582
+ i++
583
+ continue
584
+ }
376
585
  if (mode == 2) {
377
586
  # Single quotes: no escape semantics. Only `'\''` ends.
378
587
  if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
@@ -423,9 +632,24 @@ _rea_split_segments() {
423
632
  # in-quote `|` characters.
424
633
  quote_masked_cmd() {
425
634
  local cmd="$1"
426
- printf '%s' "$cmd" \
635
+ # 0.26.1 helix-028 sibling: feed the entire (possibly multiline) `$cmd`
636
+ # to awk as a SINGLE record using a multi-byte record separator
637
+ # (`\x1c\x1d` = FS+GS). Pre-fix, the default `RS=\n` split a multiline
638
+ # input across records and reset in-quote `mode` per-line, which both
639
+ # dropped the newlines AND scrambled the mask (the closing `"` on
640
+ # line 2 was treated as opening a new quoted span in plain mode).
641
+ # Also adds ANSI-C `$'\''...'\''` (mode 3) to mirror _rea_unwrap_at_depth's
642
+ # masker — same scope: in-quote `|`/`;`/`&` get masked, opening-pair
643
+ # `$'\''` is preserved for downstream detection, closing `'\''` is left
644
+ # literal here (this helper does not need a mode-exit-mask byte; the
645
+ # caller pattern-matches against literal `|`/`;`/`&` in the masked
646
+ # stream and benefits from preserving quote boundaries verbatim).
647
+ local _qm_sep
648
+ _qm_sep=$'\x1c\x1d'
649
+ printf '%s%s' "$cmd" "$_qm_sep" \
427
650
  | awk '
428
651
  BEGIN {
652
+ RS = "\034\035"
429
653
  INQ_PIPE = "__REA_INQUOTE_PIPE_a8f2c1__"
430
654
  INQ_SC = "__REA_INQUOTE_SC_a8f2c1__"
431
655
  INQ_AMP = "__REA_INQUOTE_AMP_a8f2c1__"
@@ -439,12 +663,29 @@ quote_masked_cmd() {
439
663
  while (i <= n) {
440
664
  ch = substr(line, i, 1)
441
665
  if (mode == 0) {
666
+ if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
667
+ mode = 3; out = out "$" "'\''"; i += 2; continue
668
+ }
442
669
  if (ch == "\"") { mode = 1; out = out ch; i++; continue }
443
670
  if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
444
671
  out = out ch
445
672
  i++
446
673
  continue
447
674
  }
675
+ if (mode == 3) {
676
+ if (ch == "\\" && i < n) {
677
+ out = out ch substr(line, i + 1, 1)
678
+ i += 2
679
+ continue
680
+ }
681
+ if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
682
+ if (ch == "|") { out = out INQ_PIPE; i++; continue }
683
+ if (ch == ";") { out = out INQ_SC; i++; continue }
684
+ if (ch == "&") { out = out INQ_AMP; i++; continue }
685
+ out = out ch
686
+ i++
687
+ continue
688
+ }
448
689
  if (mode == 2) {
449
690
  if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
450
691
  if (ch == "|") { out = out INQ_PIPE; i++; continue }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bookedsolid/rea",
3
- "version": "0.26.0",
3
+ "version": "0.27.0",
4
4
  "description": "Agentic governance layer for Claude Code — policy enforcement, hook-based safety gates, audit logging, and Codex-integrated adversarial review for AI-assisted projects",
5
5
  "license": "MIT",
6
6
  "author": "Booked Solid Technology <oss@bookedsolid.tech> (https://bookedsolid.tech)",