@bookedsolid/rea 0.26.0 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8968,9 +8968,37 @@ function wordToString(word) {
8968
8968
  case 'Lit':
8969
8969
  value += stringifyField(part['Value']);
8970
8970
  break;
8971
- case 'SglQuoted':
8972
- value += stringifyField(part['Value']);
8971
+ case 'SglQuoted': {
8972
+ // 0.26.1 helix-028 P1-1: ANSI-C `$'...'` quoting expands
8973
+ // `\n`/`\t`/`\xHH`/`\NNN`/`\u…`/`\cX` etc. at parse time. mvdan-sh
8974
+ // emits `$'...'` as `SglQuoted` with `Dollar: true` and the RAW
8975
+ // escape source in `Value` (e.g. `\n` arrives as backslash-n, not
8976
+ // LF). Pre-fix the walker concatenated the raw value verbatim,
8977
+ // and the downstream `stripBashBackslashEscapes` mangled `\n` →
8978
+ // `n` (regex `\\([A-Za-z0-9./_~-])` strips backslash from any
8979
+ // letter), turning `.rea/HALT\ntrue` into `.rea/HALTntrue` — which
8980
+ // never matched the protected pattern. Real bash, of course,
8981
+ // expanded `\n` to LF, so the redirect target was actually
8982
+ // `.rea/HALT` and the kill-switch got overwritten. Decode
8983
+ // explicitly here so downstream consumers see real bytes.
8984
+ const raw = stringifyField(part['Value']);
8985
+ const isAnsiC = part['Dollar'] === true;
8986
+ if (isAnsiC) {
8987
+ const decoded = decodeAnsiC(raw);
8988
+ if (decoded === null) {
8989
+ // Unsupported escape — fail closed. Mark word as dynamic so
8990
+ // the protected/blocked path matchers refuse on uncertainty.
8991
+ dynamic = true;
8992
+ }
8993
+ else {
8994
+ value += decoded;
8995
+ }
8996
+ }
8997
+ else {
8998
+ value += raw;
8999
+ }
8973
9000
  break;
9001
+ }
8974
9002
  case 'DblQuoted': {
8975
9003
  const innerParts = asArray(part['Parts']);
8976
9004
  for (const ip of innerParts) {
@@ -9010,6 +9038,208 @@ function stringifyField(v) {
9010
9038
  return v;
9011
9039
  return '';
9012
9040
  }
9041
+ /**
9042
+ * Decode bash ANSI-C `$'...'` escape sequences. Returns the decoded
9043
+ * string, or `null` if the input contains an escape we don't support
9044
+ * (caller must fail closed on null — treat the word as dynamic so the
9045
+ * protected/blocked path matcher refuses on uncertainty).
9046
+ *
9047
+ * Bash spec covers the following escapes inside `$'...'`:
9048
+ * - `\\` literal backslash
9049
+ * - `\'` `\"` literal quote
9050
+ * - `\?` literal question mark
9051
+ * - `\a` `\b` BEL / BS
9052
+ * - `\e` `\E` ESC
9053
+ * - `\f` `\n` FF / LF
9054
+ * - `\r` `\t` CR / TAB
9055
+ * - `\v` VT
9056
+ * - `\NNN` octal (1–3 digits)
9057
+ * - `\xHH` hex (1–2 digits)
9058
+ * - `\uHHHH` unicode codepoint (1–4 hex digits)
9059
+ * - `\UHHHHHHHH` unicode codepoint (1–8 hex digits)
9060
+ * - `\cX` control char (X xor 0x40)
9061
+ *
9062
+ * 0.26.1 helix-028 P1-1.
9063
+ */
9064
+ function decodeAnsiC(raw) {
9065
+ let out = '';
9066
+ let i = 0;
9067
+ const n = raw.length;
9068
+ while (i < n) {
9069
+ const ch = raw.charCodeAt(i);
9070
+ if (ch !== 0x5c /* '\\' */) {
9071
+ out += raw[i];
9072
+ i += 1;
9073
+ continue;
9074
+ }
9075
+ // Lone trailing backslash — bash keeps it literal.
9076
+ if (i + 1 >= n) {
9077
+ out += '\\';
9078
+ i += 1;
9079
+ continue;
9080
+ }
9081
+ const next = raw[i + 1];
9082
+ // Single-char escapes.
9083
+ switch (next) {
9084
+ case '\\':
9085
+ out += '\\';
9086
+ i += 2;
9087
+ continue;
9088
+ case "'":
9089
+ out += "'";
9090
+ i += 2;
9091
+ continue;
9092
+ case '"':
9093
+ out += '"';
9094
+ i += 2;
9095
+ continue;
9096
+ case '?':
9097
+ out += '?';
9098
+ i += 2;
9099
+ continue;
9100
+ case 'a':
9101
+ out += '\x07';
9102
+ i += 2;
9103
+ continue;
9104
+ case 'b':
9105
+ out += '\x08';
9106
+ i += 2;
9107
+ continue;
9108
+ case 'e':
9109
+ case 'E':
9110
+ out += '\x1b';
9111
+ i += 2;
9112
+ continue;
9113
+ case 'f':
9114
+ out += '\x0c';
9115
+ i += 2;
9116
+ continue;
9117
+ case 'n':
9118
+ out += '\n';
9119
+ i += 2;
9120
+ continue;
9121
+ case 'r':
9122
+ out += '\r';
9123
+ i += 2;
9124
+ continue;
9125
+ case 't':
9126
+ out += '\t';
9127
+ i += 2;
9128
+ continue;
9129
+ case 'v':
9130
+ out += '\x0b';
9131
+ i += 2;
9132
+ continue;
9133
+ default:
9134
+ break;
9135
+ }
9136
+ // \xHH — 1 or 2 hex digits.
9137
+ if (next === 'x') {
9138
+ let j = i + 2;
9139
+ let hex = '';
9140
+ while (j < n && hex.length < 2 && /[0-9a-fA-F]/.test(raw[j])) {
9141
+ hex += raw[j];
9142
+ j += 1;
9143
+ }
9144
+ if (hex.length === 0) {
9145
+ // `\x` with no digits is unspecified; bash treats it literally as
9146
+ // backslash-x. Mirror that — preserve and continue.
9147
+ out += '\\x';
9148
+ i += 2;
9149
+ continue;
9150
+ }
9151
+ out += String.fromCharCode(parseInt(hex, 16));
9152
+ i = j;
9153
+ continue;
9154
+ }
9155
+ // \NNN — 1, 2, or 3 octal digits. `next` itself is the first digit.
9156
+ if (next >= '0' && next <= '7') {
9157
+ let j = i + 1;
9158
+ let oct = '';
9159
+ while (j < n && oct.length < 3 && raw[j] >= '0' && raw[j] <= '7') {
9160
+ oct += raw[j];
9161
+ j += 1;
9162
+ }
9163
+ out += String.fromCharCode(parseInt(oct, 8) & 0xff);
9164
+ i = j;
9165
+ continue;
9166
+ }
9167
+ // \uHHHH — 1 to 4 hex digits.
9168
+ if (next === 'u') {
9169
+ let j = i + 2;
9170
+ let hex = '';
9171
+ while (j < n && hex.length < 4 && /[0-9a-fA-F]/.test(raw[j])) {
9172
+ hex += raw[j];
9173
+ j += 1;
9174
+ }
9175
+ if (hex.length === 0) {
9176
+ out += '\\u';
9177
+ i += 2;
9178
+ continue;
9179
+ }
9180
+ const cp = parseInt(hex, 16);
9181
+ try {
9182
+ out += String.fromCodePoint(cp);
9183
+ }
9184
+ catch {
9185
+ return null;
9186
+ }
9187
+ i = j;
9188
+ continue;
9189
+ }
9190
+ // \UHHHHHHHH — 1 to 8 hex digits.
9191
+ if (next === 'U') {
9192
+ let j = i + 2;
9193
+ let hex = '';
9194
+ while (j < n && hex.length < 8 && /[0-9a-fA-F]/.test(raw[j])) {
9195
+ hex += raw[j];
9196
+ j += 1;
9197
+ }
9198
+ if (hex.length === 0) {
9199
+ out += '\\U';
9200
+ i += 2;
9201
+ continue;
9202
+ }
9203
+ const cp = parseInt(hex, 16);
9204
+ // String.fromCodePoint throws RangeError on out-of-range values
9205
+ // (>0x10FFFF). Bash silently truncates; we fail closed via null.
9206
+ try {
9207
+ out += String.fromCodePoint(cp);
9208
+ }
9209
+ catch {
9210
+ return null;
9211
+ }
9212
+ i = j;
9213
+ continue;
9214
+ }
9215
+ // \cX — control char (X xor 0x40). X may be any printable ASCII.
9216
+ if (next === 'c') {
9217
+ if (i + 2 >= n) {
9218
+ // Lone `\c` at end — treat as literal.
9219
+ out += '\\c';
9220
+ i += 2;
9221
+ continue;
9222
+ }
9223
+ const xCh = raw[i + 2].charCodeAt(0);
9224
+ // Standard form: bash xors with 0x40 then masks to 7 bits. So
9225
+ // \cJ → 'J' (0x4a) ^ 0x40 = 0x0a (LF). \c? is special-cased to DEL.
9226
+ if (raw[i + 2] === '?') {
9227
+ out += '\x7f';
9228
+ }
9229
+ else {
9230
+ out += String.fromCharCode((xCh ^ 0x40) & 0x7f);
9231
+ }
9232
+ i += 3;
9233
+ continue;
9234
+ }
9235
+ // Unknown escape: bash preserves `\X` literally for unknown X. We
9236
+ // could mirror that, but the safer posture for a security scanner is
9237
+ // to fail closed — refuse on uncertainty so an attacker can't hide
9238
+ // payload bytes behind an escape we forgot to model.
9239
+ return null;
9240
+ }
9241
+ return out;
9242
+ }
9013
9243
  function asArray(v) {
9014
9244
  if (Array.isArray(v)) {
9015
9245
  return v;
@@ -138,8 +138,32 @@ _rea_unwrap_at_depth() {
138
138
  # \x03 ETX — replaces in-quote `;`
139
139
  # \x05 ENQ — replaces in-quote `&`
140
140
  # \x06 ACK — replaces in-quote `|`
141
+ #
142
+ # 0.26.1 helix-028 P1 fix: feed the entire (possibly multiline) `$cmd`
143
+ # to awk as a SINGLE record using a multi-byte record separator
144
+ # (`\x1c\x1d` = FS+GS, control bytes that cannot appear in real shell
145
+ # input). Pre-fix, awk's default RS=`\n` made the masking awk process
146
+ # each line independently, which (a) dropped the newlines from the
147
+ # masked output and (b) reset the in-quote `mode` state per-line — so
148
+ # `bash -lc "printf x > .rea/HALT\ntrue"` had its closing `"` on line 2
149
+ # treated as an opening quote in plain mode, scrambling the mask. macOS
150
+ # BSD awk does NOT support NUL as RS (truncates after first record);
151
+ # `\x1c\x1d` is a portable multi-byte sentinel that awk's RS handles
152
+ # uniformly across BSD/GNU awk implementations.
153
+ #
154
+ # 0.26.1 ANSI-C sibling: also recognize `$'...'` (mode 3) as a quoted
155
+ # span. Pre-fix, the masker treated `$` as plain text in mode 0, so
156
+ # the closing `'` of `$'...'` was the only `'` the masker saw and it
157
+ # entered mode 2 there — flipping the mask state for the rest of the
158
+ # input. Mode 3 honors `\\`-escape semantics (so `\'` and `\\` inside
159
+ # the body do not prematurely terminate the span); on exit the closing
160
+ # `'` is masked to `\x02` (same as mode 2's exit) so the wrapper-scan
161
+ # can no longer treat in-quote `'` as a payload-opening quote.
162
+ local _unwrap_sep
163
+ _unwrap_sep=$'\x1c\x1d'
141
164
  local masked
142
- masked=$(printf '%s' "$cmd" | awk '
165
+ masked=$(printf '%s%s' "$cmd" "$_unwrap_sep" | awk '
166
+ BEGIN { RS = "\034\035" }
143
167
  {
144
168
  line = $0
145
169
  out = ""
@@ -149,12 +173,40 @@ _rea_unwrap_at_depth() {
149
173
  while (i <= n) {
150
174
  ch = substr(line, i, 1)
151
175
  if (mode == 0) {
176
+ # ANSI-C `$'\''...'\''` introducer: emit `$` and opening quote
177
+ # literally (so the wrapper-scan can detect the introducer)
178
+ # and enter mode 3.
179
+ if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
180
+ mode = 3
181
+ out = out "$" "'\''"
182
+ i += 2
183
+ continue
184
+ }
152
185
  if (ch == "\"") { mode = 1; out = out ch; i++; continue }
153
186
  if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
154
187
  out = out ch
155
188
  i++
156
189
  continue
157
190
  }
191
+ if (mode == 3) {
192
+ # ANSI-C: `\\X` is a literal escape pair (`\\\''`, `\\\\`, `\\n`,
193
+ # etc.). Preserve the pair so the closing `'\''` detector below
194
+ # does not exit on `\\\''`.
195
+ if (ch == "\\" && i < n) {
196
+ nxt = substr(line, i + 1, 1)
197
+ out = out ch nxt
198
+ i += 2
199
+ continue
200
+ }
201
+ if (ch == "'\''") { mode = 0; out = out "\002"; i++; continue }
202
+ if (ch == ";") { out = out "\003"; i++; continue }
203
+ if (ch == "&") { out = out "\005"; i++; continue }
204
+ if (ch == "|") { out = out "\006"; i++; continue }
205
+ if (ch == "\"") { out = out "\001"; i++; continue }
206
+ out = out ch
207
+ i++
208
+ continue
209
+ }
158
210
  if (mode == 2) {
159
211
  if (ch == "'\''") { mode = 0; out = out "\002"; i++; continue }
160
212
  if (ch == ";") { out = out "\003"; i++; continue }
@@ -183,15 +235,27 @@ _rea_unwrap_at_depth() {
183
235
  }
184
236
  printf "%s", out
185
237
  }')
186
- # Pass both raw and masked into awk. Wrapper-regex matches against the
187
- # masked form; payload extraction reads the raw form using the same
188
- # offsets. Because the mask is byte-for-byte width-preserving, the
189
- # same RSTART/RLENGTH applies to both.
238
+ # Pass both raw and masked into awk via stdin as NUL-region-separated
239
+ # records `awk -v raw="$cmd" -v masked="$masked"` errors with
240
+ # `awk: newline in string` the moment either string contains a literal
241
+ # newline. RS=`\x1c\x1d` (FS+GS multi-byte sentinel) survives newlines
242
+ # in either record. (BSD awk does not support NUL-as-RS reliably.)
243
+ # Wrapper-regex matches against the masked form; payload extraction
244
+ # reads the raw form using the same offsets. Because the mask is
245
+ # byte-for-byte width-preserving, the same RSTART/RLENGTH applies to
246
+ # both.
190
247
  #
191
248
  # 0.21.2: capture payloads to a local var; iterate to recurse.
249
+ # 0.26.1 helix-028 P1: switch from `awk -v` to NUL-region stdin.
250
+ # 0.26.1 ANSI-C sibling: handle `$'\''...'\''` as a third quoted-body
251
+ # form alongside `'\''...'\''` and `"..."`. Decode common escape
252
+ # sequences (`\\n`, `\\t`, `\\r`, `\\\\`, `\\\''`, `\\"`) when emitting
253
+ # the payload so the downstream segment splitter sees real newlines
254
+ # and splits on them.
192
255
  local _unwrap_payloads
193
- _unwrap_payloads=$(printf '' | awk -v raw="$cmd" -v masked="$masked" '
256
+ _unwrap_payloads=$(printf '%s%s%s%s' "$cmd" "$_unwrap_sep" "$masked" "$_unwrap_sep" | awk '
194
257
  BEGIN {
258
+ RS = "\034\035"
195
259
  # Wrapper-prefix regex: shell-name + optional flag tokens + -c-style flag.
196
260
  # Each flag token is `-` followed by 1+ letters and trailing space.
197
261
  # NOTE: matches only OUTSIDE outer quoted spans because in-quote
@@ -207,6 +271,10 @@ _rea_unwrap_at_depth() {
207
271
  # NOT covered here. Adding pwsh requires a separate code path
208
272
  # because EncodedCommand base64-decodes at runtime.
209
273
  WRAP = "(^|[[:space:]&|;])(bash|sh|zsh|dash|ksh|mksh|oksh|posh|yash|csh|tcsh|fish)([[:space:]]+-[a-zA-Z]+)*[[:space:]]+-(c|lc|lic|ic|cl|cli|li|il)[[:space:]]+"
274
+ }
275
+ NR == 1 { raw = $0; next }
276
+ NR == 2 {
277
+ masked = $0
210
278
  # Track the cursor in BOTH raw and masked. Because the mask is
211
279
  # byte-for-byte width-preserving, the same RSTART/RLENGTH applies
212
280
  # to both — but each iteration of the loop must SLICE both strings
@@ -225,8 +293,120 @@ _rea_unwrap_at_depth() {
225
293
  # verbatim only when it was an outer quote.
226
294
  first = substr(rtail, 1, 1)
227
295
  mfirst = substr(mtail, 1, 1)
296
+ # ANSI-C: `$'\''...'\''` introducer (raw and masked must both
297
+ # carry `$` followed by literal `'\''` — the masker preserves the
298
+ # opening pair when it transitions into mode 3).
299
+ if (first == "$" && substr(rtail, 2, 1) == "'\''" \
300
+ && mfirst == "$" && substr(mtail, 2, 1) == "'\''") {
301
+ body = substr(rtail, 3)
302
+ n = length(body)
303
+ j = 1
304
+ out = ""
305
+ closed = 0
306
+ while (j <= n) {
307
+ c = substr(body, j, 1)
308
+ if (c == "\\" && j < n) {
309
+ nxt = substr(body, j + 1, 1)
310
+ # Decode common ANSI-C escape sequences so the splitter
311
+ # downstream sees real bytes (e.g. `\n` → newline → segment
312
+ # boundary at protected/blocked-path detection time).
313
+ if (nxt == "n") { out = out "\n"; j += 2; continue }
314
+ if (nxt == "t") { out = out "\t"; j += 2; continue }
315
+ if (nxt == "r") { out = out "\r"; j += 2; continue }
316
+ if (nxt == "\\") { out = out "\\"; j += 2; continue }
317
+ if (nxt == "'\''") { out = out "'\''"; j += 2; continue }
318
+ if (nxt == "\"") { out = out "\""; j += 2; continue }
319
+ if (nxt == "a") { out = out "\007"; j += 2; continue }
320
+ if (nxt == "b") { out = out "\010"; j += 2; continue }
321
+ if (nxt == "e" || nxt == "E") { out = out "\033"; j += 2; continue }
322
+ if (nxt == "f") { out = out "\014"; j += 2; continue }
323
+ if (nxt == "v") { out = out "\013"; j += 2; continue }
324
+ if (nxt == "?") { out = out "?"; j += 2; continue }
325
+ # 0.26.1 helix-028 P1-2: `\xHH` (1–2 hex digits). Pre-fix
326
+ # `bash -lc $'\''echo > .rea/HALT\\x0Atrue'\''` had `\x0A`
327
+ # preserved as the literal pair `\x0A`, so the segment
328
+ # splitter never saw the real LF and the second statement
329
+ # (`true` / arbitrary attacker payload) was hidden in the
330
+ # same segment as the first. Decode here so the LF reaches
331
+ # the splitter.
332
+ if (nxt == "x") {
333
+ hex = ""
334
+ k = j + 2
335
+ while (k <= n && length(hex) < 2 \
336
+ && index("0123456789abcdefABCDEF", substr(body, k, 1)) > 0) {
337
+ hex = hex substr(body, k, 1)
338
+ k++
339
+ }
340
+ if (length(hex) > 0) {
341
+ # awk has no native hex parser. Walk the digits.
342
+ hv = 0
343
+ for (h = 1; h <= length(hex); h++) {
344
+ hd = substr(hex, h, 1)
345
+ di = index("0123456789abcdef", tolower(hd)) - 1
346
+ hv = hv * 16 + di
347
+ }
348
+ out = out sprintf("%c", hv)
349
+ j = k
350
+ continue
351
+ }
352
+ # `\x` with no digits — preserve pair literally.
353
+ out = out c nxt
354
+ j += 2
355
+ continue
356
+ }
357
+ # 0.26.1 helix-028 P1-2: `\NNN` octal (1–3 digits). Pre-fix
358
+ # `\012` (= LF) was preserved as a literal pair, same bypass
359
+ # class as `\xHH`.
360
+ if (nxt >= "0" && nxt <= "7") {
361
+ oct = nxt
362
+ k = j + 2
363
+ while (k <= n && length(oct) < 3 \
364
+ && substr(body, k, 1) >= "0" \
365
+ && substr(body, k, 1) <= "7") {
366
+ oct = oct substr(body, k, 1)
367
+ k++
368
+ }
369
+ ov = 0
370
+ for (h = 1; h <= length(oct); h++) {
371
+ ov = ov * 8 + (substr(oct, h, 1) + 0)
372
+ }
373
+ # Bash truncates to 8 bits.
374
+ ov = ov % 256
375
+ out = out sprintf("%c", ov)
376
+ j = k
377
+ continue
378
+ }
379
+ # Default: preserve pair (covers `\u…`, `\U…`, `\cX` — rarer
380
+ # shapes; the literal pair is still safer than silent decoding
381
+ # for unsupported escapes in this legacy-gate layer. The Node
382
+ # scanner — primary enforcement for protected/blocked paths
383
+ # since 0.23.0 — fails closed on these via decodeAnsiC).
384
+ out = out c nxt
385
+ j += 2
386
+ continue
387
+ }
388
+ if (c == "'\''") { closed = j; break }
389
+ out = out c
390
+ j++
391
+ }
392
+ if (closed == 0) {
393
+ mrest = substr(mtail, 3)
394
+ rrest = substr(rtail, 3)
395
+ continue
396
+ }
397
+ print out
398
+ # Skip past `$` (1) + opening `'\''` (1) + body (closed-1) +
399
+ # closing `'\''` (1) = 2 + closed bytes from mtail/rtail start.
400
+ mrest = substr(mtail, 2 + closed + 1)
401
+ rrest = substr(rtail, 2 + closed + 1)
402
+ continue
403
+ }
228
404
  if (first == "'\''" && mfirst == "'\''") {
229
405
  # Single-quoted body: no escape semantics; runs to next `'\''`.
406
+ # NOTE: index against the RAW body — the masker replaces the
407
+ # closing `'\''` of an outer single-quoted span with `\002`, so
408
+ # `index(mbody, "'\''")` would never find it. The raw body
409
+ # carries the literal closing `'\''` byte verbatim.
230
410
  body = substr(rtail, 2)
231
411
  mbody = substr(mtail, 2)
232
412
  end = index(body, "'\''")
@@ -278,10 +458,7 @@ _rea_unwrap_at_depth() {
278
458
  mrest = mtail
279
459
  rrest = rtail
280
460
  }
281
- }
282
- # Empty action with no input rules — explicitly drive the loop from
283
- # END so awk does not require any input records.
284
- END {}')
461
+ }')
285
462
  # Recurse on each extracted payload with depth+1.
286
463
  if [[ -n "$_unwrap_payloads" ]]; then
287
464
  while IFS= read -r _unwrap_p; do
@@ -363,16 +540,38 @@ _rea_split_segments() {
363
540
  out = ""
364
541
  i = 1
365
542
  n = length(line)
366
- mode = 0 # 0=plain, 1=double, 2=single
543
+ mode = 0 # 0=plain, 1=double, 2=single, 3=ANSI-C $'\''...'\''
367
544
  while (i <= n) {
368
545
  ch = substr(line, i, 1)
369
546
  if (mode == 0) {
547
+ # 0.26.1 helix-028 sibling: ANSI-C `$'\''...'\''` introducer.
548
+ # Pre-fix `echo $'\''a;b'\''` had its in-quote `;` un-masked and
549
+ # the splitter broke the segment at the `;`. Mode 3 honors
550
+ # backslash-escape pairs so `\\\''` and `\\\\` do not exit early.
551
+ if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
552
+ mode = 3; out = out "$" "'\''"; i += 2; continue
553
+ }
370
554
  if (ch == "\"") { mode = 1; out = out ch; i++; continue }
371
555
  if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
372
556
  out = out ch
373
557
  i++
374
558
  continue
375
559
  }
560
+ if (mode == 3) {
561
+ if (ch == "\\" && i < n) {
562
+ nxt = substr(line, i + 1, 1)
563
+ out = out ch nxt
564
+ i += 2
565
+ continue
566
+ }
567
+ if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
568
+ if (ch == ";") { out = out SC; i++; continue }
569
+ if (ch == "&") { out = out AMP; i++; continue }
570
+ if (ch == "|") { out = out PIPE; i++; continue }
571
+ out = out ch
572
+ i++
573
+ continue
574
+ }
376
575
  if (mode == 2) {
377
576
  # Single quotes: no escape semantics. Only `'\''` ends.
378
577
  if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
@@ -423,9 +622,24 @@ _rea_split_segments() {
423
622
  # in-quote `|` characters.
424
623
  quote_masked_cmd() {
425
624
  local cmd="$1"
426
- printf '%s' "$cmd" \
625
+ # 0.26.1 helix-028 sibling: feed the entire (possibly multiline) `$cmd`
626
+ # to awk as a SINGLE record using a multi-byte record separator
627
+ # (`\x1c\x1d` = FS+GS). Pre-fix, the default `RS=\n` split a multiline
628
+ # input across records and reset in-quote `mode` per-line, which both
629
+ # dropped the newlines AND scrambled the mask (the closing `"` on
630
+ # line 2 was treated as opening a new quoted span in plain mode).
631
+ # Also adds ANSI-C `$'\''...'\''` (mode 3) to mirror _rea_unwrap_at_depth's
632
+ # masker — same scope: in-quote `|`/`;`/`&` get masked, opening-pair
633
+ # `$'\''` is preserved for downstream detection, closing `'\''` is left
634
+ # literal here (this helper does not need a mode-exit-mask byte; the
635
+ # caller pattern-matches against literal `|`/`;`/`&` in the masked
636
+ # stream and benefits from preserving quote boundaries verbatim).
637
+ local _qm_sep
638
+ _qm_sep=$'\x1c\x1d'
639
+ printf '%s%s' "$cmd" "$_qm_sep" \
427
640
  | awk '
428
641
  BEGIN {
642
+ RS = "\034\035"
429
643
  INQ_PIPE = "__REA_INQUOTE_PIPE_a8f2c1__"
430
644
  INQ_SC = "__REA_INQUOTE_SC_a8f2c1__"
431
645
  INQ_AMP = "__REA_INQUOTE_AMP_a8f2c1__"
@@ -439,12 +653,29 @@ quote_masked_cmd() {
439
653
  while (i <= n) {
440
654
  ch = substr(line, i, 1)
441
655
  if (mode == 0) {
656
+ if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
657
+ mode = 3; out = out "$" "'\''"; i += 2; continue
658
+ }
442
659
  if (ch == "\"") { mode = 1; out = out ch; i++; continue }
443
660
  if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
444
661
  out = out ch
445
662
  i++
446
663
  continue
447
664
  }
665
+ if (mode == 3) {
666
+ if (ch == "\\" && i < n) {
667
+ out = out ch substr(line, i + 1, 1)
668
+ i += 2
669
+ continue
670
+ }
671
+ if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
672
+ if (ch == "|") { out = out INQ_PIPE; i++; continue }
673
+ if (ch == ";") { out = out INQ_SC; i++; continue }
674
+ if (ch == "&") { out = out INQ_AMP; i++; continue }
675
+ out = out ch
676
+ i++
677
+ continue
678
+ }
448
679
  if (mode == 2) {
449
680
  if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
450
681
  if (ch == "|") { out = out INQ_PIPE; i++; continue }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bookedsolid/rea",
3
- "version": "0.26.0",
3
+ "version": "0.26.1",
4
4
  "description": "Agentic governance layer for Claude Code — policy enforcement, hook-based safety gates, audit logging, and Codex-integrated adversarial review for AI-assisted projects",
5
5
  "license": "MIT",
6
6
  "author": "Booked Solid Technology <oss@bookedsolid.tech> (https://bookedsolid.tech)",