@bookedsolid/rea 0.26.0 → 0.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/hooks/bash-scanner/walker.js +232 -2
- package/hooks/_lib/cmd-segments.sh +243 -12
- package/package.json +1 -1
|
@@ -8968,9 +8968,37 @@ function wordToString(word) {
|
|
|
8968
8968
|
case 'Lit':
|
|
8969
8969
|
value += stringifyField(part['Value']);
|
|
8970
8970
|
break;
|
|
8971
|
-
case 'SglQuoted':
|
|
8972
|
-
|
|
8971
|
+
case 'SglQuoted': {
|
|
8972
|
+
// 0.26.1 helix-028 P1-1: ANSI-C `$'...'` quoting expands
|
|
8973
|
+
// `\n`/`\t`/`\xHH`/`\NNN`/`\u…`/`\cX` etc. at parse time. mvdan-sh
|
|
8974
|
+
// emits `$'...'` as `SglQuoted` with `Dollar: true` and the RAW
|
|
8975
|
+
// escape source in `Value` (e.g. `\n` arrives as backslash-n, not
|
|
8976
|
+
// LF). Pre-fix the walker concatenated the raw value verbatim,
|
|
8977
|
+
// and the downstream `stripBashBackslashEscapes` mangled `\n` →
|
|
8978
|
+
// `n` (regex `\\([A-Za-z0-9./_~-])` strips backslash from any
|
|
8979
|
+
// letter), turning `.rea/HALT\ntrue` into `.rea/HALTntrue` — which
|
|
8980
|
+
// never matched the protected pattern. Real bash, of course,
|
|
8981
|
+
// expanded `\n` to LF, so the redirect target was actually
|
|
8982
|
+
// `.rea/HALT` and the kill-switch got overwritten. Decode
|
|
8983
|
+
// explicitly here so downstream consumers see real bytes.
|
|
8984
|
+
const raw = stringifyField(part['Value']);
|
|
8985
|
+
const isAnsiC = part['Dollar'] === true;
|
|
8986
|
+
if (isAnsiC) {
|
|
8987
|
+
const decoded = decodeAnsiC(raw);
|
|
8988
|
+
if (decoded === null) {
|
|
8989
|
+
// Unsupported escape — fail closed. Mark word as dynamic so
|
|
8990
|
+
// the protected/blocked path matchers refuse on uncertainty.
|
|
8991
|
+
dynamic = true;
|
|
8992
|
+
}
|
|
8993
|
+
else {
|
|
8994
|
+
value += decoded;
|
|
8995
|
+
}
|
|
8996
|
+
}
|
|
8997
|
+
else {
|
|
8998
|
+
value += raw;
|
|
8999
|
+
}
|
|
8973
9000
|
break;
|
|
9001
|
+
}
|
|
8974
9002
|
case 'DblQuoted': {
|
|
8975
9003
|
const innerParts = asArray(part['Parts']);
|
|
8976
9004
|
for (const ip of innerParts) {
|
|
@@ -9010,6 +9038,208 @@ function stringifyField(v) {
|
|
|
9010
9038
|
return v;
|
|
9011
9039
|
return '';
|
|
9012
9040
|
}
|
|
9041
|
+
/**
|
|
9042
|
+
* Decode bash ANSI-C `$'...'` escape sequences. Returns the decoded
|
|
9043
|
+
* string, or `null` if the input contains an escape we don't support
|
|
9044
|
+
* (caller must fail closed on null — treat the word as dynamic so the
|
|
9045
|
+
* protected/blocked path matcher refuses on uncertainty).
|
|
9046
|
+
*
|
|
9047
|
+
* Bash spec covers the following escapes inside `$'...'`:
|
|
9048
|
+
* - `\\` literal backslash
|
|
9049
|
+
* - `\'` `\"` literal quote
|
|
9050
|
+
* - `\?` literal question mark
|
|
9051
|
+
* - `\a` `\b` BEL / BS
|
|
9052
|
+
* - `\e` `\E` ESC
|
|
9053
|
+
* - `\f` `\n` FF / LF
|
|
9054
|
+
* - `\r` `\t` CR / TAB
|
|
9055
|
+
* - `\v` VT
|
|
9056
|
+
* - `\NNN` octal (1–3 digits)
|
|
9057
|
+
* - `\xHH` hex (1–2 digits)
|
|
9058
|
+
* - `\uHHHH` unicode codepoint (1–4 hex digits)
|
|
9059
|
+
* - `\UHHHHHHHH` unicode codepoint (1–8 hex digits)
|
|
9060
|
+
* - `\cX` control char (X xor 0x40)
|
|
9061
|
+
*
|
|
9062
|
+
* 0.26.1 helix-028 P1-1.
|
|
9063
|
+
*/
|
|
9064
|
+
function decodeAnsiC(raw) {
|
|
9065
|
+
let out = '';
|
|
9066
|
+
let i = 0;
|
|
9067
|
+
const n = raw.length;
|
|
9068
|
+
while (i < n) {
|
|
9069
|
+
const ch = raw.charCodeAt(i);
|
|
9070
|
+
if (ch !== 0x5c /* '\\' */) {
|
|
9071
|
+
out += raw[i];
|
|
9072
|
+
i += 1;
|
|
9073
|
+
continue;
|
|
9074
|
+
}
|
|
9075
|
+
// Lone trailing backslash — bash keeps it literal.
|
|
9076
|
+
if (i + 1 >= n) {
|
|
9077
|
+
out += '\\';
|
|
9078
|
+
i += 1;
|
|
9079
|
+
continue;
|
|
9080
|
+
}
|
|
9081
|
+
const next = raw[i + 1];
|
|
9082
|
+
// Single-char escapes.
|
|
9083
|
+
switch (next) {
|
|
9084
|
+
case '\\':
|
|
9085
|
+
out += '\\';
|
|
9086
|
+
i += 2;
|
|
9087
|
+
continue;
|
|
9088
|
+
case "'":
|
|
9089
|
+
out += "'";
|
|
9090
|
+
i += 2;
|
|
9091
|
+
continue;
|
|
9092
|
+
case '"':
|
|
9093
|
+
out += '"';
|
|
9094
|
+
i += 2;
|
|
9095
|
+
continue;
|
|
9096
|
+
case '?':
|
|
9097
|
+
out += '?';
|
|
9098
|
+
i += 2;
|
|
9099
|
+
continue;
|
|
9100
|
+
case 'a':
|
|
9101
|
+
out += '\x07';
|
|
9102
|
+
i += 2;
|
|
9103
|
+
continue;
|
|
9104
|
+
case 'b':
|
|
9105
|
+
out += '\x08';
|
|
9106
|
+
i += 2;
|
|
9107
|
+
continue;
|
|
9108
|
+
case 'e':
|
|
9109
|
+
case 'E':
|
|
9110
|
+
out += '\x1b';
|
|
9111
|
+
i += 2;
|
|
9112
|
+
continue;
|
|
9113
|
+
case 'f':
|
|
9114
|
+
out += '\x0c';
|
|
9115
|
+
i += 2;
|
|
9116
|
+
continue;
|
|
9117
|
+
case 'n':
|
|
9118
|
+
out += '\n';
|
|
9119
|
+
i += 2;
|
|
9120
|
+
continue;
|
|
9121
|
+
case 'r':
|
|
9122
|
+
out += '\r';
|
|
9123
|
+
i += 2;
|
|
9124
|
+
continue;
|
|
9125
|
+
case 't':
|
|
9126
|
+
out += '\t';
|
|
9127
|
+
i += 2;
|
|
9128
|
+
continue;
|
|
9129
|
+
case 'v':
|
|
9130
|
+
out += '\x0b';
|
|
9131
|
+
i += 2;
|
|
9132
|
+
continue;
|
|
9133
|
+
default:
|
|
9134
|
+
break;
|
|
9135
|
+
}
|
|
9136
|
+
// \xHH — 1 or 2 hex digits.
|
|
9137
|
+
if (next === 'x') {
|
|
9138
|
+
let j = i + 2;
|
|
9139
|
+
let hex = '';
|
|
9140
|
+
while (j < n && hex.length < 2 && /[0-9a-fA-F]/.test(raw[j])) {
|
|
9141
|
+
hex += raw[j];
|
|
9142
|
+
j += 1;
|
|
9143
|
+
}
|
|
9144
|
+
if (hex.length === 0) {
|
|
9145
|
+
// `\x` with no digits is unspecified; bash treats it literally as
|
|
9146
|
+
// backslash-x. Mirror that — preserve and continue.
|
|
9147
|
+
out += '\\x';
|
|
9148
|
+
i += 2;
|
|
9149
|
+
continue;
|
|
9150
|
+
}
|
|
9151
|
+
out += String.fromCharCode(parseInt(hex, 16));
|
|
9152
|
+
i = j;
|
|
9153
|
+
continue;
|
|
9154
|
+
}
|
|
9155
|
+
// \NNN — 1, 2, or 3 octal digits. `next` itself is the first digit.
|
|
9156
|
+
if (next >= '0' && next <= '7') {
|
|
9157
|
+
let j = i + 1;
|
|
9158
|
+
let oct = '';
|
|
9159
|
+
while (j < n && oct.length < 3 && raw[j] >= '0' && raw[j] <= '7') {
|
|
9160
|
+
oct += raw[j];
|
|
9161
|
+
j += 1;
|
|
9162
|
+
}
|
|
9163
|
+
out += String.fromCharCode(parseInt(oct, 8) & 0xff);
|
|
9164
|
+
i = j;
|
|
9165
|
+
continue;
|
|
9166
|
+
}
|
|
9167
|
+
// \uHHHH — 1 to 4 hex digits.
|
|
9168
|
+
if (next === 'u') {
|
|
9169
|
+
let j = i + 2;
|
|
9170
|
+
let hex = '';
|
|
9171
|
+
while (j < n && hex.length < 4 && /[0-9a-fA-F]/.test(raw[j])) {
|
|
9172
|
+
hex += raw[j];
|
|
9173
|
+
j += 1;
|
|
9174
|
+
}
|
|
9175
|
+
if (hex.length === 0) {
|
|
9176
|
+
out += '\\u';
|
|
9177
|
+
i += 2;
|
|
9178
|
+
continue;
|
|
9179
|
+
}
|
|
9180
|
+
const cp = parseInt(hex, 16);
|
|
9181
|
+
try {
|
|
9182
|
+
out += String.fromCodePoint(cp);
|
|
9183
|
+
}
|
|
9184
|
+
catch {
|
|
9185
|
+
return null;
|
|
9186
|
+
}
|
|
9187
|
+
i = j;
|
|
9188
|
+
continue;
|
|
9189
|
+
}
|
|
9190
|
+
// \UHHHHHHHH — 1 to 8 hex digits.
|
|
9191
|
+
if (next === 'U') {
|
|
9192
|
+
let j = i + 2;
|
|
9193
|
+
let hex = '';
|
|
9194
|
+
while (j < n && hex.length < 8 && /[0-9a-fA-F]/.test(raw[j])) {
|
|
9195
|
+
hex += raw[j];
|
|
9196
|
+
j += 1;
|
|
9197
|
+
}
|
|
9198
|
+
if (hex.length === 0) {
|
|
9199
|
+
out += '\\U';
|
|
9200
|
+
i += 2;
|
|
9201
|
+
continue;
|
|
9202
|
+
}
|
|
9203
|
+
const cp = parseInt(hex, 16);
|
|
9204
|
+
// String.fromCodePoint throws RangeError on out-of-range values
|
|
9205
|
+
// (>0x10FFFF). Bash silently truncates; we fail closed via null.
|
|
9206
|
+
try {
|
|
9207
|
+
out += String.fromCodePoint(cp);
|
|
9208
|
+
}
|
|
9209
|
+
catch {
|
|
9210
|
+
return null;
|
|
9211
|
+
}
|
|
9212
|
+
i = j;
|
|
9213
|
+
continue;
|
|
9214
|
+
}
|
|
9215
|
+
// \cX — control char (X xor 0x40). X may be any printable ASCII.
|
|
9216
|
+
if (next === 'c') {
|
|
9217
|
+
if (i + 2 >= n) {
|
|
9218
|
+
// Lone `\c` at end — treat as literal.
|
|
9219
|
+
out += '\\c';
|
|
9220
|
+
i += 2;
|
|
9221
|
+
continue;
|
|
9222
|
+
}
|
|
9223
|
+
const xCh = raw[i + 2].charCodeAt(0);
|
|
9224
|
+
// Standard form: bash xors with 0x40 then masks to 7 bits. So
|
|
9225
|
+
// \cJ → 'J' (0x4a) ^ 0x40 = 0x0a (LF). \c? is special-cased to DEL.
|
|
9226
|
+
if (raw[i + 2] === '?') {
|
|
9227
|
+
out += '\x7f';
|
|
9228
|
+
}
|
|
9229
|
+
else {
|
|
9230
|
+
out += String.fromCharCode((xCh ^ 0x40) & 0x7f);
|
|
9231
|
+
}
|
|
9232
|
+
i += 3;
|
|
9233
|
+
continue;
|
|
9234
|
+
}
|
|
9235
|
+
// Unknown escape: bash preserves `\X` literally for unknown X. We
|
|
9236
|
+
// could mirror that, but the safer posture for a security scanner is
|
|
9237
|
+
// to fail closed — refuse on uncertainty so an attacker can't hide
|
|
9238
|
+
// payload bytes behind an escape we forgot to model.
|
|
9239
|
+
return null;
|
|
9240
|
+
}
|
|
9241
|
+
return out;
|
|
9242
|
+
}
|
|
9013
9243
|
function asArray(v) {
|
|
9014
9244
|
if (Array.isArray(v)) {
|
|
9015
9245
|
return v;
|
|
@@ -138,8 +138,32 @@ _rea_unwrap_at_depth() {
|
|
|
138
138
|
# \x03 ETX — replaces in-quote `;`
|
|
139
139
|
# \x05 ENQ — replaces in-quote `&`
|
|
140
140
|
# \x06 ACK — replaces in-quote `|`
|
|
141
|
+
#
|
|
142
|
+
# 0.26.1 helix-028 P1 fix: feed the entire (possibly multiline) `$cmd`
|
|
143
|
+
# to awk as a SINGLE record using a multi-byte record separator
|
|
144
|
+
# (`\x1c\x1d` = FS+GS, control bytes that cannot appear in real shell
|
|
145
|
+
# input). Pre-fix, awk's default RS=`\n` made the masking awk process
|
|
146
|
+
# each line independently, which (a) dropped the newlines from the
|
|
147
|
+
# masked output and (b) reset the in-quote `mode` state per-line — so
|
|
148
|
+
# `bash -lc "printf x > .rea/HALT\ntrue"` had its closing `"` on line 2
|
|
149
|
+
# treated as an opening quote in plain mode, scrambling the mask. macOS
|
|
150
|
+
# BSD awk does NOT support NUL as RS (truncates after first record);
|
|
151
|
+
# `\x1c\x1d` is a portable multi-byte sentinel that awk's RS handles
|
|
152
|
+
# uniformly across BSD/GNU awk implementations.
|
|
153
|
+
#
|
|
154
|
+
# 0.26.1 ANSI-C sibling: also recognize `$'...'` (mode 3) as a quoted
|
|
155
|
+
# span. Pre-fix, the masker treated `$` as plain text in mode 0, so
|
|
156
|
+
# the closing `'` of `$'...'` was the only `'` the masker saw and it
|
|
157
|
+
# entered mode 2 there — flipping the mask state for the rest of the
|
|
158
|
+
# input. Mode 3 honors `\\`-escape semantics (so `\'` and `\\` inside
|
|
159
|
+
# the body do not prematurely terminate the span); on exit the closing
|
|
160
|
+
# `'` is masked to `\x02` (same as mode 2's exit) so the wrapper-scan
|
|
161
|
+
# can no longer treat in-quote `'` as a payload-opening quote.
|
|
162
|
+
local _unwrap_sep
|
|
163
|
+
_unwrap_sep=$'\x1c\x1d'
|
|
141
164
|
local masked
|
|
142
|
-
masked=$(printf '%s' "$cmd" | awk '
|
|
165
|
+
masked=$(printf '%s%s' "$cmd" "$_unwrap_sep" | awk '
|
|
166
|
+
BEGIN { RS = "\034\035" }
|
|
143
167
|
{
|
|
144
168
|
line = $0
|
|
145
169
|
out = ""
|
|
@@ -149,12 +173,40 @@ _rea_unwrap_at_depth() {
|
|
|
149
173
|
while (i <= n) {
|
|
150
174
|
ch = substr(line, i, 1)
|
|
151
175
|
if (mode == 0) {
|
|
176
|
+
# ANSI-C `$'\''...'\''` introducer: emit `$` and opening quote
|
|
177
|
+
# literally (so the wrapper-scan can detect the introducer)
|
|
178
|
+
# and enter mode 3.
|
|
179
|
+
if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
|
|
180
|
+
mode = 3
|
|
181
|
+
out = out "$" "'\''"
|
|
182
|
+
i += 2
|
|
183
|
+
continue
|
|
184
|
+
}
|
|
152
185
|
if (ch == "\"") { mode = 1; out = out ch; i++; continue }
|
|
153
186
|
if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
|
|
154
187
|
out = out ch
|
|
155
188
|
i++
|
|
156
189
|
continue
|
|
157
190
|
}
|
|
191
|
+
if (mode == 3) {
|
|
192
|
+
# ANSI-C: `\\X` is a literal escape pair (`\\\''`, `\\\\`, `\\n`,
|
|
193
|
+
# etc.). Preserve the pair so the closing `'\''` detector below
|
|
194
|
+
# does not exit on `\\\''`.
|
|
195
|
+
if (ch == "\\" && i < n) {
|
|
196
|
+
nxt = substr(line, i + 1, 1)
|
|
197
|
+
out = out ch nxt
|
|
198
|
+
i += 2
|
|
199
|
+
continue
|
|
200
|
+
}
|
|
201
|
+
if (ch == "'\''") { mode = 0; out = out "\002"; i++; continue }
|
|
202
|
+
if (ch == ";") { out = out "\003"; i++; continue }
|
|
203
|
+
if (ch == "&") { out = out "\005"; i++; continue }
|
|
204
|
+
if (ch == "|") { out = out "\006"; i++; continue }
|
|
205
|
+
if (ch == "\"") { out = out "\001"; i++; continue }
|
|
206
|
+
out = out ch
|
|
207
|
+
i++
|
|
208
|
+
continue
|
|
209
|
+
}
|
|
158
210
|
if (mode == 2) {
|
|
159
211
|
if (ch == "'\''") { mode = 0; out = out "\002"; i++; continue }
|
|
160
212
|
if (ch == ";") { out = out "\003"; i++; continue }
|
|
@@ -183,15 +235,27 @@ _rea_unwrap_at_depth() {
|
|
|
183
235
|
}
|
|
184
236
|
printf "%s", out
|
|
185
237
|
}')
|
|
186
|
-
# Pass both raw and masked into awk
|
|
187
|
-
#
|
|
188
|
-
#
|
|
189
|
-
#
|
|
238
|
+
# Pass both raw and masked into awk via stdin as NUL-region-separated
|
|
239
|
+
# records — `awk -v raw="$cmd" -v masked="$masked"` errors with
|
|
240
|
+
# `awk: newline in string` the moment either string contains a literal
|
|
241
|
+
# newline. RS=`\x1c\x1d` (FS+GS multi-byte sentinel) survives newlines
|
|
242
|
+
# in either record. (BSD awk does not support NUL-as-RS reliably.)
|
|
243
|
+
# Wrapper-regex matches against the masked form; payload extraction
|
|
244
|
+
# reads the raw form using the same offsets. Because the mask is
|
|
245
|
+
# byte-for-byte width-preserving, the same RSTART/RLENGTH applies to
|
|
246
|
+
# both.
|
|
190
247
|
#
|
|
191
248
|
# 0.21.2: capture payloads to a local var; iterate to recurse.
|
|
249
|
+
# 0.26.1 helix-028 P1: switch from `awk -v` to NUL-region stdin.
|
|
250
|
+
# 0.26.1 ANSI-C sibling: handle `$'\''...'\''` as a third quoted-body
|
|
251
|
+
# form alongside `'\''...'\''` and `"..."`. Decode common escape
|
|
252
|
+
# sequences (`\\n`, `\\t`, `\\r`, `\\\\`, `\\\''`, `\\"`) when emitting
|
|
253
|
+
# the payload so the downstream segment splitter sees real newlines
|
|
254
|
+
# and splits on them.
|
|
192
255
|
local _unwrap_payloads
|
|
193
|
-
_unwrap_payloads=$(printf ''
|
|
256
|
+
_unwrap_payloads=$(printf '%s%s%s%s' "$cmd" "$_unwrap_sep" "$masked" "$_unwrap_sep" | awk '
|
|
194
257
|
BEGIN {
|
|
258
|
+
RS = "\034\035"
|
|
195
259
|
# Wrapper-prefix regex: shell-name + optional flag tokens + -c-style flag.
|
|
196
260
|
# Each flag token is `-` followed by 1+ letters and trailing space.
|
|
197
261
|
# NOTE: matches only OUTSIDE outer quoted spans because in-quote
|
|
@@ -207,6 +271,10 @@ _rea_unwrap_at_depth() {
|
|
|
207
271
|
# NOT covered here. Adding pwsh requires a separate code path
|
|
208
272
|
# because EncodedCommand base64-decodes at runtime.
|
|
209
273
|
WRAP = "(^|[[:space:]&|;])(bash|sh|zsh|dash|ksh|mksh|oksh|posh|yash|csh|tcsh|fish)([[:space:]]+-[a-zA-Z]+)*[[:space:]]+-(c|lc|lic|ic|cl|cli|li|il)[[:space:]]+"
|
|
274
|
+
}
|
|
275
|
+
NR == 1 { raw = $0; next }
|
|
276
|
+
NR == 2 {
|
|
277
|
+
masked = $0
|
|
210
278
|
# Track the cursor in BOTH raw and masked. Because the mask is
|
|
211
279
|
# byte-for-byte width-preserving, the same RSTART/RLENGTH applies
|
|
212
280
|
# to both — but each iteration of the loop must SLICE both strings
|
|
@@ -225,8 +293,120 @@ _rea_unwrap_at_depth() {
|
|
|
225
293
|
# verbatim only when it was an outer quote.
|
|
226
294
|
first = substr(rtail, 1, 1)
|
|
227
295
|
mfirst = substr(mtail, 1, 1)
|
|
296
|
+
# ANSI-C: `$'\''...'\''` introducer (raw and masked must both
|
|
297
|
+
# carry `$` followed by literal `'\''` — the masker preserves the
|
|
298
|
+
# opening pair when it transitions into mode 3).
|
|
299
|
+
if (first == "$" && substr(rtail, 2, 1) == "'\''" \
|
|
300
|
+
&& mfirst == "$" && substr(mtail, 2, 1) == "'\''") {
|
|
301
|
+
body = substr(rtail, 3)
|
|
302
|
+
n = length(body)
|
|
303
|
+
j = 1
|
|
304
|
+
out = ""
|
|
305
|
+
closed = 0
|
|
306
|
+
while (j <= n) {
|
|
307
|
+
c = substr(body, j, 1)
|
|
308
|
+
if (c == "\\" && j < n) {
|
|
309
|
+
nxt = substr(body, j + 1, 1)
|
|
310
|
+
# Decode common ANSI-C escape sequences so the splitter
|
|
311
|
+
# downstream sees real bytes (e.g. `\n` → newline → segment
|
|
312
|
+
# boundary at protected/blocked-path detection time).
|
|
313
|
+
if (nxt == "n") { out = out "\n"; j += 2; continue }
|
|
314
|
+
if (nxt == "t") { out = out "\t"; j += 2; continue }
|
|
315
|
+
if (nxt == "r") { out = out "\r"; j += 2; continue }
|
|
316
|
+
if (nxt == "\\") { out = out "\\"; j += 2; continue }
|
|
317
|
+
if (nxt == "'\''") { out = out "'\''"; j += 2; continue }
|
|
318
|
+
if (nxt == "\"") { out = out "\""; j += 2; continue }
|
|
319
|
+
if (nxt == "a") { out = out "\007"; j += 2; continue }
|
|
320
|
+
if (nxt == "b") { out = out "\010"; j += 2; continue }
|
|
321
|
+
if (nxt == "e" || nxt == "E") { out = out "\033"; j += 2; continue }
|
|
322
|
+
if (nxt == "f") { out = out "\014"; j += 2; continue }
|
|
323
|
+
if (nxt == "v") { out = out "\013"; j += 2; continue }
|
|
324
|
+
if (nxt == "?") { out = out "?"; j += 2; continue }
|
|
325
|
+
# 0.26.1 helix-028 P1-2: `\xHH` (1–2 hex digits). Pre-fix
|
|
326
|
+
# `bash -lc $'\''echo > .rea/HALT\\x0Atrue'\''` had `\x0A`
|
|
327
|
+
# preserved as the literal pair `\x0A`, so the segment
|
|
328
|
+
# splitter never saw the real LF and the second statement
|
|
329
|
+
# (`true` / arbitrary attacker payload) was hidden in the
|
|
330
|
+
# same segment as the first. Decode here so the LF reaches
|
|
331
|
+
# the splitter.
|
|
332
|
+
if (nxt == "x") {
|
|
333
|
+
hex = ""
|
|
334
|
+
k = j + 2
|
|
335
|
+
while (k <= n && length(hex) < 2 \
|
|
336
|
+
&& index("0123456789abcdefABCDEF", substr(body, k, 1)) > 0) {
|
|
337
|
+
hex = hex substr(body, k, 1)
|
|
338
|
+
k++
|
|
339
|
+
}
|
|
340
|
+
if (length(hex) > 0) {
|
|
341
|
+
# awk has no native hex parser. Walk the digits.
|
|
342
|
+
hv = 0
|
|
343
|
+
for (h = 1; h <= length(hex); h++) {
|
|
344
|
+
hd = substr(hex, h, 1)
|
|
345
|
+
di = index("0123456789abcdef", tolower(hd)) - 1
|
|
346
|
+
hv = hv * 16 + di
|
|
347
|
+
}
|
|
348
|
+
out = out sprintf("%c", hv)
|
|
349
|
+
j = k
|
|
350
|
+
continue
|
|
351
|
+
}
|
|
352
|
+
# `\x` with no digits — preserve pair literally.
|
|
353
|
+
out = out c nxt
|
|
354
|
+
j += 2
|
|
355
|
+
continue
|
|
356
|
+
}
|
|
357
|
+
# 0.26.1 helix-028 P1-2: `\NNN` octal (1–3 digits). Pre-fix
|
|
358
|
+
# `\012` (= LF) was preserved as a literal pair, same bypass
|
|
359
|
+
# class as `\xHH`.
|
|
360
|
+
if (nxt >= "0" && nxt <= "7") {
|
|
361
|
+
oct = nxt
|
|
362
|
+
k = j + 2
|
|
363
|
+
while (k <= n && length(oct) < 3 \
|
|
364
|
+
&& substr(body, k, 1) >= "0" \
|
|
365
|
+
&& substr(body, k, 1) <= "7") {
|
|
366
|
+
oct = oct substr(body, k, 1)
|
|
367
|
+
k++
|
|
368
|
+
}
|
|
369
|
+
ov = 0
|
|
370
|
+
for (h = 1; h <= length(oct); h++) {
|
|
371
|
+
ov = ov * 8 + (substr(oct, h, 1) + 0)
|
|
372
|
+
}
|
|
373
|
+
# Bash truncates to 8 bits.
|
|
374
|
+
ov = ov % 256
|
|
375
|
+
out = out sprintf("%c", ov)
|
|
376
|
+
j = k
|
|
377
|
+
continue
|
|
378
|
+
}
|
|
379
|
+
# Default: preserve pair (covers `\u…`, `\U…`, `\cX` — rarer
|
|
380
|
+
# shapes; the literal pair is still safer than silent decoding
|
|
381
|
+
# for unsupported escapes in this legacy-gate layer. The Node
|
|
382
|
+
# scanner — primary enforcement for protected/blocked paths
|
|
383
|
+
# since 0.23.0 — fails closed on these via decodeAnsiC).
|
|
384
|
+
out = out c nxt
|
|
385
|
+
j += 2
|
|
386
|
+
continue
|
|
387
|
+
}
|
|
388
|
+
if (c == "'\''") { closed = j; break }
|
|
389
|
+
out = out c
|
|
390
|
+
j++
|
|
391
|
+
}
|
|
392
|
+
if (closed == 0) {
|
|
393
|
+
mrest = substr(mtail, 3)
|
|
394
|
+
rrest = substr(rtail, 3)
|
|
395
|
+
continue
|
|
396
|
+
}
|
|
397
|
+
print out
|
|
398
|
+
# Skip past `$` (1) + opening `'\''` (1) + body (closed-1) +
|
|
399
|
+
# closing `'\''` (1) = 2 + closed bytes from mtail/rtail start.
|
|
400
|
+
mrest = substr(mtail, 2 + closed + 1)
|
|
401
|
+
rrest = substr(rtail, 2 + closed + 1)
|
|
402
|
+
continue
|
|
403
|
+
}
|
|
228
404
|
if (first == "'\''" && mfirst == "'\''") {
|
|
229
405
|
# Single-quoted body: no escape semantics; runs to next `'\''`.
|
|
406
|
+
# NOTE: index against the RAW body — the masker replaces the
|
|
407
|
+
# closing `'\''` of an outer single-quoted span with `\002`, so
|
|
408
|
+
# `index(mbody, "'\''")` would never find it. The raw body
|
|
409
|
+
# carries the literal closing `'\''` byte verbatim.
|
|
230
410
|
body = substr(rtail, 2)
|
|
231
411
|
mbody = substr(mtail, 2)
|
|
232
412
|
end = index(body, "'\''")
|
|
@@ -278,10 +458,7 @@ _rea_unwrap_at_depth() {
|
|
|
278
458
|
mrest = mtail
|
|
279
459
|
rrest = rtail
|
|
280
460
|
}
|
|
281
|
-
}
|
|
282
|
-
# Empty action with no input rules — explicitly drive the loop from
|
|
283
|
-
# END so awk does not require any input records.
|
|
284
|
-
END {}')
|
|
461
|
+
}')
|
|
285
462
|
# Recurse on each extracted payload with depth+1.
|
|
286
463
|
if [[ -n "$_unwrap_payloads" ]]; then
|
|
287
464
|
while IFS= read -r _unwrap_p; do
|
|
@@ -363,16 +540,38 @@ _rea_split_segments() {
|
|
|
363
540
|
out = ""
|
|
364
541
|
i = 1
|
|
365
542
|
n = length(line)
|
|
366
|
-
mode = 0 # 0=plain, 1=double, 2=single
|
|
543
|
+
mode = 0 # 0=plain, 1=double, 2=single, 3=ANSI-C $'\''...'\''
|
|
367
544
|
while (i <= n) {
|
|
368
545
|
ch = substr(line, i, 1)
|
|
369
546
|
if (mode == 0) {
|
|
547
|
+
# 0.26.1 helix-028 sibling: ANSI-C `$'\''...'\''` introducer.
|
|
548
|
+
# Pre-fix `echo $'\''a;b'\''` had its in-quote `;` un-masked and
|
|
549
|
+
# the splitter broke the segment at the `;`. Mode 3 honors
|
|
550
|
+
# backslash-escape pairs so `\\\''` and `\\\\` do not exit early.
|
|
551
|
+
if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
|
|
552
|
+
mode = 3; out = out "$" "'\''"; i += 2; continue
|
|
553
|
+
}
|
|
370
554
|
if (ch == "\"") { mode = 1; out = out ch; i++; continue }
|
|
371
555
|
if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
|
|
372
556
|
out = out ch
|
|
373
557
|
i++
|
|
374
558
|
continue
|
|
375
559
|
}
|
|
560
|
+
if (mode == 3) {
|
|
561
|
+
if (ch == "\\" && i < n) {
|
|
562
|
+
nxt = substr(line, i + 1, 1)
|
|
563
|
+
out = out ch nxt
|
|
564
|
+
i += 2
|
|
565
|
+
continue
|
|
566
|
+
}
|
|
567
|
+
if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
|
|
568
|
+
if (ch == ";") { out = out SC; i++; continue }
|
|
569
|
+
if (ch == "&") { out = out AMP; i++; continue }
|
|
570
|
+
if (ch == "|") { out = out PIPE; i++; continue }
|
|
571
|
+
out = out ch
|
|
572
|
+
i++
|
|
573
|
+
continue
|
|
574
|
+
}
|
|
376
575
|
if (mode == 2) {
|
|
377
576
|
# Single quotes: no escape semantics. Only `'\''` ends.
|
|
378
577
|
if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
|
|
@@ -423,9 +622,24 @@ _rea_split_segments() {
|
|
|
423
622
|
# in-quote `|` characters.
|
|
424
623
|
quote_masked_cmd() {
|
|
425
624
|
local cmd="$1"
|
|
426
|
-
|
|
625
|
+
# 0.26.1 helix-028 sibling: feed the entire (possibly multiline) `$cmd`
|
|
626
|
+
# to awk as a SINGLE record using a multi-byte record separator
|
|
627
|
+
# (`\x1c\x1d` = FS+GS). Pre-fix, the default `RS=\n` split a multiline
|
|
628
|
+
# input across records and reset in-quote `mode` per-line, which both
|
|
629
|
+
# dropped the newlines AND scrambled the mask (the closing `"` on
|
|
630
|
+
# line 2 was treated as opening a new quoted span in plain mode).
|
|
631
|
+
# Also adds ANSI-C `$'\''...'\''` (mode 3) to mirror _rea_unwrap_at_depth's
|
|
632
|
+
# masker — same scope: in-quote `|`/`;`/`&` get masked, opening-pair
|
|
633
|
+
# `$'\''` is preserved for downstream detection, closing `'\''` is left
|
|
634
|
+
# literal here (this helper does not need a mode-exit-mask byte; the
|
|
635
|
+
# caller pattern-matches against literal `|`/`;`/`&` in the masked
|
|
636
|
+
# stream and benefits from preserving quote boundaries verbatim).
|
|
637
|
+
local _qm_sep
|
|
638
|
+
_qm_sep=$'\x1c\x1d'
|
|
639
|
+
printf '%s%s' "$cmd" "$_qm_sep" \
|
|
427
640
|
| awk '
|
|
428
641
|
BEGIN {
|
|
642
|
+
RS = "\034\035"
|
|
429
643
|
INQ_PIPE = "__REA_INQUOTE_PIPE_a8f2c1__"
|
|
430
644
|
INQ_SC = "__REA_INQUOTE_SC_a8f2c1__"
|
|
431
645
|
INQ_AMP = "__REA_INQUOTE_AMP_a8f2c1__"
|
|
@@ -439,12 +653,29 @@ quote_masked_cmd() {
|
|
|
439
653
|
while (i <= n) {
|
|
440
654
|
ch = substr(line, i, 1)
|
|
441
655
|
if (mode == 0) {
|
|
656
|
+
if (ch == "$" && i < n && substr(line, i + 1, 1) == "'\''") {
|
|
657
|
+
mode = 3; out = out "$" "'\''"; i += 2; continue
|
|
658
|
+
}
|
|
442
659
|
if (ch == "\"") { mode = 1; out = out ch; i++; continue }
|
|
443
660
|
if (ch == "'\''") { mode = 2; out = out ch; i++; continue }
|
|
444
661
|
out = out ch
|
|
445
662
|
i++
|
|
446
663
|
continue
|
|
447
664
|
}
|
|
665
|
+
if (mode == 3) {
|
|
666
|
+
if (ch == "\\" && i < n) {
|
|
667
|
+
out = out ch substr(line, i + 1, 1)
|
|
668
|
+
i += 2
|
|
669
|
+
continue
|
|
670
|
+
}
|
|
671
|
+
if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
|
|
672
|
+
if (ch == "|") { out = out INQ_PIPE; i++; continue }
|
|
673
|
+
if (ch == ";") { out = out INQ_SC; i++; continue }
|
|
674
|
+
if (ch == "&") { out = out INQ_AMP; i++; continue }
|
|
675
|
+
out = out ch
|
|
676
|
+
i++
|
|
677
|
+
continue
|
|
678
|
+
}
|
|
448
679
|
if (mode == 2) {
|
|
449
680
|
if (ch == "'\''") { mode = 0; out = out ch; i++; continue }
|
|
450
681
|
if (ch == "|") { out = out INQ_PIPE; i++; continue }
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bookedsolid/rea",
|
|
3
|
-
"version": "0.26.
|
|
3
|
+
"version": "0.26.1",
|
|
4
4
|
"description": "Agentic governance layer for Claude Code — policy enforcement, hook-based safety gates, audit logging, and Codex-integrated adversarial review for AI-assisted projects",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Booked Solid Technology <oss@bookedsolid.tech> (https://bookedsolid.tech)",
|