@bookedsolid/rea 0.31.0 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/prepare-commit-msg +80 -6
- package/MIGRATING.md +24 -15
- package/dist/cli/hook.js +60 -22
- package/dist/hooks/_lib/halt-check.d.ts +78 -0
- package/dist/hooks/_lib/halt-check.js +106 -0
- package/dist/hooks/_lib/payload.d.ts +124 -0
- package/dist/hooks/_lib/payload.js +245 -0
- package/dist/hooks/_lib/segments.d.ts +125 -0
- package/dist/hooks/_lib/segments.js +766 -0
- package/dist/hooks/architecture-review-gate/index.d.ts +58 -0
- package/dist/hooks/architecture-review-gate/index.js +250 -0
- package/dist/hooks/attribution-advisory/index.d.ts +72 -0
- package/dist/hooks/attribution-advisory/index.js +233 -0
- package/dist/hooks/bash-scanner/protected-scan.js +14 -2
- package/dist/hooks/changeset-security-gate/index.d.ts +71 -0
- package/dist/hooks/changeset-security-gate/index.js +330 -0
- package/dist/hooks/dependency-audit-gate/index.d.ts +91 -0
- package/dist/hooks/dependency-audit-gate/index.js +294 -0
- package/dist/hooks/env-file-protection/index.d.ts +55 -0
- package/dist/hooks/env-file-protection/index.js +159 -0
- package/dist/hooks/pr-issue-link-gate/index.d.ts +91 -0
- package/dist/hooks/pr-issue-link-gate/index.js +127 -0
- package/dist/hooks/security-disclosure-gate/index.d.ts +91 -0
- package/dist/hooks/security-disclosure-gate/index.js +502 -0
- package/hooks/_lib/protected-paths.sh +10 -3
- package/hooks/architecture-review-gate.sh +92 -77
- package/hooks/attribution-advisory.sh +139 -131
- package/hooks/changeset-security-gate.sh +114 -149
- package/hooks/dependency-audit-gate.sh +115 -156
- package/hooks/env-file-protection.sh +130 -97
- package/hooks/pr-issue-link-gate.sh +114 -45
- package/hooks/security-disclosure-gate.sh +148 -316
- package/hooks/settings-protection.sh +13 -9
- package/package.json +1 -1
- package/templates/architecture-review-gate.dogfood-staged.sh +116 -0
- package/templates/attribution-advisory.dogfood-staged.sh +170 -0
- package/templates/changeset-security-gate.dogfood-staged.sh +137 -0
- package/templates/dependency-audit-gate.dogfood-staged.sh +138 -0
- package/templates/env-file-protection.dogfood-staged.sh +157 -0
- package/templates/pr-issue-link-gate.dogfood-staged.sh +134 -0
- package/templates/prepare-commit-msg.husky.sh +80 -6
- package/templates/security-disclosure-gate.dogfood-staged.sh +171 -0
- package/templates/settings-protection.dogfood.patch +58 -0
|
@@ -0,0 +1,766 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quote-aware shell-segment splitter for the Node-binary hook tier.
|
|
3
|
+
*
|
|
4
|
+
* 0.32.0 — port of the relevant primitives in
|
|
5
|
+
* `hooks/_lib/cmd-segments.sh`. The bash helper is 1002 LOC of
|
|
6
|
+
* defense-in-depth (heredoc unwrapping, nested-shell recursion,
|
|
7
|
+
* env-var-assignment stripping, etc.) — most of those branches exist
|
|
8
|
+
* to defend against bypass attempts in WRITE-tier gates (`dangerous-
|
|
9
|
+
* bash-interceptor`, `dependency-audit-gate`). The Phase 1 pilots
|
|
10
|
+
* landing in 0.32.0 (`security-disclosure-gate`,
|
|
11
|
+
* `attribution-advisory`) only need the SUBSET of segment behavior
|
|
12
|
+
* those two hooks actually exercise:
|
|
13
|
+
*
|
|
14
|
+
* 1. Split the input on shell command separators (`;`, `&&`, `||`,
|
|
15
|
+
* `|`, `&`, newline) while masking separators that appear inside
|
|
16
|
+
* matched `"..."` and `'...'` quote spans.
|
|
17
|
+
* 2. For each segment, strip leading `sudo`, `exec`, `time`, `then`,
|
|
18
|
+
* `do`, `else`, `fi`, and `VAR=value` env-prefixes so the
|
|
19
|
+
* caller's regex can anchor at the segment's actual command head.
|
|
20
|
+
* 3. Expose two query primitives:
|
|
21
|
+
* - `anySegmentStartsWith(cmd, regexHead)`
|
|
22
|
+
* true if any segment's prefix-stripped head matches the
|
|
23
|
+
* head-anchored regex.
|
|
24
|
+
* - `anySegmentMatches(cmd, regex)`
|
|
25
|
+
* true if any segment's raw (non-stripped) text contains a
|
|
26
|
+
* match for the regex (used for content scans like
|
|
27
|
+
* `Co-Authored-By:` markers inside `git commit -m "..."`).
|
|
28
|
+
*
|
|
29
|
+
* Out-of-scope vs. the bash helper:
|
|
30
|
+
*
|
|
31
|
+
* - No heredoc body extraction. The pilots match on the command
|
|
32
|
+
* line, not on heredoc contents. (Body-file resolution in
|
|
33
|
+
* `security-disclosure-gate` is done separately by reading the
|
|
34
|
+
* file path off the command.)
|
|
35
|
+
* - No nested-shell unwrapping (`bash -c 'PAYLOAD'`). The
|
|
36
|
+
* bash-scanner walker already handles that for the WRITE gates;
|
|
37
|
+
* the Phase 1 pilots inherit the SECURITY guarantee that any
|
|
38
|
+
* hostile nested shell would have been refused by the bash-scanner
|
|
39
|
+
* tier BEFORE this advisory tier ran.
|
|
40
|
+
* - No backtick/command-substitution recursion.
|
|
41
|
+
*
|
|
42
|
+
* If a future pilot needs those branches, port them here in a
|
|
43
|
+
* subsequent release. The CURRENT pilots' bash counterparts call only
|
|
44
|
+
* `any_segment_starts_with` and `any_segment_matches` against
|
|
45
|
+
* direct-stdin commands.
|
|
46
|
+
*
|
|
47
|
+
* Quote-handling parity with cmd-segments.sh:
|
|
48
|
+
*
|
|
49
|
+
* - Double-quoted spans (`"..."`): `\"` and `\\` are literal escapes;
|
|
50
|
+
* all other characters are literal.
|
|
51
|
+
* - Single-quoted spans (`'...'`): no escape semantics; every
|
|
52
|
+
* character is literal until the next `'`.
|
|
53
|
+
* - Unterminated quote spans extend to end-of-input (caller's bug —
|
|
54
|
+
* we still emit a single segment for it rather than throwing).
|
|
55
|
+
* - Backslash outside quotes escapes the following character (so
|
|
56
|
+
* `git commit \&\& foo` parses as a single segment, matching
|
|
57
|
+
* bash's behavior).
|
|
58
|
+
*/
|
|
59
|
+
/**
|
|
60
|
+
* Sentinel bytes used to mask separators that appear inside quote
|
|
61
|
+
* spans before splitting. Multi-byte and not legal in shell command
|
|
62
|
+
* input — collisions are impossible for any realistic payload.
|
|
63
|
+
*
|
|
64
|
+
* The byte choices (0x1c – 0x1f are ASCII file-separator / group-
|
|
65
|
+
* separator / record-separator / unit-separator) are the same range
|
|
66
|
+
* `cmd-segments.sh` uses for its in-quote masking. We never expose
|
|
67
|
+
* them externally; they exist only during the split and are restored
|
|
68
|
+
* verbatim in the emitted segment text.
|
|
69
|
+
*/
|
|
70
|
+
const MASK = {
|
|
71
|
+
SEMI: '\x1c\x10S\x1d',
|
|
72
|
+
AMP_AMP: '\x1c\x10A\x10A\x1d',
|
|
73
|
+
PIPE_PIPE: '\x1c\x10P\x10P\x1d',
|
|
74
|
+
PIPE: '\x1c\x10P\x1d',
|
|
75
|
+
AMP: '\x1c\x10A\x1d',
|
|
76
|
+
NEWLINE: '\x1c\x10N\x1d',
|
|
77
|
+
};
|
|
78
|
+
/**
|
|
79
|
+
* Replace separators inside quote spans with sentinels so the split
|
|
80
|
+
* walker doesn't see them. After splitting, the sentinels are
|
|
81
|
+
* unmasked back to their literal characters in each emitted segment.
|
|
82
|
+
*/
|
|
83
|
+
function maskQuotedSeparators(cmd) {
|
|
84
|
+
let out = '';
|
|
85
|
+
let i = 0;
|
|
86
|
+
const n = cmd.length;
|
|
87
|
+
let mode = 'plain';
|
|
88
|
+
while (i < n) {
|
|
89
|
+
const ch = cmd[i];
|
|
90
|
+
if (mode === 'plain') {
|
|
91
|
+
if (ch === '\\' && i + 1 < n) {
|
|
92
|
+
// Backslash escapes the next character — emit both verbatim;
|
|
93
|
+
// the split walker treats `\` as not-a-separator so escaped
|
|
94
|
+
// `\&\&` etc. survive into the segment.
|
|
95
|
+
out += ch + cmd[i + 1];
|
|
96
|
+
i += 2;
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
if (ch === '"') {
|
|
100
|
+
mode = 'dquote';
|
|
101
|
+
out += ch;
|
|
102
|
+
i += 1;
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
if (ch === "'") {
|
|
106
|
+
mode = 'squote';
|
|
107
|
+
out += ch;
|
|
108
|
+
i += 1;
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
out += ch;
|
|
112
|
+
i += 1;
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
if (mode === 'dquote') {
|
|
116
|
+
if (ch === '\\' && i + 1 < n) {
|
|
117
|
+
out += ch + cmd[i + 1];
|
|
118
|
+
i += 2;
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
if (ch === '"') {
|
|
122
|
+
mode = 'plain';
|
|
123
|
+
out += ch;
|
|
124
|
+
i += 1;
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
// Mask separators inside double-quoted spans.
|
|
128
|
+
if (ch === ';') {
|
|
129
|
+
out += MASK.SEMI;
|
|
130
|
+
i += 1;
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
if (ch === '&' && cmd[i + 1] === '&') {
|
|
134
|
+
out += MASK.AMP_AMP;
|
|
135
|
+
i += 2;
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
if (ch === '|' && cmd[i + 1] === '|') {
|
|
139
|
+
out += MASK.PIPE_PIPE;
|
|
140
|
+
i += 2;
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
if (ch === '|') {
|
|
144
|
+
out += MASK.PIPE;
|
|
145
|
+
i += 1;
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
if (ch === '&') {
|
|
149
|
+
out += MASK.AMP;
|
|
150
|
+
i += 1;
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
if (ch === '\n') {
|
|
154
|
+
out += MASK.NEWLINE;
|
|
155
|
+
i += 1;
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
out += ch;
|
|
159
|
+
i += 1;
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
// mode === 'squote' — no escape semantics; mask separators verbatim.
|
|
163
|
+
if (ch === "'") {
|
|
164
|
+
mode = 'plain';
|
|
165
|
+
out += ch;
|
|
166
|
+
i += 1;
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
if (ch === ';') {
|
|
170
|
+
out += MASK.SEMI;
|
|
171
|
+
i += 1;
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
if (ch === '&' && cmd[i + 1] === '&') {
|
|
175
|
+
out += MASK.AMP_AMP;
|
|
176
|
+
i += 2;
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
if (ch === '|' && cmd[i + 1] === '|') {
|
|
180
|
+
out += MASK.PIPE_PIPE;
|
|
181
|
+
i += 2;
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
if (ch === '|') {
|
|
185
|
+
out += MASK.PIPE;
|
|
186
|
+
i += 1;
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
if (ch === '&') {
|
|
190
|
+
out += MASK.AMP;
|
|
191
|
+
i += 1;
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
if (ch === '\n') {
|
|
195
|
+
out += MASK.NEWLINE;
|
|
196
|
+
i += 1;
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
out += ch;
|
|
200
|
+
i += 1;
|
|
201
|
+
}
|
|
202
|
+
return out;
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Reverse the masking. Sentinels become their literal separator
|
|
206
|
+
* character again so the emitted segment text reads as the caller
|
|
207
|
+
* authored it.
|
|
208
|
+
*/
|
|
209
|
+
function unmask(text) {
|
|
210
|
+
return text
|
|
211
|
+
.replace(/\x1c\x10S\x1d/g, ';')
|
|
212
|
+
.replace(/\x1c\x10A\x10A\x1d/g, '&&')
|
|
213
|
+
.replace(/\x1c\x10P\x10P\x1d/g, '||')
|
|
214
|
+
.replace(/\x1c\x10P\x1d/g, '|')
|
|
215
|
+
.replace(/\x1c\x10A\x1d/g, '&')
|
|
216
|
+
.replace(/\x1c\x10N\x1d/g, '\n');
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Split the masked command on UNQUOTED separators. The masking pass
|
|
220
|
+
* already replaced in-quote separators with sentinels, so a plain
|
|
221
|
+
* regex split is now safe.
|
|
222
|
+
*
|
|
223
|
+
* The split pattern matches any of: `;`, `&&`, `||`, `|`, `&` (when
|
|
224
|
+
* not part of `&&`), newline. We use a single regex with a lookbehind
|
|
225
|
+
* to avoid splitting `&&` as two `&`s.
|
|
226
|
+
*
|
|
227
|
+
* `\\` escapes the next character — we don't want to split on `\;`
|
|
228
|
+
* either. Handled by checking the preceding character is NOT `\`
|
|
229
|
+
* (lookbehind).
|
|
230
|
+
*/
|
|
231
|
+
function splitOnUnquotedSeparators(masked) {
|
|
232
|
+
// 2026-05-15 codex round-3 P1 fix: walk char-by-char tracking
|
|
233
|
+
// backslash-escape state instead of using regex lookbehind. The
|
|
234
|
+
// pre-fix regex `(?<!\\)(...)` was a single-char negative lookbehind
|
|
235
|
+
// which treated `echo \\;` as "preceded by `\` → no split". But in
|
|
236
|
+
// bash semantics, `\\` is a literal `\` escape PAIR — the `;` that
|
|
237
|
+
// follows it is NOT escaped, so the command splits into two
|
|
238
|
+
// segments. The pre-fix splitter let `echo \\; npm install evil`
|
|
239
|
+
// pass as a single segment, defeating the dependency-audit-gate
|
|
240
|
+
// segment-anchor check and several other consumers.
|
|
241
|
+
//
|
|
242
|
+
// Strategy: walk left-to-right. When we encounter `\`, advance past
|
|
243
|
+
// the next character (the escape pair consumes 2 bytes). When we
|
|
244
|
+
// encounter a recognized separator at a non-pair position, emit a
|
|
245
|
+
// split. This matches bash's argv-tokenizer semantics for
|
|
246
|
+
// backslash-escape parity.
|
|
247
|
+
//
|
|
248
|
+
// The masker is byte-width-preserving so we can walk `masked`
|
|
249
|
+
// directly without re-syncing with the original.
|
|
250
|
+
const segments = [];
|
|
251
|
+
let segStart = 0;
|
|
252
|
+
let i = 0;
|
|
253
|
+
const n = masked.length;
|
|
254
|
+
while (i < n) {
|
|
255
|
+
const ch = masked[i];
|
|
256
|
+
if (ch === '\\' && i + 1 < n) {
|
|
257
|
+
// Escape pair — consume both, NEVER treat the next char as a
|
|
258
|
+
// separator. Bash `\\` is a literal `\`; the char following
|
|
259
|
+
// the pair is then evaluated for separator status.
|
|
260
|
+
i += 2;
|
|
261
|
+
continue;
|
|
262
|
+
}
|
|
263
|
+
// Separator detection. Order matters: `&&` and `||` are 2-byte
|
|
264
|
+
// separators; the 1-byte forms must not steal their first byte.
|
|
265
|
+
let sepLen = 0;
|
|
266
|
+
if (ch === '&' && masked[i + 1] === '&')
|
|
267
|
+
sepLen = 2;
|
|
268
|
+
else if (ch === '|' && masked[i + 1] === '|')
|
|
269
|
+
sepLen = 2;
|
|
270
|
+
else if (ch === ';' || ch === '|' || ch === '&' || ch === '\n')
|
|
271
|
+
sepLen = 1;
|
|
272
|
+
if (sepLen > 0) {
|
|
273
|
+
const piece = masked.slice(segStart, i);
|
|
274
|
+
const trimmed = piece.trim();
|
|
275
|
+
if (trimmed.length > 0)
|
|
276
|
+
segments.push(trimmed);
|
|
277
|
+
i += sepLen;
|
|
278
|
+
segStart = i;
|
|
279
|
+
continue;
|
|
280
|
+
}
|
|
281
|
+
i += 1;
|
|
282
|
+
}
|
|
283
|
+
// Tail.
|
|
284
|
+
if (segStart < n) {
|
|
285
|
+
const piece = masked.slice(segStart, n);
|
|
286
|
+
const trimmed = piece.trim();
|
|
287
|
+
if (trimmed.length > 0)
|
|
288
|
+
segments.push(trimmed);
|
|
289
|
+
}
|
|
290
|
+
return segments;
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Patterns that may precede a real command head in a segment. Mirrors
|
|
294
|
+
* the catalog in `cmd-segments.sh#strip_segment_prefix`. Order matters
|
|
295
|
+
* — env-var-assignment must come AFTER `sudo` because `sudo VAR=x cmd`
|
|
296
|
+
* is a real shape.
|
|
297
|
+
*
|
|
298
|
+
* `--<flag>=<value>` is NOT stripped — those are part of the command.
|
|
299
|
+
*/
|
|
300
|
+
const LEADING_KEYWORDS = ['sudo', 'exec', 'time', 'then', 'do', 'else', 'fi'];
|
|
301
|
+
/**
|
|
302
|
+
* Match an env-var assignment at the head of a segment, INCLUDING
|
|
303
|
+
* quoted and ANSI-C values. Codex round 1 P1 (2026-05-15): the
|
|
304
|
+
* pre-fix pattern was `^[A-Za-z_][A-Za-z0-9_]*=\S*\s+` which only
|
|
305
|
+
* matched unquoted single-token values. The bash helper this
|
|
306
|
+
* replaces handles five shapes the prior regex missed:
|
|
307
|
+
*
|
|
308
|
+
* 1. `KEY="value with spaces" cmd` (double-quoted)
|
|
309
|
+
* 2. `KEY='value with spaces' cmd` (single-quoted)
|
|
310
|
+
* 3. `KEY=$'ANSI-C\\nvalue' cmd` (ANSI-C escape form)
|
|
311
|
+
* 4. `KEY=` (empty value)
|
|
312
|
+
* 5. `KEY=value cmd` (unquoted, the old form)
|
|
313
|
+
*
|
|
314
|
+
* Without coverage of (1)-(3), an attacker could hide a relevant
|
|
315
|
+
* command head behind `REA_SKIP="urgent" gh issue create …` and
|
|
316
|
+
* the `gh issue create` head would never reach the matcher in
|
|
317
|
+
* `runSecurityDisclosureGate` / `runAttributionAdvisory`.
|
|
318
|
+
*
|
|
319
|
+
* Returns the consumed prefix length, or 0 if no env assignment.
|
|
320
|
+
*/
|
|
321
|
+
function matchEnvAssignLength(seg) {
|
|
322
|
+
// Variable-name prefix: `[A-Za-z_][A-Za-z0-9_]*=`. Strict POSIX
|
|
323
|
+
// identifier — bash itself rejects names starting with a digit.
|
|
324
|
+
const namePrefix = /^[A-Za-z_][A-Za-z0-9_]*=/.exec(seg);
|
|
325
|
+
if (namePrefix === null)
|
|
326
|
+
return 0;
|
|
327
|
+
let i = namePrefix[0].length;
|
|
328
|
+
const n = seg.length;
|
|
329
|
+
if (i >= n)
|
|
330
|
+
return 0; // `KEY=` followed by nothing — not a prefix.
|
|
331
|
+
// Determine the value-form by the first character after `=`.
|
|
332
|
+
const ch = seg[i];
|
|
333
|
+
// 3. ANSI-C form: `$'…'`. Consume up to the matching `'`,
|
|
334
|
+
// honoring backslash escapes (so `$'a\\'b'` → contents are
|
|
335
|
+
// `a\'b`, terminator is the third `'`). Bash forbids the
|
|
336
|
+
// closing quote from being escaped — the `$'` shape uses C
|
|
337
|
+
// string conventions, not shell-quote conventions.
|
|
338
|
+
if (ch === '$' && i + 1 < n && seg[i + 1] === "'") {
|
|
339
|
+
i += 2; // consume `$'`
|
|
340
|
+
while (i < n && seg[i] !== "'") {
|
|
341
|
+
if (seg[i] === '\\' && i + 1 < n) {
|
|
342
|
+
i += 2;
|
|
343
|
+
continue;
|
|
344
|
+
}
|
|
345
|
+
i += 1;
|
|
346
|
+
}
|
|
347
|
+
if (i >= n)
|
|
348
|
+
return 0; // unterminated — not a clean prefix.
|
|
349
|
+
i += 1; // consume closing `'`
|
|
350
|
+
}
|
|
351
|
+
else if (ch === '"') {
|
|
352
|
+
// 1. Double-quoted form. `\"` and `\\` are escapes.
|
|
353
|
+
i += 1;
|
|
354
|
+
while (i < n && seg[i] !== '"') {
|
|
355
|
+
if (seg[i] === '\\' && i + 1 < n) {
|
|
356
|
+
i += 2;
|
|
357
|
+
continue;
|
|
358
|
+
}
|
|
359
|
+
i += 1;
|
|
360
|
+
}
|
|
361
|
+
if (i >= n)
|
|
362
|
+
return 0;
|
|
363
|
+
i += 1;
|
|
364
|
+
}
|
|
365
|
+
else if (ch === "'") {
|
|
366
|
+
// 2. Single-quoted form. No escapes — consume until next `'`.
|
|
367
|
+
i += 1;
|
|
368
|
+
while (i < n && seg[i] !== "'")
|
|
369
|
+
i += 1;
|
|
370
|
+
if (i >= n)
|
|
371
|
+
return 0;
|
|
372
|
+
i += 1;
|
|
373
|
+
}
|
|
374
|
+
else {
|
|
375
|
+
// 5. Unquoted form. Consume contiguous non-whitespace.
|
|
376
|
+
while (i < n && seg[i] !== ' ' && seg[i] !== '\t')
|
|
377
|
+
i += 1;
|
|
378
|
+
}
|
|
379
|
+
// Require at least one whitespace after the value so we don't
|
|
380
|
+
// strip `FOO=barbaz` (no command following).
|
|
381
|
+
if (i >= n || (seg[i] !== ' ' && seg[i] !== '\t'))
|
|
382
|
+
return 0;
|
|
383
|
+
// Consume trailing whitespace before yielding the new segment.
|
|
384
|
+
while (i < n && (seg[i] === ' ' || seg[i] === '\t'))
|
|
385
|
+
i += 1;
|
|
386
|
+
return i;
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Strip leading shell keywords and env-var assignments from a segment
|
|
390
|
+
* so the caller's head-anchored regex sees the actual command first.
|
|
391
|
+
*
|
|
392
|
+
* Examples:
|
|
393
|
+
* `sudo gh pr create` → `gh pr create`
|
|
394
|
+
* `CI=1 pnpm add foo` → `pnpm add foo`
|
|
395
|
+
* `sudo CI=1 pnpm add foo` → `pnpm add foo`
|
|
396
|
+
* `REA_SKIP="urgent fix" gh issue create x` → `gh issue create x`
|
|
397
|
+
* `KEY=$'a\\nb' git commit` → `git commit`
|
|
398
|
+
* `then git push --force` → `git push --force`
|
|
399
|
+
*
|
|
400
|
+
* The bash counterpart loops until no more prefix matches. We mirror
|
|
401
|
+
* that with an iteration cap of 32 (was 8; raised to support deeply
|
|
402
|
+
* stacked env prefixes — bash itself has no limit so 8 was a per-
|
|
403
|
+
* advisory-pilot bypass surface).
|
|
404
|
+
*/
|
|
405
|
+
function stripSegmentPrefix(seg) {
|
|
406
|
+
let current = seg;
|
|
407
|
+
for (let iter = 0; iter < 32; iter += 1) {
|
|
408
|
+
let changed = false;
|
|
409
|
+
for (const kw of LEADING_KEYWORDS) {
|
|
410
|
+
const re = new RegExp(`^${kw}\\s+`);
|
|
411
|
+
if (re.test(current)) {
|
|
412
|
+
current = current.replace(re, '');
|
|
413
|
+
changed = true;
|
|
414
|
+
break;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
if (changed)
|
|
418
|
+
continue;
|
|
419
|
+
const envLen = matchEnvAssignLength(current);
|
|
420
|
+
if (envLen > 0) {
|
|
421
|
+
current = current.slice(envLen);
|
|
422
|
+
changed = true;
|
|
423
|
+
}
|
|
424
|
+
if (!changed)
|
|
425
|
+
break;
|
|
426
|
+
}
|
|
427
|
+
return current;
|
|
428
|
+
}
|
|
429
|
+
/**
|
|
430
|
+
* Split `cmd` into segments using the quote-aware masking → split →
|
|
431
|
+
* unmask pipeline. Returns an array of `{ raw, head }` tuples in the
|
|
432
|
+
* order they appeared in the original command.
|
|
433
|
+
*
|
|
434
|
+
* 0.33.0 — nested-shell unwrapping was added on top of the original
|
|
435
|
+
* 0.32.0 splitter. When a segment's head is `bash -c|-lc|--c PAYLOAD`
|
|
436
|
+
* or `sh -c|-lc|--c PAYLOAD` (any combination of `-l` and `-c` flags),
|
|
437
|
+
* the PAYLOAD inside the quoted arg becomes additional segments
|
|
438
|
+
* appended after the wrapper segment. Mirrors the bash counterpart's
|
|
439
|
+
* `_rea_unwrap_nested_shells` (helix-017 #3 fix). Recurses up to
|
|
440
|
+
* `MAX_NESTED_DEPTH` levels.
|
|
441
|
+
*/
|
|
442
|
+
export function splitSegments(cmd) {
|
|
443
|
+
if (cmd.length === 0)
|
|
444
|
+
return [];
|
|
445
|
+
return splitSegmentsRecursive(cmd, 0);
|
|
446
|
+
}
|
|
447
|
+
const MAX_NESTED_DEPTH = 8;
|
|
448
|
+
function splitSegmentsRecursive(cmd, depth) {
|
|
449
|
+
const masked = maskQuotedSeparators(cmd);
|
|
450
|
+
const rawSegs = splitOnUnquotedSeparators(masked);
|
|
451
|
+
const out = [];
|
|
452
|
+
for (const raw of rawSegs) {
|
|
453
|
+
const unmaskedRaw = unmask(raw);
|
|
454
|
+
const head = stripSegmentPrefix(unmaskedRaw);
|
|
455
|
+
out.push({ raw: unmaskedRaw, head });
|
|
456
|
+
// Try to unwrap a nested shell payload.
|
|
457
|
+
if (depth < MAX_NESTED_DEPTH) {
|
|
458
|
+
const inner = extractNestedShellPayload(head);
|
|
459
|
+
if (inner !== null) {
|
|
460
|
+
// Append the inner payload's segments AFTER the wrapper segment.
|
|
461
|
+
// This preserves the bash hook's emit-order: the wrapper IS a
|
|
462
|
+
// segment too (so a hook that anchors on `bash` for some other
|
|
463
|
+
// reason still sees it), and the inner segments follow.
|
|
464
|
+
out.push(...splitSegmentsRecursive(inner, depth + 1));
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
return out;
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* Recognize a nested-shell wrapper segment and return the unquoted
|
|
472
|
+
* payload string. Returns `null` when the segment is not a wrapper.
|
|
473
|
+
*
|
|
474
|
+
* 2026-05-15 codex round-1 P1 fix — extends parity with
|
|
475
|
+
* `_rea_unwrap_nested_shells` in `hooks/_lib/cmd-segments.sh`.
|
|
476
|
+
*
|
|
477
|
+
* Bash-parity matrix:
|
|
478
|
+
*
|
|
479
|
+
* 1. Shell names: bash | sh | zsh | dash
|
|
480
|
+
* (The bash counterpart also includes ksh / mksh / oksh / posh /
|
|
481
|
+
* yash / csh / tcsh / fish per the 0.19.0 M1 security review. We
|
|
482
|
+
* cover the common quartet here; the rare shells fall through to
|
|
483
|
+
* the bash-scanner tier which DOES have full coverage. Extending
|
|
484
|
+
* this list later is a one-line change.)
|
|
485
|
+
* 2. Split-flag forms ANY combination of pre-flags before `-c`:
|
|
486
|
+
* bash -l -c '…' bash -i -c '…' bash -e -c '…'
|
|
487
|
+
* bash -li -c '…' bash --noprofile -c '…'
|
|
488
|
+
* The pre-fix regex `(?:-[a-z]*c|--c)(?:\s+-[a-z]+)*` failed
|
|
489
|
+
* because it required `-c` to appear IN the FIRST flag token —
|
|
490
|
+
* `bash -l -c 'PAYLOAD'` did not match.
|
|
491
|
+
* 3. Combined-flag forms: -c, -lc, -lic, -ic, -cl, -cli, -li, -il
|
|
492
|
+
* (the bash WRAP pattern's `-(c|lc|lic|ic|cl|cli|li|il)` set).
|
|
493
|
+
* 4. ANSI-C-quoted payload: `bash -c $'…'`. Pre-fix the introducer
|
|
494
|
+
* regex `(['"])` could not match the `$` prefix, so the entire
|
|
495
|
+
* ANSI-C wrapper was a single un-unwrapped segment.
|
|
496
|
+
*
|
|
497
|
+
* The walker:
|
|
498
|
+
* - Tokenizes the head into whitespace-separated tokens.
|
|
499
|
+
* - First token must be a recognized shell name.
|
|
500
|
+
* - Walks subsequent flag tokens, each `-[A-Za-z]+` or `--[A-Za-z]+`.
|
|
501
|
+
* - A flag token containing a `c` letter terminates the flag walk
|
|
502
|
+
* (it's the `-c` introducer). The next non-flag token is the
|
|
503
|
+
* payload argument.
|
|
504
|
+
* - The payload argument's first character determines the quote
|
|
505
|
+
* style: `'`, `"`, or `$'` (ANSI-C). Any other character means
|
|
506
|
+
* the payload is unquoted and we return null (don't unwrap — the
|
|
507
|
+
* payload may already be a bare argv).
|
|
508
|
+
*/
|
|
509
|
+
function extractNestedShellPayload(head) {
|
|
510
|
+
// Tokenize on whitespace. The head has already passed through
|
|
511
|
+
// stripSegmentPrefix so leading `sudo`/env-prefixes are gone.
|
|
512
|
+
const trimmed = head.trimStart();
|
|
513
|
+
if (trimmed.length === 0)
|
|
514
|
+
return null;
|
|
515
|
+
// 1. Shell-name token. Full parity with cmd-segments.sh `WRAP`:
|
|
516
|
+
// bash | sh | zsh | dash | ksh | mksh | oksh | posh | yash |
|
|
517
|
+
// csh | tcsh | fish. Codex round-2 P1 (2026-05-15): the round-1
|
|
518
|
+
// quartet (bash|sh|zsh|dash) left ksh/mksh/oksh/posh/yash/csh/
|
|
519
|
+
// tcsh/fish unwrapped — on machines where any of those shells
|
|
520
|
+
// are installed, `mksh -c 'source .env'` and
|
|
521
|
+
// `ksh -c 'npm install missing-pkg'` would bypass
|
|
522
|
+
// env-file-protection / dependency-audit-gate entirely.
|
|
523
|
+
// The bash counterpart caught these via the 0.19.0 M1 security
|
|
524
|
+
// review (WRAP regex extension).
|
|
525
|
+
//
|
|
526
|
+
// NOTE: pwsh (PowerShell) is intentionally OUT — it accepts -c
|
|
527
|
+
// and -Command, and -EncodedCommand base64-decodes at runtime.
|
|
528
|
+
// Adding pwsh requires a separate code path with base64 decode
|
|
529
|
+
// (mirroring the bash counterpart's explicit pwsh exclusion).
|
|
530
|
+
const shellMatch = /^(bash|sh|zsh|dash|ksh|mksh|oksh|posh|yash|csh|tcsh|fish)\b/i.exec(trimmed);
|
|
531
|
+
if (shellMatch === null)
|
|
532
|
+
return null;
|
|
533
|
+
let cursor = shellMatch[0].length;
|
|
534
|
+
// 2. Walk flag tokens. Each token is whitespace-separated and starts
|
|
535
|
+
// with `-`. A flag token containing the letter `c` (case-insens.)
|
|
536
|
+
// is the `-c` introducer; the NEXT token is the payload.
|
|
537
|
+
let sawCFlag = false;
|
|
538
|
+
while (cursor < trimmed.length) {
|
|
539
|
+
// Skip whitespace.
|
|
540
|
+
while (cursor < trimmed.length && /\s/.test(trimmed[cursor])) {
|
|
541
|
+
cursor += 1;
|
|
542
|
+
}
|
|
543
|
+
if (cursor >= trimmed.length)
|
|
544
|
+
return null;
|
|
545
|
+
// Peek next token.
|
|
546
|
+
const rest = trimmed.slice(cursor);
|
|
547
|
+
if (rest[0] !== '-') {
|
|
548
|
+
// Not a flag — must be the payload argument.
|
|
549
|
+
break;
|
|
550
|
+
}
|
|
551
|
+
// Extract the flag token (contiguous non-whitespace).
|
|
552
|
+
const flagMatch = /^(\S+)/.exec(rest);
|
|
553
|
+
if (flagMatch === null)
|
|
554
|
+
return null;
|
|
555
|
+
const flag = flagMatch[0] ?? '';
|
|
556
|
+
cursor += flag.length;
|
|
557
|
+
// Recognized flag-token shapes:
|
|
558
|
+
// `-c` `-l` `-i` `-e` `-lc` `-lic` `-ic` `-cl` `-cli` `-li` `-il`
|
|
559
|
+
// `--c` `--noprofile` (etc.) — we don't enforce the full list,
|
|
560
|
+
// just that it's `-<letters>` or `--<letters>`.
|
|
561
|
+
if (!/^--?[A-Za-z]+$/.test(flag))
|
|
562
|
+
return null;
|
|
563
|
+
// Does this flag contain `c` (the -c introducer letter)?
|
|
564
|
+
// `--c` also counts (rare but bash accepts).
|
|
565
|
+
if (/c/i.test(flag.replace(/^--?/, ''))) {
|
|
566
|
+
sawCFlag = true;
|
|
567
|
+
// Continue the loop — the payload is the NEXT non-flag token.
|
|
568
|
+
// (Bash's argv parser stops walking flags as soon as it sees -c,
|
|
569
|
+
// but we accept additional flags between -c and the payload for
|
|
570
|
+
// safety; the bash WRAP regex similarly tolerates trailing
|
|
571
|
+
// flag-like tokens before the quoted body.)
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
if (!sawCFlag)
|
|
575
|
+
return null;
|
|
576
|
+
if (cursor >= trimmed.length)
|
|
577
|
+
return null;
|
|
578
|
+
// Skip whitespace before payload.
|
|
579
|
+
while (cursor < trimmed.length && /\s/.test(trimmed[cursor])) {
|
|
580
|
+
cursor += 1;
|
|
581
|
+
}
|
|
582
|
+
if (cursor >= trimmed.length)
|
|
583
|
+
return null;
|
|
584
|
+
// 3. Inspect the payload's introducer character.
|
|
585
|
+
const first = trimmed[cursor];
|
|
586
|
+
let quote;
|
|
587
|
+
let isAnsiC = false;
|
|
588
|
+
let payloadStart = cursor;
|
|
589
|
+
if (first === '$' && trimmed[cursor + 1] === "'") {
|
|
590
|
+
// ANSI-C: $'…' — single-quote-style but with C-string escapes.
|
|
591
|
+
quote = "'";
|
|
592
|
+
isAnsiC = true;
|
|
593
|
+
payloadStart = cursor + 2;
|
|
594
|
+
}
|
|
595
|
+
else if (first === "'" || first === '"') {
|
|
596
|
+
quote = first;
|
|
597
|
+
payloadStart = cursor + 1;
|
|
598
|
+
}
|
|
599
|
+
else {
|
|
600
|
+
// Unquoted payload — refuse to unwrap. The bash counterpart's
|
|
601
|
+
// WRAP regex requires a quote introducer too.
|
|
602
|
+
return null;
|
|
603
|
+
}
|
|
604
|
+
// 4. Walk the payload, collecting bytes until the matching closing
|
|
605
|
+
// quote. Honor quote-specific escape rules.
|
|
606
|
+
let i = payloadStart;
|
|
607
|
+
let payload = '';
|
|
608
|
+
while (i < trimmed.length) {
|
|
609
|
+
const ch = trimmed[i];
|
|
610
|
+
if (ch === quote) {
|
|
611
|
+
// Closing quote found.
|
|
612
|
+
return payload;
|
|
613
|
+
}
|
|
614
|
+
if (isAnsiC && ch === '\\' && i + 1 < trimmed.length) {
|
|
615
|
+
// ANSI-C escape decoding. Mirror the bash counterpart's escape
|
|
616
|
+
// table (cmd-segments.sh, _rea_unwrap_at_depth). Only the
|
|
617
|
+
// common-enough subset is decoded; unknowns pass through as the
|
|
618
|
+
// literal pair (matches awk default behavior).
|
|
619
|
+
const nxt = trimmed[i + 1];
|
|
620
|
+
switch (nxt) {
|
|
621
|
+
case 'n':
|
|
622
|
+
payload += '\n';
|
|
623
|
+
break;
|
|
624
|
+
case 't':
|
|
625
|
+
payload += '\t';
|
|
626
|
+
break;
|
|
627
|
+
case 'r':
|
|
628
|
+
payload += '\r';
|
|
629
|
+
break;
|
|
630
|
+
case '\\':
|
|
631
|
+
payload += '\\';
|
|
632
|
+
break;
|
|
633
|
+
case "'":
|
|
634
|
+
payload += "'";
|
|
635
|
+
break;
|
|
636
|
+
case '"':
|
|
637
|
+
payload += '"';
|
|
638
|
+
break;
|
|
639
|
+
case 'a':
|
|
640
|
+
payload += '\x07';
|
|
641
|
+
break;
|
|
642
|
+
case 'b':
|
|
643
|
+
payload += '\x08';
|
|
644
|
+
break;
|
|
645
|
+
case 'e':
|
|
646
|
+
case 'E':
|
|
647
|
+
payload += '\x1b';
|
|
648
|
+
break;
|
|
649
|
+
case 'f':
|
|
650
|
+
payload += '\x0c';
|
|
651
|
+
break;
|
|
652
|
+
case 'v':
|
|
653
|
+
payload += '\x0b';
|
|
654
|
+
break;
|
|
655
|
+
case '0':
|
|
656
|
+
payload += '\x00';
|
|
657
|
+
break;
|
|
658
|
+
case 'x': {
|
|
659
|
+
// \xHH or \xH — up to 2 hex digits.
|
|
660
|
+
let hex = '';
|
|
661
|
+
let k = i + 2;
|
|
662
|
+
while (k < trimmed.length && hex.length < 2) {
|
|
663
|
+
const hc = trimmed[k];
|
|
664
|
+
if (!/[0-9a-fA-F]/.test(hc))
|
|
665
|
+
break;
|
|
666
|
+
hex += hc;
|
|
667
|
+
k += 1;
|
|
668
|
+
}
|
|
669
|
+
if (hex.length > 0) {
|
|
670
|
+
payload += String.fromCharCode(parseInt(hex, 16));
|
|
671
|
+
i = k;
|
|
672
|
+
continue;
|
|
673
|
+
}
|
|
674
|
+
// Fall through — `\x` with no hex digits is a literal pair.
|
|
675
|
+
payload += '\\x';
|
|
676
|
+
break;
|
|
677
|
+
}
|
|
678
|
+
default:
|
|
679
|
+
// Unknown escape — preserve the literal pair (bash awk
|
|
680
|
+
// default). E.g. `\z` → `\z`.
|
|
681
|
+
payload += '\\' + nxt;
|
|
682
|
+
break;
|
|
683
|
+
}
|
|
684
|
+
i += 2;
|
|
685
|
+
continue;
|
|
686
|
+
}
|
|
687
|
+
if (!isAnsiC && quote === '"' && ch === '\\' && i + 1 < trimmed.length) {
|
|
688
|
+
// Double-quote: backslash escapes the next character.
|
|
689
|
+
payload += trimmed[i + 1] ?? '';
|
|
690
|
+
i += 2;
|
|
691
|
+
continue;
|
|
692
|
+
}
|
|
693
|
+
payload += ch;
|
|
694
|
+
i += 1;
|
|
695
|
+
}
|
|
696
|
+
// Unterminated quote — return what we have. The bash counterpart
|
|
697
|
+
// similarly accepts unterminated quotes as "rest of line is payload".
|
|
698
|
+
return payload;
|
|
699
|
+
}
|
|
700
|
+
/**
|
|
701
|
+
* Returns true if any segment's prefix-stripped head matches the
|
|
702
|
+
* head-anchored regex. The regex must NOT include a `^` anchor —
|
|
703
|
+
* we anchor by testing against the head of the segment via
|
|
704
|
+
* `regex.test(head.slice(0, match.length))` simulation. In practice
|
|
705
|
+
* we just run the regex against the head with the regex already
|
|
706
|
+
* head-anchored by virtue of `head` containing only the prefix-
|
|
707
|
+
* stripped form.
|
|
708
|
+
*
|
|
709
|
+
* The bash counterpart uses `grep -qiE PATTERN <<<"$head"` so we
|
|
710
|
+
* match the same posture: case-INSENSITIVE, extended regex.
|
|
711
|
+
*
|
|
712
|
+
* @param regexSource ERE source. We compile with case-insensitive
|
|
713
|
+
* flag. Caller passes the same string they would
|
|
714
|
+
* have passed to `any_segment_starts_with` in bash.
|
|
715
|
+
* The regex is internally anchored with `^`.
|
|
716
|
+
*/
|
|
717
|
+
export function anySegmentStartsWith(cmd, regexSource) {
|
|
718
|
+
// Compile once. `^` anchor + `i` flag.
|
|
719
|
+
const re = new RegExp(`^${regexSource}`, 'i');
|
|
720
|
+
for (const seg of splitSegments(cmd)) {
|
|
721
|
+
if (re.test(seg.head))
|
|
722
|
+
return true;
|
|
723
|
+
}
|
|
724
|
+
return false;
|
|
725
|
+
}
|
|
726
|
+
/**
|
|
727
|
+
* Returns true if any segment's RAW text contains a match for the
|
|
728
|
+
* regex (no head anchoring). Mirrors `any_segment_matches` — used for
|
|
729
|
+
* content-scan patterns like `Co-Authored-By:` markers inside
|
|
730
|
+
* quoted `git commit -m "..."` arguments.
|
|
731
|
+
*
|
|
732
|
+
* Case-INSENSITIVE, extended regex. Same posture as the bash helper.
|
|
733
|
+
*/
|
|
734
|
+
export function anySegmentMatches(cmd, regexSource) {
|
|
735
|
+
const re = new RegExp(regexSource, 'i');
|
|
736
|
+
for (const seg of splitSegments(cmd)) {
|
|
737
|
+
if (re.test(seg.raw))
|
|
738
|
+
return true;
|
|
739
|
+
}
|
|
740
|
+
return false;
|
|
741
|
+
}
|
|
742
|
+
/**
|
|
743
|
+
* Returns true if any single segment's RAW text contains matches for
|
|
744
|
+
* BOTH `regexA` AND `regexB`. Mirrors `any_segment_matches_both` from
|
|
745
|
+
* the bash counterpart — used by `env-file-protection` to require that
|
|
746
|
+
* a text-reading utility AND an `.env*` filename co-occur within the
|
|
747
|
+
* same shell segment (a multi-segment construction like
|
|
748
|
+
* `echo "log: cat .env stuff" ; touch foo.env` must NOT fire because
|
|
749
|
+
* the utility and filename live in different segments).
|
|
750
|
+
*
|
|
751
|
+
* Case-INSENSITIVE, extended regex on both patterns. Same posture as
|
|
752
|
+
* the bash helper.
|
|
753
|
+
*
|
|
754
|
+
* 0.33.0 port. The bash helper was introduced in 0.16.2 to fix the
|
|
755
|
+
* helix-017 P2 false-positive class where two independent booleans
|
|
756
|
+
* (any-utility OR any-env) were AND'd across segments.
|
|
757
|
+
*/
|
|
758
|
+
export function anySegmentMatchesBoth(cmd, regexA, regexB) {
|
|
759
|
+
const reA = new RegExp(regexA, 'i');
|
|
760
|
+
const reB = new RegExp(regexB, 'i');
|
|
761
|
+
for (const seg of splitSegments(cmd)) {
|
|
762
|
+
if (reA.test(seg.raw) && reB.test(seg.raw))
|
|
763
|
+
return true;
|
|
764
|
+
}
|
|
765
|
+
return false;
|
|
766
|
+
}
|