@bookedsolid/rea 0.32.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/hook.js +49 -0
- package/dist/hooks/_lib/payload.d.ts +38 -0
- package/dist/hooks/_lib/payload.js +79 -0
- package/dist/hooks/_lib/segments.d.ts +127 -0
- package/dist/hooks/_lib/segments.js +628 -16
- package/dist/hooks/architecture-review-gate/index.d.ts +58 -0
- package/dist/hooks/architecture-review-gate/index.js +250 -0
- package/dist/hooks/changeset-security-gate/index.d.ts +71 -0
- package/dist/hooks/changeset-security-gate/index.js +330 -0
- package/dist/hooks/dangerous-bash-interceptor/index.d.ts +103 -0
- package/dist/hooks/dangerous-bash-interceptor/index.js +669 -0
- package/dist/hooks/dependency-audit-gate/index.d.ts +91 -0
- package/dist/hooks/dependency-audit-gate/index.js +294 -0
- package/dist/hooks/env-file-protection/index.d.ts +55 -0
- package/dist/hooks/env-file-protection/index.js +159 -0
- package/dist/hooks/local-review-gate/index.d.ts +145 -0
- package/dist/hooks/local-review-gate/index.js +374 -0
- package/dist/hooks/secret-scanner/index.d.ts +143 -0
- package/dist/hooks/secret-scanner/index.js +404 -0
- package/hooks/architecture-review-gate.sh +92 -77
- package/hooks/changeset-security-gate.sh +114 -149
- package/hooks/dangerous-bash-interceptor.sh +168 -386
- package/hooks/dependency-audit-gate.sh +115 -156
- package/hooks/env-file-protection.sh +130 -97
- package/hooks/local-review-gate.sh +523 -410
- package/hooks/secret-scanner.sh +210 -200
- package/package.json +1 -1
- package/templates/architecture-review-gate.dogfood-staged.sh +116 -0
- package/templates/changeset-security-gate.dogfood-staged.sh +137 -0
- package/templates/dangerous-bash-interceptor.dogfood-staged.sh +196 -0
- package/templates/dependency-audit-gate.dogfood-staged.sh +138 -0
- package/templates/env-file-protection.dogfood-staged.sh +157 -0
- package/templates/local-review-gate.dogfood-staged.sh +573 -0
- package/templates/secret-scanner.dogfood-staged.sh +240 -0
|
@@ -229,22 +229,63 @@ function unmask(text) {
|
|
|
229
229
|
* (lookbehind).
|
|
230
230
|
*/
|
|
231
231
|
function splitOnUnquotedSeparators(masked) {
|
|
232
|
-
//
|
|
233
|
-
//
|
|
234
|
-
|
|
235
|
-
//
|
|
236
|
-
//
|
|
237
|
-
//
|
|
238
|
-
|
|
232
|
+
// 2026-05-15 codex round-3 P1 fix: walk char-by-char tracking
|
|
233
|
+
// backslash-escape state instead of using regex lookbehind. The
|
|
234
|
+
// pre-fix regex `(?<!\\)(...)` was a single-char negative lookbehind
|
|
235
|
+
// which treated `echo \\;` as "preceded by `\` → no split". But in
|
|
236
|
+
// bash semantics, `\\` is a literal `\` escape PAIR — the `;` that
|
|
237
|
+
// follows it is NOT escaped, so the command splits into two
|
|
238
|
+
// segments. The pre-fix splitter let `echo \\; npm install evil`
|
|
239
|
+
// pass as a single segment, defeating the dependency-audit-gate
|
|
240
|
+
// segment-anchor check and several other consumers.
|
|
241
|
+
//
|
|
242
|
+
// Strategy: walk left-to-right. When we encounter `\`, advance past
|
|
243
|
+
// the next character (the escape pair consumes 2 bytes). When we
|
|
244
|
+
// encounter a recognized separator at a non-pair position, emit a
|
|
245
|
+
// split. This matches bash's argv-tokenizer semantics for
|
|
246
|
+
// backslash-escape parity.
|
|
247
|
+
//
|
|
248
|
+
// The masker is byte-width-preserving so we can walk `masked`
|
|
249
|
+
// directly without re-syncing with the original.
|
|
239
250
|
const segments = [];
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
251
|
+
let segStart = 0;
|
|
252
|
+
let i = 0;
|
|
253
|
+
const n = masked.length;
|
|
254
|
+
while (i < n) {
|
|
255
|
+
const ch = masked[i];
|
|
256
|
+
if (ch === '\\' && i + 1 < n) {
|
|
257
|
+
// Escape pair — consume both, NEVER treat the next char as a
|
|
258
|
+
// separator. Bash `\\` is a literal `\`; the char following
|
|
259
|
+
// the pair is then evaluated for separator status.
|
|
260
|
+
i += 2;
|
|
243
261
|
continue;
|
|
244
|
-
|
|
245
|
-
|
|
262
|
+
}
|
|
263
|
+
// Separator detection. Order matters: `&&` and `||` are 2-byte
|
|
264
|
+
// separators; the 1-byte forms must not steal their first byte.
|
|
265
|
+
let sepLen = 0;
|
|
266
|
+
if (ch === '&' && masked[i + 1] === '&')
|
|
267
|
+
sepLen = 2;
|
|
268
|
+
else if (ch === '|' && masked[i + 1] === '|')
|
|
269
|
+
sepLen = 2;
|
|
270
|
+
else if (ch === ';' || ch === '|' || ch === '&' || ch === '\n')
|
|
271
|
+
sepLen = 1;
|
|
272
|
+
if (sepLen > 0) {
|
|
273
|
+
const piece = masked.slice(segStart, i);
|
|
274
|
+
const trimmed = piece.trim();
|
|
275
|
+
if (trimmed.length > 0)
|
|
276
|
+
segments.push(trimmed);
|
|
277
|
+
i += sepLen;
|
|
278
|
+
segStart = i;
|
|
246
279
|
continue;
|
|
247
|
-
|
|
280
|
+
}
|
|
281
|
+
i += 1;
|
|
282
|
+
}
|
|
283
|
+
// Tail.
|
|
284
|
+
if (segStart < n) {
|
|
285
|
+
const piece = masked.slice(segStart, n);
|
|
286
|
+
const trimmed = piece.trim();
|
|
287
|
+
if (trimmed.length > 0)
|
|
288
|
+
segments.push(trimmed);
|
|
248
289
|
}
|
|
249
290
|
return segments;
|
|
250
291
|
}
|
|
@@ -389,16 +430,272 @@ function stripSegmentPrefix(seg) {
|
|
|
389
430
|
* Split `cmd` into segments using the quote-aware masking → split →
|
|
390
431
|
* unmask pipeline. Returns an array of `{ raw, head }` tuples in the
|
|
391
432
|
* order they appeared in the original command.
|
|
433
|
+
*
|
|
434
|
+
* 0.33.0 — nested-shell unwrapping was added on top of the original
|
|
435
|
+
* 0.32.0 splitter. When a segment's head is `bash -c|-lc|--c PAYLOAD`
|
|
436
|
+
* or `sh -c|-lc|--c PAYLOAD` (any combination of `-l` and `-c` flags),
|
|
437
|
+
* the PAYLOAD inside the quoted arg becomes additional segments
|
|
438
|
+
* appended after the wrapper segment. Mirrors the bash counterpart's
|
|
439
|
+
* `_rea_unwrap_nested_shells` (helix-017 #3 fix). Recurses up to
|
|
440
|
+
* `MAX_NESTED_DEPTH` levels.
|
|
392
441
|
*/
|
|
393
442
|
export function splitSegments(cmd) {
|
|
394
443
|
if (cmd.length === 0)
|
|
395
444
|
return [];
|
|
445
|
+
return splitSegmentsRecursive(cmd, 0);
|
|
446
|
+
}
|
|
447
|
+
const MAX_NESTED_DEPTH = 8;
|
|
448
|
+
function splitSegmentsRecursive(cmd, depth) {
|
|
396
449
|
const masked = maskQuotedSeparators(cmd);
|
|
397
450
|
const rawSegs = splitOnUnquotedSeparators(masked);
|
|
398
|
-
|
|
451
|
+
const out = [];
|
|
452
|
+
for (const raw of rawSegs) {
|
|
399
453
|
const unmaskedRaw = unmask(raw);
|
|
400
|
-
|
|
401
|
-
|
|
454
|
+
const head = stripSegmentPrefix(unmaskedRaw);
|
|
455
|
+
out.push({ raw: unmaskedRaw, head });
|
|
456
|
+
// Try to unwrap a nested shell payload.
|
|
457
|
+
if (depth < MAX_NESTED_DEPTH) {
|
|
458
|
+
const inner = extractNestedShellPayload(head);
|
|
459
|
+
if (inner !== null) {
|
|
460
|
+
// Append the inner payload's segments AFTER the wrapper segment.
|
|
461
|
+
// This preserves the bash hook's emit-order: the wrapper IS a
|
|
462
|
+
// segment too (so a hook that anchors on `bash` for some other
|
|
463
|
+
// reason still sees it), and the inner segments follow.
|
|
464
|
+
out.push(...splitSegmentsRecursive(inner, depth + 1));
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
return out;
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* Recognize a nested-shell wrapper segment and return the unquoted
|
|
472
|
+
* payload string. Returns `null` when the segment is not a wrapper.
|
|
473
|
+
*
|
|
474
|
+
* 2026-05-15 codex round-1 P1 fix — extends parity with
|
|
475
|
+
* `_rea_unwrap_nested_shells` in `hooks/_lib/cmd-segments.sh`.
|
|
476
|
+
*
|
|
477
|
+
* Bash-parity matrix:
|
|
478
|
+
*
|
|
479
|
+
* 1. Shell names: bash | sh | zsh | dash
|
|
480
|
+
* (The bash counterpart also includes ksh / mksh / oksh / posh /
|
|
481
|
+
* yash / csh / tcsh / fish per the 0.19.0 M1 security review. We
|
|
482
|
+
* cover the common quartet here; the rare shells fall through to
|
|
483
|
+
* the bash-scanner tier which DOES have full coverage. Extending
|
|
484
|
+
* this list later is a one-line change.)
|
|
485
|
+
* 2. Split-flag forms ANY combination of pre-flags before `-c`:
|
|
486
|
+
* bash -l -c '…' bash -i -c '…' bash -e -c '…'
|
|
487
|
+
* bash -li -c '…' bash --noprofile -c '…'
|
|
488
|
+
* The pre-fix regex `(?:-[a-z]*c|--c)(?:\s+-[a-z]+)*` failed
|
|
489
|
+
* because it required `-c` to appear IN the FIRST flag token —
|
|
490
|
+
* `bash -l -c 'PAYLOAD'` did not match.
|
|
491
|
+
* 3. Combined-flag forms: -c, -lc, -lic, -ic, -cl, -cli, -li, -il
|
|
492
|
+
* (the bash WRAP pattern's `-(c|lc|lic|ic|cl|cli|li|il)` set).
|
|
493
|
+
* 4. ANSI-C-quoted payload: `bash -c $'…'`. Pre-fix the introducer
|
|
494
|
+
* regex `(['"])` could not match the `$` prefix, so the entire
|
|
495
|
+
* ANSI-C wrapper was a single un-unwrapped segment.
|
|
496
|
+
*
|
|
497
|
+
* The walker:
|
|
498
|
+
* - Tokenizes the head into whitespace-separated tokens.
|
|
499
|
+
* - First token must be a recognized shell name.
|
|
500
|
+
* - Walks subsequent flag tokens, each `-[A-Za-z]+` or `--[A-Za-z]+`.
|
|
501
|
+
* - A flag token containing a `c` letter terminates the flag walk
|
|
502
|
+
* (it's the `-c` introducer). The next non-flag token is the
|
|
503
|
+
* payload argument.
|
|
504
|
+
* - The payload argument's first character determines the quote
|
|
505
|
+
* style: `'`, `"`, or `$'` (ANSI-C). Any other character means
|
|
506
|
+
* the payload is unquoted and we return null (don't unwrap — the
|
|
507
|
+
* payload may already be a bare argv).
|
|
508
|
+
*/
|
|
509
|
+
function extractNestedShellPayload(head) {
|
|
510
|
+
// Tokenize on whitespace. The head has already passed through
|
|
511
|
+
// stripSegmentPrefix so leading `sudo`/env-prefixes are gone.
|
|
512
|
+
const trimmed = head.trimStart();
|
|
513
|
+
if (trimmed.length === 0)
|
|
514
|
+
return null;
|
|
515
|
+
// 1. Shell-name token. Full parity with cmd-segments.sh `WRAP`:
|
|
516
|
+
// bash | sh | zsh | dash | ksh | mksh | oksh | posh | yash |
|
|
517
|
+
// csh | tcsh | fish. Codex round-2 P1 (2026-05-15): the round-1
|
|
518
|
+
// quartet (bash|sh|zsh|dash) left ksh/mksh/oksh/posh/yash/csh/
|
|
519
|
+
// tcsh/fish unwrapped — on machines where any of those shells
|
|
520
|
+
// are installed, `mksh -c 'source .env'` and
|
|
521
|
+
// `ksh -c 'npm install missing-pkg'` would bypass
|
|
522
|
+
// env-file-protection / dependency-audit-gate entirely.
|
|
523
|
+
// The bash counterpart caught these via the 0.19.0 M1 security
|
|
524
|
+
// review (WRAP regex extension).
|
|
525
|
+
//
|
|
526
|
+
// NOTE: pwsh (PowerShell) is intentionally OUT — it accepts -c
|
|
527
|
+
// and -Command, and -EncodedCommand base64-decodes at runtime.
|
|
528
|
+
// Adding pwsh requires a separate code path with base64 decode
|
|
529
|
+
// (mirroring the bash counterpart's explicit pwsh exclusion).
|
|
530
|
+
const shellMatch = /^(bash|sh|zsh|dash|ksh|mksh|oksh|posh|yash|csh|tcsh|fish)\b/i.exec(trimmed);
|
|
531
|
+
if (shellMatch === null)
|
|
532
|
+
return null;
|
|
533
|
+
let cursor = shellMatch[0].length;
|
|
534
|
+
// 2. Walk flag tokens. Each token is whitespace-separated and starts
|
|
535
|
+
// with `-`. A flag token containing the letter `c` (case-insens.)
|
|
536
|
+
// is the `-c` introducer; the NEXT token is the payload.
|
|
537
|
+
let sawCFlag = false;
|
|
538
|
+
while (cursor < trimmed.length) {
|
|
539
|
+
// Skip whitespace.
|
|
540
|
+
while (cursor < trimmed.length && /\s/.test(trimmed[cursor])) {
|
|
541
|
+
cursor += 1;
|
|
542
|
+
}
|
|
543
|
+
if (cursor >= trimmed.length)
|
|
544
|
+
return null;
|
|
545
|
+
// Peek next token.
|
|
546
|
+
const rest = trimmed.slice(cursor);
|
|
547
|
+
if (rest[0] !== '-') {
|
|
548
|
+
// Not a flag — must be the payload argument.
|
|
549
|
+
break;
|
|
550
|
+
}
|
|
551
|
+
// Extract the flag token (contiguous non-whitespace).
|
|
552
|
+
const flagMatch = /^(\S+)/.exec(rest);
|
|
553
|
+
if (flagMatch === null)
|
|
554
|
+
return null;
|
|
555
|
+
const flag = flagMatch[0] ?? '';
|
|
556
|
+
cursor += flag.length;
|
|
557
|
+
// Recognized flag-token shapes:
|
|
558
|
+
// `-c` `-l` `-i` `-e` `-lc` `-lic` `-ic` `-cl` `-cli` `-li` `-il`
|
|
559
|
+
// `--c` `--noprofile` (etc.) — we don't enforce the full list,
|
|
560
|
+
// just that it's `-<letters>` or `--<letters>`.
|
|
561
|
+
if (!/^--?[A-Za-z]+$/.test(flag))
|
|
562
|
+
return null;
|
|
563
|
+
// Does this flag contain `c` (the -c introducer letter)?
|
|
564
|
+
// `--c` also counts (rare but bash accepts).
|
|
565
|
+
if (/c/i.test(flag.replace(/^--?/, ''))) {
|
|
566
|
+
sawCFlag = true;
|
|
567
|
+
// Continue the loop — the payload is the NEXT non-flag token.
|
|
568
|
+
// (Bash's argv parser stops walking flags as soon as it sees -c,
|
|
569
|
+
// but we accept additional flags between -c and the payload for
|
|
570
|
+
// safety; the bash WRAP regex similarly tolerates trailing
|
|
571
|
+
// flag-like tokens before the quoted body.)
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
if (!sawCFlag)
|
|
575
|
+
return null;
|
|
576
|
+
if (cursor >= trimmed.length)
|
|
577
|
+
return null;
|
|
578
|
+
// Skip whitespace before payload.
|
|
579
|
+
while (cursor < trimmed.length && /\s/.test(trimmed[cursor])) {
|
|
580
|
+
cursor += 1;
|
|
581
|
+
}
|
|
582
|
+
if (cursor >= trimmed.length)
|
|
583
|
+
return null;
|
|
584
|
+
// 3. Inspect the payload's introducer character.
|
|
585
|
+
const first = trimmed[cursor];
|
|
586
|
+
let quote;
|
|
587
|
+
let isAnsiC = false;
|
|
588
|
+
let payloadStart = cursor;
|
|
589
|
+
if (first === '$' && trimmed[cursor + 1] === "'") {
|
|
590
|
+
// ANSI-C: $'…' — single-quote-style but with C-string escapes.
|
|
591
|
+
quote = "'";
|
|
592
|
+
isAnsiC = true;
|
|
593
|
+
payloadStart = cursor + 2;
|
|
594
|
+
}
|
|
595
|
+
else if (first === "'" || first === '"') {
|
|
596
|
+
quote = first;
|
|
597
|
+
payloadStart = cursor + 1;
|
|
598
|
+
}
|
|
599
|
+
else {
|
|
600
|
+
// Unquoted payload — refuse to unwrap. The bash counterpart's
|
|
601
|
+
// WRAP regex requires a quote introducer too.
|
|
602
|
+
return null;
|
|
603
|
+
}
|
|
604
|
+
// 4. Walk the payload, collecting bytes until the matching closing
|
|
605
|
+
// quote. Honor quote-specific escape rules.
|
|
606
|
+
let i = payloadStart;
|
|
607
|
+
let payload = '';
|
|
608
|
+
while (i < trimmed.length) {
|
|
609
|
+
const ch = trimmed[i];
|
|
610
|
+
if (ch === quote) {
|
|
611
|
+
// Closing quote found.
|
|
612
|
+
return payload;
|
|
613
|
+
}
|
|
614
|
+
if (isAnsiC && ch === '\\' && i + 1 < trimmed.length) {
|
|
615
|
+
// ANSI-C escape decoding. Mirror the bash counterpart's escape
|
|
616
|
+
// table (cmd-segments.sh, _rea_unwrap_at_depth). Only the
|
|
617
|
+
// common-enough subset is decoded; unknowns pass through as the
|
|
618
|
+
// literal pair (matches awk default behavior).
|
|
619
|
+
const nxt = trimmed[i + 1];
|
|
620
|
+
switch (nxt) {
|
|
621
|
+
case 'n':
|
|
622
|
+
payload += '\n';
|
|
623
|
+
break;
|
|
624
|
+
case 't':
|
|
625
|
+
payload += '\t';
|
|
626
|
+
break;
|
|
627
|
+
case 'r':
|
|
628
|
+
payload += '\r';
|
|
629
|
+
break;
|
|
630
|
+
case '\\':
|
|
631
|
+
payload += '\\';
|
|
632
|
+
break;
|
|
633
|
+
case "'":
|
|
634
|
+
payload += "'";
|
|
635
|
+
break;
|
|
636
|
+
case '"':
|
|
637
|
+
payload += '"';
|
|
638
|
+
break;
|
|
639
|
+
case 'a':
|
|
640
|
+
payload += '\x07';
|
|
641
|
+
break;
|
|
642
|
+
case 'b':
|
|
643
|
+
payload += '\x08';
|
|
644
|
+
break;
|
|
645
|
+
case 'e':
|
|
646
|
+
case 'E':
|
|
647
|
+
payload += '\x1b';
|
|
648
|
+
break;
|
|
649
|
+
case 'f':
|
|
650
|
+
payload += '\x0c';
|
|
651
|
+
break;
|
|
652
|
+
case 'v':
|
|
653
|
+
payload += '\x0b';
|
|
654
|
+
break;
|
|
655
|
+
case '0':
|
|
656
|
+
payload += '\x00';
|
|
657
|
+
break;
|
|
658
|
+
case 'x': {
|
|
659
|
+
// \xHH or \xH — up to 2 hex digits.
|
|
660
|
+
let hex = '';
|
|
661
|
+
let k = i + 2;
|
|
662
|
+
while (k < trimmed.length && hex.length < 2) {
|
|
663
|
+
const hc = trimmed[k];
|
|
664
|
+
if (!/[0-9a-fA-F]/.test(hc))
|
|
665
|
+
break;
|
|
666
|
+
hex += hc;
|
|
667
|
+
k += 1;
|
|
668
|
+
}
|
|
669
|
+
if (hex.length > 0) {
|
|
670
|
+
payload += String.fromCharCode(parseInt(hex, 16));
|
|
671
|
+
i = k;
|
|
672
|
+
continue;
|
|
673
|
+
}
|
|
674
|
+
// Fall through — `\x` with no hex digits is a literal pair.
|
|
675
|
+
payload += '\\x';
|
|
676
|
+
break;
|
|
677
|
+
}
|
|
678
|
+
default:
|
|
679
|
+
// Unknown escape — preserve the literal pair (bash awk
|
|
680
|
+
// default). E.g. `\z` → `\z`.
|
|
681
|
+
payload += '\\' + nxt;
|
|
682
|
+
break;
|
|
683
|
+
}
|
|
684
|
+
i += 2;
|
|
685
|
+
continue;
|
|
686
|
+
}
|
|
687
|
+
if (!isAnsiC && quote === '"' && ch === '\\' && i + 1 < trimmed.length) {
|
|
688
|
+
// Double-quote: backslash escapes the next character.
|
|
689
|
+
payload += trimmed[i + 1] ?? '';
|
|
690
|
+
i += 2;
|
|
691
|
+
continue;
|
|
692
|
+
}
|
|
693
|
+
payload += ch;
|
|
694
|
+
i += 1;
|
|
695
|
+
}
|
|
696
|
+
// Unterminated quote — return what we have. The bash counterpart
|
|
697
|
+
// similarly accepts unterminated quotes as "rest of line is payload".
|
|
698
|
+
return payload;
|
|
402
699
|
}
|
|
403
700
|
/**
|
|
404
701
|
* Returns true if any segment's prefix-stripped head matches the
|
|
@@ -442,3 +739,318 @@ export function anySegmentMatches(cmd, regexSource) {
|
|
|
442
739
|
}
|
|
443
740
|
return false;
|
|
444
741
|
}
|
|
742
|
+
/**
|
|
743
|
+
* Returns true if any single segment's RAW text contains matches for
|
|
744
|
+
* BOTH `regexA` AND `regexB`. Mirrors `any_segment_matches_both` from
|
|
745
|
+
* the bash counterpart — used by `env-file-protection` to require that
|
|
746
|
+
* a text-reading utility AND an `.env*` filename co-occur within the
|
|
747
|
+
* same shell segment (a multi-segment construction like
|
|
748
|
+
* `echo "log: cat .env stuff" ; touch foo.env` must NOT fire because
|
|
749
|
+
* the utility and filename live in different segments).
|
|
750
|
+
*
|
|
751
|
+
* Case-INSENSITIVE, extended regex on both patterns. Same posture as
|
|
752
|
+
* the bash helper.
|
|
753
|
+
*
|
|
754
|
+
* 0.33.0 port. The bash helper was introduced in 0.16.2 to fix the
|
|
755
|
+
* helix-017 P2 false-positive class where two independent booleans
|
|
756
|
+
* (any-utility OR any-env) were AND'd across segments.
|
|
757
|
+
*/
|
|
758
|
+
export function anySegmentMatchesBoth(cmd, regexA, regexB) {
|
|
759
|
+
const reA = new RegExp(regexA, 'i');
|
|
760
|
+
const reB = new RegExp(regexB, 'i');
|
|
761
|
+
for (const seg of splitSegments(cmd)) {
|
|
762
|
+
if (reA.test(seg.raw) && reB.test(seg.raw))
|
|
763
|
+
return true;
|
|
764
|
+
}
|
|
765
|
+
return false;
|
|
766
|
+
}
|
|
767
|
+
/**
|
|
768
|
+
* Returns true if any segment's RAW text (env-var prefixes intact, only
|
|
769
|
+
* leading whitespace trimmed) matches the regex source. Mirrors
|
|
770
|
+
* `any_segment_raw_matches` in the bash counterpart — used by checks
|
|
771
|
+
* where the env-prefix itself IS the signal (`HUSKY=0 git`, `REA_BYPASS=`,
|
|
772
|
+
* `alias … = HUSKY=0`).
|
|
773
|
+
*
|
|
774
|
+
* 0.34.0 port — dangerous-bash-interceptor (H10, H15, H16) and
|
|
775
|
+
* local-review-gate (env-prefix git push detection) call into this.
|
|
776
|
+
* Note: callers anchor with `^` in the regex source when they want
|
|
777
|
+
* "starts at segment head"; we do not prepend `^` here.
|
|
778
|
+
*/
|
|
779
|
+
export function anySegmentRawMatches(cmd, regexSource) {
|
|
780
|
+
const re = new RegExp(regexSource, 'i');
|
|
781
|
+
for (const seg of splitSegments(cmd)) {
|
|
782
|
+
const trimmed = seg.raw.replace(/^\s+/, '');
|
|
783
|
+
if (re.test(trimmed))
|
|
784
|
+
return true;
|
|
785
|
+
}
|
|
786
|
+
return false;
|
|
787
|
+
}
|
|
788
|
+
/**
|
|
789
|
+
* Returns true if any segment's RAW text contains a match for the
|
|
790
|
+
* regex source. Mirrors `any_segment_matches` in the bash counterpart —
|
|
791
|
+
* used by content-scan style checks. The regex matches anywhere in the
|
|
792
|
+
* segment (not anchored). Useful for `(psql|pgcli)[^|&;]*DROP[[:space:]]+(TABLE|…)`
|
|
793
|
+
* style patterns that must match across the whole segment but only
|
|
794
|
+
* within a single segment (a heredoc body in segment N or commit
|
|
795
|
+
* message in segment 1 must NOT poison segment N+1).
|
|
796
|
+
*
|
|
797
|
+
* 0.34.0 port — dangerous-bash-interceptor H6 calls into this.
|
|
798
|
+
*/
|
|
799
|
+
export function anySegmentContains(cmd, regexSource) {
|
|
800
|
+
const re = new RegExp(regexSource, 'i');
|
|
801
|
+
for (const seg of splitSegments(cmd)) {
|
|
802
|
+
if (re.test(seg.head))
|
|
803
|
+
return true;
|
|
804
|
+
}
|
|
805
|
+
return false;
|
|
806
|
+
}
|
|
807
|
+
/**
|
|
808
|
+
* Iterate over every segment of `cmd` and invoke `callback(raw, head)`
|
|
809
|
+
* for each. Mirrors `for_each_segment` in the bash counterpart —
|
|
810
|
+
* dangerous-bash-interceptor H1 uses this to walk each push segment
|
|
811
|
+
* independently (since one segment may include `--force-with-lease`
|
|
812
|
+
* while another carries an unsafe `--force`).
|
|
813
|
+
*
|
|
814
|
+
* The callback receives the raw segment (env-prefix preserved) and the
|
|
815
|
+
* prefix-stripped head. Return value is ignored.
|
|
816
|
+
*
|
|
817
|
+
* 0.34.0 port.
|
|
818
|
+
*/
|
|
819
|
+
export function forEachSegment(cmd, callback) {
|
|
820
|
+
for (const seg of splitSegments(cmd)) {
|
|
821
|
+
callback(seg.raw, seg.head);
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
/**
|
|
825
|
+
* Quote-aware mask of in-quote separators. Mirrors `quote_masked_cmd`
|
|
826
|
+
* in the bash counterpart — produces a string where in-quote `|` / `;`
|
|
827
|
+
* / `&` characters are replaced with multi-byte sentinels so a caller's
|
|
828
|
+
* regex can match real (unquoted) instances of those bytes without
|
|
829
|
+
* false-positiving on quoted commit-message bodies (`git commit -m
|
|
830
|
+
* "curl|sh later"`).
|
|
831
|
+
*
|
|
832
|
+
* 0.34.0 port — dangerous-bash-interceptor H12 (`curl|sh` detection)
|
|
833
|
+
* uses this to scan the WHOLE command (not split into segments)
|
|
834
|
+
* without quoted-mention false positives.
|
|
835
|
+
*
|
|
836
|
+
* Implementation uses the same sentinel-byte alphabet the bash helper
|
|
837
|
+
* uses. Sentinels are public so callers can `.test()` against the
|
|
838
|
+
* masked output without accidentally tripping on them.
|
|
839
|
+
*/
|
|
840
|
+
export const INQUOTE_PIPE_SENTINEL = '__REA_INQUOTE_PIPE_a8f2c1__';
|
|
841
|
+
export const INQUOTE_SEMI_SENTINEL = '__REA_INQUOTE_SC_a8f2c1__';
|
|
842
|
+
export const INQUOTE_AMP_SENTINEL = '__REA_INQUOTE_AMP_a8f2c1__';
|
|
843
|
+
export function quoteMaskedCmd(cmd) {
|
|
844
|
+
// 4-state walker mirroring the bash awk:
|
|
845
|
+
// 0 = plain
|
|
846
|
+
// 1 = inside "…" (backslash escapes next char)
|
|
847
|
+
// 2 = inside '…' (no escapes)
|
|
848
|
+
// 3 = inside $'…' (ANSI-C; backslash escapes next char)
|
|
849
|
+
// In modes 1/2/3, in-quote `|`/`;`/`&` are replaced with sentinels.
|
|
850
|
+
// The opening `$'` is preserved verbatim (caller code that detects
|
|
851
|
+
// ANSI-C envelopes still sees them).
|
|
852
|
+
let out = '';
|
|
853
|
+
let i = 0;
|
|
854
|
+
const n = cmd.length;
|
|
855
|
+
let mode = 0;
|
|
856
|
+
while (i < n) {
|
|
857
|
+
const ch = cmd[i];
|
|
858
|
+
if (mode === 0) {
|
|
859
|
+
if (ch === '$' && i + 1 < n && cmd[i + 1] === "'") {
|
|
860
|
+
mode = 3;
|
|
861
|
+
out += "$'";
|
|
862
|
+
i += 2;
|
|
863
|
+
continue;
|
|
864
|
+
}
|
|
865
|
+
if (ch === '"') {
|
|
866
|
+
mode = 1;
|
|
867
|
+
out += ch;
|
|
868
|
+
i += 1;
|
|
869
|
+
continue;
|
|
870
|
+
}
|
|
871
|
+
if (ch === "'") {
|
|
872
|
+
mode = 2;
|
|
873
|
+
out += ch;
|
|
874
|
+
i += 1;
|
|
875
|
+
continue;
|
|
876
|
+
}
|
|
877
|
+
if (ch === '\\' && i + 1 < n) {
|
|
878
|
+
out += ch + cmd[i + 1];
|
|
879
|
+
i += 2;
|
|
880
|
+
continue;
|
|
881
|
+
}
|
|
882
|
+
out += ch;
|
|
883
|
+
i += 1;
|
|
884
|
+
continue;
|
|
885
|
+
}
|
|
886
|
+
if (mode === 3) {
|
|
887
|
+
if (ch === '\\' && i + 1 < n) {
|
|
888
|
+
out += ch + cmd[i + 1];
|
|
889
|
+
i += 2;
|
|
890
|
+
continue;
|
|
891
|
+
}
|
|
892
|
+
if (ch === "'") {
|
|
893
|
+
mode = 0;
|
|
894
|
+
out += ch;
|
|
895
|
+
i += 1;
|
|
896
|
+
continue;
|
|
897
|
+
}
|
|
898
|
+
if (ch === '|') {
|
|
899
|
+
out += INQUOTE_PIPE_SENTINEL;
|
|
900
|
+
i += 1;
|
|
901
|
+
continue;
|
|
902
|
+
}
|
|
903
|
+
if (ch === ';') {
|
|
904
|
+
out += INQUOTE_SEMI_SENTINEL;
|
|
905
|
+
i += 1;
|
|
906
|
+
continue;
|
|
907
|
+
}
|
|
908
|
+
if (ch === '&') {
|
|
909
|
+
out += INQUOTE_AMP_SENTINEL;
|
|
910
|
+
i += 1;
|
|
911
|
+
continue;
|
|
912
|
+
}
|
|
913
|
+
out += ch;
|
|
914
|
+
i += 1;
|
|
915
|
+
continue;
|
|
916
|
+
}
|
|
917
|
+
if (mode === 2) {
|
|
918
|
+
if (ch === "'") {
|
|
919
|
+
mode = 0;
|
|
920
|
+
out += ch;
|
|
921
|
+
i += 1;
|
|
922
|
+
continue;
|
|
923
|
+
}
|
|
924
|
+
if (ch === '|') {
|
|
925
|
+
out += INQUOTE_PIPE_SENTINEL;
|
|
926
|
+
i += 1;
|
|
927
|
+
continue;
|
|
928
|
+
}
|
|
929
|
+
if (ch === ';') {
|
|
930
|
+
out += INQUOTE_SEMI_SENTINEL;
|
|
931
|
+
i += 1;
|
|
932
|
+
continue;
|
|
933
|
+
}
|
|
934
|
+
if (ch === '&') {
|
|
935
|
+
out += INQUOTE_AMP_SENTINEL;
|
|
936
|
+
i += 1;
|
|
937
|
+
continue;
|
|
938
|
+
}
|
|
939
|
+
out += ch;
|
|
940
|
+
i += 1;
|
|
941
|
+
continue;
|
|
942
|
+
}
|
|
943
|
+
// mode === 1
|
|
944
|
+
if (ch === '\\' && i + 1 < n) {
|
|
945
|
+
out += ch + cmd[i + 1];
|
|
946
|
+
i += 2;
|
|
947
|
+
continue;
|
|
948
|
+
}
|
|
949
|
+
if (ch === '"') {
|
|
950
|
+
mode = 0;
|
|
951
|
+
out += ch;
|
|
952
|
+
i += 1;
|
|
953
|
+
continue;
|
|
954
|
+
}
|
|
955
|
+
if (ch === '|') {
|
|
956
|
+
out += INQUOTE_PIPE_SENTINEL;
|
|
957
|
+
i += 1;
|
|
958
|
+
continue;
|
|
959
|
+
}
|
|
960
|
+
if (ch === ';') {
|
|
961
|
+
out += INQUOTE_SEMI_SENTINEL;
|
|
962
|
+
i += 1;
|
|
963
|
+
continue;
|
|
964
|
+
}
|
|
965
|
+
if (ch === '&') {
|
|
966
|
+
out += INQUOTE_AMP_SENTINEL;
|
|
967
|
+
i += 1;
|
|
968
|
+
continue;
|
|
969
|
+
}
|
|
970
|
+
out += ch;
|
|
971
|
+
i += 1;
|
|
972
|
+
}
|
|
973
|
+
return out;
|
|
974
|
+
}
|
|
975
|
+
/**
|
|
976
|
+
* Walk the nested-shell unwrap chain and emit `cmd` PLUS each inner
|
|
977
|
+
* payload as a separate string. Mirrors `_rea_unwrap_nested_shells`
|
|
978
|
+
* in the bash counterpart.
|
|
979
|
+
*
|
|
980
|
+
* Used by dangerous-bash-interceptor H12 (`curl|sh` detection) so a
|
|
981
|
+
* payload like `zsh -c "curl https://x | sh"` is scanned for the pipe
|
|
982
|
+
* shape even though the literal `|` is inside quotes at the outer
|
|
983
|
+
* level. The H12 check then runs `quoteMaskedCmd` against each
|
|
984
|
+
* emitted line independently.
|
|
985
|
+
*
|
|
986
|
+
* Depth-bounded at MAX_NESTED_DEPTH (8) — same as `splitSegments`.
|
|
987
|
+
*
|
|
988
|
+
* 0.34.0 port.
|
|
989
|
+
*/
|
|
990
|
+
export function unwrapNestedShells(cmd) {
|
|
991
|
+
const out = [cmd];
|
|
992
|
+
unwrapNestedShellsRecursive(cmd, 0, out);
|
|
993
|
+
return out;
|
|
994
|
+
}
|
|
995
|
+
function unwrapNestedShellsRecursive(cmd, depth, acc) {
|
|
996
|
+
if (depth >= MAX_NESTED_DEPTH)
|
|
997
|
+
return;
|
|
998
|
+
// Walk segments so a heredoc-style or multi-line command gets each
|
|
999
|
+
// segment's inner payload extracted independently.
|
|
1000
|
+
const masked = maskQuotedSeparators(cmd);
|
|
1001
|
+
const rawSegs = splitOnUnquotedSeparators(masked);
|
|
1002
|
+
for (const raw of rawSegs) {
|
|
1003
|
+
const unmaskedRaw = unmask(raw);
|
|
1004
|
+
const head = stripSegmentPrefix(unmaskedRaw);
|
|
1005
|
+
const inner = extractNestedShellPayload(head);
|
|
1006
|
+
if (inner !== null) {
|
|
1007
|
+
acc.push(inner);
|
|
1008
|
+
unwrapNestedShellsRecursive(inner, depth + 1, acc);
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
/**
|
|
1013
|
+
* Return every segment of `cmd` whose prefix-stripped head matches the
|
|
1014
|
+
* head-anchored regex source. Mirrors `find_all_segments_starting_with`
|
|
1015
|
+
* in the bash counterpart.
|
|
1016
|
+
*
|
|
1017
|
+
* Returns each match as `{ raw, head }` so callers (local-review-gate's
|
|
1018
|
+
* round-25 P1-B sweep) can validate per-segment bypass against the
|
|
1019
|
+
* raw (env-prefix-intact) form.
|
|
1020
|
+
*
|
|
1021
|
+
* Case-INSENSITIVE. Empty array on no matches.
|
|
1022
|
+
*
|
|
1023
|
+
* 0.34.0 port.
|
|
1024
|
+
*/
|
|
1025
|
+
export function findAllSegmentsStartingWith(cmd, regexSource) {
|
|
1026
|
+
const re = new RegExp(`^${regexSource}`, 'i');
|
|
1027
|
+
const out = [];
|
|
1028
|
+
for (const seg of splitSegments(cmd)) {
|
|
1029
|
+
if (re.test(seg.head))
|
|
1030
|
+
out.push(seg);
|
|
1031
|
+
}
|
|
1032
|
+
return out;
|
|
1033
|
+
}
|
|
1034
|
+
/**
|
|
1035
|
+
* Return every segment of `cmd` whose RAW text (env-prefix intact,
|
|
1036
|
+
* leading whitespace trimmed) matches the regex source. Mirrors
|
|
1037
|
+
* `find_all_segments_raw_matches` in the bash counterpart.
|
|
1038
|
+
*
|
|
1039
|
+
* Companion to `findAllSegmentsStartingWith` for the env-prefix shapes
|
|
1040
|
+
* the prefix-stripper bails on (quoted-value env-vars like
|
|
1041
|
+
* `REA_SKIP="urgent fix"`).
|
|
1042
|
+
*
|
|
1043
|
+
* Case-INSENSITIVE. Empty array on no matches.
|
|
1044
|
+
*
|
|
1045
|
+
* 0.34.0 port.
|
|
1046
|
+
*/
|
|
1047
|
+
export function findAllSegmentsRawMatches(cmd, regexSource) {
|
|
1048
|
+
const re = new RegExp(regexSource, 'i');
|
|
1049
|
+
const out = [];
|
|
1050
|
+
for (const seg of splitSegments(cmd)) {
|
|
1051
|
+
const trimmed = seg.raw.replace(/^\s+/, '');
|
|
1052
|
+
if (re.test(trimmed))
|
|
1053
|
+
out.push(seg);
|
|
1054
|
+
}
|
|
1055
|
+
return out;
|
|
1056
|
+
}
|