@bookedsolid/rea 0.32.0 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/cli/hook.js +49 -0
  2. package/dist/hooks/_lib/payload.d.ts +38 -0
  3. package/dist/hooks/_lib/payload.js +79 -0
  4. package/dist/hooks/_lib/segments.d.ts +127 -0
  5. package/dist/hooks/_lib/segments.js +628 -16
  6. package/dist/hooks/architecture-review-gate/index.d.ts +58 -0
  7. package/dist/hooks/architecture-review-gate/index.js +250 -0
  8. package/dist/hooks/changeset-security-gate/index.d.ts +71 -0
  9. package/dist/hooks/changeset-security-gate/index.js +330 -0
  10. package/dist/hooks/dangerous-bash-interceptor/index.d.ts +103 -0
  11. package/dist/hooks/dangerous-bash-interceptor/index.js +669 -0
  12. package/dist/hooks/dependency-audit-gate/index.d.ts +91 -0
  13. package/dist/hooks/dependency-audit-gate/index.js +294 -0
  14. package/dist/hooks/env-file-protection/index.d.ts +55 -0
  15. package/dist/hooks/env-file-protection/index.js +159 -0
  16. package/dist/hooks/local-review-gate/index.d.ts +145 -0
  17. package/dist/hooks/local-review-gate/index.js +374 -0
  18. package/dist/hooks/secret-scanner/index.d.ts +143 -0
  19. package/dist/hooks/secret-scanner/index.js +404 -0
  20. package/hooks/architecture-review-gate.sh +92 -77
  21. package/hooks/changeset-security-gate.sh +114 -149
  22. package/hooks/dangerous-bash-interceptor.sh +168 -386
  23. package/hooks/dependency-audit-gate.sh +115 -156
  24. package/hooks/env-file-protection.sh +130 -97
  25. package/hooks/local-review-gate.sh +523 -410
  26. package/hooks/secret-scanner.sh +210 -200
  27. package/package.json +1 -1
  28. package/templates/architecture-review-gate.dogfood-staged.sh +116 -0
  29. package/templates/changeset-security-gate.dogfood-staged.sh +137 -0
  30. package/templates/dangerous-bash-interceptor.dogfood-staged.sh +196 -0
  31. package/templates/dependency-audit-gate.dogfood-staged.sh +138 -0
  32. package/templates/env-file-protection.dogfood-staged.sh +157 -0
  33. package/templates/local-review-gate.dogfood-staged.sh +573 -0
  34. package/templates/secret-scanner.dogfood-staged.sh +240 -0
@@ -229,22 +229,63 @@ function unmask(text) {
229
229
  * (lookbehind).
230
230
  */
231
231
  function splitOnUnquotedSeparators(masked) {
232
- // Negative lookbehind for `\` `git commit \; foo` shouldn't split.
233
- // JS regex supports lookbehind in V8 / Node 12+.
234
- const splitter = /(?<!\\)(\&\&|\|\||;|\||\&|\n)/g;
235
- // We split AND consume the separator (capture group above). The
236
- // result interleaves segment, separator, segment, separator, …; we
237
- // keep only the even-indexed entries (the segments).
238
- const parts = masked.split(splitter);
232
+ // 2026-05-15 codex round-3 P1 fix: walk char-by-char tracking
233
+ // backslash-escape state instead of using regex lookbehind. The
234
+ // pre-fix regex `(?<!\\)(...)` was a single-char negative lookbehind
235
+ // which treated `echo \\;` as "preceded by `\` → no split". But in
236
+ // bash semantics, `\\` is a literal `\` escape PAIR — the `;` that
237
+ // follows it is NOT escaped, so the command splits into two
238
+ // segments. The pre-fix splitter let `echo \\; npm install evil`
239
+ // pass as a single segment, defeating the dependency-audit-gate
240
+ // segment-anchor check and several other consumers.
241
+ //
242
+ // Strategy: walk left-to-right. When we encounter `\`, advance past
243
+ // the next character (the escape pair consumes 2 bytes). When we
244
+ // encounter a recognized separator at a non-pair position, emit a
245
+ // split. This matches bash's argv-tokenizer semantics for
246
+ // backslash-escape parity.
247
+ //
248
+ // The masker is byte-width-preserving so we can walk `masked`
249
+ // directly without re-syncing with the original.
239
250
  const segments = [];
240
- for (let i = 0; i < parts.length; i += 2) {
241
- const raw = parts[i];
242
- if (raw === undefined)
251
+ let segStart = 0;
252
+ let i = 0;
253
+ const n = masked.length;
254
+ while (i < n) {
255
+ const ch = masked[i];
256
+ if (ch === '\\' && i + 1 < n) {
257
+ // Escape pair — consume both, NEVER treat the next char as a
258
+ // separator. Bash `\\` is a literal `\`; the char following
259
+ // the pair is then evaluated for separator status.
260
+ i += 2;
243
261
  continue;
244
- const trimmed = raw.trim();
245
- if (trimmed.length === 0)
262
+ }
263
+ // Separator detection. Order matters: `&&` and `||` are 2-byte
264
+ // separators; the 1-byte forms must not steal their first byte.
265
+ let sepLen = 0;
266
+ if (ch === '&' && masked[i + 1] === '&')
267
+ sepLen = 2;
268
+ else if (ch === '|' && masked[i + 1] === '|')
269
+ sepLen = 2;
270
+ else if (ch === ';' || ch === '|' || ch === '&' || ch === '\n')
271
+ sepLen = 1;
272
+ if (sepLen > 0) {
273
+ const piece = masked.slice(segStart, i);
274
+ const trimmed = piece.trim();
275
+ if (trimmed.length > 0)
276
+ segments.push(trimmed);
277
+ i += sepLen;
278
+ segStart = i;
246
279
  continue;
247
- segments.push(trimmed);
280
+ }
281
+ i += 1;
282
+ }
283
+ // Tail.
284
+ if (segStart < n) {
285
+ const piece = masked.slice(segStart, n);
286
+ const trimmed = piece.trim();
287
+ if (trimmed.length > 0)
288
+ segments.push(trimmed);
248
289
  }
249
290
  return segments;
250
291
  }
@@ -389,16 +430,272 @@ function stripSegmentPrefix(seg) {
389
430
  * Split `cmd` into segments using the quote-aware masking → split →
390
431
  * unmask pipeline. Returns an array of `{ raw, head }` tuples in the
391
432
  * order they appeared in the original command.
433
+ *
434
+ * 0.33.0 — nested-shell unwrapping was added on top of the original
435
+ * 0.32.0 splitter. When a segment's head is `bash -c|-lc|--c PAYLOAD`
436
+ * or `sh -c|-lc|--c PAYLOAD` (any combination of `-l` and `-c` flags),
437
+ * the PAYLOAD inside the quoted arg becomes additional segments
438
+ * appended after the wrapper segment. Mirrors the bash counterpart's
439
+ * `_rea_unwrap_nested_shells` (helix-017 #3 fix). Recurses up to
440
+ * `MAX_NESTED_DEPTH` levels.
392
441
  */
393
442
  export function splitSegments(cmd) {
394
443
  if (cmd.length === 0)
395
444
  return [];
445
+ return splitSegmentsRecursive(cmd, 0);
446
+ }
447
+ const MAX_NESTED_DEPTH = 8;
448
+ function splitSegmentsRecursive(cmd, depth) {
396
449
  const masked = maskQuotedSeparators(cmd);
397
450
  const rawSegs = splitOnUnquotedSeparators(masked);
398
- return rawSegs.map((raw) => {
451
+ const out = [];
452
+ for (const raw of rawSegs) {
399
453
  const unmaskedRaw = unmask(raw);
400
- return { raw: unmaskedRaw, head: stripSegmentPrefix(unmaskedRaw) };
401
- });
454
+ const head = stripSegmentPrefix(unmaskedRaw);
455
+ out.push({ raw: unmaskedRaw, head });
456
+ // Try to unwrap a nested shell payload.
457
+ if (depth < MAX_NESTED_DEPTH) {
458
+ const inner = extractNestedShellPayload(head);
459
+ if (inner !== null) {
460
+ // Append the inner payload's segments AFTER the wrapper segment.
461
+ // This preserves the bash hook's emit-order: the wrapper IS a
462
+ // segment too (so a hook that anchors on `bash` for some other
463
+ // reason still sees it), and the inner segments follow.
464
+ out.push(...splitSegmentsRecursive(inner, depth + 1));
465
+ }
466
+ }
467
+ }
468
+ return out;
469
+ }
470
+ /**
471
+ * Recognize a nested-shell wrapper segment and return the unquoted
472
+ * payload string. Returns `null` when the segment is not a wrapper.
473
+ *
474
+ * 2026-05-15 codex round-1 P1 fix — extends parity with
475
+ * `_rea_unwrap_nested_shells` in `hooks/_lib/cmd-segments.sh`.
476
+ *
477
+ * Bash-parity matrix:
478
+ *
479
+ * 1. Shell names: bash | sh | zsh | dash
480
+ * (The bash counterpart also includes ksh / mksh / oksh / posh /
481
+ * yash / csh / tcsh / fish per the 0.19.0 M1 security review. We
482
+ * cover the common quartet here; the rare shells fall through to
483
+ * the bash-scanner tier which DOES have full coverage. Extending
484
+ * this list later is a one-line change.)
485
+ * 2. Split-flag forms ANY combination of pre-flags before `-c`:
486
+ * bash -l -c '…' bash -i -c '…' bash -e -c '…'
487
+ * bash -li -c '…' bash --noprofile -c '…'
488
+ * The pre-fix regex `(?:-[a-z]*c|--c)(?:\s+-[a-z]+)*` failed
489
+ * because it required `-c` to appear IN the FIRST flag token —
490
+ * `bash -l -c 'PAYLOAD'` did not match.
491
+ * 3. Combined-flag forms: -c, -lc, -lic, -ic, -cl, -cli, -li, -il
492
+ * (the bash WRAP pattern's `-(c|lc|lic|ic|cl|cli|li|il)` set).
493
+ * 4. ANSI-C-quoted payload: `bash -c $'…'`. Pre-fix the introducer
494
+ * regex `(['"])` could not match the `$` prefix, so the entire
495
+ * ANSI-C wrapper was a single un-unwrapped segment.
496
+ *
497
+ * The walker:
498
+ * - Tokenizes the head into whitespace-separated tokens.
499
+ * - First token must be a recognized shell name.
500
+ * - Walks subsequent flag tokens, each `-[A-Za-z]+` or `--[A-Za-z]+`.
501
+ * - A flag token containing a `c` letter terminates the flag walk
502
+ * (it's the `-c` introducer). The next non-flag token is the
503
+ * payload argument.
504
+ * - The payload argument's first character determines the quote
505
+ * style: `'`, `"`, or `$'` (ANSI-C). Any other character means
506
+ * the payload is unquoted and we return null (don't unwrap — the
507
+ * payload may already be a bare argv).
508
+ */
509
+ function extractNestedShellPayload(head) {
510
+ // Tokenize on whitespace. The head has already passed through
511
+ // stripSegmentPrefix so leading `sudo`/env-prefixes are gone.
512
+ const trimmed = head.trimStart();
513
+ if (trimmed.length === 0)
514
+ return null;
515
+ // 1. Shell-name token. Full parity with cmd-segments.sh `WRAP`:
516
+ // bash | sh | zsh | dash | ksh | mksh | oksh | posh | yash |
517
+ // csh | tcsh | fish. Codex round-2 P1 (2026-05-15): the round-1
518
+ // quartet (bash|sh|zsh|dash) left ksh/mksh/oksh/posh/yash/csh/
519
+ // tcsh/fish unwrapped — on machines where any of those shells
520
+ // are installed, `mksh -c 'source .env'` and
521
+ // `ksh -c 'npm install missing-pkg'` would bypass
522
+ // env-file-protection / dependency-audit-gate entirely.
523
+ // The bash counterpart caught these via the 0.19.0 M1 security
524
+ // review (WRAP regex extension).
525
+ //
526
+ // NOTE: pwsh (PowerShell) is intentionally OUT — it accepts -c
527
+ // and -Command, and -EncodedCommand base64-decodes at runtime.
528
+ // Adding pwsh requires a separate code path with base64 decode
529
+ // (mirroring the bash counterpart's explicit pwsh exclusion).
530
+ const shellMatch = /^(bash|sh|zsh|dash|ksh|mksh|oksh|posh|yash|csh|tcsh|fish)\b/i.exec(trimmed);
531
+ if (shellMatch === null)
532
+ return null;
533
+ let cursor = shellMatch[0].length;
534
+ // 2. Walk flag tokens. Each token is whitespace-separated and starts
535
+ // with `-`. A flag token containing the letter `c` (case-insens.)
536
+ // is the `-c` introducer; the NEXT token is the payload.
537
+ let sawCFlag = false;
538
+ while (cursor < trimmed.length) {
539
+ // Skip whitespace.
540
+ while (cursor < trimmed.length && /\s/.test(trimmed[cursor])) {
541
+ cursor += 1;
542
+ }
543
+ if (cursor >= trimmed.length)
544
+ return null;
545
+ // Peek next token.
546
+ const rest = trimmed.slice(cursor);
547
+ if (rest[0] !== '-') {
548
+ // Not a flag — must be the payload argument.
549
+ break;
550
+ }
551
+ // Extract the flag token (contiguous non-whitespace).
552
+ const flagMatch = /^(\S+)/.exec(rest);
553
+ if (flagMatch === null)
554
+ return null;
555
+ const flag = flagMatch[0] ?? '';
556
+ cursor += flag.length;
557
+ // Recognized flag-token shapes:
558
+ // `-c` `-l` `-i` `-e` `-lc` `-lic` `-ic` `-cl` `-cli` `-li` `-il`
559
+ // `--c` `--noprofile` (etc.) — we don't enforce the full list,
560
+ // just that it's `-<letters>` or `--<letters>`.
561
+ if (!/^--?[A-Za-z]+$/.test(flag))
562
+ return null;
563
+ // Does this flag contain `c` (the -c introducer letter)?
564
+ // `--c` also counts (rare but bash accepts).
565
+ if (/c/i.test(flag.replace(/^--?/, ''))) {
566
+ sawCFlag = true;
567
+ // Continue the loop — the payload is the NEXT non-flag token.
568
+ // (Bash's argv parser stops walking flags as soon as it sees -c,
569
+ // but we accept additional flags between -c and the payload for
570
+ // safety; the bash WRAP regex similarly tolerates trailing
571
+ // flag-like tokens before the quoted body.)
572
+ }
573
+ }
574
+ if (!sawCFlag)
575
+ return null;
576
+ if (cursor >= trimmed.length)
577
+ return null;
578
+ // Skip whitespace before payload.
579
+ while (cursor < trimmed.length && /\s/.test(trimmed[cursor])) {
580
+ cursor += 1;
581
+ }
582
+ if (cursor >= trimmed.length)
583
+ return null;
584
+ // 3. Inspect the payload's introducer character.
585
+ const first = trimmed[cursor];
586
+ let quote;
587
+ let isAnsiC = false;
588
+ let payloadStart = cursor;
589
+ if (first === '$' && trimmed[cursor + 1] === "'") {
590
+ // ANSI-C: $'…' — single-quote-style but with C-string escapes.
591
+ quote = "'";
592
+ isAnsiC = true;
593
+ payloadStart = cursor + 2;
594
+ }
595
+ else if (first === "'" || first === '"') {
596
+ quote = first;
597
+ payloadStart = cursor + 1;
598
+ }
599
+ else {
600
+ // Unquoted payload — refuse to unwrap. The bash counterpart's
601
+ // WRAP regex requires a quote introducer too.
602
+ return null;
603
+ }
604
+ // 4. Walk the payload, collecting bytes until the matching closing
605
+ // quote. Honor quote-specific escape rules.
606
+ let i = payloadStart;
607
+ let payload = '';
608
+ while (i < trimmed.length) {
609
+ const ch = trimmed[i];
610
+ if (ch === quote) {
611
+ // Closing quote found.
612
+ return payload;
613
+ }
614
+ if (isAnsiC && ch === '\\' && i + 1 < trimmed.length) {
615
+ // ANSI-C escape decoding. Mirror the bash counterpart's escape
616
+ // table (cmd-segments.sh, _rea_unwrap_at_depth). Only the
617
+ // common-enough subset is decoded; unknowns pass through as the
618
+ // literal pair (matches awk default behavior).
619
+ const nxt = trimmed[i + 1];
620
+ switch (nxt) {
621
+ case 'n':
622
+ payload += '\n';
623
+ break;
624
+ case 't':
625
+ payload += '\t';
626
+ break;
627
+ case 'r':
628
+ payload += '\r';
629
+ break;
630
+ case '\\':
631
+ payload += '\\';
632
+ break;
633
+ case "'":
634
+ payload += "'";
635
+ break;
636
+ case '"':
637
+ payload += '"';
638
+ break;
639
+ case 'a':
640
+ payload += '\x07';
641
+ break;
642
+ case 'b':
643
+ payload += '\x08';
644
+ break;
645
+ case 'e':
646
+ case 'E':
647
+ payload += '\x1b';
648
+ break;
649
+ case 'f':
650
+ payload += '\x0c';
651
+ break;
652
+ case 'v':
653
+ payload += '\x0b';
654
+ break;
655
+ case '0':
656
+ payload += '\x00';
657
+ break;
658
+ case 'x': {
659
+ // \xHH or \xH — up to 2 hex digits.
660
+ let hex = '';
661
+ let k = i + 2;
662
+ while (k < trimmed.length && hex.length < 2) {
663
+ const hc = trimmed[k];
664
+ if (!/[0-9a-fA-F]/.test(hc))
665
+ break;
666
+ hex += hc;
667
+ k += 1;
668
+ }
669
+ if (hex.length > 0) {
670
+ payload += String.fromCharCode(parseInt(hex, 16));
671
+ i = k;
672
+ continue;
673
+ }
674
+ // Fall through — `\x` with no hex digits is a literal pair.
675
+ payload += '\\x';
676
+ break;
677
+ }
678
+ default:
679
+ // Unknown escape — preserve the literal pair (bash awk
680
+ // default). E.g. `\z` → `\z`.
681
+ payload += '\\' + nxt;
682
+ break;
683
+ }
684
+ i += 2;
685
+ continue;
686
+ }
687
+ if (!isAnsiC && quote === '"' && ch === '\\' && i + 1 < trimmed.length) {
688
+ // Double-quote: backslash escapes the next character.
689
+ payload += trimmed[i + 1] ?? '';
690
+ i += 2;
691
+ continue;
692
+ }
693
+ payload += ch;
694
+ i += 1;
695
+ }
696
+ // Unterminated quote — return what we have. The bash counterpart
697
+ // similarly accepts unterminated quotes as "rest of line is payload".
698
+ return payload;
402
699
  }
403
700
  /**
404
701
  * Returns true if any segment's prefix-stripped head matches the
@@ -442,3 +739,318 @@ export function anySegmentMatches(cmd, regexSource) {
442
739
  }
443
740
  return false;
444
741
  }
742
+ /**
743
+ * Returns true if any single segment's RAW text contains matches for
744
+ * BOTH `regexA` AND `regexB`. Mirrors `any_segment_matches_both` from
745
+ * the bash counterpart — used by `env-file-protection` to require that
746
+ * a text-reading utility AND an `.env*` filename co-occur within the
747
+ * same shell segment (a multi-segment construction like
748
+ * `echo "log: cat .env stuff" ; touch foo.env` must NOT fire because
749
+ * the utility and filename live in different segments).
750
+ *
751
+ * Case-INSENSITIVE, extended regex on both patterns. Same posture as
752
+ * the bash helper.
753
+ *
754
+ * 0.33.0 port. The bash helper was introduced in 0.16.2 to fix the
755
+ * helix-017 P2 false-positive class where two independent booleans
756
+ * (any-utility OR any-env) were AND'd across segments.
757
+ */
758
+ export function anySegmentMatchesBoth(cmd, regexA, regexB) {
759
+ const reA = new RegExp(regexA, 'i');
760
+ const reB = new RegExp(regexB, 'i');
761
+ for (const seg of splitSegments(cmd)) {
762
+ if (reA.test(seg.raw) && reB.test(seg.raw))
763
+ return true;
764
+ }
765
+ return false;
766
+ }
767
+ /**
768
+ * Returns true if any segment's RAW text (env-var prefixes intact, only
769
+ * leading whitespace trimmed) matches the regex source. Mirrors
770
+ * `any_segment_raw_matches` in the bash counterpart — used by checks
771
+ * where the env-prefix itself IS the signal (`HUSKY=0 git`, `REA_BYPASS=`,
772
+ * `alias … = HUSKY=0`).
773
+ *
774
+ * 0.34.0 port — dangerous-bash-interceptor (H10, H15, H16) and
775
+ * local-review-gate (env-prefix git push detection) call into this.
776
+ * Note: callers anchor with `^` in the regex source when they want
777
+ * "starts at segment head"; we do not prepend `^` here.
778
+ */
779
+ export function anySegmentRawMatches(cmd, regexSource) {
780
+ const re = new RegExp(regexSource, 'i');
781
+ for (const seg of splitSegments(cmd)) {
782
+ const trimmed = seg.raw.replace(/^\s+/, '');
783
+ if (re.test(trimmed))
784
+ return true;
785
+ }
786
+ return false;
787
+ }
788
+ /**
789
+ * Returns true if any segment's RAW text contains a match for the
790
+ * regex source. Mirrors `any_segment_matches` in the bash counterpart —
791
+ * used by content-scan style checks. The regex matches anywhere in the
792
+ * segment (not anchored). Useful for `(psql|pgcli)[^|&;]*DROP[[:space:]]+(TABLE|…)`
793
+ * style patterns that must match across the whole segment but only
794
+ * within a single segment (a heredoc body in segment N or commit
795
+ * message in segment 1 must NOT poison segment N+1).
796
+ *
797
+ * 0.34.0 port — dangerous-bash-interceptor H6 calls into this.
798
+ */
799
+ export function anySegmentContains(cmd, regexSource) {
800
+ const re = new RegExp(regexSource, 'i');
801
+ for (const seg of splitSegments(cmd)) {
802
+ if (re.test(seg.head))
803
+ return true;
804
+ }
805
+ return false;
806
+ }
807
+ /**
808
+ * Iterate over every segment of `cmd` and invoke `callback(raw, head)`
809
+ * for each. Mirrors `for_each_segment` in the bash counterpart —
810
+ * dangerous-bash-interceptor H1 uses this to walk each push segment
811
+ * independently (since one segment may include `--force-with-lease`
812
+ * while another carries an unsafe `--force`).
813
+ *
814
+ * The callback receives the raw segment (env-prefix preserved) and the
815
+ * prefix-stripped head. Return value is ignored.
816
+ *
817
+ * 0.34.0 port.
818
+ */
819
+ export function forEachSegment(cmd, callback) {
820
+ for (const seg of splitSegments(cmd)) {
821
+ callback(seg.raw, seg.head);
822
+ }
823
+ }
824
+ /**
825
+ * Quote-aware mask of in-quote separators. Mirrors `quote_masked_cmd`
826
+ * in the bash counterpart — produces a string where in-quote `|` / `;`
827
+ * / `&` characters are replaced with multi-byte sentinels so a caller's
828
+ * regex can match real (unquoted) instances of those bytes without
829
+ * false-positiving on quoted commit-message bodies (`git commit -m
830
+ * "curl|sh later"`).
831
+ *
832
+ * 0.34.0 port — dangerous-bash-interceptor H12 (`curl|sh` detection)
833
+ * uses this to scan the WHOLE command (not split into segments)
834
+ * without quoted-mention false positives.
835
+ *
836
+ * Implementation uses the same sentinel-byte alphabet the bash helper
837
+ * uses. Sentinels are public so callers can `.test()` against the
838
+ * masked output without accidentally tripping on them.
839
+ */
840
+ export const INQUOTE_PIPE_SENTINEL = '__REA_INQUOTE_PIPE_a8f2c1__';
841
+ export const INQUOTE_SEMI_SENTINEL = '__REA_INQUOTE_SC_a8f2c1__';
842
+ export const INQUOTE_AMP_SENTINEL = '__REA_INQUOTE_AMP_a8f2c1__';
843
+ export function quoteMaskedCmd(cmd) {
844
+ // 4-state walker mirroring the bash awk:
845
+ // 0 = plain
846
+ // 1 = inside "…" (backslash escapes next char)
847
+ // 2 = inside '…' (no escapes)
848
+ // 3 = inside $'…' (ANSI-C; backslash escapes next char)
849
+ // In modes 1/2/3, in-quote `|`/`;`/`&` are replaced with sentinels.
850
+ // The opening `$'` is preserved verbatim (caller code that detects
851
+ // ANSI-C envelopes still sees them).
852
+ let out = '';
853
+ let i = 0;
854
+ const n = cmd.length;
855
+ let mode = 0;
856
+ while (i < n) {
857
+ const ch = cmd[i];
858
+ if (mode === 0) {
859
+ if (ch === '$' && i + 1 < n && cmd[i + 1] === "'") {
860
+ mode = 3;
861
+ out += "$'";
862
+ i += 2;
863
+ continue;
864
+ }
865
+ if (ch === '"') {
866
+ mode = 1;
867
+ out += ch;
868
+ i += 1;
869
+ continue;
870
+ }
871
+ if (ch === "'") {
872
+ mode = 2;
873
+ out += ch;
874
+ i += 1;
875
+ continue;
876
+ }
877
+ if (ch === '\\' && i + 1 < n) {
878
+ out += ch + cmd[i + 1];
879
+ i += 2;
880
+ continue;
881
+ }
882
+ out += ch;
883
+ i += 1;
884
+ continue;
885
+ }
886
+ if (mode === 3) {
887
+ if (ch === '\\' && i + 1 < n) {
888
+ out += ch + cmd[i + 1];
889
+ i += 2;
890
+ continue;
891
+ }
892
+ if (ch === "'") {
893
+ mode = 0;
894
+ out += ch;
895
+ i += 1;
896
+ continue;
897
+ }
898
+ if (ch === '|') {
899
+ out += INQUOTE_PIPE_SENTINEL;
900
+ i += 1;
901
+ continue;
902
+ }
903
+ if (ch === ';') {
904
+ out += INQUOTE_SEMI_SENTINEL;
905
+ i += 1;
906
+ continue;
907
+ }
908
+ if (ch === '&') {
909
+ out += INQUOTE_AMP_SENTINEL;
910
+ i += 1;
911
+ continue;
912
+ }
913
+ out += ch;
914
+ i += 1;
915
+ continue;
916
+ }
917
+ if (mode === 2) {
918
+ if (ch === "'") {
919
+ mode = 0;
920
+ out += ch;
921
+ i += 1;
922
+ continue;
923
+ }
924
+ if (ch === '|') {
925
+ out += INQUOTE_PIPE_SENTINEL;
926
+ i += 1;
927
+ continue;
928
+ }
929
+ if (ch === ';') {
930
+ out += INQUOTE_SEMI_SENTINEL;
931
+ i += 1;
932
+ continue;
933
+ }
934
+ if (ch === '&') {
935
+ out += INQUOTE_AMP_SENTINEL;
936
+ i += 1;
937
+ continue;
938
+ }
939
+ out += ch;
940
+ i += 1;
941
+ continue;
942
+ }
943
+ // mode === 1
944
+ if (ch === '\\' && i + 1 < n) {
945
+ out += ch + cmd[i + 1];
946
+ i += 2;
947
+ continue;
948
+ }
949
+ if (ch === '"') {
950
+ mode = 0;
951
+ out += ch;
952
+ i += 1;
953
+ continue;
954
+ }
955
+ if (ch === '|') {
956
+ out += INQUOTE_PIPE_SENTINEL;
957
+ i += 1;
958
+ continue;
959
+ }
960
+ if (ch === ';') {
961
+ out += INQUOTE_SEMI_SENTINEL;
962
+ i += 1;
963
+ continue;
964
+ }
965
+ if (ch === '&') {
966
+ out += INQUOTE_AMP_SENTINEL;
967
+ i += 1;
968
+ continue;
969
+ }
970
+ out += ch;
971
+ i += 1;
972
+ }
973
+ return out;
974
+ }
975
+ /**
976
+ * Walk the nested-shell unwrap chain and emit `cmd` PLUS each inner
977
+ * payload as a separate string. Mirrors `_rea_unwrap_nested_shells`
978
+ * in the bash counterpart.
979
+ *
980
+ * Used by dangerous-bash-interceptor H12 (`curl|sh` detection) so a
981
+ * payload like `zsh -c "curl https://x | sh"` is scanned for the pipe
982
+ * shape even though the literal `|` is inside quotes at the outer
983
+ * level. The H12 check then runs `quoteMaskedCmd` against each
984
+ * emitted line independently.
985
+ *
986
+ * Depth-bounded at MAX_NESTED_DEPTH (8) — same as `splitSegments`.
987
+ *
988
+ * 0.34.0 port.
989
+ */
990
+ export function unwrapNestedShells(cmd) {
991
+ const out = [cmd];
992
+ unwrapNestedShellsRecursive(cmd, 0, out);
993
+ return out;
994
+ }
995
+ function unwrapNestedShellsRecursive(cmd, depth, acc) {
996
+ if (depth >= MAX_NESTED_DEPTH)
997
+ return;
998
+ // Walk segments so a heredoc-style or multi-line command gets each
999
+ // segment's inner payload extracted independently.
1000
+ const masked = maskQuotedSeparators(cmd);
1001
+ const rawSegs = splitOnUnquotedSeparators(masked);
1002
+ for (const raw of rawSegs) {
1003
+ const unmaskedRaw = unmask(raw);
1004
+ const head = stripSegmentPrefix(unmaskedRaw);
1005
+ const inner = extractNestedShellPayload(head);
1006
+ if (inner !== null) {
1007
+ acc.push(inner);
1008
+ unwrapNestedShellsRecursive(inner, depth + 1, acc);
1009
+ }
1010
+ }
1011
+ }
1012
+ /**
1013
+ * Return every segment of `cmd` whose prefix-stripped head matches the
1014
+ * head-anchored regex source. Mirrors `find_all_segments_starting_with`
1015
+ * in the bash counterpart.
1016
+ *
1017
+ * Returns each match as `{ raw, head }` so callers (local-review-gate's
1018
+ * round-25 P1-B sweep) can validate per-segment bypass against the
1019
+ * raw (env-prefix-intact) form.
1020
+ *
1021
+ * Case-INSENSITIVE. Empty array on no matches.
1022
+ *
1023
+ * 0.34.0 port.
1024
+ */
1025
+ export function findAllSegmentsStartingWith(cmd, regexSource) {
1026
+ const re = new RegExp(`^${regexSource}`, 'i');
1027
+ const out = [];
1028
+ for (const seg of splitSegments(cmd)) {
1029
+ if (re.test(seg.head))
1030
+ out.push(seg);
1031
+ }
1032
+ return out;
1033
+ }
1034
+ /**
1035
+ * Return every segment of `cmd` whose RAW text (env-prefix intact,
1036
+ * leading whitespace trimmed) matches the regex source. Mirrors
1037
+ * `find_all_segments_raw_matches` in the bash counterpart.
1038
+ *
1039
+ * Companion to `findAllSegmentsStartingWith` for the env-prefix shapes
1040
+ * the prefix-stripper bails on (quoted-value env-vars like
1041
+ * `REA_SKIP="urgent fix"`).
1042
+ *
1043
+ * Case-INSENSITIVE. Empty array on no matches.
1044
+ *
1045
+ * 0.34.0 port.
1046
+ */
1047
+ export function findAllSegmentsRawMatches(cmd, regexSource) {
1048
+ const re = new RegExp(regexSource, 'i');
1049
+ const out = [];
1050
+ for (const seg of splitSegments(cmd)) {
1051
+ const trimmed = seg.raw.replace(/^\s+/, '');
1052
+ if (re.test(trimmed))
1053
+ out.push(seg);
1054
+ }
1055
+ return out;
1056
+ }