@bookedsolid/rea 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +15 -0
  2. package/THREAT_MODEL.md +582 -0
  3. package/dist/audit/append.js +1 -1
  4. package/dist/cli/doctor.js +11 -12
  5. package/dist/cli/hook.d.ts +37 -3
  6. package/dist/cli/hook.js +167 -5
  7. package/dist/cli/init.js +14 -26
  8. package/dist/cli/install/canonical.js +18 -3
  9. package/dist/cli/install/commit-msg.js +1 -2
  10. package/dist/cli/install/copy.js +4 -13
  11. package/dist/cli/install/fs-safe.js +5 -16
  12. package/dist/cli/install/gitignore.js +1 -5
  13. package/dist/cli/install/pre-push.js +3 -8
  14. package/dist/cli/install/settings-merge.js +79 -16
  15. package/dist/cli/upgrade.js +14 -10
  16. package/dist/gateway/downstream.js +1 -2
  17. package/dist/gateway/live-state.js +3 -1
  18. package/dist/gateway/log.js +1 -3
  19. package/dist/gateway/middleware/audit.js +1 -1
  20. package/dist/gateway/middleware/injection.js +3 -9
  21. package/dist/gateway/middleware/policy.js +3 -1
  22. package/dist/gateway/middleware/redact.js +1 -1
  23. package/dist/gateway/observability/codex-telemetry.js +1 -2
  24. package/dist/gateway/reviewers/claude-self.js +10 -6
  25. package/dist/hooks/bash-scanner/blocked-scan.d.ts +26 -0
  26. package/dist/hooks/bash-scanner/blocked-scan.js +467 -0
  27. package/dist/hooks/bash-scanner/index.d.ts +41 -0
  28. package/dist/hooks/bash-scanner/index.js +62 -0
  29. package/dist/hooks/bash-scanner/parse-fail-closed.d.ts +31 -0
  30. package/dist/hooks/bash-scanner/parse-fail-closed.js +27 -0
  31. package/dist/hooks/bash-scanner/parser.d.ts +42 -0
  32. package/dist/hooks/bash-scanner/parser.js +92 -0
  33. package/dist/hooks/bash-scanner/protected-scan.d.ts +76 -0
  34. package/dist/hooks/bash-scanner/protected-scan.js +815 -0
  35. package/dist/hooks/bash-scanner/verdict.d.ts +80 -0
  36. package/dist/hooks/bash-scanner/verdict.js +49 -0
  37. package/dist/hooks/bash-scanner/walker.d.ts +165 -0
  38. package/dist/hooks/bash-scanner/walker.js +7954 -0
  39. package/dist/hooks/push-gate/base.js +2 -6
  40. package/dist/hooks/push-gate/codex-runner.js +3 -1
  41. package/dist/hooks/push-gate/index.js +9 -10
  42. package/dist/policy/loader.js +4 -1
  43. package/dist/registry/tofu-gate.js +2 -2
  44. package/hooks/blocked-paths-bash-gate.sh +142 -272
  45. package/hooks/protected-paths-bash-gate.sh +227 -511
  46. package/package.json +3 -2
  47. package/profiles/bst-internal-no-codex.yaml +1 -1
  48. package/profiles/bst-internal.yaml +1 -1
  49. package/profiles/client-engagement.yaml +1 -1
  50. package/profiles/lit-wc.yaml +1 -1
  51. package/profiles/minimal.yaml +1 -1
  52. package/profiles/open-source-no-codex.yaml +1 -1
  53. package/profiles/open-source.yaml +1 -1
  54. package/scripts/postinstall.mjs +1 -2
  55. package/scripts/run-vitest.mjs +117 -0
@@ -1,536 +1,252 @@
1
- #!/bin/bash
1
+ #!/usr/bin/env bash
2
2
  # PreToolUse hook: protected-paths-bash-gate.sh
3
- # Fires BEFORE every Bash tool call.
4
- # Refuses Bash commands that write to PROTECTED_PATTERNS via shell
5
- # redirection or write-flag utilities — the kill-switch and policy
6
- # files MUST be unreachable via any tool surface, including Bash.
7
3
  #
8
- # Pre-0.15.0, settings-protection.sh §6 protected `.rea/HALT`,
9
- # `.rea/policy.yaml`, `.claude/settings.json`, `.husky/*` against
10
- # Write/Edit/MultiEdit tool calls. But shell redirects bypassed it
11
- # entirely:
4
+ # 0.23.0+ — thin shim. Forwards stdin (Claude Code's tool_input JSON)
5
+ # to `rea hook scan-bash --mode protected`, parses the verdict, exits
6
+ # 0/2 accordingly.
12
7
  #
13
- # printf '...' > .rea/HALT # bypass Bash matcher only
14
- # tee .rea/policy.yaml < new.yaml # bypass
15
- # cp new-settings.json .claude/settings.json
16
- # sed -i '' '/foo/d' .husky/pre-push
17
- # dd of=.rea/HALT
8
+ # Pre-0.23.0 this hook was a 536-line bash regex pipeline. The rewrite
9
+ # moved every detection rule into a parser-backed AST walker at
10
+ # `src/hooks/bash-scanner/`. helix-023 and discord-ops Round 13 closed
11
+ # 9 bypass classes that lived in the old segmenter; the new scanner
12
+ # closes them definitionally — there is no segmenter to bypass.
18
13
  #
19
- # This hook closes that gap by detecting redirect/write patterns
20
- # whose target matches the same `_lib/protected-paths.sh` allowlist.
14
+ # Failure mode: if the rea CLI cannot be located in a SANDBOXED tier
15
+ # (consumer's `node_modules/@bookedsolid/rea/dist/cli/index.js` or the
16
+ # rea repo's own `dist/cli/index.js`), we REFUSE the command. NEVER
17
+ # ALLOW on uncertainty. Operators need `@bookedsolid/rea` installed for
18
+ # the gate to work; `rea doctor` flags missing CLI as a P0.
19
+ #
20
+ # Defense in depth: the shim does NOT trust the CLI exit code alone.
21
+ # After capturing stdout we re-parse the verdict with `node -e` to
22
+ # confirm:
23
+ # 1. it is valid JSON,
24
+ # 2. the top-level shape has `.verdict == "allow"|"block"`,
25
+ # 3. the verdict matches the exit code (allow→0, block→2).
26
+ # Any disagreement → exit 2.
27
+ #
28
+ # Codex round 4 Finding 2: dropped tier 1 (PATH `command -v rea`) and
29
+ # tier 2 (`node_modules/.bin/rea` symlink). Both are workspace-attacker
30
+ # controllable: an attacker who can write a file at
31
+ # `node_modules/.bin/rea` (or set PATH to a directory they own) can
32
+ # stage a fake `rea` binary that exits 0 with `{"verdict":"allow"}` and
33
+ # subvert the gate.
34
+ #
35
+ # Trust-boundary HONESTY (codex round 5 F5): the shim's structural
36
+ # defense is the realpath sandbox (round 4 #2 + round 5 F2). It defeats:
37
+ # - PATH-attacker hijack via fake `rea` binary
38
+ # - node_modules/.bin/rea symlink-bin hijack
39
+ # - node_modules/@bookedsolid/rea -> /tmp/sym-attacker symlink-out
40
+ # - intra-project hijack without a matching package.json
41
+ # It does NOT defeat an attacker who writes a forged dist/cli/index.js
42
+ # AND a matching package.json directly into node_modules/. At that level
43
+ # the attacker has already compromised the package install pipeline (npm
44
+ # registry, lockfile, dependency confusion) and any dependency the agent
45
+ # imports is also forgeable — hook-tier defense is past. The trust
46
+ # boundary is package-tier integrity (npm provenance + manifest
47
+ # verification), not the bash gate. See THREAT_MODEL §8.3 + docs/
48
+ # architecture/bash-scanner.md for the full rationale.
49
+ #
50
+ # Tier defense: realpath the resolved CLI before exec. Two complementary
51
+ # checks:
52
+ # PRIMARY (codex round 5 F2): realpath(cli) MUST live INSIDE
53
+ # realpath(CLAUDE_PROJECT_DIR). Catches symlink-out-of-project attacks
54
+ # where the attacker writes `node_modules/@bookedsolid/rea` as a
55
+ # symlink to a tree under `/tmp/sym-attacker` containing a forged
56
+ # `package.json` with name `@bookedsolid/rea` and a forged
57
+ # `dist/cli/index.js` that exits 0 with `{"verdict":"allow"}`. Pre-fix
58
+ # the secondary check (package.json walk-up) was the ONLY guard, and
59
+ # the attacker satisfies it by placing a forged package.json in their
60
+ # own tree.
61
+ # SECONDARY: walk up from the resolved CLI looking for an ancestor
62
+ # `package.json` whose `name` is `@bookedsolid/rea`. This guards
63
+ # against intra-project symlinks where the realpath stays inside
64
+ # the project (e.g. accidentally pointing dist/ at node_modules/).
65
+ #
66
+ # Codex round 2 R2-3 (preserved): REA_NODE_CLI env-var honoring REMOVED.
67
+ # Test harnesses must set CLAUDE_PROJECT_DIR to a directory whose
68
+ # `dist/cli/index.js` (or `node_modules/@bookedsolid/rea/...`) holds
69
+ # the trusted CLI build. The shim NEVER reads REA_NODE_CLI.
21
70
  #
22
71
  # Exit codes:
23
- # 0 = no protected-path write detected — allow
24
- # 2 = protected-path write via Bash detected block
72
+ # 0 = allow (verdict.verdict == "allow")
73
+ # 2 = block (verdict.verdict == "block", or any failure path)
25
74
 
26
75
  set -uo pipefail
27
76
 
28
- # shellcheck source=_lib/protected-paths.sh
29
- source "$(dirname "$0")/_lib/protected-paths.sh"
30
- # shellcheck source=_lib/path-normalize.sh
31
- source "$(dirname "$0")/_lib/path-normalize.sh"
32
- # shellcheck source=_lib/cmd-segments.sh
33
- source "$(dirname "$0")/_lib/cmd-segments.sh"
34
- # shellcheck source=_lib/interpreter-scanner.sh
35
- source "$(dirname "$0")/_lib/interpreter-scanner.sh"
77
+ proj="${CLAUDE_PROJECT_DIR:-$(pwd)}"
36
78
 
37
- INPUT=$(cat)
79
+ # Resolve the rea CLI through a fixed 2-tier sandboxed order.
80
+ # 1. node_modules/@bookedsolid/rea/dist/cli/index.js (consumer-side
81
+ # direct dist execution — the published artifact)
82
+ # 2. dist/cli/index.js under CLAUDE_PROJECT_DIR (the rea repo's own
83
+ # dogfood install, where `rea` is the package itself)
84
+ #
85
+ # We build an `argv` array rather than a string so paths containing
86
+ # whitespace round-trip safely.
87
+ REA_ARGV=()
88
+ RESOLVED_CLI_PATH=""
89
+ if [ -f "$proj/node_modules/@bookedsolid/rea/dist/cli/index.js" ]; then
90
+ REA_ARGV=(node "$proj/node_modules/@bookedsolid/rea/dist/cli/index.js")
91
+ RESOLVED_CLI_PATH="$proj/node_modules/@bookedsolid/rea/dist/cli/index.js"
92
+ elif [ -f "$proj/dist/cli/index.js" ]; then
93
+ # rea repo dogfood: the project IS @bookedsolid/rea.
94
+ REA_ARGV=(node "$proj/dist/cli/index.js")
95
+ RESOLVED_CLI_PATH="$proj/dist/cli/index.js"
96
+ fi
38
97
 
39
- if ! command -v jq >/dev/null 2>&1; then
40
- printf 'REA ERROR: jq is required but not installed.\n' >&2
98
+ if [ "${#REA_ARGV[@]}" -eq 0 ]; then
99
+ printf 'rea: CLI not found at sandboxed tiers (node_modules/@bookedsolid/rea/dist or dist/).\n' >&2
100
+ printf 'Install @bookedsolid/rea via npm/pnpm and run `rea doctor`.\n' >&2
101
+ printf 'Refusing the Bash command on uncertainty.\n' >&2
41
102
  exit 2
42
103
  fi
43
104
 
44
- REA_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}"
45
-
46
- # HALT checkuniform with other hooks.
47
- HALT_FILE="${REA_ROOT}/.rea/HALT"
48
- if [ -f "$HALT_FILE" ]; then
49
- printf 'REA HALT: %s\nAll agent operations suspended. Run: rea unfreeze\n' \
50
- "$(head -c 1024 "$HALT_FILE" 2>/dev/null || echo 'Reason unknown')" >&2
105
+ # Codex round 4 Finding 2 tier defense: realpath the resolved CLI and
106
+ # verify it lives inside the sandboxed dirs. Catches symlink games.
107
+ # We require Node for the verifier anyway (below) use it here too.
108
+ if ! command -v node >/dev/null 2>&1; then
109
+ printf 'rea: node not on PATH (required to realpath verify scan-bash CLI). Refusing.\n' >&2
110
+ exit 2
111
+ fi
112
+ sandbox_check=$(node -e '
113
+ const fs = require("fs");
114
+ const path = require("path");
115
+ const cli = process.argv[1];
116
+ const projDir = process.argv[2];
117
+ let real;
118
+ try { real = fs.realpathSync(cli); } catch (e) {
119
+ process.stdout.write("bad:realpath:" + (e && e.message ? e.message : String(e)));
120
+ process.exit(1);
121
+ }
122
+ // Codex round 5 F2 (P0) — PRIMARY check: realpath(cli) must live
123
+ // INSIDE realpath(CLAUDE_PROJECT_DIR). Pre-fix an attacker who
124
+ // writes a symlink at node_modules/@bookedsolid/rea pointing to
125
+ // /tmp/sym-attacker (containing a forged package.json + dist/cli/index.js
126
+ // that exits 0 with verdict:"allow") completely defeated the
127
+ // sandbox — the package.json walk-up was satisfied by the forged
128
+ // file in the attacker tree. Containing realCli to realProj closes
129
+ // every symlink-out attack class because the attacker no longer
130
+ // controls a path inside the project tree.
131
+ let realProj;
132
+ try { realProj = fs.realpathSync(projDir); } catch (e) {
133
+ process.stdout.write("bad:realpath-proj:" + (e && e.message ? e.message : String(e)));
134
+ process.exit(1);
135
+ }
136
+ const projWithSep = realProj.endsWith(path.sep) ? realProj : realProj + path.sep;
137
+ if (!(real === realProj || real.startsWith(projWithSep))) {
138
+ process.stdout.write("bad:cli-escapes-project:" + real + ":proj=" + realProj);
139
+ process.exit(1);
140
+ }
141
+ // Codex round 4 Finding 2 (now SECONDARY) — shape + ancestor pkg.json.
142
+ //
143
+ // Acceptance: the resolved CLI must end in `.../dist/cli/index.js`
144
+ // and have an ancestor `package.json` whose `name` is `@bookedsolid/rea`.
145
+ // This guards against intra-project hijack where an attacker writes
146
+ // a symlink at node_modules/@bookedsolid/rea pointing to a sibling
147
+ // tree INSIDE the project (e.g. ./scratch/) — the PRIMARY check
148
+ // accepts it (still inside project root) but the package.json walk-up
149
+ // refuses unless that tree contains the canonical package metadata.
150
+ const expectedEnd = path.join("dist", "cli", "index.js");
151
+ if (!real.endsWith(path.sep + expectedEnd) && real !== "/" + expectedEnd) {
152
+ process.stdout.write("bad:cli-shape:" + real);
153
+ process.exit(1);
154
+ }
155
+ // Walk up looking for package.json with the protected name.
156
+ let cur = path.dirname(path.dirname(path.dirname(real))); // pkg root
157
+ let found = false;
158
+ for (let i = 0; i < 20 && cur && cur !== path.dirname(cur); i += 1) {
159
+ const pj = path.join(cur, "package.json");
160
+ if (fs.existsSync(pj)) {
161
+ try {
162
+ const data = JSON.parse(fs.readFileSync(pj, "utf8"));
163
+ if (data && data.name === "@bookedsolid/rea") {
164
+ found = true;
165
+ break;
166
+ }
167
+ } catch (e) {
168
+ // Continue walking up.
169
+ }
170
+ }
171
+ cur = path.dirname(cur);
172
+ }
173
+ if (!found) {
174
+ process.stdout.write("bad:no-rea-pkg:" + real);
175
+ process.exit(1);
176
+ }
177
+ process.stdout.write("ok");
178
+ process.exit(0);
179
+ ' "$RESOLVED_CLI_PATH" "$proj" 2>&1)
180
+ sandbox_status=$?
181
+ if [ "$sandbox_status" -ne 0 ] || [ "$sandbox_check" != "ok" ]; then
182
+ printf 'rea: scan-bash CLI realpath escapes sandbox (%s). Refusing.\n' "$sandbox_check" >&2
51
183
  exit 2
52
184
  fi
53
185
 
54
- CMD=$(printf '%s' "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null)
55
- if [[ -z "$CMD" ]]; then
186
+ # Capture stdin once and forward it to the CLI.
187
+ payload=$(cat)
188
+ if [ -z "$payload" ]; then
56
189
  exit 0
57
190
  fi
58
191
 
59
- # Normalize a path token. 0.16.0 codex P1 fixes (helix Findings 015):
60
- # - resolve `..` segments via realpath when the path exists, OR reject
61
- # them outright when it doesn't (`.claude/hooks/../settings.json`
62
- # writes to `.claude/settings.json` but the literal-string match
63
- # missed it pre-fix)
64
- # - lowercase the result so case-insensitive matchers (macOS APFS,
65
- # `.ClAuDe/settings.json`) still match the canonical lowercase
66
- # pattern (`.claude/settings.json`)
67
- # - apply shared `_lib/path-normalize.sh::normalize_path` for backslash
68
- # translation + URL decode + leading-`./` strip
69
- # shellcheck source=_lib/path-normalize.sh
70
- source "$(dirname "$0")/_lib/path-normalize.sh"
71
-
72
- _normalize_target() {
73
- local t="$1"
74
- # Strip matching surrounding quotes.
75
- if [[ "$t" =~ ^\"(.*)\"$ ]]; then t="${BASH_REMATCH[1]}"; fi
76
- if [[ "$t" =~ ^\'(.*)\'$ ]]; then t="${BASH_REMATCH[1]}"; fi
77
- # 0.21.2 helix-022 #5: fail closed on shell parameter/command
78
- # substitution in the target. `printf x > "$p"` (where p was set
79
- # earlier in the segment to `.rea/HALT`) bypassed the gate because
80
- # neither the logical nor resolved-form check matched the literal
81
- # string `$p`. We DO NOT try to resolve `$NAME=value` assignments
82
- # in the same segment — that's a partial-execution semantic this
83
- # static analyzer cannot guarantee. Refuse with a clear sentinel
84
- # so the caller emits the actionable error message.
85
- case "$t" in
86
- *'$'*|*'`'*)
87
- printf '__rea_unresolved_expansion__:%s' "$t"
88
- return 0
89
- ;;
90
- esac
91
- # If the path contains `..` segments, resolve them aggressively. We
92
- # cannot rely on `realpath` being installed; do a manual resolution
93
- # by walking segments. This is the helix-015 P1 fix: pre-fix, the
94
- # literal `.claude/hooks/../settings.json` did not match the
95
- # `.claude/settings.json` pattern even though the OS would resolve
96
- # the write to that target.
97
- case "/$t/" in
98
- */../*)
99
- # Build absolute then walk and normalize segments.
100
- # 0.16.0 codex P1-1 fix: use `read -ra` with IFS=/ instead of an
101
- # unquoted `for part in $abs` loop. The unquoted `for` was subject
102
- # to pathname expansion — `.claude/*/../settings.json` would glob
103
- # `*` against the agent's CWD, mangling the resolved path and
104
- # bypassing the protected-paths matcher. `read -ra` with an
105
- # explicit delimiter disables both word-splitting (via IFS) AND
106
- # pathname expansion (read does not glob).
107
- local abs="$t"
108
- [[ "$abs" != /* ]] && abs="$REA_ROOT/$abs"
109
- local -a raw_parts parts=()
110
- IFS='/' read -ra raw_parts <<<"$abs"
111
- for part in "${raw_parts[@]}"; do
112
- case "$part" in
113
- ''|.) continue ;;
114
- ..) [[ "${#parts[@]}" -gt 0 ]] && unset 'parts[${#parts[@]}-1]' ;;
115
- *) parts+=("$part") ;;
116
- esac
117
- done
118
- t="/$(IFS=/; printf '%s' "${parts[*]}")"
119
- # 0.16.0 codex P2-3 fix: if the resolved absolute path escapes
120
- # REA_ROOT, emit a sentinel so the caller refuses outright.
121
- # `exit 2` here would only exit the `$()` subshell, not the parent
122
- # hook process — sentinel + caller-side handling is the only
123
- # cross-shell-portable way.
124
- if [[ "$t" != "$REA_ROOT" && "$t" != "$REA_ROOT"/* ]]; then
125
- printf '__rea_outside_root__:%s' "$t"
126
- return 0
127
- fi
128
- ;;
129
- esac
130
- # Hand off to shared normalize_path (strips $REA_ROOT, URL-decodes,
131
- # translates `\` → `/`, strips leading `./`).
132
- t=$(normalize_path "$t")
133
- # Lowercase for case-insensitive matching (helix-015 P1 fix #2 —
134
- # macOS APFS allows `.ClAuDe/settings.json` to land on the same
135
- # file as `.claude/settings.json`, so the matcher must compare
136
- # lowercased forms).
137
- printf '%s' "$t" | tr '[:upper:]' '[:lower:]'
138
- }
139
-
140
- # 0.21.2 helix-022 #4: cp/mv destination extractor. Walks the segment
141
- # token-by-token, skips flags (single-dash, double-dash, `--` end-of-
142
- # options separator), returns the LAST positional argument — which is
143
- # the destination per POSIX cp/mv semantic.
144
- #
145
- # Handles:
146
- # cp src dst → dst
147
- # cp -f src dst → dst
148
- # cp --force src dst → dst
149
- # cp a b c dst → dst (multi-source: last is destination)
150
- # cp -- -src dst → dst (-- ends option processing)
151
- # cp -t dir src → src is the source after -t flag (-t SOURCE_FIRST)
152
- # but we don't try to follow -t semantics; we
153
- # conservatively treat the LAST positional as
154
- # the destination, which over-blocks `-t dir src`
155
- # (destination becomes `src`) — the caller's
156
- # rea_path_is_protected check then determines
157
- # if that's actually protected. False-positive
158
- # case is narrow.
159
- #
160
- # Flag-with-value awareness: short flag clusters that take a value
161
- # (cp -t TARGET_DIR, mv -S SUFFIX, install -m MODE, etc.) consume the
162
- # next token. Conservative heuristic: known short-options-with-values
163
- # get the next token consumed.
164
- _extract_cpmv_destination() {
165
- local segment="$1"
166
- local stripped="${segment#"${segment%%[![:space:]]*}"}"
167
- # Word-split on whitespace. `set --` is intentional; downstream
168
- # iteration consumes positional args.
169
- local positionals=()
170
- local found_cmd=""
171
- local end_of_options=0
172
- # shellcheck disable=SC2086
173
- set -- $stripped
174
- while [ "$#" -gt 0 ]; do
175
- local tok="$1"
176
- shift
177
- if [[ -z "$found_cmd" ]]; then
178
- case "$tok" in
179
- cp|mv) found_cmd="$tok" ;;
180
- esac
181
- continue
192
+ # Run the scanner.
193
+ verdict=$(printf '%s' "$payload" | "${REA_ARGV[@]}" hook scan-bash --mode protected)
194
+ status=$?
195
+
196
+ # Defense in depth — verify the verdict JSON matches the exit code.
197
+ verifier='try {
198
+ const raw = require("fs").readFileSync(0, "utf8");
199
+ if (raw.trim().length === 0) { process.stdout.write("bad:empty"); process.exit(1); }
200
+ const v = JSON.parse(raw);
201
+ if (typeof v !== "object" || v === null || Array.isArray(v)) {
202
+ process.stdout.write("bad:non-object"); process.exit(1);
203
+ }
204
+ if (v.verdict !== "allow" && v.verdict !== "block") {
205
+ process.stdout.write("bad:verdict-shape:" + String(v.verdict)); process.exit(1);
206
+ }
207
+ process.stdout.write("ok:" + v.verdict); process.exit(0);
208
+ } catch (e) {
209
+ process.stdout.write("bad:" + (e && e.message ? e.message : String(e))); process.exit(1);
210
+ }'
211
+
212
+ verdict_check=$(printf '%s' "$verdict" | node -e "$verifier" 2>&1)
213
+ verdict_check_status=$?
214
+
215
+ case "$status" in
216
+ 0)
217
+ if [ "$verdict_check_status" -ne 0 ]; then
218
+ printf 'rea: scan-bash exited 0 but verdict JSON is malformed (%s). Refusing on uncertainty.\n' "$verdict_check" >&2
219
+ exit 2
182
220
  fi
183
- if [[ "$end_of_options" -eq 1 ]]; then
184
- positionals+=("$tok")
185
- continue
221
+ if [ "$verdict_check" != "ok:allow" ]; then
222
+ printf 'rea: scan-bash exit 0 but verdict says %s. Refusing on uncertainty.\n' "$verdict_check" >&2
223
+ exit 2
186
224
  fi
187
- case "$tok" in
188
- --) end_of_options=1; continue ;;
189
- --*=*) continue ;;
190
- --*)
191
- # Long flags that take a value as the next token.
192
- case "$tok" in
193
- --target-directory|--reply|--suffix|--backup|--reflink|--strip-trailing-slashes)
194
- shift 2>/dev/null || true
195
- ;;
196
- esac
197
- continue
198
- ;;
199
- -*)
200
- # Short flag cluster. Check the LAST char — if it's a known
201
- # value-taking flag, consume the next token.
202
- case "$tok" in
203
- *-t|*-S|*-Z|*-T) shift 2>/dev/null || true ;;
204
- esac
205
- continue
206
- ;;
207
- *)
208
- positionals+=("$tok")
209
- ;;
210
- esac
211
- done
212
- if [[ ${#positionals[@]} -ge 2 ]]; then
213
- printf '%s' "${positionals[$((${#positionals[@]} - 1))]}"
214
- fi
215
- }
216
-
217
- # Refuse and exit 2 with a uniform error message.
218
- _refuse() {
219
- local pattern="$1" target="$2" segment="$3"
220
- {
221
- printf 'PROTECTED PATH (bash): write to a package-managed file blocked\n'
222
- printf '\n'
223
- printf ' Pattern matched: %s\n' "$pattern"
224
- printf ' Resolved target: %s\n' "$target"
225
- printf ' Segment: %s\n' "$segment"
226
- printf '\n'
227
- printf ' Rule: protected paths (kill-switch, policy.yaml, settings.json,\n'
228
- printf ' .husky/*) are unreachable via Bash redirects too — not just\n'
229
- printf ' Write/Edit/MultiEdit. To modify, a human must edit directly.\n'
230
- } >&2
231
- exit 2
232
- }
233
-
234
- # Inspect one segment for redirect / write patterns and refuse if the
235
- # target matches any protected pattern.
236
- _check_segment() {
237
- local _raw="$1" segment="$2"
238
- [[ -z "$segment" ]] && return 0
239
-
240
- local target_token=""
241
- local detected_form=""
242
-
243
- # bash `[[ =~ ]]` regex literals with `|` and `(...)` parsed inline
244
- # confuse some bash versions on macOS. Use named variables for each
245
- # pattern so the literal stays in a string context only.
246
- # 0.16.0 codex P1 fix (helix-015 #3): widened redirect regex. Pre-fix
247
- # only matched `>`, `>>`, `2>`, `2>>`, `&>`. Missed:
248
- # - `1>` / `1>>` (explicit stdout fd)
249
- # - `>|` (noclobber-override redirect)
250
- # - `[0-9]+>` / `[0-9]+>>` (any fd prefix — `9>file`, `42>>file`)
251
- # All of these write to the target and bypassed the gate. The new
252
- # pattern accepts: optional fd-prefix, then `>` or `>>` or `>|`, with
253
- # optional `&` for stderr-merge variants.
254
- local re_redirect='(^|[[:space:]])(&>>|&>|[0-9]+>>|[0-9]+>\||[0-9]+>|>>|>\||>)[[:space:]]*([^[:space:]&|;<>]+)'
255
- # 0.21.2 helix-022 #4: cp/mv detection now uses an explicit argv-walk
256
- # (`_extract_cpmv_destination`) instead of regex-with-backtracking so
257
- # every shape is handled — `cp -f src dst`, multi-source `cp a b dst`,
258
- # `cp --no-clobber src dst`, `cp -- src dst`. The walker treats the
259
- # LAST positional as the destination (POSIX cp/mv semantic). The
260
- # sentinel `re_cpmv` regex below is retained ONLY as a cheap pre-screen
261
- # — it matches the command name to avoid running the walker on every
262
- # segment, but never returns the destination (the walker does).
263
- local re_cpmv_screen='(^|[[:space:]])(cp|mv)[[:space:]]+'
264
- local re_sed='(^|[[:space:]])sed[[:space:]]+(-[a-zA-Z]*i[a-zA-Z]*[^[:space:]]*)[[:space:]]+[^&|;<>]+[[:space:]]([^[:space:]&|;<>]+)[[:space:]]*$'
265
- local re_dd='(^|[[:space:]])dd[[:space:]]+[^&|;<>]*of=([^[:space:]&|;<>]+)'
266
- # 0.15.0 codex P1 fix: replaced the bash-3.2-broken `(...)*` pattern
267
- # for tee/truncate flag-skipping with a token-walk approach that
268
- # works across BSD bash 3.2 and GNU bash 4+. Walks every token after
269
- # the command, skips flags (single-dash short, double-dash long with
270
- # optional =value), returns the first non-flag token as the target.
271
-
272
- if [[ "$segment" =~ $re_redirect ]]; then
273
- target_token="${BASH_REMATCH[3]}"
274
- detected_form="redirect ${BASH_REMATCH[2]}"
275
- elif [[ "$segment" =~ $re_cpmv_screen ]]; then
276
- # 0.21.2 helix-022 #4: extract destination via argv-walk; LAST
277
- # positional is the destination per POSIX cp/mv semantic.
278
- local _cpmv_cmd="${BASH_REMATCH[2]}"
279
- target_token=$(_extract_cpmv_destination "$segment")
280
- detected_form="$_cpmv_cmd"
281
- if [[ -z "$target_token" ]]; then
282
- # No positional destination found — segment isn't actually a
283
- # valid cp/mv invocation. Fall through.
284
- :
285
- fi
286
- elif [[ "$segment" =~ $re_sed ]]; then
287
- target_token="${BASH_REMATCH[3]}"
288
- detected_form="sed -i"
289
- elif [[ "$segment" =~ $re_dd ]]; then
290
- target_token="${BASH_REMATCH[2]}"
291
- detected_form="dd of="
292
- else
293
- # tee / truncate / install / ln — token-walk for cross-bash safety.
294
- # Read tokens, find the command, then return the first non-flag arg.
295
- local prev_word="" found_cmd=""
296
- local _seg_for_walk="$segment"
297
- # Strip leading whitespace.
298
- _seg_for_walk="${_seg_for_walk#"${_seg_for_walk%%[![:space:]]*}"}"
299
- # shellcheck disable=SC2086
300
- set -- $_seg_for_walk
301
- while [ "$#" -gt 0 ]; do
302
- local tok="$1"
303
- shift
304
- if [[ -z "$found_cmd" ]]; then
305
- case "$tok" in
306
- tee|truncate|install|ln)
307
- found_cmd="$tok"
308
- ;;
309
- esac
310
- prev_word="$tok"
311
- continue
312
- fi
313
- # We're inside the command's argv. Skip flags.
314
- case "$tok" in
315
- --) continue ;;
316
- --*=*) continue ;;
317
- --*)
318
- # Long flag — may take a value as the NEXT token (we don't
319
- # know which long options take values). For safety, skip
320
- # only known no-value long flags; otherwise consume the
321
- # next token too if it looks like a value.
322
- case "$tok" in
323
- --append|--ignore-interrupts|--no-clobber|--force|--no-target-directory|--symbolic|--no-dereference|--reference=*) continue ;;
324
- *) shift 2>/dev/null || true; continue ;;
325
- esac
326
- ;;
327
- -*)
328
- # Short flag cluster. Skip. truncate -s SIZE — `-s` is a flag,
329
- # SIZE is its arg. We're conservative: skip the next token if
330
- # the flag cluster's last char is one of the size-bearing
331
- # flags (truncate -s, install -m, ln -t).
332
- case "$tok" in
333
- -s*|-m*|-o*|-g*|-t*) shift 2>/dev/null || true ;;
334
- esac
335
- continue
336
- ;;
337
- *)
338
- # First non-flag token — this is the target (or, for cp/mv-
339
- # like commands, the first source; the cpmv detector above
340
- # handles those separately). We treat ALL non-flag args as
341
- # potential targets and check each — that catches
342
- # `tee a b c` where any of a/b/c could be a protected file.
343
- target_token="$tok"
344
- detected_form="$found_cmd"
345
- # Check this token immediately; if not protected, keep
346
- # walking — there may be more positional args.
347
- local _t
348
- _t=$(_normalize_target "$target_token")
349
- # 0.16.0 codex P2-3: outside-REA_ROOT sentinel handling (logical).
350
- if [[ "$_t" == __rea_outside_root__:* ]]; then
351
- local resolved="${_t#__rea_outside_root__:}"
352
- {
353
- printf 'PROTECTED PATH (bash): path traversal escapes project root\n'
354
- printf ' Logical: %s\n Resolved: %s\n' "$target_token" "$resolved"
355
- } >&2
356
- exit 2
357
- fi
358
- # 0.21.2 helix-022 #5: shell expansion in target — refuse.
359
- if [[ "$_t" == __rea_unresolved_expansion__:* ]]; then
360
- local raw="${_t#__rea_unresolved_expansion__:}"
361
- {
362
- printf 'PROTECTED PATH (bash): unresolved shell expansion in target\n'
363
- printf ' Token: %s\n Segment: %s\n' "$raw" "$segment"
364
- printf ' Rule: $-substitution and `command-substitution` in redirect\n'
365
- printf ' targets are refused at static-analysis time. Resolve\n'
366
- printf ' the variable to a literal path before the redirect.\n'
367
- } >&2
368
- exit 2
369
- fi
370
- # 0.20.1 helix-021 #1: resolve intermediate symlinks via
371
- # `cd -P / pwd -P` parent-canonicalization (Write-tier parity).
372
- # `ln -s ../ .husky/pre-push.d/linkdir; printf x > .husky/pre-push.d/linkdir/pre-push`
373
- # had a logical form of `.husky/pre-push.d/linkdir/pre-push`
374
- # that didn't match any protected pattern; the resolved form
375
- # is `.husky/pre-push` which DOES match. Refuse on either.
376
- local _t_resolved
377
- _t_resolved=$(rea_resolved_relative_form "$target_token")
378
- if [[ "$_t_resolved" == __rea_outside_root__:* ]]; then
379
- local resolved="${_t_resolved#__rea_outside_root__:}"
380
- {
381
- printf 'PROTECTED PATH (bash): symlink resolves outside project root\n'
382
- printf ' Logical: %s\n Resolved: %s\n' "$target_token" "$resolved"
383
- } >&2
384
- exit 2
385
- fi
386
- if rea_path_is_protected "$_t" \
387
- || ([[ -n "$_t_resolved" ]] && rea_path_is_protected "$_t_resolved"); then
388
- local matched=""
389
- local pattern_lc
390
- local hit_form="$_t"
391
- if [[ -n "$_t_resolved" ]] && rea_path_is_protected "$_t_resolved" \
392
- && ! rea_path_is_protected "$_t"; then
393
- hit_form="$_t_resolved"
394
- fi
395
- for pattern in "${REA_PROTECTED_PATTERNS[@]}"; do
396
- pattern_lc=$(printf '%s' "$pattern" | tr '[:upper:]' '[:lower:]')
397
- if [[ "$hit_form" == "$pattern_lc" ]]; then matched="$pattern"; break; fi
398
- if [[ "$pattern_lc" == */ && "$hit_form" == "$pattern_lc"* ]]; then matched="$pattern"; break; fi
399
- done
400
- _refuse "$matched" "$hit_form" "$segment"
401
- fi
402
- # Reset target_token so the post-loop check doesn't double-check.
403
- target_token=""
404
- ;;
405
- esac
406
- done
407
- fi
408
-
409
- # 0.21.2 helix-022 #2: when no shell-redirect target was found,
410
- # interpreter-scanner pass before returning. `node -e
411
- # "fs.writeFileSync('.rea/HALT','x')"` has NO redirect or cp/mv
412
- # token but still writes a protected path. Run the scanner on the
413
- # raw segment; refuse if any extracted target is protected.
414
- if [[ -z "$target_token" ]]; then
415
- _interpreter_scan_and_refuse_protected "$segment"
416
- return 0
417
- fi
418
-
419
- local target
420
- target=$(_normalize_target "$target_token")
421
- # 0.16.0 codex P2-3 fix: outside-REA_ROOT sentinel from _normalize_target.
422
- if [[ "$target" == __rea_outside_root__:* ]]; then
423
- local resolved="${target#__rea_outside_root__:}"
424
- {
425
- printf 'PROTECTED PATH (bash): path traversal escapes project root\n'
426
- printf '\n'
427
- printf ' Logical: %s\n' "$target_token"
428
- printf ' Resolved: %s\n' "$resolved"
429
- printf ' Segment: %s\n' "$segment"
430
- printf '\n'
431
- printf ' Rule: bash redirects whose target resolves outside REA_ROOT\n'
432
- printf ' are refused. Use a project-relative path without `..`\n'
433
- printf ' segments.\n'
434
- } >&2
435
- exit 2
436
- fi
437
- # 0.21.2 helix-022 #5: shell expansion in target — refuse.
438
- if [[ "$target" == __rea_unresolved_expansion__:* ]]; then
439
- local raw="${target#__rea_unresolved_expansion__:}"
440
- {
441
- printf 'PROTECTED PATH (bash): unresolved shell expansion in target\n'
442
- printf '\n'
443
- printf ' Token: %s\n' "$raw"
444
- printf ' Segment: %s\n' "$segment"
445
- printf '\n'
446
- printf ' Rule: $-substitution and `command-substitution` in redirect\n'
447
- printf ' targets are refused at static-analysis time. Resolve\n'
448
- printf ' the variable to a literal path before the redirect.\n'
449
- } >&2
450
- exit 2
451
- fi
452
- # 0.20.1 helix-021 #1: resolve intermediate symlinks. See parallel
453
- # block in the multi-target loop above for the rationale.
454
- local target_resolved
455
- target_resolved=$(rea_resolved_relative_form "$target_token")
456
- if [[ "$target_resolved" == __rea_outside_root__:* ]]; then
457
- local resolved="${target_resolved#__rea_outside_root__:}"
458
- {
459
- printf 'PROTECTED PATH (bash): symlink resolves outside project root\n'
460
- printf '\n'
461
- printf ' Logical: %s\n' "$target_token"
462
- printf ' Resolved: %s\n' "$resolved"
463
- printf ' Segment: %s\n' "$segment"
464
- } >&2
465
- exit 2
466
- fi
467
- if rea_path_is_protected "$target" \
468
- || ([[ -n "$target_resolved" ]] && rea_path_is_protected "$target_resolved"); then
469
- # Find the matching pattern for the error message. Both `target`
470
- # and `pattern` lowercased to match `_normalize_target`'s case-
471
- # insensitive output (helix-015 P1 fix).
472
- local matched="" pattern_lc
473
- local hit_form="$target"
474
- if [[ -n "$target_resolved" ]] && rea_path_is_protected "$target_resolved" \
475
- && ! rea_path_is_protected "$target"; then
476
- hit_form="$target_resolved"
225
+ exit 0
226
+ ;;
227
+ 2)
228
+ # Block path — the CLI has already emitted the operator-facing
229
+ # reason on stderr. We additionally verify the JSON shape so a
230
+ # forged `/bin/true` (which would never reach here, but be defensive)
231
+ # cannot bypass.
232
+ if [ "$verdict_check_status" -ne 0 ]; then
233
+ # Malformed stdout under exit 2 is unusual but harmless — the
234
+ # block path is still honored.
235
+ exit 2
477
236
  fi
478
- for pattern in "${REA_PROTECTED_PATTERNS[@]}"; do
479
- pattern_lc=$(printf '%s' "$pattern" | tr '[:upper:]' '[:lower:]')
480
- if [[ "$hit_form" == "$pattern_lc" ]]; then matched="$pattern"; break; fi
481
- if [[ "$pattern_lc" == */ && "$hit_form" == "$pattern_lc"* ]]; then matched="$pattern"; break; fi
482
- done
483
- _refuse "$matched" "$hit_form" "$segment"
484
- fi
485
-
486
- # 0.21.2 helix-022 #2: interpreter-scanner pass even when a
487
- # shell-redirect target was already found. A single segment can
488
- # have BOTH a shell redirect AND a node -e fs.write*; both must
489
- # be checked.
490
- _interpreter_scan_and_refuse_protected "$segment"
491
-
492
- return 0
493
- }
494
-
495
- # 0.21.2 helix-022 #2: interpreter-scanner pass. Catches
496
- # `node -e "fs.writeFileSync('.rea/HALT','x')"` and equivalents in
497
- # python/ruby/perl. The blocked-paths sibling has had this since
498
- # 0.16.3 F3; this is parity. Each extracted target runs through
499
- # `_normalize_target` + `rea_path_is_protected` so the existing
500
- # logical-form + symlink-resolved-form checks both apply.
501
- _interpreter_scan_and_refuse_protected() {
502
- local segment="$1"
503
- local _interp_targets
504
- _interp_targets=$(rea_interpreter_write_targets "$segment")
505
- [[ -z "$_interp_targets" ]] && return 0
506
- while IFS= read -r _interp_t; do
507
- [[ -z "$_interp_t" ]] && continue
508
- local _norm
509
- _norm=$(_normalize_target "$_interp_t")
510
- if [[ "$_norm" == __rea_outside_root__:* || "$_norm" == __rea_unresolved_expansion__:* ]]; then
511
- continue
237
+ if [ "$verdict_check" != "ok:block" ]; then
238
+ printf 'rea: scan-bash exit 2 but verdict says %s. Refusing on uncertainty.\n' "$verdict_check" >&2
239
+ exit 2
512
240
  fi
513
- local _norm_resolved
514
- _norm_resolved=$(rea_resolved_relative_form "$_interp_t")
515
- if rea_path_is_protected "$_norm" \
516
- || ([[ -n "$_norm_resolved" && "$_norm_resolved" != __rea_outside_root__:* ]] \
517
- && rea_path_is_protected "$_norm_resolved"); then
518
- local matched_interp="" pattern_lc
519
- local hit_form="$_norm"
520
- if [[ -n "$_norm_resolved" ]] && rea_path_is_protected "$_norm_resolved" \
521
- && ! rea_path_is_protected "$_norm"; then
522
- hit_form="$_norm_resolved"
523
- fi
524
- for pattern in "${REA_PROTECTED_PATTERNS[@]}"; do
525
- pattern_lc=$(printf '%s' "$pattern" | tr '[:upper:]' '[:lower:]')
526
- if [[ "$hit_form" == "$pattern_lc" ]]; then matched_interp="$pattern"; break; fi
527
- if [[ "$pattern_lc" == */ && "$hit_form" == "$pattern_lc"* ]]; then matched_interp="$pattern"; break; fi
528
- done
529
- _refuse "$matched_interp" "$hit_form" "$segment"
241
+ exit 2
242
+ ;;
243
+ *)
244
+ # Unexpected exit code treat as block on uncertainty. The CLI
245
+ # writes its own diagnostic; we add an explicit refusal.
246
+ printf 'rea: scan-bash exited %d (expected 0/2). Refusing on uncertainty.\n' "$status" >&2
247
+ if [ -n "$verdict" ]; then
248
+ printf 'rea: scan-bash stdout was: %s\n' "$verdict" >&2
530
249
  fi
531
- done <<<"$_interp_targets"
532
- }
533
-
534
- for_each_segment "$CMD" _check_segment
535
-
536
- exit 0
250
+ exit 2
251
+ ;;
252
+ esac