@bookedsolid/rea 0.33.0 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,404 @@
1
+ /**
2
+ * Node-binary port of `hooks/secret-scanner.sh`.
3
+ *
4
+ * 0.34.0 Phase 2 port #3 (tier-2 medium-complexity hooks with enforcer
5
+ * logic).
6
+ *
7
+ * Detects credential patterns in content about to be written via the
8
+ * Write/Edit/MultiEdit/NotebookEdit Claude Code tools and blocks (exit
9
+ * 2) when a HIGH-severity pattern matches a non-placeholder substring.
10
+ * Last-resort pre-write guard — gitleaks (pre-commit) is the primary
11
+ * gate; this hook stops the obvious credential-in-source-file shapes
12
+ * before they ever touch disk.
13
+ *
14
+ * Behavioral contract preserves the bash hook byte-for-byte:
15
+ *
16
+ * 1. HALT check → exit 2 with shared banner.
17
+ * 2. Read stdin via `parseWriteHookPayload`. Extracts `file_path` /
18
+ * `notebook_path` and the canonical content priority:
19
+ * content > new_string > edits[].new_string joined > new_source.
20
+ * Empty content → exit 0.
21
+ * 3. Suffix-based file_path exclusion: `*.env.example` / `*.env.sample`
22
+ * pass through silently. Test files are NOT excluded — the
23
+ * placeholder filter handles legitimate test fixtures.
24
+ * 4. Apply the bash hook's awk line filter:
25
+ * - Strip lines whose trimmed form starts with `#` (shell comment).
26
+ * - Strip lines where `process.env.VAR` is the RHS of an
27
+ * assignment (`= process.env.SOMETHING`).
28
+ * - Strip lines mentioning `os.environ[`.
29
+ * Anything left is the corpus the patterns run against.
30
+ * 5. Run each of the 17 patterns (12 HIGH + 5 MEDIUM) against the
31
+ * filtered corpus. For each match:
32
+ * - Apply `isPlaceholder()` filter (matches the bash hook's
33
+ * `is_placeholder` shell function — placeholder forms like
34
+ * `<your_key>`, `your_api_key`, `example_token`,
35
+ * `aaaaaaa...`, etc. are dropped).
36
+ * - Truncate the matching substring at 60 chars for display.
37
+ * - Cap collected matches at 5 per pattern.
38
+ * 6. If ANY HIGH match remains → exit 2 with the "SECRET DETECTED"
39
+ * banner. Else if MEDIUM matches → emit advisory + exit 0. No
40
+ * matches → exit 0.
41
+ *
42
+ * MultiEdit handling: `parseWriteHookPayload` joins every `edits[i].
43
+ * new_string` with `\n`. This intentionally folds the fragments into
44
+ * one corpus for scanning; the joined newline boundary preserves
45
+ * line-anchored patterns. The bash counterpart used the same join
46
+ * shape via `extract_write_content` in `_lib/payload-read.sh`.
47
+ *
48
+ * 0.14.0 hardening — type-guard against malformed payloads (non-string
49
+ * `new_string`, non-array `edits`, etc.) lives in the shared
50
+ * `parseWriteHookPayload`. Defensive coercion means a crafted
51
+ * `{"edits":42}` payload doesn't throw at the boundary; it's treated as
52
+ * missing.
53
+ */
54
+ import path from 'node:path';
55
+ import { checkHalt, formatHaltBanner } from '../_lib/halt-check.js';
56
+ import { parseWriteHookPayload, MalformedPayloadError, TypePayloadError, readStdinWithTimeout, } from '../_lib/payload.js';
57
+ /**
58
+ * The canonical pattern catalog. Order matters for the bash parity test
59
+ * — matches are emitted in the order patterns are listed.
60
+ *
61
+ * NOTE: We do NOT call into `src/gateway/middleware/redact.ts`'s
62
+ * `SECRET_PATTERNS` here even though the catalog overlaps. The bash
63
+ * hook had its OWN extended catalog (12 HIGH + 5 MEDIUM, including
64
+ * Stripe live/test keys, Supabase JWTs, database URLs) that's a
65
+ * superset of the redact middleware's 12-pattern set. Folding the two
66
+ * catalogs together is a deliberate non-goal of 0.34.0 — keep the
67
+ * write-tier hook's coverage stable; revisit unification in a future
68
+ * release once both sites are Node-binary.
69
+ */
70
+ const SECRET_PATTERNS = [
71
+ // ── HIGH severity (blocking) ─────────────────────────────────────
72
+ {
73
+ severity: 'HIGH',
74
+ label: 'AWS Access Key ID',
75
+ regex: /AKIA[0-9A-Z]{16}/g,
76
+ },
77
+ {
78
+ severity: 'HIGH',
79
+ label: 'AWS Secret Access Key',
80
+ regex: /[Aa][Ww][Ss]_SECRET_ACCESS_KEY\s*=\s*[A-Za-z0-9/+]{40}/g,
81
+ },
82
+ {
83
+ severity: 'HIGH',
84
+ label: 'Private key block',
85
+ regex: /-----BEGIN (RSA|EC|OPENSSH|PGP) PRIVATE KEY-----/g,
86
+ },
87
+ {
88
+ severity: 'HIGH',
89
+ label: 'Anthropic API key',
90
+ regex: /sk-ant-api03-[A-Za-z0-9_-]{93}/g,
91
+ },
92
+ {
93
+ severity: 'HIGH',
94
+ label: 'Anthropic OAuth token',
95
+ regex: /sk-ant-oat01-[A-Za-z0-9_-]{86}/g,
96
+ },
97
+ {
98
+ severity: 'HIGH',
99
+ label: 'GitHub classic Personal Access Token',
100
+ regex: /gh[puors]_[A-Za-z0-9]{36}/g,
101
+ },
102
+ {
103
+ severity: 'HIGH',
104
+ label: 'GitHub fine-grained Personal Access Token',
105
+ regex: /github_pat_[A-Za-z0-9_]{82}/g,
106
+ },
107
+ {
108
+ severity: 'HIGH',
109
+ label: 'Stripe live secret/restricted key',
110
+ regex: /(sk|rk)_live_[A-Za-z0-9]{24,}/g,
111
+ },
112
+ {
113
+ severity: 'HIGH',
114
+ label: 'Stripe webhook signing secret',
115
+ regex: /whsec_[A-Za-z0-9+/]{40,}/g,
116
+ },
117
+ {
118
+ severity: 'HIGH',
119
+ label: 'Generic secret assignment (double-quoted)',
120
+ regex: /(SECRET|PASSWORD|PRIVATE_KEY|API_SECRET)\s*=\s*"[^"]{20,}"/g,
121
+ },
122
+ {
123
+ severity: 'HIGH',
124
+ label: 'Generic secret assignment (single-quoted)',
125
+ regex: /(SECRET|PASSWORD|PRIVATE_KEY|API_SECRET)\s*=\s*'[^']{20,}'/g,
126
+ },
127
+ {
128
+ severity: 'HIGH',
129
+ label: 'Supabase service role key (JWT)',
130
+ regex: /SUPABASE_SERVICE_ROLE_KEY\s*=\s*["']?eyJ[A-Za-z0-9._-]{50,}/g,
131
+ },
132
+ // ── MEDIUM severity (advisory) ───────────────────────────────────
133
+ {
134
+ severity: 'MEDIUM',
135
+ label: '.env credential assignment',
136
+ // Multiline `m` flag so `^` anchors at line start across the
137
+ // joined corpus. The bash hook ran per-pattern against the
138
+ // filtered file; per-line semantics match.
139
+ regex: /^(ANTHROPIC_API_KEY|SUPABASE_SERVICE_ROLE_KEY|DATABASE_URL|STRIPE_SECRET)\s*=\s*\S+/gm,
140
+ },
141
+ {
142
+ severity: 'MEDIUM',
143
+ label: 'Stripe test API key (real credential, test env)',
144
+ regex: /(sk|pk|rk)_test_[A-Za-z0-9]{24,}/g,
145
+ },
146
+ {
147
+ severity: 'MEDIUM',
148
+ label: 'Stripe live publishable key',
149
+ regex: /pk_live_[A-Za-z0-9]{24,}/g,
150
+ },
151
+ {
152
+ severity: 'MEDIUM',
153
+ label: 'Hardcoded DB connection string with password',
154
+ regex: /postgresql:\/\/[^:]+:[^@]{8,}@/g,
155
+ },
156
+ {
157
+ severity: 'MEDIUM',
158
+ label: 'Supabase anon key in non-client context',
159
+ regex: /SUPABASE_ANON_KEY\s*=\s*["']?eyJ[A-Za-z0-9._-]{50,}/g,
160
+ },
161
+ ];
162
+ /**
163
+ * Maximum length of the displayed match snippet. Mirrors the bash
164
+ * hook's `${MATCH:0:60}...` slice + ellipsis.
165
+ */
166
+ const MAX_SNIPPET_LEN = 60;
167
+ /**
168
+ * Maximum number of matches collected per pattern. Mirrors the bash
169
+ * hook's `head -5` on `MATCHES`. Bounds banner length on a pathological
170
+ * input (e.g. a file with 100 AWS keys).
171
+ */
172
+ const MAX_MATCHES_PER_PATTERN = 5;
173
+ /**
174
+ * Filter content lines the same way the bash hook's awk preprocessor
175
+ * does:
176
+ * - Strip lines whose leading-whitespace-stripped form starts with `#`.
177
+ * - Strip lines where `process.env.VAR` is the RHS of an assignment.
178
+ * The bash hook used two regexes (trailing-non-letter and
179
+ * `;,)` punctuation forms) — we cover both.
180
+ * - Strip lines mentioning `os.environ[`.
181
+ *
182
+ * Newline-preserving so multiline regex anchors (`^…$`) still work on
183
+ * the filtered corpus.
184
+ */
185
+ export function filterContent(content) {
186
+ if (content.length === 0)
187
+ return '';
188
+ const lines = content.split('\n');
189
+ const kept = [];
190
+ for (const line of lines) {
191
+ const trimmed = line.replace(/^\s+/, '');
192
+ // Shell-comment lines only.
193
+ if (trimmed.startsWith('#'))
194
+ continue;
195
+ // `= process.env.VAR[^a-zA-Z]?$` — terminator or end-of-line.
196
+ if (/=\s*process\.env\.[A-Z_]+[^a-zA-Z]?$/.test(trimmed))
197
+ continue;
198
+ // `= process.env.VAR[;,)]` — followed by terminator punctuation.
199
+ if (/=\s*process\.env\.[A-Z_]+\s*[;,)]/.test(trimmed))
200
+ continue;
201
+ // Python-style `os.environ[`.
202
+ if (/os\.environ\[/.test(trimmed))
203
+ continue;
204
+ kept.push(line);
205
+ }
206
+ return kept.join('\n');
207
+ }
208
+ /**
209
+ * Bash `is_placeholder` parity. Returns true when the match is a known
210
+ * placeholder shape and should NOT be counted as a real secret.
211
+ *
212
+ * Lowercased once at the top; all sub-checks operate on the lower form.
213
+ */
214
+ export function isPlaceholder(match) {
215
+ const lower = match.toLowerCase();
216
+ if (/<[a-z_]+>/.test(lower))
217
+ return true;
218
+ if (/your_key_here/.test(lower))
219
+ return true;
220
+ if (/your_api_key/.test(lower))
221
+ return true;
222
+ if (/your_secret/.test(lower))
223
+ return true;
224
+ if (/placeholder/.test(lower))
225
+ return true;
226
+ if (/changeme/.test(lower))
227
+ return true;
228
+ if (/insert.*here/.test(lower))
229
+ return true;
230
+ // Prefix-pair placeholder compounds: `test_key`, `fake_api`, etc.
231
+ if (/^(test|fake|mock|demo|example)_(key|token|secret|credential|api)$/.test(lower)) {
232
+ return true;
233
+ }
234
+ // `test_<word>_key` form.
235
+ if (/^test_[a-z_]+_key$/.test(lower))
236
+ return true;
237
+ // Repeated-character dummy strings (8+ same char).
238
+ if (/^(.)\1{7,}$/.test(lower))
239
+ return true;
240
+ return false;
241
+ }
242
+ /**
243
+ * Scan filtered content against every pattern in the catalog. Returns
244
+ * the accepted matches in catalog order.
245
+ */
246
+ export function scanContent(filtered) {
247
+ const accepted = [];
248
+ if (filtered.length === 0)
249
+ return accepted;
250
+ for (const desc of SECRET_PATTERNS) {
251
+ // Clone the regex so the lastIndex state doesn't leak across
252
+ // patterns (esp. with the `g` flag which is sticky).
253
+ const re = new RegExp(desc.regex.source, desc.regex.flags);
254
+ let matches = 0;
255
+ let m;
256
+ while ((m = re.exec(filtered)) !== null) {
257
+ const raw = m[0];
258
+ // Zero-width match safeguard (every pattern in the catalog has
259
+ // a positive lower bound, but defense-in-depth costs nothing).
260
+ if (raw.length === 0) {
261
+ re.lastIndex += 1;
262
+ continue;
263
+ }
264
+ if (isPlaceholder(raw))
265
+ continue;
266
+ const snippet = raw.length > MAX_SNIPPET_LEN ? raw.slice(0, MAX_SNIPPET_LEN) + '...' : raw;
267
+ accepted.push({
268
+ severity: desc.severity,
269
+ label: desc.label,
270
+ snippet,
271
+ });
272
+ matches += 1;
273
+ if (matches >= MAX_MATCHES_PER_PATTERN)
274
+ break;
275
+ }
276
+ }
277
+ return accepted;
278
+ }
279
+ /**
280
+ * Suffix-based file_path exclusion. `*.env.example` and `*.env.sample`
281
+ * skip the scan entirely — those are documentation files that
282
+ * intentionally carry placeholder credential shapes.
283
+ *
284
+ * Test files are NOT excluded. Real credentials in test fixtures must
285
+ * still be caught; the placeholder filter handles legitimate dummy
286
+ * keys.
287
+ */
288
+ export function isExcludedSuffix(filePath) {
289
+ if (filePath.length === 0)
290
+ return false;
291
+ if (filePath.endsWith('.env.example'))
292
+ return true;
293
+ if (filePath.endsWith('.env.sample'))
294
+ return true;
295
+ return false;
296
+ }
297
+ function buildBlockBanner(filePath, matches) {
298
+ const basename = filePath.length > 0 ? path.basename(filePath) : 'unknown';
299
+ const lines = [`SECRET DETECTED: Potential credential in ${basename}\n`];
300
+ let count = 0;
301
+ for (const m of matches) {
302
+ count += 1;
303
+ if (count > MAX_MATCHES_PER_PATTERN)
304
+ break;
305
+ lines.push(` ${m.severity}: ${m.label} — '${m.snippet}'\n`);
306
+ }
307
+ lines.push('Block reason: Writing credentials to disk risks exposure via git history.\n');
308
+ lines.push('Fix: Load credentials from environment variables — never hardcode secrets.\n');
309
+ return lines.join('');
310
+ }
311
+ function buildAdvisoryBanner(filePath, matches) {
312
+ const basename = filePath.length > 0 ? path.basename(filePath) : 'unknown';
313
+ const lines = [
314
+ `SECRET-SCAN WARN: Low-confidence credential pattern in ${basename} (advisory — not blocking)\n`,
315
+ ];
316
+ for (const m of matches) {
317
+ lines.push(` ${m.severity}: ${m.label} — '${m.snippet}'\n`);
318
+ }
319
+ lines.push('Note: Heuristic match — may be a false positive. If real, load from environment.\n');
320
+ return lines.join('');
321
+ }
322
+ /**
323
+ * Pure executor. Returns `{ exitCode, stderr, matches }`; the CLI
324
+ * wrapper translates them into `process.stderr.write` + `process.exit`.
325
+ */
326
+ export async function runSecretScanner(options = {}) {
327
+ const reaRoot = options.reaRoot ?? process.env['CLAUDE_PROJECT_DIR'] ?? process.cwd();
328
+ let stderr = '';
329
+ const writeStderr = (s) => {
330
+ stderr += s;
331
+ if (options.stderrWrite)
332
+ options.stderrWrite(s);
333
+ };
334
+ // 1. HALT check — fail-closed (exit 2).
335
+ const halt = checkHalt(reaRoot);
336
+ if (halt.halted) {
337
+ writeStderr(formatHaltBanner(halt.reason));
338
+ return { exitCode: 2, stderr, matches: [] };
339
+ }
340
+ // 2. Read + parse stdin via the write-tier payload helper.
341
+ const stdinRaw = options.stdinOverride !== undefined
342
+ ? options.stdinOverride
343
+ : await readStdinWithTimeout(5_000);
344
+ let filePath = '';
345
+ let content = '';
346
+ try {
347
+ const payload = parseWriteHookPayload(stdinRaw);
348
+ filePath = payload.filePath;
349
+ content = payload.content;
350
+ }
351
+ catch (err) {
352
+ if (err instanceof MalformedPayloadError || err instanceof TypePayloadError) {
353
+ // Fail-closed on uncertainty. The bash hook ran with
354
+ // `set -uo pipefail` and its awk/grep would have processed even
355
+ // a malformed payload defensively — but a TypePayloadError
356
+ // signals an outright protocol mismatch we should not silently
357
+ // pass through.
358
+ writeStderr(`secret-scanner: ${err.message} — refusing on uncertainty.\n`);
359
+ return { exitCode: 2, stderr, matches: [] };
360
+ }
361
+ throw err;
362
+ }
363
+ // 3. Empty content → exit 0.
364
+ if (content.length === 0) {
365
+ return { exitCode: 0, stderr, matches: [] };
366
+ }
367
+ // 4. Suffix-based file exclusions.
368
+ if (isExcludedSuffix(filePath)) {
369
+ return { exitCode: 0, stderr, matches: [] };
370
+ }
371
+ // 5. Filter + scan.
372
+ const filtered = filterContent(content);
373
+ if (filtered.length === 0) {
374
+ return { exitCode: 0, stderr, matches: [] };
375
+ }
376
+ const accepted = scanContent(filtered);
377
+ if (accepted.length === 0) {
378
+ return { exitCode: 0, stderr, matches: [] };
379
+ }
380
+ const highCount = accepted.filter((m) => m.severity === 'HIGH').length;
381
+ if (highCount > 0) {
382
+ writeStderr(buildBlockBanner(filePath, accepted));
383
+ return { exitCode: 2, stderr, matches: accepted };
384
+ }
385
+ // Medium-only — advisory.
386
+ writeStderr(buildAdvisoryBanner(filePath, accepted));
387
+ return { exitCode: 0, stderr, matches: accepted };
388
+ }
389
+ /**
390
+ * CLI entry point — `rea hook secret-scanner`.
391
+ */
392
+ export async function runHookSecretScanner(options = {}) {
393
+ const result = await runSecretScanner({
394
+ ...options,
395
+ stderrWrite: (s) => process.stderr.write(s),
396
+ });
397
+ process.exit(result.exitCode);
398
+ }
399
+ // Internal exports for byte-fidelity / banner-drift tests.
400
+ export const __INTERNAL_FOR_TESTS = {
401
+ SECRET_PATTERNS,
402
+ MAX_SNIPPET_LEN,
403
+ MAX_MATCHES_PER_PATTERN,
404
+ };