@bookedsolid/rea 0.35.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,386 @@
1
+ #!/usr/bin/env node
2
+ // G — Static lint for `awk '...'` blocks embedded in bash hooks.
3
+ //
4
+ // 0.36.0 charter item 3 / 0.34.0 round-4 + round-6 regression class.
5
+ //
6
+ // # The class
7
+ //
8
+ // Bash hooks frequently embed an awk script inside a bash-single-quoted
9
+ // argument:
10
+ //
11
+ // awk '
12
+ // # awk comment
13
+ // { print $1 }
14
+ // '
15
+ //
16
+ // Bash single-quoted strings have one rule: NO escape sequences inside.
17
+ // The string ends at the next unescaped `'`. If any character inside the
18
+ // awk body is a literal `'`, bash terminates the string THERE — the rest
19
+ // of the awk body is then re-parsed as bash, almost always producing a
20
+ // `syntax error near unexpected token` or worse, silently shelling out
21
+ // to whatever follows.
22
+ //
23
+ // The 0.34.0 marathon hit this twice — once at round-4, once at round-6.
24
+ // The round-6 instance locked the entire repo (every Bash refused at
25
+ // hook parse time because every hook sourced `_lib/cmd-segments.sh`,
26
+ // which crashed at parse). Repair required out-of-session `git apply`.
27
+ //
28
+ // # The lint
29
+ //
30
+ // For each `*.sh` under `hooks/` and `.claude/hooks/` (dogfood mirror),
31
+ // find every `awk '<NL>` block opening (the awk-with-multiline-body
32
+ // shape that the marathon class triggers in), scan inward until the
33
+ // matching unescaped `'`, and flag any line inside that:
34
+ //
35
+ // - Starts with optional whitespace then `#` (a comment line in awk),
36
+ // - Contains a literal `'`.
37
+ //
38
+ // We deliberately do NOT lint inline awk one-liners (`awk '{ print $1 }'`
39
+ // on one line) because those have no comment lines by construction —
40
+ // the bug class only manifests in multi-line awk bodies.
41
+ //
42
+ // # Wired into `pnpm lint`
43
+ //
44
+ // `package.json#scripts.lint` chains `lint:awk-quotes` before eslint, in
45
+ // the same posture as `lint:regex`. A failure here means a `'` ended up
46
+ // in an awk comment in a shipped hook body; the diff that introduced it
47
+ // would have parse-failed the hook at runtime (the way 0.34.0 round-6
48
+ // did). CI catches it before it ships.
49
+ //
50
+ // Mirrors-coverage rationale: `.claude/hooks/` is rea's own dogfood
51
+ // mirror. `tools/check-dogfood-drift.mjs` already enforces byte-equality
52
+ // between `hooks/*.sh` and `.claude/hooks/*.sh`, but this lint runs
53
+ // BEFORE that gate during a typical edit cycle, and a drifted mirror
54
+ // could still ship if the drift gate is bypassed. Lint both for
55
+ // defense-in-depth.
56
+
57
+ import { readdirSync, readFileSync, existsSync } from 'node:fs';
58
+ import { fileURLToPath } from 'node:url';
59
+ import path from 'node:path';
60
+
61
+ const here = path.dirname(fileURLToPath(import.meta.url));
62
+ const repoRoot = path.resolve(here, '..');
63
+
64
+ // 0.36.0 codex round-5 P2 #1: extended coverage. Originally the
65
+ // SCAN_DIRS list only covered `hooks/` and `.claude/hooks/` — but the
66
+ // package also ships awk-heavy shell scripts in `.husky/` (e.g.
67
+ // `prepare-commit-msg`) and `templates/` (e.g.
68
+ // `local-review-gate.dogfood-staged.sh`). A bare-apostrophe regression
69
+ // in those surfaces would have shipped silently. Adding them here
70
+ // pulls them under the same gate. Each path is checked for existence
71
+ // in `listShellFiles` so a profile that omits the directory still
72
+ // works.
73
+ const SCAN_DIRS = [
74
+ path.join(repoRoot, 'hooks'),
75
+ path.join(repoRoot, 'hooks', '_lib'),
76
+ path.join(repoRoot, '.claude', 'hooks'),
77
+ path.join(repoRoot, '.claude', 'hooks', '_lib'),
78
+ // 0.36.0 codex round-5 P2 #1 additions.
79
+ path.join(repoRoot, '.husky'),
80
+ path.join(repoRoot, 'templates'),
81
+ ];
82
+
83
+ /**
84
+ * List shell-script files directly under the given directory
85
+ * (non-recursive). A file qualifies if it ends in `.sh` OR its
86
+ * first line is a `#!/...sh`/`#!/...bash` shebang (for extensionless
87
+ * husky hooks like `.husky/pre-push`).
88
+ *
89
+ * Returns empty array if the directory doesn't exist.
90
+ *
91
+ * 0.36.0 codex round-5 P2 #1: pre-fix required `.sh` extension, which
92
+ * skipped every `.husky/` file (they're shipped extensionless).
93
+ */
94
+ function listShellFiles(dir) {
95
+ if (!existsSync(dir)) return [];
96
+ const entries = readdirSync(dir, { withFileTypes: true });
97
+ const out = [];
98
+ for (const e of entries) {
99
+ if (!e.isFile()) continue;
100
+ const full = path.join(dir, e.name);
101
+ // Codex round-7 P2: `.patch` files are unified diffs (hunk-prefixed
102
+ // lines, comments interleaved with `+`/`-`/` `), NOT raw shell. The
103
+ // scanFile function only understands shell syntax, so feeding a
104
+ // patch through it generates false-positives on benign comment-
105
+ // hunks like `+# this isn't related to awk`. Skip patches; the
106
+ // hook body the patch SHIPS TO will be linted directly once
107
+ // applied, which is the more reliable signal anyway.
108
+ if (e.name.endsWith('.sh')) {
109
+ out.push(full);
110
+ continue;
111
+ }
112
+ // Extensionless: check shebang.
113
+ try {
114
+ const head = readFileSync(full, 'utf8').slice(0, 64);
115
+ if (/^#!.*\b(sh|bash|zsh|dash|ksh)\b/.test(head)) {
116
+ out.push(full);
117
+ }
118
+ } catch {
119
+ // unreadable — skip silently
120
+ }
121
+ }
122
+ return out;
123
+ }
124
+
125
+ /**
126
+ * Scan a single `.sh` file for `awk '` opening blocks (multi-line body
127
+ * shape: `awk '` at end of a line, OR `awk '` followed by newline). For
128
+ * each open block, walk lines until the closing unescaped `'` and flag
129
+ * any comment line containing a literal `'`.
130
+ *
131
+ * Returns an array of `{file, line, content, reason}` findings.
132
+ */
133
+ function scanFile(file) {
134
+ const text = readFileSync(file, 'utf8');
135
+ const lines = text.split('\n');
136
+ const findings = [];
137
+
138
+ let inAwkBlock = false;
139
+ let awkStartLine = -1;
140
+
141
+ for (let i = 0; i < lines.length; i += 1) {
142
+ const line = lines[i];
143
+
144
+ if (!inAwkBlock) {
145
+ // Detect block opening: any line containing the `awk` keyword
146
+ // that opens an awk-arg single-quote which DOESN'T close on
147
+ // the same line. Real-corpus shapes that must trigger:
148
+ //
149
+ // awk ' ← bare
150
+ // ... | awk ' ← piped
151
+ // ... | awk -v key=val ' ← -v vars
152
+ // foo=$(awk -v a="$x" -v b="$y" ' ← multi-var
153
+ // awk -F: ' ← field-sep
154
+ // awk -v msg="can't" ' ← -v with `'` in DQ-arg
155
+ // awk 'BEGIN { ... } ← body starts on opener
156
+ // ... | awk 'BEGIN { x = 1 ← body starts on opener
157
+ //
158
+ // And must NOT trigger on:
159
+ //
160
+ // # Example: awk '...' ← shell comment about awk
161
+ // awk '{print $1}' ← one-liner (no multi-line)
162
+ //
163
+ // Algorithm:
164
+ // 1. Skip shell-comment lines (leading `#`).
165
+ // 2. Require the `awk` keyword somewhere on the line.
166
+ // 3. Strip benign bash quote-escape sequences.
167
+ // 4. Count remaining `'`. An odd count means the line opens
168
+ // an awk-arg that doesn't close on this line (multi-line
169
+ // body). An even count means every open is paired with a
170
+ // close on this line (one-liner — no multi-line bug
171
+ // class).
172
+ //
173
+ // 0.36.0 codex round-3 P2 #1: pre-fix opener was
174
+ // `/\bawk\b/ && /'\s*$/` which flipped on any prose line
175
+ // mentioning awk that happened to end in `'` (e.g. a comment
176
+ // like `# Example: awk '`). Shell-comment skip closes that
177
+ // false-positive path.
178
+ //
179
+ // 0.36.0 codex round-3 P2 #2: pre-fix opener required `'` at
180
+ // EOL, missing the `awk 'BEGIN { ... }` shape where the body
181
+ // starts on the same line as the opener. Odd-quote-count
182
+ // detection handles both shapes uniformly.
183
+ // Skip shell-comment lines — they may mention `awk` in prose
184
+ // (e.g. `# Example: awk '...'`) without being a real awk call.
185
+ const codeOnly = line.replace(/^\s+/, '');
186
+ if (codeOnly.startsWith('#')) continue;
187
+ if (!/\bawk\b/.test(line)) continue;
188
+ // Strip in order:
189
+ // - bash double-quoted spans (`"..."`) — bash treats `'`
190
+ // inside them as literal, NOT as quote terminators. Without
191
+ // this strip, `awk -v msg="can't" '` would count 2 `'`s
192
+ // and look balanced when it's actually 1 unclosed open.
193
+ // - benign quote-escape sequences (`'\''`, `'"'"'`, `''`).
194
+ // Order matters: strip `"..."` first because the `'"'"'`
195
+ // escape contains a DQ pair that would be wrongly consumed by
196
+ // the DQ-strip if applied second.
197
+ // Codex round-7 P1 fix: the prior `"[^"]*"` strip was too naive —
198
+ // a valid shell line like `awk -v msg="foo \"can't\" bar" '` has
199
+ // backslash-escaped quotes inside the double-quoted span. `[^"]*`
200
+ // stops at the first `"` (which is `\"`), the next `"` opens a
201
+ // new span, etc. The apostrophe from `can't` is left behind and
202
+ // the linter false-balances the quote count. Fix: walk DQ spans
203
+ // with proper escape handling — treat `\\` and `\"` as escapes,
204
+ // ANY other char between `"`s is literal.
205
+ let sanitizedOpener = line
206
+ .replace(/'"'"'/g, '')
207
+ .replace(/'\\''/g, '')
208
+ .replace(/''/g, '');
209
+ // Replace each `"..."` (with backslash-escape awareness) with `""`.
210
+ sanitizedOpener = sanitizedOpener.replace(/"(?:[^"\\]|\\[\s\S])*"/g, '""');
211
+ const quoteCount = (sanitizedOpener.match(/'/g) ?? []).length;
212
+ // Odd → opens a multi-line body. Even (incl. 0 / 2) → no
213
+ // unclosed open on this line (one-liner or no quote at all).
214
+ if (quoteCount % 2 === 1) {
215
+ inAwkBlock = true;
216
+ awkStartLine = i + 1; // 1-indexed for human-readable errors
217
+ // 0.36.0 codex round-4 P2 #1: when the body starts on the
218
+ // SAME line as the opener (`awk 'BEGIN { print "can't"`),
219
+ // any apostrophe-in-word shape already on that opener line
220
+ // MUST be checked too. Pre-fix the opener-detect branch
221
+ // flipped state and immediately `continue`d, leaving the
222
+ // opener line's body content unscanned.
223
+ //
224
+ // Locate the OPENING `'` (the LAST `'` in the sanitized
225
+ // line — `awk` is typically the last token before the
226
+ // opening quote, so any earlier `'`s are inside upstream
227
+ // shell commands like `printf '%s'`). Then run the
228
+ // apostrophe-in-word check on the text AFTER it (the awk
229
+ // body content). Word-boundary detection scopes the lint
230
+ // to the high-confidence bug shape (same discriminator as
231
+ // the body-line check above).
232
+ const openerIdx = sanitizedOpener.lastIndexOf("'");
233
+ const bodyOnOpenerLine = sanitizedOpener.slice(openerIdx + 1);
234
+ const apostropheInWord = /\b[A-Za-z][A-Za-z]*'[A-Za-z]/g;
235
+ const om = bodyOnOpenerLine.match(apostropheInWord);
236
+ if (om !== null) {
237
+ const strippedOpener = line.replace(/^\s+/, '');
238
+ const kind = strippedOpener.startsWith('#') ? 'comment' : 'code';
239
+ findings.push({
240
+ file,
241
+ line: i + 1,
242
+ content: line,
243
+ reason:
244
+ `awk-body ${kind} content on the OPENER line ` +
245
+ `contains an apostrophe-in-word shape (${om[0]}). ` +
246
+ `Bash terminates the \`awk '...'\` single-quoted ` +
247
+ `argument at the embedded \`'\`, splicing the rest ` +
248
+ `of the body into bash context; the hook parse-fails ` +
249
+ `at runtime (0.34.0 round-4 + round-6 class). ` +
250
+ `Rewrite without the apostrophe (e.g. \`cannot\` for ` +
251
+ `\`can't\`) or escape as \`'\\''\`.`,
252
+ awkStartLine,
253
+ });
254
+ // Bail out of block-mode — the bare `'` already
255
+ // terminated bash quoting at runtime.
256
+ inAwkBlock = false;
257
+ awkStartLine = -1;
258
+ }
259
+ }
260
+ continue;
261
+ }
262
+
263
+ // Inside awk block. Three things can happen on this line:
264
+ // 1. The line contains a BARE `'` somewhere (in code OR
265
+ // comment) that isn't a close → finding.
266
+ // 2. The line is the canonical block close → leave the block.
267
+ // 3. Neither — keep walking.
268
+ //
269
+ // Bare-quote definition: a `'` that isn't part of a known-safe
270
+ // bash escape sequence for embedding a literal apostrophe inside
271
+ // a single-quoted string. The three benign forms are:
272
+ // - `'\''` (close-quote, backslash-escaped quote, reopen-quote)
273
+ // - `'"'"'` (close-quote, double-quoted quote, reopen-quote)
274
+ // - `''` (close + reopen, injects NO byte — used in rea
275
+ // hook comments to quote literal-byte sequences like
276
+ // `\\\''` without breaking bash parsing).
277
+ // All three are fine in awk-internal context: bash terminates the
278
+ // single-quoted argument, emits a literal `'` (or no byte for
279
+ // `''`), and resumes single-quoting.
280
+ //
281
+ // 0.36.0 codex round-2 P2 #1 fix: pre-fix the bare-quote check
282
+ // only ran on comment lines (`stripped.startsWith('#')`). A code
283
+ // line like `BEGIN { print "can't" }` or `/can't/` parse-fails
284
+ // the same way — bash sees the `'` in `can't` regardless of
285
+ // whether awk parses the surrounding chars as a comment, string,
286
+ // or regex. Lint now scans every line in the block.
287
+ //
288
+ // Close detection: the rea hook bodies always close an `awk '`
289
+ // block with a `'` followed by a redirect / pipe / end-of-line
290
+ // / closing paren on a line that is OTHERWISE empty of awk-body
291
+ // text. Concretely: leading whitespace, then `'`, then optional
292
+ // `|`/`>`/`)`/whitespace/EOL. We detect close BEFORE running the
293
+ // bare-quote check on that line so a canonical-close line
294
+ // (` '`) doesn't itself trip a finding.
295
+ const sanitized = line
296
+ .replace(/'"'"'/g, '')
297
+ .replace(/'\\''/g, '')
298
+ .replace(/''/g, '');
299
+
300
+ if (!sanitized.includes("'")) {
301
+ // No bare `'` after stripping benign forms — no close, no bug.
302
+ continue;
303
+ }
304
+
305
+ // Detect the 0.34.0 round-4 + round-6 bug class specifically:
306
+ // an apostrophe-in-word shape like `can't`, `isn't`, `doesn't`
307
+ // — a `'` flanked by ASCII word chars on at least one side.
308
+ // That's the exact shape that broke the marathon (it appears
309
+ // naturally in English prose and slips past code review). Other
310
+ // possible bare-`'` shapes (e.g. `'X` at line start, where X is
311
+ // ASCII content) are genuinely ambiguous from the lint's POV —
312
+ // they may be the canonical close `'` followed by a bash
313
+ // continuation, the close of a bash quoted string, etc. We
314
+ // deliberately scope the lint to the high-confidence,
315
+ // demonstrated-historical-bug shape rather than risk
316
+ // false-positives on bash-grammar surface area we cannot parse.
317
+ //
318
+ // 0.36.0 codex round-4 P2 #2 resolution: pre-fix tried to
319
+ // distinguish close from bug structurally (by what preceded or
320
+ // followed the `'`). Both attempts produced false-positives on
321
+ // valid close shapes (`' "$arg"`, `END { print x }'`,
322
+ // `' | tr ...`). Word-boundary detection is the simplest
323
+ // discriminator that catches the exact bug class without
324
+ // tripping on legitimate bash continuation.
325
+ const apostropheInWord = /\b[A-Za-z][A-Za-z]*'[A-Za-z]/g;
326
+ const m = sanitized.match(apostropheInWord);
327
+ if (m !== null) {
328
+ const strippedLine = line.replace(/^\s+/, '');
329
+ const kind = strippedLine.startsWith('#') ? 'comment' : 'code';
330
+ findings.push({
331
+ file,
332
+ line: i + 1,
333
+ content: line,
334
+ reason:
335
+ `awk-body ${kind} line contains an apostrophe-in-word ` +
336
+ `shape (${m[0]}). Bash terminates the \`awk '...'\` ` +
337
+ `single-quoted argument at the embedded \`'\`, splicing ` +
338
+ `the rest of the body into bash context; the hook ` +
339
+ `parse-fails at runtime (0.34.0 round-4 + round-6 class). ` +
340
+ `Rewrite without the apostrophe (e.g. \`cannot\` for ` +
341
+ `\`can't\`) or escape as \`'\\''\`.`,
342
+ awkStartLine,
343
+ });
344
+ // Bail out of block-mode — the bare `'` already terminated
345
+ // bash quoting at runtime, so further lines are bash-parsed,
346
+ // not awk-parsed.
347
+ inAwkBlock = false;
348
+ awkStartLine = -1;
349
+ continue;
350
+ }
351
+
352
+ // Any other `'` shape: assume it's a legitimate close `'`
353
+ // followed by bash continuation. Leave the block.
354
+ inAwkBlock = false;
355
+ awkStartLine = -1;
356
+ }
357
+
358
+ return findings;
359
+ }
360
+
361
+ const allFindings = [];
362
+ for (const dir of SCAN_DIRS) {
363
+ for (const file of listShellFiles(dir)) {
364
+ allFindings.push(...scanFile(file));
365
+ }
366
+ }
367
+
368
+ if (allFindings.length === 0) {
369
+ // Quiet success — matches the posture of lint:regex.
370
+ process.exit(0);
371
+ }
372
+
373
+ console.error(
374
+ '[lint:awk-quotes] FAIL — bare single-quote in awk comment line ' +
375
+ '(0.34.0 round-4 + round-6 regression class):\n',
376
+ );
377
+ for (const f of allFindings) {
378
+ const rel = path.relative(repoRoot, f.file);
379
+ console.error(` ${rel}:${f.line} (awk block opened at line ${f.awkStartLine})`);
380
+ console.error(` ${f.content.trim()}`);
381
+ console.error(` → ${f.reason}\n`);
382
+ }
383
+ console.error(
384
+ `[lint:awk-quotes] ${allFindings.length} finding(s) across ${SCAN_DIRS.length} scan path(s).`,
385
+ );
386
+ process.exit(1);