@blamejs/exceptd-skills 0.15.48 → 0.15.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,337 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ /**
4
+ * check-codebase-patterns.js — grep-gate enforcement for code-shape bug
5
+ * classes that have recurred across exceptd releases. One run surfaces every
6
+ * class as a single numbered report instead of dying on the first hit.
7
+ *
8
+ * Shipped v1 classes:
9
+ * - process-exit-after-stdout-write : a library-callable function writes to
10
+ * the result channel (process.stdout.write / console.log) and then calls
11
+ * process.exit(), which truncates the buffered write when stdout is
12
+ * piped. Route through `safeExit(EXIT_CODES.X); return;` (lib/exit-codes).
13
+ * This is the stdout-flush-truncation class the validate-cves fix closed by hand.
14
+ * - dynamic-regex : `new RegExp(<non-literal>)` — a ReDoS sink when the
15
+ * pattern derives from operator input. Use a static literal, or anchor +
16
+ * length-cap the input, or mark the site `// allow:dynamic-regex —
17
+ * <reason>` when the source is a trusted bundled schema.
18
+ * - orphan-allow-class : an `// allow:<class>` marker whose class is not in
19
+ * VALID_ALLOW_CLASSES, or is missing the `— <reason>` tail. A typo'd
20
+ * marker suppresses nothing, so the underlying violation would ship
21
+ * unflagged — this meta-guard keeps the marker mechanism trustworthy.
22
+ *
23
+ * Exceptions live at the violation site, not in this file:
24
+ * - file-level, in the first 50 lines: // codebase-patterns:allow-file <class> — <reason>
25
+ * - per-line, on the same line or up to 2 lines above: // allow:<class> — <reason>
26
+ *
27
+ * NOT covered here (owned elsewhere — do not duplicate):
28
+ * - internal phase/version vocabulary in comments -> scripts/check-version-tags.js
29
+ * - process.exit on the top-level CLI dispatch -> tests/safe-exit-grep.test.js
30
+ * - anti-coincidence test assertions -> scripts/check-test-coverage.js
31
+ * - internal-path leaks in operator output -> tests/operator-leak-grep.test.js
32
+ */
33
+
34
+ const fs = require("node:fs");
35
+ const path = require("node:path");
36
+
37
+ const ROOT = path.resolve(__dirname, "..");
38
+
39
+ // The classes that accept an `// allow:<class>` marker. orphan-allow-class is
40
+ // the meta-guard itself and is intentionally NOT a markable class.
41
+ const VALID_ALLOW_CLASSES = Object.freeze({
42
+ "process-exit-after-stdout-write": true,
43
+ "dynamic-regex": true,
44
+ "bidi-codepoint-literal": true,
45
+ });
46
+
47
+ const EXCLUDE_DIRS = new Set([
48
+ "node_modules", "vendor", ".git", ".cache", ".scratch",
49
+ "data", ".test-output", ".keys", "keys", "coverage",
50
+ ]);
51
+
52
+ // ---- file walk -----------------------------------------------------------
53
+
54
+ function relPath(abs) {
55
+ return path.relative(ROOT, abs).split(path.sep).join("/");
56
+ }
57
+
58
+ function walk(dir, out) {
59
+ let entries;
60
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
61
+ catch (_e) { return out; }
62
+ for (const e of entries) {
63
+ const abs = path.join(dir, e.name);
64
+ if (e.isDirectory()) {
65
+ if (EXCLUDE_DIRS.has(e.name)) continue;
66
+ walk(abs, out);
67
+ } else if (e.isFile() && /\.(c|m)?js$/.test(e.name) && !/\.test\.js$/.test(e.name)) {
68
+ out.push(abs);
69
+ }
70
+ }
71
+ return out;
72
+ }
73
+
74
+ // Source files under the given top-level roots, as repo-relative POSIX paths.
75
+ function filesUnder(roots) {
76
+ const out = [];
77
+ for (const r of roots) {
78
+ const abs = path.join(ROOT, r);
79
+ try {
80
+ const st = fs.statSync(abs);
81
+ if (st.isDirectory()) walk(abs, out);
82
+ else if (st.isFile()) out.push(abs);
83
+ } catch (_e) { /* missing root — skip */ }
84
+ }
85
+ return out.map(relPath).sort();
86
+ }
87
+
88
+ const _lineCache = new Map();
89
+ function readLines(rel) {
90
+ if (_lineCache.has(rel)) return _lineCache.get(rel);
91
+ const abs = path.isAbsolute(rel) ? rel : path.join(ROOT, rel);
92
+ let lines;
93
+ try { lines = fs.readFileSync(abs, "utf8").split(/\r?\n/); }
94
+ catch (_e) { lines = []; }
95
+ _lineCache.set(rel, lines);
96
+ return lines;
97
+ }
98
+
99
+ // Strip a trailing `//` line comment for code-shape detection (so a class
100
+ // name mentioned in a comment doesn't arm a detector). Leaves string contents
101
+ // alone enough for the coarse line-level checks here.
102
+ function stripLineComment(line) {
103
+ const idx = line.indexOf("//");
104
+ return idx === -1 ? line : line.slice(0, idx);
105
+ }
106
+
107
+ // ---- allow-marker engine -------------------------------------------------
108
+
109
+ function hasFileAllow(rel, cls) {
110
+ const head = readLines(rel).slice(0, 50);
111
+ const re = new RegExp("codebase-patterns:allow-file\\s+" + cls + "\\b");
112
+ return head.some((l) => re.test(l));
113
+ }
114
+
115
+ function hasLineAllow(rel, lineNo /* 1-based */, cls) {
116
+ const lines = readLines(rel);
117
+ const re = new RegExp("//.*\\ballow:" + cls + "\\b");
118
+ for (let n = lineNo; n >= lineNo - 2 && n >= 1; n--) {
119
+ if (re.test(lines[n - 1] || "")) return true;
120
+ }
121
+ return false;
122
+ }
123
+
124
+ function filterMarkers(hits, cls) {
125
+ return hits.filter((h) => !hasFileAllow(h.file, cls) && !hasLineAllow(h.file, h.line, cls));
126
+ }
127
+
128
+ // ---- require.main block ranges -------------------------------------------
129
+
130
+ // Line ranges (1-based, inclusive) of `if (require.main === module) { ... }`
131
+ // blocks — the dual-mode CLI-entry section where synchronous-print-then-exit
132
+ // is correct. process.exit there is owned by tests/safe-exit-grep.test.js and
133
+ // is not a library-surface concern.
134
+ function requireMainRanges(lines) {
135
+ const ranges = [];
136
+ for (let i = 0; i < lines.length; i++) {
137
+ if (/\brequire\.main\s*===\s*module\b/.test(lines[i])) {
138
+ // Find the opening brace (same line or next few), then balance.
139
+ let depth = 0;
140
+ let started = false;
141
+ let j = i;
142
+ for (; j < lines.length; j++) {
143
+ for (const ch of lines[j]) {
144
+ if (ch === "{") { depth++; started = true; }
145
+ else if (ch === "}") { depth--; }
146
+ }
147
+ if (started && depth <= 0) break;
148
+ }
149
+ if (started) ranges.push([i + 1, j + 1]);
150
+ }
151
+ }
152
+ return ranges;
153
+ }
154
+
155
+ function inRanges(ranges, lineNo) {
156
+ return ranges.some(([a, b]) => lineNo >= a && lineNo <= b);
157
+ }
158
+
159
+ // ---- detectors -----------------------------------------------------------
160
+
161
+ // A line that opens a new function body (so a backward stdout-write scan stops
162
+ // at the enclosing function and doesn't arm an exit from an unrelated earlier
163
+ // function).
164
+ const FUNCTION_START = /(^|[^.\w])function\b|=>\s*\{?\s*$|^\s*(async\s+)?[A-Za-z_$][\w$]*\s*\([^)]*\)\s*\{/;
165
+
166
+ function detectProcessExitAfterStdout(files) {
167
+ const hits = [];
168
+ for (const rel of (files || filesUnder(["lib", "orchestrator"]))) {
169
+ const lines = readLines(rel);
170
+ const mainRanges = requireMainRanges(lines);
171
+ for (let i = 0; i < lines.length; i++) {
172
+ const code = stripLineComment(lines[i]);
173
+ if (!/\bprocess\.exit\s*\(/.test(code)) continue;
174
+ const lineNo = i + 1;
175
+ if (inRanges(mainRanges, lineNo)) continue; // CLI-entry block: legitimate
176
+ // Scan backward within the enclosing function for a result-channel
177
+ // write (console.log / process.stdout.write). Stop at a function start.
178
+ let sawStdout = false;
179
+ for (let k = i - 1; k >= 0 && k >= i - 60; k--) {
180
+ const prev = stripLineComment(lines[k]);
181
+ if (/\bprocess\.stdout\.write\s*\(/.test(prev) || /\bconsole\.log\s*\(/.test(prev)) {
182
+ sawStdout = true; break;
183
+ }
184
+ if (FUNCTION_START.test(prev)) break; // left the function body
185
+ }
186
+ if (sawStdout) hits.push({ file: rel, line: lineNo, content: lines[i].trim() });
187
+ }
188
+ }
189
+ return filterMarkers(hits, "process-exit-after-stdout-write");
190
+ }
191
+
192
+ function detectDynamicRegex(files) {
193
+ const hits = [];
194
+ for (const rel of (files || filesUnder(["lib", "orchestrator", "bin/exceptd.js"]))) {
195
+ const lines = readLines(rel);
196
+ for (let i = 0; i < lines.length; i++) {
197
+ const code = stripLineComment(lines[i]);
198
+ const m = code.match(/\bnew RegExp\s*\(\s*(.)/);
199
+ if (!m) continue;
200
+ // Literal first arg => a quote or a `/` regex literal => static, safe.
201
+ const firstChar = m[1];
202
+ if (firstChar === '"' || firstChar === "'" || firstChar === "/") continue;
203
+ hits.push({ file: rel, line: i + 1, content: lines[i].trim() });
204
+ }
205
+ }
206
+ return filterMarkers(hits, "dynamic-regex");
207
+ }
208
+
209
+ // Raw bidi-override / zero-width / invisible / null codepoints embedded as
210
+ // literals in source — the Trojan-Source class (CVE-2021-42574). A literal
211
+ // such codepoint is invisible in review and can reorder or hide code. Source
212
+ // should emit them programmatically (via vendor/blamejs/codepoint-class) or
213
+ // escape them (\uXXXX), never type them literally. The range table holds only
214
+ // numeric codepoints + the regex is built from escapes, so this detector's own
215
+ // source is clean (and the file self-skips below regardless).
216
+ const _BIDI_LITERAL_RANGES = [
217
+ [0x202A, 0x202E], [0x2066, 0x2069], 0x200E, 0x200F, 0x061C, // bidi overrides + isolates
218
+ 0x200B, 0x200C, 0x200D, 0x00AD, 0x2060, 0xFEFF, // zero-width / invisible
219
+ 0x0000, // null
220
+ ];
221
+ function _bidiLiteralRe() {
222
+ const body = _BIDI_LITERAL_RANGES.map((r) =>
223
+ Array.isArray(r)
224
+ ? "\\u" + r[0].toString(16).padStart(4, "0") + "-\\u" + r[1].toString(16).padStart(4, "0")
225
+ : "\\u" + r.toString(16).padStart(4, "0")
226
+ ).join("");
227
+ return new RegExp("[" + body + "]"); // allow:dynamic-regex — codepoints from a static literal range table, not operator input
228
+ }
229
+ function detectBidiCodepointLiteral(files) {
230
+ const re = _bidiLiteralRe();
231
+ const hits = [];
232
+ for (const rel of (files || filesUnder(["bin/exceptd.js", "lib", "orchestrator", "scripts"]))) {
233
+ if (rel === "scripts/check-codebase-patterns.js") continue; // holds the range table itself
234
+ const lines = readLines(rel);
235
+ for (let i = 0; i < lines.length; i++) {
236
+ if (re.test(lines[i])) hits.push({ file: rel, line: i + 1, content: lines[i].trim() });
237
+ }
238
+ }
239
+ return filterMarkers(hits, "bidi-codepoint-literal");
240
+ }
241
+
242
+ function detectOrphanAllowClass(files) {
243
+ const hits = [];
244
+ for (const rel of (files || filesUnder(["bin/exceptd.js", "lib", "orchestrator", "scripts"]))) {
245
+ if (rel === "scripts/check-codebase-patterns.js") continue; // holds the registry + regexes
246
+ const lines = readLines(rel);
247
+ for (let i = 0; i < lines.length; i++) {
248
+ const cmt = lines[i].indexOf("//");
249
+ if (cmt === -1) continue;
250
+ const comment = lines[i].slice(cmt);
251
+ // Validate BOTH marker forms with the same class + reason rules:
252
+ // per-line: allow:<class> — <reason>
253
+ // file-level: codebase-patterns:allow-file <class> — <reason>
254
+ // The file-level form is the broadest exemption (it suppresses every hit
255
+ // of its class in the file), so a reason-less or unknown-class file-level
256
+ // marker must be caught here too — otherwise it would suppress silently
257
+ // and never reach the per-line orphan check.
258
+ const fileLevel = comment.match(/\bcodebase-patterns:allow-file\s+([a-z0-9-]+)\b(.*)$/);
259
+ const perLine = comment.match(/\ballow:([a-z0-9-]+)\b(.*)$/);
260
+ const m = fileLevel || perLine;
261
+ if (!m) continue;
262
+ const cls = m[1];
263
+ const tail = m[2];
264
+ const label = fileLevel ? `allow-file ${cls}` : `allow:${cls}`;
265
+ if (!VALID_ALLOW_CLASSES[cls]) {
266
+ hits.push({ file: rel, line: i + 1, content: lines[i].trim(), why: `unknown allow-class "${cls}"` });
267
+ } else if (!/[—-]\s*\S/.test(tail)) {
268
+ hits.push({ file: rel, line: i + 1, content: lines[i].trim(), why: `${label} is missing the "— <reason>" tail` });
269
+ }
270
+ }
271
+ }
272
+ return hits;
273
+ }
274
+
275
+ const CLASSES = [
276
+ {
277
+ id: "process-exit-after-stdout-write",
278
+ run: detectProcessExitAfterStdout,
279
+ warnOnly: false,
280
+ hint: "use `safeExit(EXIT_CODES.X); return;` (lib/exit-codes.js) — process.exit() truncates buffered stdout when piped",
281
+ },
282
+ {
283
+ id: "dynamic-regex",
284
+ run: detectDynamicRegex,
285
+ warnOnly: true, // flip to false next release once the known sites carry markers
286
+ hint: "RegExp from operator input is a ReDoS sink — anchor + length-cap, or `// allow:dynamic-regex — <reason>` when the pattern is a trusted bundled schema",
287
+ },
288
+ {
289
+ id: "bidi-codepoint-literal",
290
+ run: detectBidiCodepointLiteral,
291
+ warnOnly: false,
292
+ hint: "raw bidi/zero-width/null codepoint in source — emit it via vendor/blamejs/codepoint-class tables or a \\uXXXX escape, or `// allow:bidi-codepoint-literal — <reason>` if the literal is load-bearing test/illustrative data",
293
+ },
294
+ {
295
+ id: "orphan-allow-class",
296
+ run: detectOrphanAllowClass,
297
+ warnOnly: false,
298
+ hint: "a typo'd or reason-less `// allow:<class>` suppresses nothing — fix the class id or add `— <reason>`",
299
+ },
300
+ ];
301
+
302
+ function main() {
303
+ let hardFail = 0;
304
+ let warnTotal = 0;
305
+ let n = 0;
306
+ for (const c of CLASSES) {
307
+ const hits = c.run();
308
+ if (!hits.length) { console.log(` ok ${c.id}: clean`); continue; }
309
+ for (const h of hits) {
310
+ n++;
311
+ const tag = c.warnOnly ? "[warn]" : "FAIL";
312
+ const extra = h.why ? ` (${h.why})` : "";
313
+ console.error(` ${n}. ${tag} ${c.id} ${h.file}:${h.line}: ${String(h.content).slice(0, 110)}${extra}`);
314
+ }
315
+ console.error(` -> ${c.hint}`);
316
+ if (c.warnOnly) warnTotal += hits.length; else hardFail += hits.length;
317
+ }
318
+ if (hardFail === 0) {
319
+ console.log(`[check-codebase-patterns] ok${warnTotal ? ` (${warnTotal} warning(s))` : ""}`);
320
+ process.exitCode = 0;
321
+ return;
322
+ }
323
+ console.error(`[check-codebase-patterns] FAIL — ${hardFail} blocking violation(s).`);
324
+ process.exitCode = 1;
325
+ }
326
+
327
+ module.exports = {
328
+ VALID_ALLOW_CLASSES,
329
+ CLASSES,
330
+ detectProcessExitAfterStdout,
331
+ detectDynamicRegex,
332
+ detectBidiCodepointLiteral,
333
+ detectOrphanAllowClass,
334
+ filesUnder,
335
+ };
336
+
337
+ if (require.main === module) main();
@@ -236,6 +236,19 @@ const GATES = [
236
236
  args: [path.join(ROOT, "scripts", "check-agents-md-collectors.js")],
237
237
  ciJobName: "Data integrity (catalog + manifest snapshot)",
238
238
  },
239
+ {
240
+ // Codebase-pattern gate. Blocks the code-shape bug classes that
241
+ // recurred across releases: a library-callable function that writes to
242
+ // stdout then calls process.exit() (truncates the buffered write when
243
+ // piped — the stdout-flush-truncation class), and a stale/typo'd `// allow:` marker.
244
+ // dynamic-RegExp construction is surfaced warn-only this release. The
245
+ // exception mechanism + the "owned elsewhere" boundary are documented in
246
+ // the script header.
247
+ name: "Codebase-pattern gates (stdout-flush, dynamic RegExp, bidi codepoints, orphan markers)",
248
+ command: process.execPath,
249
+ args: [path.join(ROOT, "scripts", "check-codebase-patterns.js")],
250
+ ciJobName: "Data integrity (catalog + manifest snapshot)",
251
+ },
239
252
  ];
240
253
 
241
254
  function runGate(gate) {
@@ -274,6 +274,13 @@ function cmdPrepare(opts) {
274
274
  // the canonical-count guard meaningful when a release adds test files.
275
275
  _run("node", ["scripts/check-test-count.js", "--update-baseline"]);
276
276
 
277
+ _section("codebase-patterns currency (advisory)");
278
+ // Flag when the upstream pattern catalog (the sibling blamejs codebase-
279
+ // patterns test) has grown a class exceptd hasn't triaged yet — the same
280
+ // forcing function the actions/vendor currency checks give those surfaces.
281
+ // Advisory: never blocks; skips cleanly when the sibling repo is absent.
282
+ _run("node", ["scripts/check-codebase-patterns-currency.js"], { allowFail: true });
283
+
277
284
  console.log("\nnext: node scripts/release.js gates");
278
285
  }
279
286
 
@@ -368,6 +375,22 @@ function cmdWatch() {
368
375
  // run doesn't throw before we get to inspect + rerun it.
369
376
  _run("gh", ["pr", "checks", prNum, "--watch"], { allowFail: true });
370
377
 
378
+ // Gate on check CONCLUSIONS, not only review threads. A red required check
379
+ // leaves the PR BLOCKED at merge, so surfacing failures here (the whole
380
+ // point of the watch phase) beats advancing to "next: merge" and letting
381
+ // cmdMerge reject it. Bucket is gh's normalized verdict: pass / fail /
382
+ // pending / skipping / cancel.
383
+ var checksRaw = _capture("gh", ["pr", "checks", prNum, "--json", "name,bucket,link"]).stdout;
384
+ var checks = [];
385
+ try { checks = JSON.parse(checksRaw || "[]"); } catch (_e) { checks = []; }
386
+ var failed = checks.filter(function (c) { return c.bucket === "fail" || c.bucket === "cancel"; });
387
+ if (failed.length > 0) {
388
+ console.log("\nfailed checks (" + failed.length + "):");
389
+ failed.forEach(function (c) { console.log(" ✗ " + c.name + " " + (c.link || "")); });
390
+ console.log("\nFix in code, push, then re-run: node scripts/release.js watch");
391
+ process.exit(3);
392
+ }
393
+
371
394
  var unresolved = _unresolvedThreads(prNum);
372
395
  if (unresolved.length > 0) {
373
396
  console.log("\nunresolved review threads (" + unresolved.length + "):");
@@ -9,6 +9,7 @@ upstream commit [`1442f17`](https://github.com/blamejs/blamejs/commit/1442f17758
9
9
  |---|---|---|
10
10
  | `retry.js` | `lib/retry.js` | Battle-tested exponential backoff + crypto jitter + AbortSignal + circuit-breaker. Used by `lib/job-queue.js` and `lib/refresh-external.js` for HTTP retry semantics on KEV/EPSS/NVD/IETF/GitHub fetches. |
11
11
  | `worker-pool.js` | `lib/worker-pool.js` | Generic worker_threads pool with bounded queue, per-task timeout, worker recycle. Used by `scripts/build-indexes.js --parallel` and any future CPU-bound fan-out work. |
12
+ | `codepoint-class.js` | `lib/codepoint-class.js` | Trojan-Source (CVE-2021-42574) codepoint threat tables — bidi-override / C0-control / zero-width / null ranges + compiled regexes + `applyCharStripPolicies`. Used by the `--operator` reject path (`bin/exceptd.js`) and `sanitizeOperatorText` (`lib/playbook-runner.js`) to classify which family an offending codepoint belongs to; `\p{C}` remains the category backstop. |
12
13
  | `LICENSE` | `LICENSE` | Apache-2.0 license text (identical to exceptd's). |
13
14
  | `_PROVENANCE.json` | — | sha256 of each vendored file + upstream file at pin, plus the strip rules applied. `lib/validate-vendor.js` re-hashes on every predeploy run. |
14
15
 
@@ -52,6 +52,22 @@
52
52
  "exceptd_deltas": [
53
53
  "scriptPath validator rejects Windows UNC + extended-path prefixes (\\\\?\\, \\\\.\\, \\\\<server>\\) — defense-in-depth against worker-spawn from network shares on win32 platforms"
54
54
  ]
55
+ },
56
+ "codepoint-class.js": {
57
+ "vendored_path": "vendor/blamejs/codepoint-class.js",
58
+ "vendored_sha256": "2be79cf25de87f46b608aec98ee790f4cf1035ffee48fe70ff082d3cf6f324ba",
59
+ "upstream_path": "lib/codepoint-class.js",
60
+ "upstream_sha256_at_pin": "2be79cf25de87f46b608aec98ee790f4cf1035ffee48fe70ff082d3cf6f324ba",
61
+ "stripped": [],
62
+ "surface_preserved": [
63
+ "BIDI_RE / C0_CTRL_RE / ZERO_WIDTH_RE / NULL_BYTE (classification regexes)",
64
+ "BIDI_RE_G / C0_CTRL_RE_G / ZW_RE_G / NULL_RE_G (global strip regexes)",
65
+ "applyCharStripPolicies(text, opts)",
66
+ "hex4 / charClass / fromCp / range tables"
67
+ ],
68
+ "exceptd_deltas": [
69
+ "Used for codepoint-family CLASSIFICATION + family-strip only. The BIDI|C0|ZERO_WIDTH|NULL union is a strict subset of Unicode General Category C, so \\p{C} remains the reject/strip backstop at both consumer sites (bin/exceptd.js --operator validation, lib/playbook-runner.js sanitizeOperatorText) — it catches the divergent remainder (U+007F, U+0080-009F, U+FFF9-FFFB, private-use, unassigned) the named-family regexes miss."
70
+ ]
55
71
  }
56
72
  }
57
73
  }