@blamejs/exceptd-skills 0.13.125 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17696,5 +17696,55 @@
17696
17696
  ],
17697
17697
  "_auto_imported": false,
17698
17698
  "_intake_method": "manual-verified-curation"
17699
+ },
17700
+ "CVE-2026-21877": {
17701
+ "name": "n8n Git Node Arbitrary File Write Authenticated RCE",
17702
+ "lesson_date": "2026-05-26",
17703
+ "attack_vector": {
17704
+ "description": "n8n's Git node lets an authenticated user write a dangerous file to an arbitrary path, which is then executed, yielding full instance compromise on self-hosted and Cloud.",
17705
+ "privileges_required": "low (authenticated user who can configure the Git node)",
17706
+ "complexity": "low",
17707
+ "ai_factor": "The abused surface is n8n's Git node, in an AI-workflow / automation builder. The lesson: a workflow node that writes files is a code-execution sink - constrain the file types and paths it can write, and scope workflow-edit permission tightly."
17708
+ },
17709
+ "framework_coverage": {
17710
+ "NIST-800-53-AC-3": {
17711
+ "covered": true,
17712
+ "adequate": false,
17713
+ "gap": "Access enforcement does not stop an authenticated user from writing an executable file via the Git node."
17714
+ },
17715
+ "NIST-800-53-SI-3": {
17716
+ "covered": true,
17717
+ "adequate": false,
17718
+ "gap": "Malicious-code protection does not stop an arbitrary file write that becomes code execution."
17719
+ },
17720
+ "ALL-AI-PIPELINE-INTEGRITY": {
17721
+ "covered": false,
17722
+ "adequate": false,
17723
+ "gap": "No framework treats a workflow builder's file-writing node as a code-execution sink requiring type/path constraint."
17724
+ }
17725
+ },
17726
+ "compliance_exposure_score": {
17727
+ "percent_audit_passing_orgs_still_exposed": 81,
17728
+ "basis": "Workflow builders ship nodes (Git, filesystem) that write files on trusted assumptions; file-type/path constraints on these sinks are rarely audited.",
17729
+ "theater_pattern": "ai_app_builder_code_node_sandbox_escape"
17730
+ },
17731
+ "ai_discovered_zeroday": false,
17732
+ "ai_discovery_source": "human_researcher",
17733
+ "ai_assist_factor": "none",
17734
+ "new_control_requirements": [
17735
+ {
17736
+ "id": "NEW-CTRL-103",
17737
+ "name": "AI-APP-BUILDER-EXECUTION-ENDPOINT-AUTH-AND-SANDBOX",
17738
+ "description": "A visual LLM app/agent/workflow builder (Langflow, Flowise, Dify, n8n, and similar) must authenticate every endpoint that can reach a code-execution path and must never let a workflow-supplied node write files of executable/dangerous types to arbitrary paths or run code with host privileges. Sandbox any code the platform executes on a user's behalf in a non-bypassable, host-isolated environment, constrain file-writing nodes to safe types/paths, and scope workflow-edit permission tightly. The distinguishing test: configure a file-writing or code node to drop an executable to a startup/cron path on a staging instance and confirm it is refused.",
17739
+ "evidence": "https://github.com/n8n-io/n8n/security/advisories/GHSA-v364-rw7m-3263",
17740
+ "gap_closes": [
17741
+ "NIST-800-53-AC-3",
17742
+ "NIST-800-53-SI-3",
17743
+ "ALL-AI-PIPELINE-INTEGRITY"
17744
+ ]
17745
+ }
17746
+ ],
17747
+ "_auto_imported": false,
17748
+ "_intake_method": "manual-verified-curation"
17699
17749
  }
17700
17750
  }
@@ -21,6 +21,7 @@
21
21
 
22
22
  const fs = require("node:fs");
23
23
  const path = require("node:path");
24
+ const { isLinkedWorktreeDir } = require("./scan-excludes");
24
25
 
25
26
  const COLLECTOR_ID = "cicd-pipeline-compromise";
26
27
 
@@ -219,7 +220,15 @@ function scanOidcPolicies(root) {
219
220
  for (const e of entries) {
220
221
  if (e.name === "node_modules" || e.name === ".git") continue;
221
222
  const full = path.join(dir, e.name);
222
- if (e.isDirectory()) { walk(full, depth + 1); continue; }
223
+ if (e.isDirectory()) {
224
+ // Skip linked git worktrees (gitdir-pointer `.git` file), e.g.
225
+ // agent-created repo copies under `.claude/worktrees/<id>/`
226
+ // nested below a scanned policy/infra dir — rescanning them
227
+ // double-counts the same OIDC trust documents.
228
+ if (isLinkedWorktreeDir(full)) continue;
229
+ walk(full, depth + 1);
230
+ continue;
231
+ }
223
232
  if (!e.isFile() || !/\.json$/i.test(e.name)) continue;
224
233
  const text = readSafe(full);
225
234
  if (!text) continue;
@@ -0,0 +1,465 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * lib/collectors/citation-hygiene.js
5
+ *
6
+ * Companion collector for the `citation-hygiene` playbook. Walks the cwd
7
+ * tree (source, comments, docstrings, and security documentation) and
8
+ * extracts every CVE and RFC citation, then cross-references each against
9
+ * the shipped CVE catalog (data/cve-catalog.json) and RFC index
10
+ * (data/rfc-references.json).
11
+ *
12
+ * It flips signal_overrides only for verdicts determinable offline from
13
+ * the catalogs:
14
+ * - fabricated-cve-id: a citation whose tail is not the canonical
15
+ * all-numeric CVE form (CVE-2024-XXXX, CVE-2024-zlib). Deterministic.
16
+ * - rejected-or-disputed-cve: a well-formed citation that resolves to a
17
+ * catalog entry whose analyst notes mark it rejected / disputed.
18
+ * - rfc-number-title-mismatch: a citation pairing a number with a title
19
+ * that conflicts with the index title for that number.
20
+ *
21
+ * Indicators that need an out-of-band lookup or human judgement
22
+ * (cve-citation-needs-external-verification, draft-mislabeled-as-rfc) are
23
+ * surfaced in the artifacts text and left UNFLIPPED so the runner returns
24
+ * inconclusive rather than a forced miss — the catalog is curated, not
25
+ * exhaustive, so absence is never a clean clear or a false fabrication.
26
+ *
27
+ * Interface: see lib/collectors/README.md
28
+ */
29
+
30
+ const fs = require("node:fs");
31
+ const path = require("node:path");
32
+
33
+ const { codeExcludeSet, isLinkedWorktreeDir } = require("./scan-excludes");
34
+
35
+ const COLLECTOR_ID = "citation-hygiene";
36
+
37
+ const DEFAULT_MAX_DEPTH = 8;
38
+ const EXCLUDES = codeExcludeSet();
39
+
40
+ // File extensions whose contents are worth scanning for citations: source,
41
+ // markup/docs, config that carries security prose. Citations live in
42
+ // comments and docstrings (source) and in docs (md / rst / txt / adoc).
43
+ const SCAN_EXTS = new Set([
44
+ ".js", ".mjs", ".cjs", ".jsx", ".ts", ".tsx", ".mts", ".cts",
45
+ ".py", ".pyi",
46
+ ".go",
47
+ ".rs",
48
+ ".java", ".kt", ".kts", ".scala",
49
+ ".rb",
50
+ ".php",
51
+ ".c", ".h", ".cc", ".cpp", ".hpp", ".cxx",
52
+ ".cs",
53
+ ".swift",
54
+ ".m", ".mm",
55
+ ".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc",
56
+ ".yaml", ".yml", ".toml", ".cfg", ".ini",
57
+ ]);
58
+
59
+ const MAX_FILE_BYTES = 2 * 1024 * 1024;
60
+
61
+ // Paths whose citations are illustrative (templates / fixtures / the
62
+ // scanner's own pattern catalogue), not real self-citations.
63
+ const ILLUSTRATIVE_PATH_SEGMENTS = [
64
+ "/test/", "/tests/", "/spec/", "/specs/", "/__tests__/",
65
+ "/fixtures/", "/fixture/",
66
+ "/.github/issue_template/", "/.github/pull_request_template/",
67
+ "/issue_template/", "/pull_request_template/",
68
+ // The collectors and the playbooks directory literally contain CVE /
69
+ // RFC patterns and example citations; scanning them would flag the
70
+ // scanner itself. The playbook's intent is the consumer's source.
71
+ "/lib/collectors/", "/data/playbooks/", "/lib/schemas/",
72
+ ];
73
+
74
+ function isIllustrativePath(rel) {
75
+ const norm = "/" + rel.replace(/\\/g, "/").toLowerCase() + "/";
76
+ for (const seg of ILLUSTRATIVE_PATH_SEGMENTS) {
77
+ if (norm.includes(seg)) return true;
78
+ }
79
+ if (/\.template($|\.)/i.test(rel)) return true;
80
+ if (/(?:^|[\\/])[^\\/]+\.(test|spec)\.[a-z]+$/i.test(rel)) return true;
81
+ return false;
82
+ }
83
+
84
+ function walkTree(root, opts = {}) {
85
+ const maxDepth = opts.maxDepth ?? DEFAULT_MAX_DEPTH;
86
+ const excludes = opts.excludes ?? EXCLUDES;
87
+ const out = [];
88
+ const seen = new Set();
89
+
90
+ function walk(dir, depth) {
91
+ if (depth > maxDepth) return;
92
+ let entries;
93
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
94
+ catch { return; }
95
+ for (const entry of entries) {
96
+ if (excludes.has(entry.name)) continue;
97
+ const full = path.join(dir, entry.name);
98
+ let real;
99
+ try { real = fs.realpathSync(full); } catch { continue; }
100
+ if (seen.has(real)) continue;
101
+ seen.add(real);
102
+ if (entry.isDirectory()) {
103
+ // Skip detached git worktrees (agent scratch copies) — descending
104
+ // into them rescans unrelated repo state.
105
+ if (isLinkedWorktreeDir(full)) continue;
106
+ walk(full, depth + 1);
107
+ } else if (entry.isFile()) {
108
+ out.push({ full, rel: path.relative(root, full), name: entry.name });
109
+ }
110
+ }
111
+ }
112
+ walk(root, 0);
113
+ return out;
114
+ }
115
+
116
+ function readSafe(full) {
117
+ try {
118
+ const s = fs.statSync(full);
119
+ if (s.size > MAX_FILE_BYTES) return null;
120
+ return fs.readFileSync(full, "utf8");
121
+ } catch { return null; }
122
+ }
123
+
124
+ // Permissive CVE matcher: 4-digit year, then a tail of digits OR letters
125
+ // (so malformed citations like CVE-2024-XXXX / CVE-2024-zlib are captured,
126
+ // not silently skipped). The canonical-form test is applied afterwards.
127
+ const CVE_CITATION_RE = /CVE-(\d{4})-([0-9A-Za-z]+)/g;
128
+ const CVE_CANONICAL_RE = /^CVE-\d{4}-\d{4,}$/;
129
+
130
+ // RFC citation: `RFC 9404`, `RFC9404`, `RFC-9404`. Capture the number.
131
+ const RFC_CITATION_RE = /RFC[\s-]?(\d{1,5})\b/gi;
132
+
133
+ // Words that mark a catalog note as recording a rejected / disputed status.
134
+ const REJECT_DISPUTE_RE = /\b(reject(?:ed|s|ion)?|disputed?|withdrawn)\b/i;
135
+
136
+ // Draft-language proximity for the (unflipped) draft-as-RFC heuristic.
137
+ const DRAFT_LANGUAGE_RE = /\b(draft-[a-z0-9-]+|internet[- ]draft|work[- ]in[- ]progress|i-d\b)\b/i;
138
+
139
+ /**
140
+ * Load the shipped CVE catalog and RFC index. The catalogs ship in the
141
+ * package tarball under data/; resolve relative to this module so the
142
+ * collector works whether run from the source tree or a node_modules
143
+ * install. Returns { cveKeys:Set, cveNotes:Map<id,string>, rfcTitles:Map<number,string>, errors:[] }.
144
+ */
145
+ function loadCatalogs() {
146
+ const errors = [];
147
+ const dataDir = path.resolve(__dirname, "..", "..", "data");
148
+ const cveKeys = new Set();
149
+ const cveNotes = new Map();
150
+ const rfcTitles = new Map();
151
+
152
+ try {
153
+ const cve = JSON.parse(fs.readFileSync(path.join(dataDir, "cve-catalog.json"), "utf8"));
154
+ for (const [k, v] of Object.entries(cve)) {
155
+ if (k.startsWith("_")) continue;
156
+ cveKeys.add(k);
157
+ if (v && typeof v === "object") {
158
+ // Concatenate the analyst-note fields that carry rejected /
159
+ // disputed status. Matching the cited key's OWN notes (not a
160
+ // neighbour's) is enforced by per-entry concatenation.
161
+ const noteParts = [
162
+ v.cvss_note, v.active_exploitation_notes, v.vector,
163
+ v.discovery_attribution_note, v.ai_discovery_notes,
164
+ v._kev_short_description,
165
+ ].filter((s) => typeof s === "string");
166
+ cveNotes.set(k, noteParts.join(" • "));
167
+ }
168
+ }
169
+ } catch (e) {
170
+ errors.push({ artifact_id: "cve-catalog", kind: "catalog_load_failed", reason: e.message });
171
+ }
172
+
173
+ try {
174
+ const rfc = JSON.parse(fs.readFileSync(path.join(dataDir, "rfc-references.json"), "utf8"));
175
+ for (const [k, v] of Object.entries(rfc)) {
176
+ if (k.startsWith("_")) continue;
177
+ if (v && typeof v === "object" && typeof v.number === "number" && typeof v.title === "string") {
178
+ rfcTitles.set(v.number, v.title);
179
+ }
180
+ }
181
+ } catch (e) {
182
+ errors.push({ artifact_id: "rfc-index", kind: "catalog_load_failed", reason: e.message });
183
+ }
184
+
185
+ return { cveKeys, cveNotes, rfcTitles, errors };
186
+ }
187
+
188
+ // Normalise a title for comparison: lowercase, drop punctuation, collapse
189
+ // whitespace, and strip a leading "the".
190
+ function normalizeTitle(s) {
191
+ return s
192
+ .toLowerCase()
193
+ .replace(/[^a-z0-9\s]/g, " ")
194
+ .replace(/\s+/g, " ")
195
+ .replace(/^the\s+/, "")
196
+ .trim();
197
+ }
198
+
199
+ const TITLE_STOPWORDS = new Set([
200
+ "the", "a", "an", "of", "for", "and", "to", "in", "on", "with",
201
+ "protocol", "version", "extension", "specification", "spec", "rfc",
202
+ ]);
203
+
204
+ function titleTokens(s) {
205
+ return new Set(
206
+ normalizeTitle(s).split(" ").filter((t) => t.length >= 3 && !/^\d+$/.test(t) && !TITLE_STOPWORDS.has(t)),
207
+ );
208
+ }
209
+
210
+ // Ordered list of meaningful (post-stopword, non-numeric) tokens in a
211
+ // title — used both for overlap and for acronym construction.
212
+ function orderedTitleTokens(s) {
213
+ return normalizeTitle(s)
214
+ .split(" ")
215
+ .filter((t) => t.length >= 3 && !/^\d+$/.test(t) && !TITLE_STOPWORDS.has(t));
216
+ }
217
+
218
+ // Build the lowercase acronym from the title's meaningful words
219
+ // (Transport Layer Security Protocol -> "tls", since protocol/version are
220
+ // stopwords). Lets a nickname / abbreviation in the adjacent text be
221
+ // recognised as the same document, not a wrong title.
222
+ function titleAcronym(realTitle) {
223
+ return orderedTitleTokens(realTitle).map((w) => w[0]).join("");
224
+ }
225
+
226
+ /**
227
+ * Decide whether an adjacent text fragment makes a TITLE CLAIM that
228
+ * conflicts with the real index title. Conservative by design — the cost
229
+ * of a false positive (telling an author their correct citation is wrong)
230
+ * is high, so the bar to flag a mismatch is deliberately strict:
231
+ * - the cited RFC number must be in the index (checked by the caller),
232
+ * - the adjacent text must carry at least THREE meaningful tokens — a
233
+ * bare nickname / abbreviation ("TLS 1.3", "(HTTP)") reduces below
234
+ * this and is treated as no-title-claim, never a mismatch,
235
+ * - if the adjacent tokens contain the title's acronym, it is the same
236
+ * document (TLS for Transport Layer Security); not a mismatch,
237
+ * - only ZERO overlap between the meaningful adjacent tokens and the
238
+ * real-title tokens flags a mismatch. Any shared content word means
239
+ * the author is describing the right document (paraphrase) — demote.
240
+ * Returns "mismatch" | "match" | "no-title-claim".
241
+ */
242
+ function classifyRfcTitle(adjacentText, realTitle) {
243
+ const adjTokens = titleTokens(adjacentText);
244
+ // Require a substantive title claim. Fewer than three content tokens is
245
+ // a nickname / abbreviation, not a stated title — stay conservative.
246
+ if (adjTokens.size < 3) return "no-title-claim";
247
+ const realTokens = titleTokens(realTitle);
248
+ if (realTokens.size === 0) return "no-title-claim";
249
+ // Acronym recognition: "tls" in the adjacent text matches "Transport
250
+ // Layer Security". Same document, not a wrong title.
251
+ const acronym = titleAcronym(realTitle);
252
+ if (acronym.length >= 2 && adjTokens.has(acronym)) return "match";
253
+ let overlap = 0;
254
+ for (const t of adjTokens) {
255
+ if (realTokens.has(t)) overlap++;
256
+ }
257
+ // Any shared content word -> the author is describing the right
258
+ // document. Only a stated title with ZERO overlap is a conflicting
259
+ // claim. This trades recall for precision intentionally.
260
+ return overlap === 0 ? "mismatch" : "match";
261
+ }
262
+
263
+ // Pull the text on the same line as the match, used as the "adjacent text"
264
+ // for the RFC title comparison.
265
+ function lineAround(content, index) {
266
+ const start = content.lastIndexOf("\n", index) + 1;
267
+ let end = content.indexOf("\n", index);
268
+ if (end === -1) end = content.length;
269
+ return content.slice(start, end);
270
+ }
271
+
272
+ function collect({ cwd = process.cwd() } = {}) {
273
+ const errors = [];
274
+ const startTime = Date.now();
275
+ const root = path.resolve(cwd);
276
+
277
+ const { cveKeys, cveNotes, rfcTitles, errors: catErrors } = loadCatalogs();
278
+ for (const e of catErrors) errors.push(e);
279
+ const catalogsLoaded = cveKeys.size > 0 && rfcTitles.size > 0;
280
+
281
+ let files;
282
+ try {
283
+ files = walkTree(root);
284
+ } catch (e) {
285
+ errors.push({ kind: "walk_failed", reason: e.message });
286
+ files = [];
287
+ }
288
+ if (files.length > 50000) {
289
+ errors.push({
290
+ kind: "file_count_capped",
291
+ reason: `walked ${files.length} files; capping content scan at 50000.`,
292
+ });
293
+ files = files.slice(0, 50000);
294
+ }
295
+
296
+ const scanFiles = files.filter((f) => SCAN_EXTS.has(path.extname(f.name).toLowerCase()));
297
+
298
+ // Hit collectors. Each entry keeps the file + the citation text so the
299
+ // artifact summary is auditable. CVE / RFC literals are references, not
300
+ // secrets, so they are safe to retain in the value text.
301
+ const hits = {
302
+ "fabricated-cve-id": [],
303
+ "rejected-or-disputed-cve": [],
304
+ "rfc-number-title-mismatch": [],
305
+ };
306
+ // Inconclusive / needs-verification buckets — surfaced in artifacts,
307
+ // never flipped to a deterministic verdict.
308
+ const needsVerify = {
309
+ cve_not_in_catalog: [],
310
+ rfc_not_in_index: [],
311
+ draft_as_rfc_candidates: [],
312
+ };
313
+
314
+ let totalCveCitations = 0;
315
+ let totalRfcCitations = 0;
316
+
317
+ for (const f of scanFiles) {
318
+ const content = readSafe(f.full);
319
+ if (content == null) {
320
+ errors.push({ artifact_id: "source-files", kind: "read_failed", reason: f.rel });
321
+ continue;
322
+ }
323
+ const illustrative = isIllustrativePath(f.rel);
324
+
325
+ // ---- CVE citations ----
326
+ for (const m of content.matchAll(CVE_CITATION_RE)) {
327
+ const full = m[0];
328
+ totalCveCitations++;
329
+ const canonical = CVE_CANONICAL_RE.test(full);
330
+ if (!canonical) {
331
+ // Fabricated / malformed. Illustrative surfaces (templates,
332
+ // fixtures, the format-explaining docs) are demoted.
333
+ if (!illustrative) {
334
+ hits["fabricated-cve-id"].push({ file: f.rel, citation: full });
335
+ }
336
+ continue;
337
+ }
338
+ // Well-formed. Cross-reference the catalog.
339
+ if (cveKeys.has(full)) {
340
+ const note = cveNotes.get(full) || "";
341
+ if (REJECT_DISPUTE_RE.test(note) && !illustrative) {
342
+ hits["rejected-or-disputed-cve"].push({ file: f.rel, citation: full });
343
+ }
344
+ } else if (catalogsLoaded && !illustrative) {
345
+ // Absent from the curated catalog: needs an external lookup.
346
+ // NOT a fabrication — inconclusive by design.
347
+ needsVerify.cve_not_in_catalog.push({ file: f.rel, citation: full });
348
+ }
349
+ }
350
+
351
+ // ---- RFC citations ----
352
+ for (const m of content.matchAll(RFC_CITATION_RE)) {
353
+ totalRfcCitations++;
354
+ const num = Number(m[1]);
355
+ if (!Number.isFinite(num)) continue;
356
+ const line = lineAround(content, m.index);
357
+ if (rfcTitles.has(num)) {
358
+ const verdict = classifyRfcTitle(line, rfcTitles.get(num));
359
+ if (verdict === "mismatch" && !illustrative) {
360
+ hits["rfc-number-title-mismatch"].push({
361
+ file: f.rel,
362
+ citation: `RFC ${num}`,
363
+ real_title: rfcTitles.get(num),
364
+ });
365
+ }
366
+ } else if (catalogsLoaded && !illustrative) {
367
+ // Number not in the published index. Needs verification; if draft
368
+ // language is adjacent, record it as a draft-as-RFC candidate
369
+ // (still inconclusive — left unflipped).
370
+ needsVerify.rfc_not_in_index.push({ file: f.rel, citation: `RFC ${num}` });
371
+ if (DRAFT_LANGUAGE_RE.test(line)) {
372
+ needsVerify.draft_as_rfc_candidates.push({ file: f.rel, citation: `RFC ${num}` });
373
+ }
374
+ }
375
+ }
376
+ }
377
+
378
+ // signal_overrides: only the deterministically-decidable indicators are
379
+ // flipped. The needs-verification indicators stay absent so the runner
380
+ // returns inconclusive for them.
381
+ const signal_overrides = {
382
+ "fabricated-cve-id": hits["fabricated-cve-id"].length > 0 ? "hit" : "miss",
383
+ "rfc-number-title-mismatch": hits["rfc-number-title-mismatch"].length > 0 ? "hit" : "miss",
384
+ };
385
+ // rejected-or-disputed-cve is high-confidence (not deterministic) — flip
386
+ // on a catalog-backed match, otherwise miss. Only assert a verdict when
387
+ // the catalog actually loaded; without it the check could not run.
388
+ if (cveKeys.size > 0) {
389
+ signal_overrides["rejected-or-disputed-cve"] =
390
+ hits["rejected-or-disputed-cve"].length > 0 ? "hit" : "miss";
391
+ } else {
392
+ signal_overrides["rejected-or-disputed-cve"] = "inconclusive";
393
+ }
394
+ // The needs-verification CVE indicator: hit means "found citations the
395
+ // offline catalog cannot confirm" — itself an inconclusive state, so it
396
+ // maps to inconclusive (not a clean miss) when such citations exist.
397
+ if (needsVerify.cve_not_in_catalog.length > 0) {
398
+ signal_overrides["cve-citation-needs-external-verification"] = "inconclusive";
399
+ }
400
+
401
+ const summarize = (list) => {
402
+ if (list.length === 0) return "0 hits";
403
+ const head = list.slice(0, 5).map((h) => {
404
+ let s = `${h.file}: ${h.citation}`;
405
+ if (h.real_title) s += ` (index title: "${h.real_title}")`;
406
+ return s;
407
+ }).join("; ");
408
+ return `${list.length} hit(s): ${head}` + (list.length > 5 ? "; …" : "");
409
+ };
410
+
411
+ const artifacts = {
412
+ "cve-citations-in-source": {
413
+ value: `${totalCveCitations} CVE citation(s) found. ` +
414
+ `fabricated: ${summarize(hits["fabricated-cve-id"])}. ` +
415
+ `rejected/disputed: ${summarize(hits["rejected-or-disputed-cve"])}. ` +
416
+ `needs-external-verification (well-formed, absent from catalog): ${summarize(needsVerify.cve_not_in_catalog)}.`,
417
+ captured: true,
418
+ },
419
+ "rfc-citations-in-source": {
420
+ value: `${totalRfcCitations} RFC citation(s) found. ` +
421
+ `title-mismatch: ${summarize(hits["rfc-number-title-mismatch"])}. ` +
422
+ `not-in-index (needs verification): ${summarize(needsVerify.rfc_not_in_index)}. ` +
423
+ `draft-as-rfc candidates: ${summarize(needsVerify.draft_as_rfc_candidates)}.`,
424
+ captured: true,
425
+ },
426
+ "cve-catalog": {
427
+ value: cveKeys.size > 0
428
+ ? `loaded ${cveKeys.size} catalog entries for cross-reference`
429
+ : "catalog unavailable — CVE cross-reference could not run",
430
+ captured: cveKeys.size > 0,
431
+ ...(cveKeys.size === 0 ? { reason: "cve-catalog.json failed to load" } : {}),
432
+ },
433
+ "rfc-index": {
434
+ value: rfcTitles.size > 0
435
+ ? `loaded ${rfcTitles.size} RFC titles for cross-reference`
436
+ : "RFC index unavailable — RFC cross-reference could not run",
437
+ captured: rfcTitles.size > 0,
438
+ ...(rfcTitles.size === 0 ? { reason: "rfc-references.json failed to load" } : {}),
439
+ },
440
+ };
441
+
442
+ return {
443
+ precondition_checks: {
444
+ "repo-cites-security-references": totalCveCitations > 0 || totalRfcCitations > 0,
445
+ },
446
+ artifacts,
447
+ signal_overrides,
448
+ collector_meta: {
449
+ collector_id: COLLECTOR_ID,
450
+ collector_version: "2026-05-26",
451
+ platform: process.platform,
452
+ captured_at: new Date().toISOString(),
453
+ cwd: root,
454
+ duration_ms: Date.now() - startTime,
455
+ files_walked: files.length,
456
+ scan_files_scanned: scanFiles.length,
457
+ cve_citations: totalCveCitations,
458
+ rfc_citations: totalRfcCitations,
459
+ catalogs_loaded: catalogsLoaded,
460
+ },
461
+ collector_errors: errors,
462
+ };
463
+ }
464
+
465
+ module.exports = { playbook_id: COLLECTOR_ID, collect };
@@ -20,15 +20,14 @@
20
20
 
21
21
  const fs = require("node:fs");
22
22
  const path = require("node:path");
23
+ const { codeExcludeSet, isLinkedWorktreeDir } = require("./scan-excludes");
23
24
 
24
25
  const COLLECTOR_ID = "containers";
25
26
 
26
27
  const DEFAULT_MAX_DEPTH = 6;
27
- const DEFAULT_EXCLUDES = new Set([
28
- "node_modules", ".git", "dist", "build", "out",
29
- ".venv", "venv", "__pycache__", ".pytest_cache",
30
- "target", ".idea", ".vscode",
31
- ]);
28
+ // Shared code-scope exclusions (dependency caches, build output, VCS +
29
+ // agent/editor scratch including `.claude/`); no container-specific extras.
30
+ const DEFAULT_EXCLUDES = codeExcludeSet();
32
31
 
33
32
  const DOCKERFILE_NAMES = new Set(["Dockerfile", "Containerfile"]);
34
33
  const DOCKERFILE_EXTS = new Set([".dockerfile", ".containerfile"]);
@@ -54,8 +53,14 @@ function walkTree(root, opts = {}) {
54
53
  try { real = fs.realpathSync(full); } catch { continue; }
55
54
  if (seen.has(real)) continue;
56
55
  seen.add(real);
57
- if (entry.isDirectory()) walk(full, depth + 1);
58
- else if (entry.isFile()) out.push({ full, rel: path.relative(root, full), name: entry.name });
56
+ if (entry.isDirectory()) {
57
+ // Skip linked git worktrees (their `.git` is a gitdir pointer
58
+ // file) — e.g. agent-created repo copies under
59
+ // `.claude/worktrees/<id>/`. Walking them rescans the same
60
+ // Dockerfiles / compose / k8s manifests as the host tree.
61
+ if (isLinkedWorktreeDir(full)) continue;
62
+ walk(full, depth + 1);
63
+ } else if (entry.isFile()) out.push({ full, rel: path.relative(root, full), name: entry.name });
59
64
  }
60
65
  }
61
66
  walk(root, 0);
@@ -16,15 +16,16 @@
16
16
 
17
17
  const fs = require("node:fs");
18
18
  const path = require("node:path");
19
+ const { codeExcludeSet, isLinkedWorktreeDir } = require("./scan-excludes");
19
20
 
20
21
  const COLLECTOR_ID = "crypto-codebase";
21
22
 
22
23
  const DEFAULT_MAX_DEPTH = 6;
23
- const DEFAULT_EXCLUDES = new Set([
24
- "node_modules", ".git", "dist", "build", "out",
25
- ".venv", "venv", "__pycache__", ".pytest_cache",
26
- "target", ".idea", ".vscode",
27
- ]);
24
+ // Shared code-scope exclusions: dependency caches, build output, VCS +
25
+ // agent/editor scratch (including `.claude/`). No crypto-codebase-specific
26
+ // extras the shared defaults already cover every directory this scan
27
+ // should never descend into.
28
+ const DEFAULT_EXCLUDES = codeExcludeSet();
28
29
 
29
30
  const SOURCE_EXTS = new Set([
30
31
  ".js", ".mjs", ".cjs", ".jsx", ".ts", ".tsx", ".mts", ".cts",
@@ -85,6 +86,11 @@ function walkTree(root, opts = {}) {
85
86
  if (seen.has(real)) continue;
86
87
  seen.add(real);
87
88
  if (entry.isDirectory()) {
89
+ // Never descend into a linked git worktree (its `.git` is a
90
+ // gitdir pointer file). Agent tooling stamps full repo copies
91
+ // under `.claude/worktrees/<id>/`; walking them rescans the same
92
+ // source as the host tree and multiplies every hit.
93
+ if (isLinkedWorktreeDir(full)) continue;
88
94
  walk(full, depth + 1);
89
95
  } else if (entry.isFile()) {
90
96
  out.push({ full, rel: path.relative(root, full), name: entry.name });