@blamejs/exceptd-skills 0.13.125 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -2
- package/CHANGELOG.md +24 -0
- package/README.md +12 -9
- package/bin/exceptd.js +142 -35
- package/data/_indexes/_meta.json +9 -9
- package/data/_indexes/activity-feed.json +2 -2
- package/data/_indexes/catalog-summaries.json +2 -2
- package/data/_indexes/chains.json +375 -0
- package/data/atlas-ttps.json +2 -1
- package/data/attack-techniques.json +4 -2
- package/data/cve-catalog.json +104 -0
- package/data/cwe-catalog.json +4 -2
- package/data/framework-control-gaps.json +18 -9
- package/data/playbooks/citation-hygiene.json +820 -0
- package/data/zeroday-lessons.json +50 -0
- package/lib/collectors/cicd-pipeline-compromise.js +10 -1
- package/lib/collectors/citation-hygiene.js +465 -0
- package/lib/collectors/containers.js +12 -7
- package/lib/collectors/crypto-codebase.js +11 -5
- package/lib/collectors/library-author.js +82 -10
- package/lib/collectors/scan-excludes.js +85 -0
- package/lib/collectors/secrets.js +10 -6
- package/lib/flag-suggest.js +2 -2
- package/lib/refresh-external.js +15 -0
- package/manifest.json +44 -44
- package/package.json +1 -1
- package/sbom.cdx.json +91 -46
- package/scripts/check-agents-md-collectors.js +8 -0
|
@@ -17696,5 +17696,55 @@
|
|
|
17696
17696
|
],
|
|
17697
17697
|
"_auto_imported": false,
|
|
17698
17698
|
"_intake_method": "manual-verified-curation"
|
|
17699
|
+
},
|
|
17700
|
+
"CVE-2026-21877": {
|
|
17701
|
+
"name": "n8n Git Node Arbitrary File Write Authenticated RCE",
|
|
17702
|
+
"lesson_date": "2026-05-26",
|
|
17703
|
+
"attack_vector": {
|
|
17704
|
+
"description": "n8n's Git node lets an authenticated user write a dangerous file to an arbitrary path, which is then executed, yielding full instance compromise on self-hosted and Cloud.",
|
|
17705
|
+
"privileges_required": "low (authenticated user who can configure the Git node)",
|
|
17706
|
+
"complexity": "low",
|
|
17707
|
+
"ai_factor": "The abused surface is n8n's Git node, in an AI-workflow / automation builder. The lesson: a workflow node that writes files is a code-execution sink - constrain the file types and paths it can write, and scope workflow-edit permission tightly."
|
|
17708
|
+
},
|
|
17709
|
+
"framework_coverage": {
|
|
17710
|
+
"NIST-800-53-AC-3": {
|
|
17711
|
+
"covered": true,
|
|
17712
|
+
"adequate": false,
|
|
17713
|
+
"gap": "Access enforcement does not stop an authenticated user from writing an executable file via the Git node."
|
|
17714
|
+
},
|
|
17715
|
+
"NIST-800-53-SI-3": {
|
|
17716
|
+
"covered": true,
|
|
17717
|
+
"adequate": false,
|
|
17718
|
+
"gap": "Malicious-code protection does not stop an arbitrary file write that becomes code execution."
|
|
17719
|
+
},
|
|
17720
|
+
"ALL-AI-PIPELINE-INTEGRITY": {
|
|
17721
|
+
"covered": false,
|
|
17722
|
+
"adequate": false,
|
|
17723
|
+
"gap": "No framework treats a workflow builder's file-writing node as a code-execution sink requiring type/path constraint."
|
|
17724
|
+
}
|
|
17725
|
+
},
|
|
17726
|
+
"compliance_exposure_score": {
|
|
17727
|
+
"percent_audit_passing_orgs_still_exposed": 81,
|
|
17728
|
+
"basis": "Workflow builders ship nodes (Git, filesystem) that write files on trusted assumptions; file-type/path constraints on these sinks are rarely audited.",
|
|
17729
|
+
"theater_pattern": "ai_app_builder_code_node_sandbox_escape"
|
|
17730
|
+
},
|
|
17731
|
+
"ai_discovered_zeroday": false,
|
|
17732
|
+
"ai_discovery_source": "human_researcher",
|
|
17733
|
+
"ai_assist_factor": "none",
|
|
17734
|
+
"new_control_requirements": [
|
|
17735
|
+
{
|
|
17736
|
+
"id": "NEW-CTRL-103",
|
|
17737
|
+
"name": "AI-APP-BUILDER-EXECUTION-ENDPOINT-AUTH-AND-SANDBOX",
|
|
17738
|
+
"description": "A visual LLM app/agent/workflow builder (Langflow, Flowise, Dify, n8n, and similar) must authenticate every endpoint that can reach a code-execution path and must never let a workflow-supplied node write files of executable/dangerous types to arbitrary paths or run code with host privileges. Sandbox any code the platform executes on a user's behalf in a non-bypassable, host-isolated environment, constrain file-writing nodes to safe types/paths, and scope workflow-edit permission tightly. The distinguishing test: configure a file-writing or code node to drop an executable to a startup/cron path on a staging instance and confirm it is refused.",
|
|
17739
|
+
"evidence": "https://github.com/n8n-io/n8n/security/advisories/GHSA-v364-rw7m-3263",
|
|
17740
|
+
"gap_closes": [
|
|
17741
|
+
"NIST-800-53-AC-3",
|
|
17742
|
+
"NIST-800-53-SI-3",
|
|
17743
|
+
"ALL-AI-PIPELINE-INTEGRITY"
|
|
17744
|
+
]
|
|
17745
|
+
}
|
|
17746
|
+
],
|
|
17747
|
+
"_auto_imported": false,
|
|
17748
|
+
"_intake_method": "manual-verified-curation"
|
|
17699
17749
|
}
|
|
17700
17750
|
}
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
|
|
22
22
|
const fs = require("node:fs");
|
|
23
23
|
const path = require("node:path");
|
|
24
|
+
const { isLinkedWorktreeDir } = require("./scan-excludes");
|
|
24
25
|
|
|
25
26
|
const COLLECTOR_ID = "cicd-pipeline-compromise";
|
|
26
27
|
|
|
@@ -219,7 +220,15 @@ function scanOidcPolicies(root) {
|
|
|
219
220
|
for (const e of entries) {
|
|
220
221
|
if (e.name === "node_modules" || e.name === ".git") continue;
|
|
221
222
|
const full = path.join(dir, e.name);
|
|
222
|
-
if (e.isDirectory()) {
|
|
223
|
+
if (e.isDirectory()) {
|
|
224
|
+
// Skip linked git worktrees (gitdir-pointer `.git` file), e.g.
|
|
225
|
+
// agent-created repo copies under `.claude/worktrees/<id>/`
|
|
226
|
+
// nested below a scanned policy/infra dir — rescanning them
|
|
227
|
+
// double-counts the same OIDC trust documents.
|
|
228
|
+
if (isLinkedWorktreeDir(full)) continue;
|
|
229
|
+
walk(full, depth + 1);
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
223
232
|
if (!e.isFile() || !/\.json$/i.test(e.name)) continue;
|
|
224
233
|
const text = readSafe(full);
|
|
225
234
|
if (!text) continue;
|
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* lib/collectors/citation-hygiene.js
|
|
5
|
+
*
|
|
6
|
+
* Companion collector for the `citation-hygiene` playbook. Walks the cwd
|
|
7
|
+
* tree (source, comments, docstrings, and security documentation) and
|
|
8
|
+
* extracts every CVE and RFC citation, then cross-references each against
|
|
9
|
+
* the shipped CVE catalog (data/cve-catalog.json) and RFC index
|
|
10
|
+
* (data/rfc-references.json).
|
|
11
|
+
*
|
|
12
|
+
* It flips signal_overrides only for verdicts determinable offline from
|
|
13
|
+
* the catalogs:
|
|
14
|
+
* - fabricated-cve-id: a citation whose tail is not the canonical
|
|
15
|
+
* all-numeric CVE form (CVE-2024-XXXX, CVE-2024-zlib). Deterministic.
|
|
16
|
+
* - rejected-or-disputed-cve: a well-formed citation that resolves to a
|
|
17
|
+
* catalog entry whose analyst notes mark it rejected / disputed.
|
|
18
|
+
* - rfc-number-title-mismatch: a citation pairing a number with a title
|
|
19
|
+
* that conflicts with the index title for that number.
|
|
20
|
+
*
|
|
21
|
+
* Indicators that need an out-of-band lookup or human judgement
|
|
22
|
+
* (cve-citation-needs-external-verification, draft-mislabeled-as-rfc) are
|
|
23
|
+
* surfaced in the artifacts text and left UNFLIPPED so the runner returns
|
|
24
|
+
* inconclusive rather than a forced miss — the catalog is curated, not
|
|
25
|
+
* exhaustive, so absence is never a clean clear or a false fabrication.
|
|
26
|
+
*
|
|
27
|
+
* Interface: see lib/collectors/README.md
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
const fs = require("node:fs");
|
|
31
|
+
const path = require("node:path");
|
|
32
|
+
|
|
33
|
+
const { codeExcludeSet, isLinkedWorktreeDir } = require("./scan-excludes");
|
|
34
|
+
|
|
35
|
+
const COLLECTOR_ID = "citation-hygiene";
|
|
36
|
+
|
|
37
|
+
const DEFAULT_MAX_DEPTH = 8;
|
|
38
|
+
const EXCLUDES = codeExcludeSet();
|
|
39
|
+
|
|
40
|
+
// File extensions whose contents are worth scanning for citations: source,
|
|
41
|
+
// markup/docs, config that carries security prose. Citations live in
|
|
42
|
+
// comments and docstrings (source) and in docs (md / rst / txt / adoc).
|
|
43
|
+
const SCAN_EXTS = new Set([
|
|
44
|
+
".js", ".mjs", ".cjs", ".jsx", ".ts", ".tsx", ".mts", ".cts",
|
|
45
|
+
".py", ".pyi",
|
|
46
|
+
".go",
|
|
47
|
+
".rs",
|
|
48
|
+
".java", ".kt", ".kts", ".scala",
|
|
49
|
+
".rb",
|
|
50
|
+
".php",
|
|
51
|
+
".c", ".h", ".cc", ".cpp", ".hpp", ".cxx",
|
|
52
|
+
".cs",
|
|
53
|
+
".swift",
|
|
54
|
+
".m", ".mm",
|
|
55
|
+
".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc",
|
|
56
|
+
".yaml", ".yml", ".toml", ".cfg", ".ini",
|
|
57
|
+
]);
|
|
58
|
+
|
|
59
|
+
const MAX_FILE_BYTES = 2 * 1024 * 1024;
|
|
60
|
+
|
|
61
|
+
// Paths whose citations are illustrative (templates / fixtures / the
|
|
62
|
+
// scanner's own pattern catalogue), not real self-citations.
|
|
63
|
+
const ILLUSTRATIVE_PATH_SEGMENTS = [
|
|
64
|
+
"/test/", "/tests/", "/spec/", "/specs/", "/__tests__/",
|
|
65
|
+
"/fixtures/", "/fixture/",
|
|
66
|
+
"/.github/issue_template/", "/.github/pull_request_template/",
|
|
67
|
+
"/issue_template/", "/pull_request_template/",
|
|
68
|
+
// The collectors and the playbooks directory literally contain CVE /
|
|
69
|
+
// RFC patterns and example citations; scanning them would flag the
|
|
70
|
+
// scanner itself. The playbook's intent is the consumer's source.
|
|
71
|
+
"/lib/collectors/", "/data/playbooks/", "/lib/schemas/",
|
|
72
|
+
];
|
|
73
|
+
|
|
74
|
+
function isIllustrativePath(rel) {
|
|
75
|
+
const norm = "/" + rel.replace(/\\/g, "/").toLowerCase() + "/";
|
|
76
|
+
for (const seg of ILLUSTRATIVE_PATH_SEGMENTS) {
|
|
77
|
+
if (norm.includes(seg)) return true;
|
|
78
|
+
}
|
|
79
|
+
if (/\.template($|\.)/i.test(rel)) return true;
|
|
80
|
+
if (/(?:^|[\\/])[^\\/]+\.(test|spec)\.[a-z]+$/i.test(rel)) return true;
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function walkTree(root, opts = {}) {
|
|
85
|
+
const maxDepth = opts.maxDepth ?? DEFAULT_MAX_DEPTH;
|
|
86
|
+
const excludes = opts.excludes ?? EXCLUDES;
|
|
87
|
+
const out = [];
|
|
88
|
+
const seen = new Set();
|
|
89
|
+
|
|
90
|
+
function walk(dir, depth) {
|
|
91
|
+
if (depth > maxDepth) return;
|
|
92
|
+
let entries;
|
|
93
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
|
|
94
|
+
catch { return; }
|
|
95
|
+
for (const entry of entries) {
|
|
96
|
+
if (excludes.has(entry.name)) continue;
|
|
97
|
+
const full = path.join(dir, entry.name);
|
|
98
|
+
let real;
|
|
99
|
+
try { real = fs.realpathSync(full); } catch { continue; }
|
|
100
|
+
if (seen.has(real)) continue;
|
|
101
|
+
seen.add(real);
|
|
102
|
+
if (entry.isDirectory()) {
|
|
103
|
+
// Skip detached git worktrees (agent scratch copies) — descending
|
|
104
|
+
// into them rescans unrelated repo state.
|
|
105
|
+
if (isLinkedWorktreeDir(full)) continue;
|
|
106
|
+
walk(full, depth + 1);
|
|
107
|
+
} else if (entry.isFile()) {
|
|
108
|
+
out.push({ full, rel: path.relative(root, full), name: entry.name });
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
walk(root, 0);
|
|
113
|
+
return out;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function readSafe(full) {
|
|
117
|
+
try {
|
|
118
|
+
const s = fs.statSync(full);
|
|
119
|
+
if (s.size > MAX_FILE_BYTES) return null;
|
|
120
|
+
return fs.readFileSync(full, "utf8");
|
|
121
|
+
} catch { return null; }
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Permissive CVE matcher: 4-digit year, then a tail of digits OR letters
|
|
125
|
+
// (so malformed citations like CVE-2024-XXXX / CVE-2024-zlib are captured,
|
|
126
|
+
// not silently skipped). The canonical-form test is applied afterwards.
|
|
127
|
+
const CVE_CITATION_RE = /CVE-(\d{4})-([0-9A-Za-z]+)/g;
|
|
128
|
+
const CVE_CANONICAL_RE = /^CVE-\d{4}-\d{4,}$/;
|
|
129
|
+
|
|
130
|
+
// RFC citation: `RFC 9404`, `RFC9404`, `RFC-9404`. Capture the number.
|
|
131
|
+
const RFC_CITATION_RE = /RFC[\s-]?(\d{1,5})\b/gi;
|
|
132
|
+
|
|
133
|
+
// Words that mark a catalog note as recording a rejected / disputed status.
|
|
134
|
+
const REJECT_DISPUTE_RE = /\b(reject(?:ed|s|ion)?|disputed?|withdrawn)\b/i;
|
|
135
|
+
|
|
136
|
+
// Draft-language proximity for the (unflipped) draft-as-RFC heuristic.
|
|
137
|
+
const DRAFT_LANGUAGE_RE = /\b(draft-[a-z0-9-]+|internet[- ]draft|work[- ]in[- ]progress|i-d\b)\b/i;
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Load the shipped CVE catalog and RFC index. The catalogs ship in the
|
|
141
|
+
* package tarball under data/; resolve relative to this module so the
|
|
142
|
+
* collector works whether run from the source tree or a node_modules
|
|
143
|
+
* install. Returns { cveKeys:Set, cveNotes:Map<id,string>, rfcTitles:Map<number,string>, errors:[] }.
|
|
144
|
+
*/
|
|
145
|
+
function loadCatalogs() {
|
|
146
|
+
const errors = [];
|
|
147
|
+
const dataDir = path.resolve(__dirname, "..", "..", "data");
|
|
148
|
+
const cveKeys = new Set();
|
|
149
|
+
const cveNotes = new Map();
|
|
150
|
+
const rfcTitles = new Map();
|
|
151
|
+
|
|
152
|
+
try {
|
|
153
|
+
const cve = JSON.parse(fs.readFileSync(path.join(dataDir, "cve-catalog.json"), "utf8"));
|
|
154
|
+
for (const [k, v] of Object.entries(cve)) {
|
|
155
|
+
if (k.startsWith("_")) continue;
|
|
156
|
+
cveKeys.add(k);
|
|
157
|
+
if (v && typeof v === "object") {
|
|
158
|
+
// Concatenate the analyst-note fields that carry rejected /
|
|
159
|
+
// disputed status. Matching the cited key's OWN notes (not a
|
|
160
|
+
// neighbour's) is enforced by per-entry concatenation.
|
|
161
|
+
const noteParts = [
|
|
162
|
+
v.cvss_note, v.active_exploitation_notes, v.vector,
|
|
163
|
+
v.discovery_attribution_note, v.ai_discovery_notes,
|
|
164
|
+
v._kev_short_description,
|
|
165
|
+
].filter((s) => typeof s === "string");
|
|
166
|
+
cveNotes.set(k, noteParts.join(" • "));
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
} catch (e) {
|
|
170
|
+
errors.push({ artifact_id: "cve-catalog", kind: "catalog_load_failed", reason: e.message });
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
try {
|
|
174
|
+
const rfc = JSON.parse(fs.readFileSync(path.join(dataDir, "rfc-references.json"), "utf8"));
|
|
175
|
+
for (const [k, v] of Object.entries(rfc)) {
|
|
176
|
+
if (k.startsWith("_")) continue;
|
|
177
|
+
if (v && typeof v === "object" && typeof v.number === "number" && typeof v.title === "string") {
|
|
178
|
+
rfcTitles.set(v.number, v.title);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
} catch (e) {
|
|
182
|
+
errors.push({ artifact_id: "rfc-index", kind: "catalog_load_failed", reason: e.message });
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return { cveKeys, cveNotes, rfcTitles, errors };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Normalise a title for comparison: lowercase, drop punctuation, collapse
|
|
189
|
+
// whitespace, and strip a leading "the".
|
|
190
|
+
function normalizeTitle(s) {
|
|
191
|
+
return s
|
|
192
|
+
.toLowerCase()
|
|
193
|
+
.replace(/[^a-z0-9\s]/g, " ")
|
|
194
|
+
.replace(/\s+/g, " ")
|
|
195
|
+
.replace(/^the\s+/, "")
|
|
196
|
+
.trim();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const TITLE_STOPWORDS = new Set([
|
|
200
|
+
"the", "a", "an", "of", "for", "and", "to", "in", "on", "with",
|
|
201
|
+
"protocol", "version", "extension", "specification", "spec", "rfc",
|
|
202
|
+
]);
|
|
203
|
+
|
|
204
|
+
function titleTokens(s) {
|
|
205
|
+
return new Set(
|
|
206
|
+
normalizeTitle(s).split(" ").filter((t) => t.length >= 3 && !/^\d+$/.test(t) && !TITLE_STOPWORDS.has(t)),
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Ordered list of meaningful (post-stopword, non-numeric) tokens in a
|
|
211
|
+
// title — used both for overlap and for acronym construction.
|
|
212
|
+
function orderedTitleTokens(s) {
|
|
213
|
+
return normalizeTitle(s)
|
|
214
|
+
.split(" ")
|
|
215
|
+
.filter((t) => t.length >= 3 && !/^\d+$/.test(t) && !TITLE_STOPWORDS.has(t));
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Build the lowercase acronym from the title's meaningful words
|
|
219
|
+
// (Transport Layer Security Protocol -> "tls", since protocol/version are
|
|
220
|
+
// stopwords). Lets a nickname / abbreviation in the adjacent text be
|
|
221
|
+
// recognised as the same document, not a wrong title.
|
|
222
|
+
function titleAcronym(realTitle) {
|
|
223
|
+
return orderedTitleTokens(realTitle).map((w) => w[0]).join("");
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Decide whether an adjacent text fragment makes a TITLE CLAIM that
|
|
228
|
+
* conflicts with the real index title. Conservative by design — the cost
|
|
229
|
+
* of a false positive (telling an author their correct citation is wrong)
|
|
230
|
+
* is high, so the bar to flag a mismatch is deliberately strict:
|
|
231
|
+
* - the cited RFC number must be in the index (checked by the caller),
|
|
232
|
+
* - the adjacent text must carry at least THREE meaningful tokens — a
|
|
233
|
+
* bare nickname / abbreviation ("TLS 1.3", "(HTTP)") reduces below
|
|
234
|
+
* this and is treated as no-title-claim, never a mismatch,
|
|
235
|
+
* - if the adjacent tokens contain the title's acronym, it is the same
|
|
236
|
+
* document (TLS for Transport Layer Security); not a mismatch,
|
|
237
|
+
* - only ZERO overlap between the meaningful adjacent tokens and the
|
|
238
|
+
* real-title tokens flags a mismatch. Any shared content word means
|
|
239
|
+
* the author is describing the right document (paraphrase) — demote.
|
|
240
|
+
* Returns "mismatch" | "match" | "no-title-claim".
|
|
241
|
+
*/
|
|
242
|
+
function classifyRfcTitle(adjacentText, realTitle) {
|
|
243
|
+
const adjTokens = titleTokens(adjacentText);
|
|
244
|
+
// Require a substantive title claim. Fewer than three content tokens is
|
|
245
|
+
// a nickname / abbreviation, not a stated title — stay conservative.
|
|
246
|
+
if (adjTokens.size < 3) return "no-title-claim";
|
|
247
|
+
const realTokens = titleTokens(realTitle);
|
|
248
|
+
if (realTokens.size === 0) return "no-title-claim";
|
|
249
|
+
// Acronym recognition: "tls" in the adjacent text matches "Transport
|
|
250
|
+
// Layer Security". Same document, not a wrong title.
|
|
251
|
+
const acronym = titleAcronym(realTitle);
|
|
252
|
+
if (acronym.length >= 2 && adjTokens.has(acronym)) return "match";
|
|
253
|
+
let overlap = 0;
|
|
254
|
+
for (const t of adjTokens) {
|
|
255
|
+
if (realTokens.has(t)) overlap++;
|
|
256
|
+
}
|
|
257
|
+
// Any shared content word -> the author is describing the right
|
|
258
|
+
// document. Only a stated title with ZERO overlap is a conflicting
|
|
259
|
+
// claim. This trades recall for precision intentionally.
|
|
260
|
+
return overlap === 0 ? "mismatch" : "match";
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Pull the text on the same line as the match, used as the "adjacent text"
|
|
264
|
+
// for the RFC title comparison.
|
|
265
|
+
function lineAround(content, index) {
|
|
266
|
+
const start = content.lastIndexOf("\n", index) + 1;
|
|
267
|
+
let end = content.indexOf("\n", index);
|
|
268
|
+
if (end === -1) end = content.length;
|
|
269
|
+
return content.slice(start, end);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function collect({ cwd = process.cwd() } = {}) {
|
|
273
|
+
const errors = [];
|
|
274
|
+
const startTime = Date.now();
|
|
275
|
+
const root = path.resolve(cwd);
|
|
276
|
+
|
|
277
|
+
const { cveKeys, cveNotes, rfcTitles, errors: catErrors } = loadCatalogs();
|
|
278
|
+
for (const e of catErrors) errors.push(e);
|
|
279
|
+
const catalogsLoaded = cveKeys.size > 0 && rfcTitles.size > 0;
|
|
280
|
+
|
|
281
|
+
let files;
|
|
282
|
+
try {
|
|
283
|
+
files = walkTree(root);
|
|
284
|
+
} catch (e) {
|
|
285
|
+
errors.push({ kind: "walk_failed", reason: e.message });
|
|
286
|
+
files = [];
|
|
287
|
+
}
|
|
288
|
+
if (files.length > 50000) {
|
|
289
|
+
errors.push({
|
|
290
|
+
kind: "file_count_capped",
|
|
291
|
+
reason: `walked ${files.length} files; capping content scan at 50000.`,
|
|
292
|
+
});
|
|
293
|
+
files = files.slice(0, 50000);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
const scanFiles = files.filter((f) => SCAN_EXTS.has(path.extname(f.name).toLowerCase()));
|
|
297
|
+
|
|
298
|
+
// Hit collectors. Each entry keeps the file + the citation text so the
|
|
299
|
+
// artifact summary is auditable. CVE / RFC literals are references, not
|
|
300
|
+
// secrets, so they are safe to retain in the value text.
|
|
301
|
+
const hits = {
|
|
302
|
+
"fabricated-cve-id": [],
|
|
303
|
+
"rejected-or-disputed-cve": [],
|
|
304
|
+
"rfc-number-title-mismatch": [],
|
|
305
|
+
};
|
|
306
|
+
// Inconclusive / needs-verification buckets — surfaced in artifacts,
|
|
307
|
+
// never flipped to a deterministic verdict.
|
|
308
|
+
const needsVerify = {
|
|
309
|
+
cve_not_in_catalog: [],
|
|
310
|
+
rfc_not_in_index: [],
|
|
311
|
+
draft_as_rfc_candidates: [],
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
let totalCveCitations = 0;
|
|
315
|
+
let totalRfcCitations = 0;
|
|
316
|
+
|
|
317
|
+
for (const f of scanFiles) {
|
|
318
|
+
const content = readSafe(f.full);
|
|
319
|
+
if (content == null) {
|
|
320
|
+
errors.push({ artifact_id: "source-files", kind: "read_failed", reason: f.rel });
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
const illustrative = isIllustrativePath(f.rel);
|
|
324
|
+
|
|
325
|
+
// ---- CVE citations ----
|
|
326
|
+
for (const m of content.matchAll(CVE_CITATION_RE)) {
|
|
327
|
+
const full = m[0];
|
|
328
|
+
totalCveCitations++;
|
|
329
|
+
const canonical = CVE_CANONICAL_RE.test(full);
|
|
330
|
+
if (!canonical) {
|
|
331
|
+
// Fabricated / malformed. Illustrative surfaces (templates,
|
|
332
|
+
// fixtures, the format-explaining docs) are demoted.
|
|
333
|
+
if (!illustrative) {
|
|
334
|
+
hits["fabricated-cve-id"].push({ file: f.rel, citation: full });
|
|
335
|
+
}
|
|
336
|
+
continue;
|
|
337
|
+
}
|
|
338
|
+
// Well-formed. Cross-reference the catalog.
|
|
339
|
+
if (cveKeys.has(full)) {
|
|
340
|
+
const note = cveNotes.get(full) || "";
|
|
341
|
+
if (REJECT_DISPUTE_RE.test(note) && !illustrative) {
|
|
342
|
+
hits["rejected-or-disputed-cve"].push({ file: f.rel, citation: full });
|
|
343
|
+
}
|
|
344
|
+
} else if (catalogsLoaded && !illustrative) {
|
|
345
|
+
// Absent from the curated catalog: needs an external lookup.
|
|
346
|
+
// NOT a fabrication — inconclusive by design.
|
|
347
|
+
needsVerify.cve_not_in_catalog.push({ file: f.rel, citation: full });
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// ---- RFC citations ----
|
|
352
|
+
for (const m of content.matchAll(RFC_CITATION_RE)) {
|
|
353
|
+
totalRfcCitations++;
|
|
354
|
+
const num = Number(m[1]);
|
|
355
|
+
if (!Number.isFinite(num)) continue;
|
|
356
|
+
const line = lineAround(content, m.index);
|
|
357
|
+
if (rfcTitles.has(num)) {
|
|
358
|
+
const verdict = classifyRfcTitle(line, rfcTitles.get(num));
|
|
359
|
+
if (verdict === "mismatch" && !illustrative) {
|
|
360
|
+
hits["rfc-number-title-mismatch"].push({
|
|
361
|
+
file: f.rel,
|
|
362
|
+
citation: `RFC ${num}`,
|
|
363
|
+
real_title: rfcTitles.get(num),
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
} else if (catalogsLoaded && !illustrative) {
|
|
367
|
+
// Number not in the published index. Needs verification; if draft
|
|
368
|
+
// language is adjacent, record it as a draft-as-RFC candidate
|
|
369
|
+
// (still inconclusive — left unflipped).
|
|
370
|
+
needsVerify.rfc_not_in_index.push({ file: f.rel, citation: `RFC ${num}` });
|
|
371
|
+
if (DRAFT_LANGUAGE_RE.test(line)) {
|
|
372
|
+
needsVerify.draft_as_rfc_candidates.push({ file: f.rel, citation: `RFC ${num}` });
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// signal_overrides: only the deterministically-decidable indicators are
|
|
379
|
+
// flipped. The needs-verification indicators stay absent so the runner
|
|
380
|
+
// returns inconclusive for them.
|
|
381
|
+
const signal_overrides = {
|
|
382
|
+
"fabricated-cve-id": hits["fabricated-cve-id"].length > 0 ? "hit" : "miss",
|
|
383
|
+
"rfc-number-title-mismatch": hits["rfc-number-title-mismatch"].length > 0 ? "hit" : "miss",
|
|
384
|
+
};
|
|
385
|
+
// rejected-or-disputed-cve is high-confidence (not deterministic) — flip
|
|
386
|
+
// on a catalog-backed match, otherwise miss. Only assert a verdict when
|
|
387
|
+
// the catalog actually loaded; without it the check could not run.
|
|
388
|
+
if (cveKeys.size > 0) {
|
|
389
|
+
signal_overrides["rejected-or-disputed-cve"] =
|
|
390
|
+
hits["rejected-or-disputed-cve"].length > 0 ? "hit" : "miss";
|
|
391
|
+
} else {
|
|
392
|
+
signal_overrides["rejected-or-disputed-cve"] = "inconclusive";
|
|
393
|
+
}
|
|
394
|
+
// The needs-verification CVE indicator: hit means "found citations the
|
|
395
|
+
// offline catalog cannot confirm" — itself an inconclusive state, so it
|
|
396
|
+
// maps to inconclusive (not a clean miss) when such citations exist.
|
|
397
|
+
if (needsVerify.cve_not_in_catalog.length > 0) {
|
|
398
|
+
signal_overrides["cve-citation-needs-external-verification"] = "inconclusive";
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const summarize = (list) => {
|
|
402
|
+
if (list.length === 0) return "0 hits";
|
|
403
|
+
const head = list.slice(0, 5).map((h) => {
|
|
404
|
+
let s = `${h.file}: ${h.citation}`;
|
|
405
|
+
if (h.real_title) s += ` (index title: "${h.real_title}")`;
|
|
406
|
+
return s;
|
|
407
|
+
}).join("; ");
|
|
408
|
+
return `${list.length} hit(s): ${head}` + (list.length > 5 ? "; …" : "");
|
|
409
|
+
};
|
|
410
|
+
|
|
411
|
+
const artifacts = {
|
|
412
|
+
"cve-citations-in-source": {
|
|
413
|
+
value: `${totalCveCitations} CVE citation(s) found. ` +
|
|
414
|
+
`fabricated: ${summarize(hits["fabricated-cve-id"])}. ` +
|
|
415
|
+
`rejected/disputed: ${summarize(hits["rejected-or-disputed-cve"])}. ` +
|
|
416
|
+
`needs-external-verification (well-formed, absent from catalog): ${summarize(needsVerify.cve_not_in_catalog)}.`,
|
|
417
|
+
captured: true,
|
|
418
|
+
},
|
|
419
|
+
"rfc-citations-in-source": {
|
|
420
|
+
value: `${totalRfcCitations} RFC citation(s) found. ` +
|
|
421
|
+
`title-mismatch: ${summarize(hits["rfc-number-title-mismatch"])}. ` +
|
|
422
|
+
`not-in-index (needs verification): ${summarize(needsVerify.rfc_not_in_index)}. ` +
|
|
423
|
+
`draft-as-rfc candidates: ${summarize(needsVerify.draft_as_rfc_candidates)}.`,
|
|
424
|
+
captured: true,
|
|
425
|
+
},
|
|
426
|
+
"cve-catalog": {
|
|
427
|
+
value: cveKeys.size > 0
|
|
428
|
+
? `loaded ${cveKeys.size} catalog entries for cross-reference`
|
|
429
|
+
: "catalog unavailable — CVE cross-reference could not run",
|
|
430
|
+
captured: cveKeys.size > 0,
|
|
431
|
+
...(cveKeys.size === 0 ? { reason: "cve-catalog.json failed to load" } : {}),
|
|
432
|
+
},
|
|
433
|
+
"rfc-index": {
|
|
434
|
+
value: rfcTitles.size > 0
|
|
435
|
+
? `loaded ${rfcTitles.size} RFC titles for cross-reference`
|
|
436
|
+
: "RFC index unavailable — RFC cross-reference could not run",
|
|
437
|
+
captured: rfcTitles.size > 0,
|
|
438
|
+
...(rfcTitles.size === 0 ? { reason: "rfc-references.json failed to load" } : {}),
|
|
439
|
+
},
|
|
440
|
+
};
|
|
441
|
+
|
|
442
|
+
return {
|
|
443
|
+
precondition_checks: {
|
|
444
|
+
"repo-cites-security-references": totalCveCitations > 0 || totalRfcCitations > 0,
|
|
445
|
+
},
|
|
446
|
+
artifacts,
|
|
447
|
+
signal_overrides,
|
|
448
|
+
collector_meta: {
|
|
449
|
+
collector_id: COLLECTOR_ID,
|
|
450
|
+
collector_version: "2026-05-26",
|
|
451
|
+
platform: process.platform,
|
|
452
|
+
captured_at: new Date().toISOString(),
|
|
453
|
+
cwd: root,
|
|
454
|
+
duration_ms: Date.now() - startTime,
|
|
455
|
+
files_walked: files.length,
|
|
456
|
+
scan_files_scanned: scanFiles.length,
|
|
457
|
+
cve_citations: totalCveCitations,
|
|
458
|
+
rfc_citations: totalRfcCitations,
|
|
459
|
+
catalogs_loaded: catalogsLoaded,
|
|
460
|
+
},
|
|
461
|
+
collector_errors: errors,
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
module.exports = { playbook_id: COLLECTOR_ID, collect };
|
|
@@ -20,15 +20,14 @@
|
|
|
20
20
|
|
|
21
21
|
const fs = require("node:fs");
|
|
22
22
|
const path = require("node:path");
|
|
23
|
+
const { codeExcludeSet, isLinkedWorktreeDir } = require("./scan-excludes");
|
|
23
24
|
|
|
24
25
|
const COLLECTOR_ID = "containers";
|
|
25
26
|
|
|
26
27
|
const DEFAULT_MAX_DEPTH = 6;
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"target", ".idea", ".vscode",
|
|
31
|
-
]);
|
|
28
|
+
// Shared code-scope exclusions (dependency caches, build output, VCS +
|
|
29
|
+
// agent/editor scratch including `.claude/`); no container-specific extras.
|
|
30
|
+
const DEFAULT_EXCLUDES = codeExcludeSet();
|
|
32
31
|
|
|
33
32
|
const DOCKERFILE_NAMES = new Set(["Dockerfile", "Containerfile"]);
|
|
34
33
|
const DOCKERFILE_EXTS = new Set([".dockerfile", ".containerfile"]);
|
|
@@ -54,8 +53,14 @@ function walkTree(root, opts = {}) {
|
|
|
54
53
|
try { real = fs.realpathSync(full); } catch { continue; }
|
|
55
54
|
if (seen.has(real)) continue;
|
|
56
55
|
seen.add(real);
|
|
57
|
-
if (entry.isDirectory())
|
|
58
|
-
|
|
56
|
+
if (entry.isDirectory()) {
|
|
57
|
+
// Skip linked git worktrees (their `.git` is a gitdir pointer
|
|
58
|
+
// file) — e.g. agent-created repo copies under
|
|
59
|
+
// `.claude/worktrees/<id>/`. Walking them rescans the same
|
|
60
|
+
// Dockerfiles / compose / k8s manifests as the host tree.
|
|
61
|
+
if (isLinkedWorktreeDir(full)) continue;
|
|
62
|
+
walk(full, depth + 1);
|
|
63
|
+
} else if (entry.isFile()) out.push({ full, rel: path.relative(root, full), name: entry.name });
|
|
59
64
|
}
|
|
60
65
|
}
|
|
61
66
|
walk(root, 0);
|
|
@@ -16,15 +16,16 @@
|
|
|
16
16
|
|
|
17
17
|
const fs = require("node:fs");
|
|
18
18
|
const path = require("node:path");
|
|
19
|
+
const { codeExcludeSet, isLinkedWorktreeDir } = require("./scan-excludes");
|
|
19
20
|
|
|
20
21
|
const COLLECTOR_ID = "crypto-codebase";
|
|
21
22
|
|
|
22
23
|
const DEFAULT_MAX_DEPTH = 6;
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
// Shared code-scope exclusions: dependency caches, build output, VCS +
|
|
25
|
+
// agent/editor scratch (including `.claude/`). No crypto-codebase-specific
|
|
26
|
+
// extras — the shared defaults already cover every directory this scan
|
|
27
|
+
// should never descend into.
|
|
28
|
+
const DEFAULT_EXCLUDES = codeExcludeSet();
|
|
28
29
|
|
|
29
30
|
const SOURCE_EXTS = new Set([
|
|
30
31
|
".js", ".mjs", ".cjs", ".jsx", ".ts", ".tsx", ".mts", ".cts",
|
|
@@ -85,6 +86,11 @@ function walkTree(root, opts = {}) {
|
|
|
85
86
|
if (seen.has(real)) continue;
|
|
86
87
|
seen.add(real);
|
|
87
88
|
if (entry.isDirectory()) {
|
|
89
|
+
// Never descend into a linked git worktree (its `.git` is a
|
|
90
|
+
// gitdir pointer file). Agent tooling stamps full repo copies
|
|
91
|
+
// under `.claude/worktrees/<id>/`; walking them rescans the same
|
|
92
|
+
// source as the host tree and multiplies every hit.
|
|
93
|
+
if (isLinkedWorktreeDir(full)) continue;
|
|
88
94
|
walk(full, depth + 1);
|
|
89
95
|
} else if (entry.isFile()) {
|
|
90
96
|
out.push({ full, rel: path.relative(root, full), name: entry.name });
|