@blamejs/exceptd-skills 0.13.126 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +5 -3
- package/CHANGELOG.md +30 -0
- package/README.md +43 -9
- package/bin/exceptd.js +148 -35
- package/data/_indexes/_meta.json +2 -2
- package/data/playbooks/citation-hygiene.json +820 -0
- package/lib/citation-resolve.js +226 -0
- package/lib/collectors/cicd-pipeline-compromise.js +10 -1
- package/lib/collectors/citation-hygiene.js +465 -0
- package/lib/collectors/containers.js +12 -7
- package/lib/collectors/crypto-codebase.js +11 -5
- package/lib/collectors/library-author.js +82 -10
- package/lib/collectors/scan-excludes.js +85 -0
- package/lib/collectors/secrets.js +10 -6
- package/lib/cve-cli.js +51 -0
- package/lib/flag-suggest.js +2 -2
- package/lib/refresh-external.js +15 -0
- package/lib/rfc-cli.js +68 -0
- package/lib/schemas/cve-catalog.schema.json +13 -0
- package/lib/source-ghsa.js +3 -0
- package/lib/source-osv.js +4 -0
- package/lib/validate-package.js +7 -2
- package/manifest.json +44 -44
- package/package.json +1 -1
- package/sbom.cdx.json +134 -44
- package/scripts/check-agents-md-collectors.js +8 -0
- package/sources/validators/cve-validator.js +46 -1
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* lib/collectors/citation-hygiene.js
|
|
5
|
+
*
|
|
6
|
+
* Companion collector for the `citation-hygiene` playbook. Walks the cwd
|
|
7
|
+
* tree (source, comments, docstrings, and security documentation) and
|
|
8
|
+
* extracts every CVE and RFC citation, then cross-references each against
|
|
9
|
+
* the shipped CVE catalog (data/cve-catalog.json) and RFC index
|
|
10
|
+
* (data/rfc-references.json).
|
|
11
|
+
*
|
|
12
|
+
* It flips signal_overrides only for verdicts determinable offline from
|
|
13
|
+
* the catalogs:
|
|
14
|
+
* - fabricated-cve-id: a citation whose tail is not the canonical
|
|
15
|
+
* all-numeric CVE form (CVE-2024-XXXX, CVE-2024-zlib). Deterministic.
|
|
16
|
+
* - rejected-or-disputed-cve: a well-formed citation that resolves to a
|
|
17
|
+
* catalog entry whose analyst notes mark it rejected / disputed.
|
|
18
|
+
* - rfc-number-title-mismatch: a citation pairing a number with a title
|
|
19
|
+
* that conflicts with the index title for that number.
|
|
20
|
+
*
|
|
21
|
+
* Indicators that need an out-of-band lookup or human judgement
|
|
22
|
+
* (cve-citation-needs-external-verification, draft-mislabeled-as-rfc) are
|
|
23
|
+
* surfaced in the artifacts text and left UNFLIPPED so the runner returns
|
|
24
|
+
* inconclusive rather than a forced miss — the catalog is curated, not
|
|
25
|
+
* exhaustive, so absence is never a clean clear or a false fabrication.
|
|
26
|
+
*
|
|
27
|
+
* Interface: see lib/collectors/README.md
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
const fs = require("node:fs");
|
|
31
|
+
const path = require("node:path");
|
|
32
|
+
|
|
33
|
+
const { codeExcludeSet, isLinkedWorktreeDir } = require("./scan-excludes");
|
|
34
|
+
|
|
35
|
+
const COLLECTOR_ID = "citation-hygiene";
|
|
36
|
+
|
|
37
|
+
const DEFAULT_MAX_DEPTH = 8;
|
|
38
|
+
const EXCLUDES = codeExcludeSet();
|
|
39
|
+
|
|
40
|
+
// File extensions whose contents are worth scanning for citations: source,
|
|
41
|
+
// markup/docs, config that carries security prose. Citations live in
|
|
42
|
+
// comments and docstrings (source) and in docs (md / rst / txt / adoc).
|
|
43
|
+
const SCAN_EXTS = new Set([
|
|
44
|
+
".js", ".mjs", ".cjs", ".jsx", ".ts", ".tsx", ".mts", ".cts",
|
|
45
|
+
".py", ".pyi",
|
|
46
|
+
".go",
|
|
47
|
+
".rs",
|
|
48
|
+
".java", ".kt", ".kts", ".scala",
|
|
49
|
+
".rb",
|
|
50
|
+
".php",
|
|
51
|
+
".c", ".h", ".cc", ".cpp", ".hpp", ".cxx",
|
|
52
|
+
".cs",
|
|
53
|
+
".swift",
|
|
54
|
+
".m", ".mm",
|
|
55
|
+
".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc",
|
|
56
|
+
".yaml", ".yml", ".toml", ".cfg", ".ini",
|
|
57
|
+
]);
|
|
58
|
+
|
|
59
|
+
const MAX_FILE_BYTES = 2 * 1024 * 1024;
|
|
60
|
+
|
|
61
|
+
// Paths whose citations are illustrative (templates / fixtures / the
|
|
62
|
+
// scanner's own pattern catalogue), not real self-citations.
|
|
63
|
+
const ILLUSTRATIVE_PATH_SEGMENTS = [
|
|
64
|
+
"/test/", "/tests/", "/spec/", "/specs/", "/__tests__/",
|
|
65
|
+
"/fixtures/", "/fixture/",
|
|
66
|
+
"/.github/issue_template/", "/.github/pull_request_template/",
|
|
67
|
+
"/issue_template/", "/pull_request_template/",
|
|
68
|
+
// The collectors and the playbooks directory literally contain CVE /
|
|
69
|
+
// RFC patterns and example citations; scanning them would flag the
|
|
70
|
+
// scanner itself. The playbook's intent is the consumer's source.
|
|
71
|
+
"/lib/collectors/", "/data/playbooks/", "/lib/schemas/",
|
|
72
|
+
];
|
|
73
|
+
|
|
74
|
+
function isIllustrativePath(rel) {
|
|
75
|
+
const norm = "/" + rel.replace(/\\/g, "/").toLowerCase() + "/";
|
|
76
|
+
for (const seg of ILLUSTRATIVE_PATH_SEGMENTS) {
|
|
77
|
+
if (norm.includes(seg)) return true;
|
|
78
|
+
}
|
|
79
|
+
if (/\.template($|\.)/i.test(rel)) return true;
|
|
80
|
+
if (/(?:^|[\\/])[^\\/]+\.(test|spec)\.[a-z]+$/i.test(rel)) return true;
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function walkTree(root, opts = {}) {
|
|
85
|
+
const maxDepth = opts.maxDepth ?? DEFAULT_MAX_DEPTH;
|
|
86
|
+
const excludes = opts.excludes ?? EXCLUDES;
|
|
87
|
+
const out = [];
|
|
88
|
+
const seen = new Set();
|
|
89
|
+
|
|
90
|
+
function walk(dir, depth) {
|
|
91
|
+
if (depth > maxDepth) return;
|
|
92
|
+
let entries;
|
|
93
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
|
|
94
|
+
catch { return; }
|
|
95
|
+
for (const entry of entries) {
|
|
96
|
+
if (excludes.has(entry.name)) continue;
|
|
97
|
+
const full = path.join(dir, entry.name);
|
|
98
|
+
let real;
|
|
99
|
+
try { real = fs.realpathSync(full); } catch { continue; }
|
|
100
|
+
if (seen.has(real)) continue;
|
|
101
|
+
seen.add(real);
|
|
102
|
+
if (entry.isDirectory()) {
|
|
103
|
+
// Skip detached git worktrees (agent scratch copies) — descending
|
|
104
|
+
// into them rescans unrelated repo state.
|
|
105
|
+
if (isLinkedWorktreeDir(full)) continue;
|
|
106
|
+
walk(full, depth + 1);
|
|
107
|
+
} else if (entry.isFile()) {
|
|
108
|
+
out.push({ full, rel: path.relative(root, full), name: entry.name });
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
walk(root, 0);
|
|
113
|
+
return out;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function readSafe(full) {
|
|
117
|
+
try {
|
|
118
|
+
const s = fs.statSync(full);
|
|
119
|
+
if (s.size > MAX_FILE_BYTES) return null;
|
|
120
|
+
return fs.readFileSync(full, "utf8");
|
|
121
|
+
} catch { return null; }
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Permissive CVE matcher: 4-digit year, then a tail of digits OR letters
|
|
125
|
+
// (so malformed citations like CVE-2024-XXXX / CVE-2024-zlib are captured,
|
|
126
|
+
// not silently skipped). The canonical-form test is applied afterwards.
|
|
127
|
+
const CVE_CITATION_RE = /CVE-(\d{4})-([0-9A-Za-z]+)/g;
|
|
128
|
+
const CVE_CANONICAL_RE = /^CVE-\d{4}-\d{4,}$/;
|
|
129
|
+
|
|
130
|
+
// RFC citation: `RFC 9404`, `RFC9404`, `RFC-9404`. Capture the number.
|
|
131
|
+
const RFC_CITATION_RE = /RFC[\s-]?(\d{1,5})\b/gi;
|
|
132
|
+
|
|
133
|
+
// Words that mark a catalog note as recording a rejected / disputed status.
|
|
134
|
+
const REJECT_DISPUTE_RE = /\b(reject(?:ed|s|ion)?|disputed?|withdrawn)\b/i;
|
|
135
|
+
|
|
136
|
+
// Draft-language proximity for the (unflipped) draft-as-RFC heuristic.
|
|
137
|
+
const DRAFT_LANGUAGE_RE = /\b(draft-[a-z0-9-]+|internet[- ]draft|work[- ]in[- ]progress|i-d\b)\b/i;
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Load the shipped CVE catalog and RFC index. The catalogs ship in the
|
|
141
|
+
* package tarball under data/; resolve relative to this module so the
|
|
142
|
+
* collector works whether run from the source tree or a node_modules
|
|
143
|
+
* install. Returns { cveKeys:Set, cveNotes:Map<id,string>, rfcTitles:Map<number,string>, errors:[] }.
|
|
144
|
+
*/
|
|
145
|
+
function loadCatalogs() {
|
|
146
|
+
const errors = [];
|
|
147
|
+
const dataDir = path.resolve(__dirname, "..", "..", "data");
|
|
148
|
+
const cveKeys = new Set();
|
|
149
|
+
const cveNotes = new Map();
|
|
150
|
+
const rfcTitles = new Map();
|
|
151
|
+
|
|
152
|
+
try {
|
|
153
|
+
const cve = JSON.parse(fs.readFileSync(path.join(dataDir, "cve-catalog.json"), "utf8"));
|
|
154
|
+
for (const [k, v] of Object.entries(cve)) {
|
|
155
|
+
if (k.startsWith("_")) continue;
|
|
156
|
+
cveKeys.add(k);
|
|
157
|
+
if (v && typeof v === "object") {
|
|
158
|
+
// Concatenate the analyst-note fields that carry rejected /
|
|
159
|
+
// disputed status. Matching the cited key's OWN notes (not a
|
|
160
|
+
// neighbour's) is enforced by per-entry concatenation.
|
|
161
|
+
const noteParts = [
|
|
162
|
+
v.cvss_note, v.active_exploitation_notes, v.vector,
|
|
163
|
+
v.discovery_attribution_note, v.ai_discovery_notes,
|
|
164
|
+
v._kev_short_description,
|
|
165
|
+
].filter((s) => typeof s === "string");
|
|
166
|
+
cveNotes.set(k, noteParts.join(" • "));
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
} catch (e) {
|
|
170
|
+
errors.push({ artifact_id: "cve-catalog", kind: "catalog_load_failed", reason: e.message });
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
try {
|
|
174
|
+
const rfc = JSON.parse(fs.readFileSync(path.join(dataDir, "rfc-references.json"), "utf8"));
|
|
175
|
+
for (const [k, v] of Object.entries(rfc)) {
|
|
176
|
+
if (k.startsWith("_")) continue;
|
|
177
|
+
if (v && typeof v === "object" && typeof v.number === "number" && typeof v.title === "string") {
|
|
178
|
+
rfcTitles.set(v.number, v.title);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
} catch (e) {
|
|
182
|
+
errors.push({ artifact_id: "rfc-index", kind: "catalog_load_failed", reason: e.message });
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return { cveKeys, cveNotes, rfcTitles, errors };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Normalise a title for comparison: lowercase, drop punctuation, collapse
|
|
189
|
+
// whitespace, and strip a leading "the".
|
|
190
|
+
function normalizeTitle(s) {
|
|
191
|
+
return s
|
|
192
|
+
.toLowerCase()
|
|
193
|
+
.replace(/[^a-z0-9\s]/g, " ")
|
|
194
|
+
.replace(/\s+/g, " ")
|
|
195
|
+
.replace(/^the\s+/, "")
|
|
196
|
+
.trim();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const TITLE_STOPWORDS = new Set([
|
|
200
|
+
"the", "a", "an", "of", "for", "and", "to", "in", "on", "with",
|
|
201
|
+
"protocol", "version", "extension", "specification", "spec", "rfc",
|
|
202
|
+
]);
|
|
203
|
+
|
|
204
|
+
function titleTokens(s) {
|
|
205
|
+
return new Set(
|
|
206
|
+
normalizeTitle(s).split(" ").filter((t) => t.length >= 3 && !/^\d+$/.test(t) && !TITLE_STOPWORDS.has(t)),
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Ordered list of meaningful (post-stopword, non-numeric) tokens in a
|
|
211
|
+
// title — used both for overlap and for acronym construction.
|
|
212
|
+
function orderedTitleTokens(s) {
|
|
213
|
+
return normalizeTitle(s)
|
|
214
|
+
.split(" ")
|
|
215
|
+
.filter((t) => t.length >= 3 && !/^\d+$/.test(t) && !TITLE_STOPWORDS.has(t));
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Build the lowercase acronym from the title's meaningful words
|
|
219
|
+
// (Transport Layer Security Protocol -> "tls", since protocol/version are
|
|
220
|
+
// stopwords). Lets a nickname / abbreviation in the adjacent text be
|
|
221
|
+
// recognised as the same document, not a wrong title.
|
|
222
|
+
function titleAcronym(realTitle) {
|
|
223
|
+
return orderedTitleTokens(realTitle).map((w) => w[0]).join("");
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Decide whether an adjacent text fragment makes a TITLE CLAIM that
|
|
228
|
+
* conflicts with the real index title. Conservative by design — the cost
|
|
229
|
+
* of a false positive (telling an author their correct citation is wrong)
|
|
230
|
+
* is high, so the bar to flag a mismatch is deliberately strict:
|
|
231
|
+
* - the cited RFC number must be in the index (checked by the caller),
|
|
232
|
+
* - the adjacent text must carry at least THREE meaningful tokens — a
|
|
233
|
+
* bare nickname / abbreviation ("TLS 1.3", "(HTTP)") reduces below
|
|
234
|
+
* this and is treated as no-title-claim, never a mismatch,
|
|
235
|
+
* - if the adjacent tokens contain the title's acronym, it is the same
|
|
236
|
+
* document (TLS for Transport Layer Security); not a mismatch,
|
|
237
|
+
* - only ZERO overlap between the meaningful adjacent tokens and the
|
|
238
|
+
* real-title tokens flags a mismatch. Any shared content word means
|
|
239
|
+
* the author is describing the right document (paraphrase) — demote.
|
|
240
|
+
* Returns "mismatch" | "match" | "no-title-claim".
|
|
241
|
+
*/
|
|
242
|
+
function classifyRfcTitle(adjacentText, realTitle) {
|
|
243
|
+
const adjTokens = titleTokens(adjacentText);
|
|
244
|
+
// Require a substantive title claim. Fewer than three content tokens is
|
|
245
|
+
// a nickname / abbreviation, not a stated title — stay conservative.
|
|
246
|
+
if (adjTokens.size < 3) return "no-title-claim";
|
|
247
|
+
const realTokens = titleTokens(realTitle);
|
|
248
|
+
if (realTokens.size === 0) return "no-title-claim";
|
|
249
|
+
// Acronym recognition: "tls" in the adjacent text matches "Transport
|
|
250
|
+
// Layer Security". Same document, not a wrong title.
|
|
251
|
+
const acronym = titleAcronym(realTitle);
|
|
252
|
+
if (acronym.length >= 2 && adjTokens.has(acronym)) return "match";
|
|
253
|
+
let overlap = 0;
|
|
254
|
+
for (const t of adjTokens) {
|
|
255
|
+
if (realTokens.has(t)) overlap++;
|
|
256
|
+
}
|
|
257
|
+
// Any shared content word -> the author is describing the right
|
|
258
|
+
// document. Only a stated title with ZERO overlap is a conflicting
|
|
259
|
+
// claim. This trades recall for precision intentionally.
|
|
260
|
+
return overlap === 0 ? "mismatch" : "match";
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Pull the text on the same line as the match, used as the "adjacent text"
|
|
264
|
+
// for the RFC title comparison.
|
|
265
|
+
function lineAround(content, index) {
|
|
266
|
+
const start = content.lastIndexOf("\n", index) + 1;
|
|
267
|
+
let end = content.indexOf("\n", index);
|
|
268
|
+
if (end === -1) end = content.length;
|
|
269
|
+
return content.slice(start, end);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function collect({ cwd = process.cwd() } = {}) {
|
|
273
|
+
const errors = [];
|
|
274
|
+
const startTime = Date.now();
|
|
275
|
+
const root = path.resolve(cwd);
|
|
276
|
+
|
|
277
|
+
const { cveKeys, cveNotes, rfcTitles, errors: catErrors } = loadCatalogs();
|
|
278
|
+
for (const e of catErrors) errors.push(e);
|
|
279
|
+
const catalogsLoaded = cveKeys.size > 0 && rfcTitles.size > 0;
|
|
280
|
+
|
|
281
|
+
let files;
|
|
282
|
+
try {
|
|
283
|
+
files = walkTree(root);
|
|
284
|
+
} catch (e) {
|
|
285
|
+
errors.push({ kind: "walk_failed", reason: e.message });
|
|
286
|
+
files = [];
|
|
287
|
+
}
|
|
288
|
+
if (files.length > 50000) {
|
|
289
|
+
errors.push({
|
|
290
|
+
kind: "file_count_capped",
|
|
291
|
+
reason: `walked ${files.length} files; capping content scan at 50000.`,
|
|
292
|
+
});
|
|
293
|
+
files = files.slice(0, 50000);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
const scanFiles = files.filter((f) => SCAN_EXTS.has(path.extname(f.name).toLowerCase()));
|
|
297
|
+
|
|
298
|
+
// Hit collectors. Each entry keeps the file + the citation text so the
|
|
299
|
+
// artifact summary is auditable. CVE / RFC literals are references, not
|
|
300
|
+
// secrets, so they are safe to retain in the value text.
|
|
301
|
+
const hits = {
|
|
302
|
+
"fabricated-cve-id": [],
|
|
303
|
+
"rejected-or-disputed-cve": [],
|
|
304
|
+
"rfc-number-title-mismatch": [],
|
|
305
|
+
};
|
|
306
|
+
// Inconclusive / needs-verification buckets — surfaced in artifacts,
|
|
307
|
+
// never flipped to a deterministic verdict.
|
|
308
|
+
const needsVerify = {
|
|
309
|
+
cve_not_in_catalog: [],
|
|
310
|
+
rfc_not_in_index: [],
|
|
311
|
+
draft_as_rfc_candidates: [],
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
let totalCveCitations = 0;
|
|
315
|
+
let totalRfcCitations = 0;
|
|
316
|
+
|
|
317
|
+
for (const f of scanFiles) {
|
|
318
|
+
const content = readSafe(f.full);
|
|
319
|
+
if (content == null) {
|
|
320
|
+
errors.push({ artifact_id: "source-files", kind: "read_failed", reason: f.rel });
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
const illustrative = isIllustrativePath(f.rel);
|
|
324
|
+
|
|
325
|
+
// ---- CVE citations ----
|
|
326
|
+
for (const m of content.matchAll(CVE_CITATION_RE)) {
|
|
327
|
+
const full = m[0];
|
|
328
|
+
totalCveCitations++;
|
|
329
|
+
const canonical = CVE_CANONICAL_RE.test(full);
|
|
330
|
+
if (!canonical) {
|
|
331
|
+
// Fabricated / malformed. Illustrative surfaces (templates,
|
|
332
|
+
// fixtures, the format-explaining docs) are demoted.
|
|
333
|
+
if (!illustrative) {
|
|
334
|
+
hits["fabricated-cve-id"].push({ file: f.rel, citation: full });
|
|
335
|
+
}
|
|
336
|
+
continue;
|
|
337
|
+
}
|
|
338
|
+
// Well-formed. Cross-reference the catalog.
|
|
339
|
+
if (cveKeys.has(full)) {
|
|
340
|
+
const note = cveNotes.get(full) || "";
|
|
341
|
+
if (REJECT_DISPUTE_RE.test(note) && !illustrative) {
|
|
342
|
+
hits["rejected-or-disputed-cve"].push({ file: f.rel, citation: full });
|
|
343
|
+
}
|
|
344
|
+
} else if (catalogsLoaded && !illustrative) {
|
|
345
|
+
// Absent from the curated catalog: needs an external lookup.
|
|
346
|
+
// NOT a fabrication — inconclusive by design.
|
|
347
|
+
needsVerify.cve_not_in_catalog.push({ file: f.rel, citation: full });
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// ---- RFC citations ----
|
|
352
|
+
for (const m of content.matchAll(RFC_CITATION_RE)) {
|
|
353
|
+
totalRfcCitations++;
|
|
354
|
+
const num = Number(m[1]);
|
|
355
|
+
if (!Number.isFinite(num)) continue;
|
|
356
|
+
const line = lineAround(content, m.index);
|
|
357
|
+
if (rfcTitles.has(num)) {
|
|
358
|
+
const verdict = classifyRfcTitle(line, rfcTitles.get(num));
|
|
359
|
+
if (verdict === "mismatch" && !illustrative) {
|
|
360
|
+
hits["rfc-number-title-mismatch"].push({
|
|
361
|
+
file: f.rel,
|
|
362
|
+
citation: `RFC ${num}`,
|
|
363
|
+
real_title: rfcTitles.get(num),
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
} else if (catalogsLoaded && !illustrative) {
|
|
367
|
+
// Number not in the published index. Needs verification; if draft
|
|
368
|
+
// language is adjacent, record it as a draft-as-RFC candidate
|
|
369
|
+
// (still inconclusive — left unflipped).
|
|
370
|
+
needsVerify.rfc_not_in_index.push({ file: f.rel, citation: `RFC ${num}` });
|
|
371
|
+
if (DRAFT_LANGUAGE_RE.test(line)) {
|
|
372
|
+
needsVerify.draft_as_rfc_candidates.push({ file: f.rel, citation: `RFC ${num}` });
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// signal_overrides: only the deterministically-decidable indicators are
|
|
379
|
+
// flipped. The needs-verification indicators stay absent so the runner
|
|
380
|
+
// returns inconclusive for them.
|
|
381
|
+
const signal_overrides = {
|
|
382
|
+
"fabricated-cve-id": hits["fabricated-cve-id"].length > 0 ? "hit" : "miss",
|
|
383
|
+
"rfc-number-title-mismatch": hits["rfc-number-title-mismatch"].length > 0 ? "hit" : "miss",
|
|
384
|
+
};
|
|
385
|
+
// rejected-or-disputed-cve is high-confidence (not deterministic) — flip
|
|
386
|
+
// on a catalog-backed match, otherwise miss. Only assert a verdict when
|
|
387
|
+
// the catalog actually loaded; without it the check could not run.
|
|
388
|
+
if (cveKeys.size > 0) {
|
|
389
|
+
signal_overrides["rejected-or-disputed-cve"] =
|
|
390
|
+
hits["rejected-or-disputed-cve"].length > 0 ? "hit" : "miss";
|
|
391
|
+
} else {
|
|
392
|
+
signal_overrides["rejected-or-disputed-cve"] = "inconclusive";
|
|
393
|
+
}
|
|
394
|
+
// The needs-verification CVE indicator: hit means "found citations the
|
|
395
|
+
// offline catalog cannot confirm" — itself an inconclusive state, so it
|
|
396
|
+
// maps to inconclusive (not a clean miss) when such citations exist.
|
|
397
|
+
if (needsVerify.cve_not_in_catalog.length > 0) {
|
|
398
|
+
signal_overrides["cve-citation-needs-external-verification"] = "inconclusive";
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const summarize = (list) => {
|
|
402
|
+
if (list.length === 0) return "0 hits";
|
|
403
|
+
const head = list.slice(0, 5).map((h) => {
|
|
404
|
+
let s = `${h.file}: ${h.citation}`;
|
|
405
|
+
if (h.real_title) s += ` (index title: "${h.real_title}")`;
|
|
406
|
+
return s;
|
|
407
|
+
}).join("; ");
|
|
408
|
+
return `${list.length} hit(s): ${head}` + (list.length > 5 ? "; …" : "");
|
|
409
|
+
};
|
|
410
|
+
|
|
411
|
+
const artifacts = {
|
|
412
|
+
"cve-citations-in-source": {
|
|
413
|
+
value: `${totalCveCitations} CVE citation(s) found. ` +
|
|
414
|
+
`fabricated: ${summarize(hits["fabricated-cve-id"])}. ` +
|
|
415
|
+
`rejected/disputed: ${summarize(hits["rejected-or-disputed-cve"])}. ` +
|
|
416
|
+
`needs-external-verification (well-formed, absent from catalog): ${summarize(needsVerify.cve_not_in_catalog)}.`,
|
|
417
|
+
captured: true,
|
|
418
|
+
},
|
|
419
|
+
"rfc-citations-in-source": {
|
|
420
|
+
value: `${totalRfcCitations} RFC citation(s) found. ` +
|
|
421
|
+
`title-mismatch: ${summarize(hits["rfc-number-title-mismatch"])}. ` +
|
|
422
|
+
`not-in-index (needs verification): ${summarize(needsVerify.rfc_not_in_index)}. ` +
|
|
423
|
+
`draft-as-rfc candidates: ${summarize(needsVerify.draft_as_rfc_candidates)}.`,
|
|
424
|
+
captured: true,
|
|
425
|
+
},
|
|
426
|
+
"cve-catalog": {
|
|
427
|
+
value: cveKeys.size > 0
|
|
428
|
+
? `loaded ${cveKeys.size} catalog entries for cross-reference`
|
|
429
|
+
: "catalog unavailable — CVE cross-reference could not run",
|
|
430
|
+
captured: cveKeys.size > 0,
|
|
431
|
+
...(cveKeys.size === 0 ? { reason: "cve-catalog.json failed to load" } : {}),
|
|
432
|
+
},
|
|
433
|
+
"rfc-index": {
|
|
434
|
+
value: rfcTitles.size > 0
|
|
435
|
+
? `loaded ${rfcTitles.size} RFC titles for cross-reference`
|
|
436
|
+
: "RFC index unavailable — RFC cross-reference could not run",
|
|
437
|
+
captured: rfcTitles.size > 0,
|
|
438
|
+
...(rfcTitles.size === 0 ? { reason: "rfc-references.json failed to load" } : {}),
|
|
439
|
+
},
|
|
440
|
+
};
|
|
441
|
+
|
|
442
|
+
return {
|
|
443
|
+
precondition_checks: {
|
|
444
|
+
"repo-cites-security-references": totalCveCitations > 0 || totalRfcCitations > 0,
|
|
445
|
+
},
|
|
446
|
+
artifacts,
|
|
447
|
+
signal_overrides,
|
|
448
|
+
collector_meta: {
|
|
449
|
+
collector_id: COLLECTOR_ID,
|
|
450
|
+
collector_version: "2026-05-26",
|
|
451
|
+
platform: process.platform,
|
|
452
|
+
captured_at: new Date().toISOString(),
|
|
453
|
+
cwd: root,
|
|
454
|
+
duration_ms: Date.now() - startTime,
|
|
455
|
+
files_walked: files.length,
|
|
456
|
+
scan_files_scanned: scanFiles.length,
|
|
457
|
+
cve_citations: totalCveCitations,
|
|
458
|
+
rfc_citations: totalRfcCitations,
|
|
459
|
+
catalogs_loaded: catalogsLoaded,
|
|
460
|
+
},
|
|
461
|
+
collector_errors: errors,
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
module.exports = { playbook_id: COLLECTOR_ID, collect };
|
|
@@ -20,15 +20,14 @@
|
|
|
20
20
|
|
|
21
21
|
const fs = require("node:fs");
|
|
22
22
|
const path = require("node:path");
|
|
23
|
+
const { codeExcludeSet, isLinkedWorktreeDir } = require("./scan-excludes");
|
|
23
24
|
|
|
24
25
|
const COLLECTOR_ID = "containers";
|
|
25
26
|
|
|
26
27
|
const DEFAULT_MAX_DEPTH = 6;
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"target", ".idea", ".vscode",
|
|
31
|
-
]);
|
|
28
|
+
// Shared code-scope exclusions (dependency caches, build output, VCS +
|
|
29
|
+
// agent/editor scratch including `.claude/`); no container-specific extras.
|
|
30
|
+
const DEFAULT_EXCLUDES = codeExcludeSet();
|
|
32
31
|
|
|
33
32
|
const DOCKERFILE_NAMES = new Set(["Dockerfile", "Containerfile"]);
|
|
34
33
|
const DOCKERFILE_EXTS = new Set([".dockerfile", ".containerfile"]);
|
|
@@ -54,8 +53,14 @@ function walkTree(root, opts = {}) {
|
|
|
54
53
|
try { real = fs.realpathSync(full); } catch { continue; }
|
|
55
54
|
if (seen.has(real)) continue;
|
|
56
55
|
seen.add(real);
|
|
57
|
-
if (entry.isDirectory())
|
|
58
|
-
|
|
56
|
+
if (entry.isDirectory()) {
|
|
57
|
+
// Skip linked git worktrees (their `.git` is a gitdir pointer
|
|
58
|
+
// file) — e.g. agent-created repo copies under
|
|
59
|
+
// `.claude/worktrees/<id>/`. Walking them rescans the same
|
|
60
|
+
// Dockerfiles / compose / k8s manifests as the host tree.
|
|
61
|
+
if (isLinkedWorktreeDir(full)) continue;
|
|
62
|
+
walk(full, depth + 1);
|
|
63
|
+
} else if (entry.isFile()) out.push({ full, rel: path.relative(root, full), name: entry.name });
|
|
59
64
|
}
|
|
60
65
|
}
|
|
61
66
|
walk(root, 0);
|
|
@@ -16,15 +16,16 @@
|
|
|
16
16
|
|
|
17
17
|
const fs = require("node:fs");
|
|
18
18
|
const path = require("node:path");
|
|
19
|
+
const { codeExcludeSet, isLinkedWorktreeDir } = require("./scan-excludes");
|
|
19
20
|
|
|
20
21
|
const COLLECTOR_ID = "crypto-codebase";
|
|
21
22
|
|
|
22
23
|
const DEFAULT_MAX_DEPTH = 6;
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
// Shared code-scope exclusions: dependency caches, build output, VCS +
|
|
25
|
+
// agent/editor scratch (including `.claude/`). No crypto-codebase-specific
|
|
26
|
+
// extras — the shared defaults already cover every directory this scan
|
|
27
|
+
// should never descend into.
|
|
28
|
+
const DEFAULT_EXCLUDES = codeExcludeSet();
|
|
28
29
|
|
|
29
30
|
const SOURCE_EXTS = new Set([
|
|
30
31
|
".js", ".mjs", ".cjs", ".jsx", ".ts", ".tsx", ".mts", ".cts",
|
|
@@ -85,6 +86,11 @@ function walkTree(root, opts = {}) {
|
|
|
85
86
|
if (seen.has(real)) continue;
|
|
86
87
|
seen.add(real);
|
|
87
88
|
if (entry.isDirectory()) {
|
|
89
|
+
// Never descend into a linked git worktree (its `.git` is a
|
|
90
|
+
// gitdir pointer file). Agent tooling stamps full repo copies
|
|
91
|
+
// under `.claude/worktrees/<id>/`; walking them rescans the same
|
|
92
|
+
// source as the host tree and multiplies every hit.
|
|
93
|
+
if (isLinkedWorktreeDir(full)) continue;
|
|
88
94
|
walk(full, depth + 1);
|
|
89
95
|
} else if (entry.isFile()) {
|
|
90
96
|
out.push({ full, rel: path.relative(root, full), name: entry.name });
|