@blamejs/exceptd-skills 0.12.10 → 0.12.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/source-osv.js CHANGED
@@ -39,17 +39,24 @@
39
39
  const https = require("https");
40
40
  const fs = require("fs");
41
41
 
42
+ // OSV_HOST_OVERRIDE lets tests redirect the network call to a local HTTP
43
+ // server bound on 127.0.0.1:<port>. The override accepts either a bare
44
+ // `host:port` string or a full `http://host:port` URL. When set, the
45
+ // underlying request switches from `https` to `http` so the test server
46
+ // doesn't need a TLS cert. Production callers never set this.
42
47
  const OSV_HOST = "api.osv.dev";
43
48
  const REQUEST_TIMEOUT_MS = 10000;
44
49
  const USER_AGENT = "exceptd-security/source-osv (+https://exceptd.com)";
45
50
 
46
- // Identifier namespaces OSV uses as PRIMARY keys (i.e. that route through
47
- // this module rather than GHSA's CVE-search path). Keep this list in sync
48
- // with the dispatcher in lib/refresh-external.js adding a new prefix
49
- // here is not enough; the dispatcher's --advisory regex must also accept it.
51
+ // Identifier namespaces OSV uses as PRIMARY keys. GHSA-* is intentionally
52
+ // NOT in this list `seedSingleAdvisory` in lib/refresh-external.js routes
53
+ // CVE-* and GHSA-* through `source-ghsa` because GHSA carries richer field
54
+ // coverage (cvss object, vulnerable_version_range string, ghsa_id linkage)
55
+ // than OSV's import of the same advisories. Keep this list in sync with the
56
+ // dispatcher in lib/refresh-external.js — adding a new prefix here is not
57
+ // enough; the dispatcher's --advisory regex must also accept it.
50
58
  const OSV_ID_PREFIXES = [
51
59
  "MAL-", // OSSF Malicious Packages
52
- "GHSA-", // GitHub Security Advisories (OSV import)
53
60
  "SNYK-", // Snyk
54
61
  "RUSTSEC-", // RustSec
55
62
  "GO-", // Go vuln DB
@@ -72,24 +79,47 @@ const OSV_ID_PREFIXES = [
72
79
 
73
80
  /**
74
81
  * Return true when `id` looks like an OSV-native primary key (i.e. NOT a
75
- * CVE-* identifier). CVE-* identifiers continue to route through the GHSA
76
- * source because GHSA carries richer field coverage for CVE-keyed records.
82
+ * CVE-* identifier and NOT a GHSA-* identifier). Both CVE-* and GHSA-*
83
+ * route through `source-ghsa` for richer field coverage.
77
84
  */
78
85
  function isOsvId(id) {
79
86
  if (!id || typeof id !== "string") return false;
80
87
  const up = id.toUpperCase();
81
88
  if (/^CVE-\d{4}-\d+$/.test(up)) return false;
89
+ if (up.startsWith("GHSA-")) return false;
82
90
  return OSV_ID_PREFIXES.some((p) => up.startsWith(p));
83
91
  }
84
92
 
85
93
  /**
86
- * Low-level HTTPS GET against OSV. Resolves to { ok, record|error, source }.
94
+ * Resolve the OSV transport target. When OSV_HOST_OVERRIDE is set the
95
+ * request switches to plain HTTP on the override host:port so test
96
+ * harnesses can stand up a local server without TLS. Production omits the
97
+ * override entirely and lands on api.osv.dev over HTTPS.
87
98
  */
88
- function osvGet(path, timeoutMs = REQUEST_TIMEOUT_MS) {
99
+ function osvTransport() {
100
+ const override = process.env.OSV_HOST_OVERRIDE;
101
+ if (!override) return { mod: https, host: OSV_HOST, port: 443 };
102
+ // Accept either "host:port" or a full URL.
103
+ let raw = override.trim();
104
+ if (/^https?:\/\//i.test(raw)) {
105
+ const u = new URL(raw);
106
+ return { mod: require("http"), host: u.hostname, port: parseInt(u.port, 10) || 80 };
107
+ }
108
+ const [h, p] = raw.split(":");
109
+ return { mod: require("http"), host: h || "127.0.0.1", port: parseInt(p, 10) || 80 };
110
+ }
111
+
112
+ /**
113
+ * Low-level GET against OSV. Resolves to { ok, record|error, source }.
114
+ * Honors OSV_HOST_OVERRIDE for offline tests.
115
+ */
116
+ function osvGet(reqPath, timeoutMs = REQUEST_TIMEOUT_MS) {
89
117
  return new Promise((resolve) => {
90
- const req = https.get({
91
- host: OSV_HOST,
92
- path,
118
+ const { mod, host, port } = osvTransport();
119
+ const req = mod.get({
120
+ host,
121
+ port,
122
+ path: reqPath,
93
123
  headers: {
94
124
  "Accept": "application/json",
95
125
  "User-Agent": USER_AGENT,
@@ -98,7 +128,11 @@ function osvGet(path, timeoutMs = REQUEST_TIMEOUT_MS) {
98
128
  }, (res) => {
99
129
  if (res.statusCode !== 200) {
100
130
  res.resume();
101
- return resolve({ ok: false, error: `OSV returned HTTP ${res.statusCode}`, source: "offline" });
131
+ const status = res.statusCode;
132
+ const error = status === 429
133
+ ? `OSV rate-limited (HTTP 429)`
134
+ : `OSV returned HTTP ${status}`;
135
+ return resolve({ ok: false, error, status, source: "offline" });
102
136
  }
103
137
  const chunks = [];
104
138
  res.on("data", (c) => chunks.push(c));
@@ -111,20 +145,22 @@ function osvGet(path, timeoutMs = REQUEST_TIMEOUT_MS) {
111
145
  }
112
146
  });
113
147
  });
114
- req.on("timeout", () => req.destroy(new Error("timeout")));
148
+ req.on("timeout", () => req.destroy(new Error("OSV request timed out")));
115
149
  req.on("error", (e) => resolve({ ok: false, error: e.message, source: "offline" }));
116
150
  });
117
151
  }
118
152
 
119
153
  /**
120
- * Low-level HTTPS POST against OSV. Body is JSON-stringified.
154
+ * Low-level POST against OSV. Body is JSON-stringified.
121
155
  */
122
- function osvPost(path, body, timeoutMs = REQUEST_TIMEOUT_MS) {
156
+ function osvPost(reqPath, body, timeoutMs = REQUEST_TIMEOUT_MS) {
123
157
  return new Promise((resolve) => {
124
158
  const payload = Buffer.from(JSON.stringify(body), "utf8");
125
- const req = https.request({
126
- host: OSV_HOST,
127
- path,
159
+ const { mod, host, port } = osvTransport();
160
+ const req = mod.request({
161
+ host,
162
+ port,
163
+ path: reqPath,
128
164
  method: "POST",
129
165
  headers: {
130
166
  "Content-Type": "application/json",
@@ -136,7 +172,11 @@ function osvPost(path, body, timeoutMs = REQUEST_TIMEOUT_MS) {
136
172
  }, (res) => {
137
173
  if (res.statusCode !== 200) {
138
174
  res.resume();
139
- return resolve({ ok: false, error: `OSV returned HTTP ${res.statusCode}`, source: "offline" });
175
+ const status = res.statusCode;
176
+ const error = status === 429
177
+ ? `OSV rate-limited (HTTP 429)`
178
+ : `OSV returned HTTP ${status}`;
179
+ return resolve({ ok: false, error, status, source: "offline" });
140
180
  }
141
181
  const chunks = [];
142
182
  res.on("data", (c) => chunks.push(c));
@@ -149,7 +189,7 @@ function osvPost(path, body, timeoutMs = REQUEST_TIMEOUT_MS) {
149
189
  }
150
190
  });
151
191
  });
152
- req.on("timeout", () => req.destroy(new Error("timeout")));
192
+ req.on("timeout", () => req.destroy(new Error("OSV request timed out")));
153
193
  req.on("error", (e) => resolve({ ok: false, error: e.message, source: "offline" }));
154
194
  req.write(payload);
155
195
  req.end();
@@ -157,14 +197,36 @@ function osvPost(path, body, timeoutMs = REQUEST_TIMEOUT_MS) {
157
197
  }
158
198
 
159
199
  /**
160
- * Read EXCEPTD_OSV_FIXTURE and return an array of OSV records. Accepts
161
- * either a single object or an array on disk.
200
+ * Read EXCEPTD_OSV_FIXTURE and return a structured envelope. Matches the
201
+ * GHSA-source convention: on any failure (missing file, malformed JSON,
202
+ * root not object/array) return `{ ok: false, error, source: "offline" }`
203
+ * rather than throw — operators on the CLI surface get a structured error
204
+ * instead of a Node stack trace.
205
+ *
206
+ * Returns:
207
+ * null when env var is unset
208
+ * { ok: true, advisories: [...], source } on success
209
+ * { ok: false, error, source: "offline" } on any failure
162
210
  */
163
211
  function readFixture() {
164
212
  const fp = process.env.EXCEPTD_OSV_FIXTURE;
165
213
  if (!fp) return null;
166
- const raw = JSON.parse(fs.readFileSync(fp, "utf8"));
167
- return Array.isArray(raw) ? raw : [raw];
214
+ let raw;
215
+ try {
216
+ raw = fs.readFileSync(fp, "utf8");
217
+ } catch (e) {
218
+ return { ok: false, error: `fixture: ${e.message}`, source: "offline" };
219
+ }
220
+ let parsed;
221
+ try {
222
+ parsed = JSON.parse(raw);
223
+ } catch (e) {
224
+ return { ok: false, error: `fixture: ${e.message}`, source: "offline" };
225
+ }
226
+ if (parsed == null || (typeof parsed !== "object")) {
227
+ return { ok: false, error: `fixture: root must be an OSV record object or array (got ${typeof parsed})`, source: "offline" };
228
+ }
229
+ return { ok: true, advisories: Array.isArray(parsed) ? parsed : [parsed], source: "fixture" };
168
230
  }
169
231
 
170
232
  /**
@@ -176,12 +238,19 @@ function readFixture() {
176
238
  */
177
239
  async function fetchAdvisoryById(id, opts = {}) {
178
240
  if (!id || typeof id !== "string") {
179
- return { ok: false, error: "id is required (MAL-*, GHSA-*, SNYK-*, etc.)", source: "offline" };
241
+ return { ok: false, error: "id is required (MAL-*, SNYK-*, RUSTSEC-*, etc.)", source: "offline" };
180
242
  }
243
+ // OSV.dev's /v1/vulns/{id} is case-sensitive — `mal-2026-3083` 404s while
244
+ // `MAL-2026-3083` resolves. Uppercase at entry so operators piping
245
+ // lowercase ids from grep/jq don't get a surprising 404 from the network
246
+ // path. Fixture lookup already case-folds, so this normalization is a
247
+ // no-op there but harmless.
248
+ id = id.toUpperCase();
181
249
  const fixture = readFixture();
182
250
  if (fixture) {
183
- const want = id.toUpperCase();
184
- const match = fixture.find((rec) => {
251
+ if (!fixture.ok) return fixture; // F1: structured error envelope
252
+ const want = id;
253
+ const match = fixture.advisories.find((rec) => {
185
254
  const recId = (rec && rec.id) ? String(rec.id).toUpperCase() : null;
186
255
  if (recId === want) return true;
187
256
  const aliases = Array.isArray(rec?.aliases) ? rec.aliases.map((a) => String(a).toUpperCase()) : [];
@@ -205,9 +274,10 @@ async function fetchAdvisoriesForPackage(name, ecosystem, version, opts = {}) {
205
274
  }
206
275
  const fixture = readFixture();
207
276
  if (fixture) {
277
+ if (!fixture.ok) return fixture; // F1: structured error envelope
208
278
  // Best-effort fixture filtering: match any record whose `affected[]`
209
279
  // contains the requested package + ecosystem (+ version when set).
210
- const matches = fixture.filter((rec) => {
280
+ const matches = fixture.advisories.filter((rec) => {
211
281
  const affected = Array.isArray(rec?.affected) ? rec.affected : [];
212
282
  return affected.some((a) => {
213
283
  const pkg = a?.package || {};
@@ -241,37 +311,112 @@ function pickCatalogKey(rec) {
241
311
  }
242
312
 
243
313
  /**
244
- * Pull a numeric CVSS score out of an OSV severity[] entry (CVSS v3 / v4
245
- * vector strings start with "CVSS:3.x/" or "CVSS:4.0/"). Returns null if
246
- * no parseable score is present.
314
+ * CVSS 3.1 base-score computation from a vector string. Implements Table 6
315
+ * of the FIRST CVSS 3.1 specification. Used when an OSV record carries a
316
+ * vector but no embedded numeric score (the common case for MAL-* records).
317
+ * Returns null on malformed input.
318
+ *
319
+ * Reference: https://www.first.org/cvss/v3.1/specification-document
320
+ */
321
+ function cvss3BaseScore(vector) {
322
+ if (typeof vector !== "string") return null;
323
+ const m = vector.match(/^CVSS:3\.\d\/(.+)$/);
324
+ if (!m) return null;
325
+ const parts = m[1].split("/");
326
+ const metrics = {};
327
+ for (const p of parts) {
328
+ const [k, v] = p.split(":");
329
+ if (!k || !v) return null;
330
+ metrics[k] = v;
331
+ }
332
+ // Required metrics — bail if any are missing.
333
+ for (const k of ["AV", "AC", "PR", "UI", "S", "C", "I", "A"]) {
334
+ if (!metrics[k]) return null;
335
+ }
336
+ const AV_W = { N: 0.85, A: 0.62, L: 0.55, P: 0.2 };
337
+ const AC_W = { L: 0.77, H: 0.44 };
338
+ const UI_W = { N: 0.85, R: 0.62 };
339
+ const CIA_W = { H: 0.56, L: 0.22, N: 0 };
340
+ // PR weights depend on Scope.
341
+ const PR_W_U = { N: 0.85, L: 0.62, H: 0.27 };
342
+ const PR_W_C = { N: 0.85, L: 0.68, H: 0.5 };
343
+ const scope = metrics.S;
344
+ if (scope !== "U" && scope !== "C") return null;
345
+ const av = AV_W[metrics.AV];
346
+ const ac = AC_W[metrics.AC];
347
+ const ui = UI_W[metrics.UI];
348
+ const pr = (scope === "C" ? PR_W_C : PR_W_U)[metrics.PR];
349
+ const c = CIA_W[metrics.C];
350
+ const i = CIA_W[metrics.I];
351
+ const a = CIA_W[metrics.A];
352
+ if ([av, ac, ui, pr, c, i, a].some((x) => x == null)) return null;
353
+ const iss = 1 - ((1 - c) * (1 - i) * (1 - a));
354
+ let impact;
355
+ if (scope === "U") {
356
+ impact = 6.42 * iss;
357
+ } else {
358
+ impact = 7.52 * (iss - 0.029) - 3.25 * Math.pow(iss - 0.02, 15);
359
+ }
360
+ if (impact <= 0) return 0.0;
361
+ const exploitability = 8.22 * av * ac * pr * ui;
362
+ let base;
363
+ if (scope === "U") {
364
+ base = Math.min(impact + exploitability, 10);
365
+ } else {
366
+ base = Math.min(1.08 * (impact + exploitability), 10);
367
+ }
368
+ // roundUp1: round up to one decimal (CVSS 3.1 §7.1).
369
+ const rounded = Math.ceil(base * 10) / 10;
370
+ if (!Number.isFinite(rounded) || rounded < 0 || rounded > 10) return null;
371
+ return rounded;
372
+ }
373
+
374
+ /**
375
+ * Pull a numeric CVSS score + vector out of an OSV severity[] entry. CVSS
376
+ * vectors start with "CVSS:3.x/" or "CVSS:4.0/". When multiple vectors are
377
+ * present (e.g. both V3 and V4), the highest version wins regardless of
378
+ * array order. When the OSV record has no embedded numeric tail, the score
379
+ * is computed from the vector itself via cvss3BaseScore(). Returns null
380
+ * components when nothing parseable is present.
247
381
  */
248
382
  function extractCvss(rec) {
249
383
  const sev = Array.isArray(rec?.severity) ? rec.severity : [];
250
384
  let score = null;
251
- let vector = null;
385
+ let bestVector = null;
386
+ let bestVersion = 0;
252
387
  for (const s of sev) {
253
388
  if (typeof s?.score !== "string") continue;
254
389
  const v = s.score.trim();
255
- // Bare numeric score
390
+ // Bare numeric score (no vector prefix).
256
391
  const num = parseFloat(v);
257
392
  if (!Number.isNaN(num) && num >= 0 && num <= 10 && !v.includes("/")) {
258
393
  if (score == null) score = num;
259
394
  continue;
260
395
  }
261
- // CVSS vector — accept the highest-version vector we see.
262
- if (/^CVSS:[34]/.test(v)) {
263
- vector = v;
264
- // Try to parse the score out of the trailing fragment if encoded
265
- // as "CVSS:3.1/AV:.../9.3" — most OSV records don't embed it here,
266
- // but some Snyk-imported records do.
267
- const m = v.match(/\/(\d+(?:\.\d+)?)$/);
268
- if (m && score == null) {
269
- const candidate = parseFloat(m[1]);
270
- if (candidate >= 0 && candidate <= 10) score = candidate;
271
- }
396
+ const m = v.match(/^CVSS:(\d+\.\d+)/);
397
+ if (!m) continue;
398
+ const ver = parseFloat(m[1]);
399
+ if (ver > bestVersion) {
400
+ bestVersion = ver;
401
+ bestVector = v;
272
402
  }
273
403
  }
274
- return { score, vector };
404
+ // If we picked a vector, try to read an embedded score from the trailing
405
+ // fragment (some Snyk records carry it as ".../9.3"). Otherwise compute
406
+ // it from the vector for CVSS 3.x. CVSS 4.0 base-score derivation is
407
+ // intentionally not implemented here — that's a v0.13 follow-up.
408
+ if (bestVector && score == null) {
409
+ const tail = bestVector.match(/\/(\d+(?:\.\d+)?)$/);
410
+ if (tail) {
411
+ const candidate = parseFloat(tail[1]);
412
+ if (candidate >= 0 && candidate <= 10) score = candidate;
413
+ }
414
+ if (score == null && /^CVSS:3\./.test(bestVector)) {
415
+ const computed = cvss3BaseScore(bestVector);
416
+ if (computed != null) score = computed;
417
+ }
418
+ }
419
+ return { score, vector: bestVector };
275
420
  }
276
421
 
277
422
  /**
@@ -373,6 +518,23 @@ function normalizeAdvisory(rec) {
373
518
  // OSV.dev canonical advisory URL — used as the primary vendor advisory.
374
519
  const osvUrl = `https://osv.dev/vulnerability/${encodeURIComponent(rec.id)}`;
375
520
 
521
+ // F6: dedupe verification_sources. OSV records frequently carry the
522
+ // canonical osv.dev URL in references[] as well, which would otherwise
523
+ // produce a duplicate alongside the prepended `osvUrl`.
524
+ const verification_sources = Array.from(new Set([
525
+ osvUrl,
526
+ ...(/^CVE-/i.test(catalogKey) ? [`https://nvd.nist.gov/vuln/detail/${catalogKey}`] : []),
527
+ ...refUrls.slice(0, 10),
528
+ ]));
529
+
530
+ // F5: EPSS coverage does not extend to non-CVE identifiers. Surface this
531
+ // explicitly so curators know to re-query if MITRE later assigns a CVE
532
+ // id to the entry. Wording mirrors the MAL-2026-3083 catalog entry.
533
+ const isCveKey = /^CVE-/i.test(catalogKey);
534
+ const epss_note = isCveKey
535
+ ? null
536
+ : "EPSS coverage does not extend to non-CVE identifiers. FIRST EPSS API only indexes CVE keys; MAL-* / SNYK-* / GHSA-* / RUSTSEC-* / etc. return no data. Re-query and populate epss_score when MITRE assigns a CVE id and the entry is renamed.";
537
+
376
538
  return {
377
539
  [catalogKey]: {
378
540
  name: rec.summary || rec.id,
@@ -407,15 +569,12 @@ function normalizeAdvisory(rec) {
407
569
  epss_score: null,
408
570
  epss_percentile: null,
409
571
  epss_date: null,
410
- epss_source: /^CVE-/i.test(catalogKey)
572
+ epss_note,
573
+ epss_source: isCveKey
411
574
  ? `https://api.first.org/data/v1/epss?cve=${catalogKey}`
412
575
  : null,
413
576
  source_verified: published || today,
414
- verification_sources: [
415
- osvUrl,
416
- ...(/^CVE-/i.test(catalogKey) ? [`https://nvd.nist.gov/vuln/detail/${catalogKey}`] : []),
417
- ...refUrls.slice(0, 10),
418
- ],
577
+ verification_sources,
419
578
  vendor_advisories: [
420
579
  {
421
580
  vendor: "OSV.dev",
@@ -451,19 +610,26 @@ async function buildDiff(ctx) {
451
610
  status: "ok",
452
611
  diffs: [],
453
612
  errors: 0,
613
+ unreachable_count: 0,
614
+ normalize_error_count: 0,
454
615
  summary: "OSV: no ids requested (set ctx.osv_ids to seed a draft, or pass --advisory <MAL-...> for one-shot import).",
455
616
  };
456
617
  }
457
618
  const existingKeys = new Set(Object.keys(ctx.cveCatalog || {}));
458
619
  const diffs = [];
459
- let errors = 0;
620
+ // F7: distinguish unreachable (fetch failed, network or 5xx) from
621
+ // normalize-rejected (record fetched but normalization produced null).
622
+ // Operators triaging a refresh-report want to know whether to chase a
623
+ // network outage or a malformed upstream record.
624
+ let unreachable = 0;
625
+ let normalizeErrors = 0;
460
626
  for (const id of ids) {
461
627
  const r = await fetchAdvisoryById(id);
462
- if (!r.ok) { errors++; continue; }
628
+ if (!r.ok) { unreachable++; continue; }
463
629
  const rec = r.advisories[0];
464
- if (!rec) { errors++; continue; }
630
+ if (!rec) { unreachable++; continue; }
465
631
  const normalized = normalizeAdvisory(rec);
466
- if (!normalized) { errors++; continue; }
632
+ if (!normalized) { normalizeErrors++; continue; }
467
633
  const key = Object.keys(normalized)[0];
468
634
  if (existingKeys.has(key)) continue;
469
635
  diffs.push({
@@ -475,11 +641,14 @@ async function buildDiff(ctx) {
475
641
  source: "osv",
476
642
  });
477
643
  }
644
+ const errors = unreachable + normalizeErrors;
478
645
  return {
479
646
  status: errors === 0 ? "ok" : errors === ids.length ? "unreachable" : "partial",
480
647
  diffs,
481
648
  errors,
482
- summary: `OSV fetched ${ids.length} id(s); ${diffs.length} new entry diff(s), ${errors} failure(s).`,
649
+ unreachable_count: unreachable,
650
+ normalize_error_count: normalizeErrors,
651
+ summary: `OSV fetched ${ids.length} id(s); ${diffs.length} new entry diff(s), ${unreachable} unreachable, ${normalizeErrors} normalize-rejected.`,
483
652
  };
484
653
  }
485
654
 
@@ -489,5 +658,7 @@ module.exports = {
489
658
  normalizeAdvisory,
490
659
  buildDiff,
491
660
  isOsvId,
661
+ extractCvss,
662
+ cvss3BaseScore,
492
663
  OSV_ID_PREFIXES,
493
664
  };
@@ -31,6 +31,40 @@ const REPO_ROOT = path.resolve(__dirname, '..');
31
31
  const SCHEMA_PATH = path.join(REPO_ROOT, 'lib', 'schemas', 'cve-catalog.schema.json');
32
32
  const CATALOG_PATH = path.join(REPO_ROOT, 'data', 'cve-catalog.json');
33
33
  const LESSONS_PATH = path.join(REPO_ROOT, 'data', 'zeroday-lessons.json');
34
+ const ATLAS_PATH = path.join(REPO_ROOT, 'data', 'atlas-ttps.json');
35
+ const CWE_PATH = path.join(REPO_ROOT, 'data', 'cwe-catalog.json');
36
+
37
+ // v0.12.12 — patterns that mark a verification_sources URL as a public exploit
38
+ // or PoC location. When poc_available: true AND a verification source matches
39
+ // one of these, the entry must carry an `iocs` block per AGENTS.md Hard Rule
40
+ // #14. Surfaced as WARNING-only for v0.12.12 so drafts and pre-IoC entries
41
+ // don't break patch-class compatibility; v0.13.0 will tighten to error.
42
+ const PUBLIC_EXPLOIT_URL_PATTERNS = [
43
+ /github\.com\/.+\/(exploits?|poc|pocs)\b/i,
44
+ /\bexploit-?db\.com\b/i,
45
+ /\bpacketstormsecurity\.com\b/i,
46
+ /\bmetasploit\b/i,
47
+ /\/poc\//i,
48
+ /-poc\b/i,
49
+ ];
50
+
51
+ // v0.12.12 — Tightened CVSS-vector prefix. Schema's existing pattern accepts
52
+ // any "CVSS:<digits>/"; the strict pattern below admits only known CVSS
53
+ // versions (2.0 / 3.0 / 3.1 / 4.0). Emitted as WARNING for v0.12.12; v0.13.0
54
+ // will tighten the schema itself.
55
+ const STRICT_CVSS_PATTERN = /^CVSS:(2\.0|3\.[01]|4\.0)\//;
56
+
57
+ // v0.12.12 — Impossible-date guard. Reject obviously bogus year ranges
58
+ // (typos like 1014 or 20262) without rejecting legitimate ISO dates.
59
+ const MIN_VALID_YEAR = 1990;
60
+ const MAX_VALID_YEAR = 2100;
61
+ const DATE_FIELDS = [
62
+ 'last_updated',
63
+ 'source_verified',
64
+ 'cisa_kev_date',
65
+ 'cisa_kev_due_date',
66
+ 'epss_date',
67
+ ];
34
68
 
35
69
  function parseArgs(argv) {
36
70
  const opts = { quiet: false };
@@ -162,17 +196,126 @@ function validate(value, schema, schemaName, pathStr) {
162
196
  return errors;
163
197
  }
164
198
 
199
+ function looksLikePublicExploitSource(url) {
200
+ if (typeof url !== 'string') return false;
201
+ return PUBLIC_EXPLOIT_URL_PATTERNS.some((re) => re.test(url));
202
+ }
203
+
204
+ function isUsableDate(value) {
205
+ if (typeof value !== 'string' || !/^\d{4}-\d{2}-\d{2}$/.test(value)) {
206
+ return { ok: false, reason: 'not in YYYY-MM-DD shape' };
207
+ }
208
+ const d = new Date(value + 'T00:00:00Z');
209
+ if (Number.isNaN(d.getTime())) return { ok: false, reason: 'unparseable' };
210
+ const year = Number(value.slice(0, 4));
211
+ if (year < MIN_VALID_YEAR || year > MAX_VALID_YEAR) {
212
+ return {
213
+ ok: false,
214
+ reason: `year ${year} outside ${MIN_VALID_YEAR}..${MAX_VALID_YEAR}`,
215
+ };
216
+ }
217
+ return { ok: true };
218
+ }
219
+
220
+ function additionalChecks(key, entry, ctx) {
221
+ const warnings = [];
222
+
223
+ // V1 — Hard Rule #14 conditional: poc + public-exploit URL → iocs required.
224
+ if (entry.poc_available === true) {
225
+ const sources = Array.isArray(entry.verification_sources)
226
+ ? entry.verification_sources
227
+ : [];
228
+ const hasPublicExploitSource = sources.some(looksLikePublicExploitSource);
229
+ if (hasPublicExploitSource) {
230
+ const iocs = entry.iocs;
231
+ const iocsPopulated =
232
+ iocs && typeof iocs === 'object' && !Array.isArray(iocs) && Object.keys(iocs).length > 0;
233
+ if (!iocsPopulated) {
234
+ warnings.push(
235
+ `${key}: poc_available=true and verification_sources includes a public-exploit URL, but iocs is missing or empty (AGENTS.md Hard Rule #14)`,
236
+ );
237
+ }
238
+ }
239
+ }
240
+
241
+ // V2 — Cross-catalog reference resolution. Unresolved refs are warnings
242
+ // for v0.12.12; v0.13.0 will flip to hard failures.
243
+ for (const ref of entry.atlas_refs || []) {
244
+ if (!ctx.atlasKeys.has(ref)) {
245
+ warnings.push(
246
+ `${key}: atlas_refs entry "${ref}" not in data/atlas-ttps.json (will hard-fail in v0.13.0)`,
247
+ );
248
+ }
249
+ }
250
+ for (const ref of entry.cwe_refs || []) {
251
+ if (!ctx.cweKeys.has(ref)) {
252
+ warnings.push(
253
+ `${key}: cwe_refs entry "${ref}" not in data/cwe-catalog.json (will hard-fail in v0.13.0)`,
254
+ );
255
+ }
256
+ }
257
+
258
+ // V4 — Impossible-date guard.
259
+ for (const f of DATE_FIELDS) {
260
+ const v = entry[f];
261
+ if (v === undefined || v === null) continue;
262
+ const r = isUsableDate(v);
263
+ if (!r.ok) {
264
+ warnings.push(`${key}: ${f} value ${JSON.stringify(v)} is invalid (${r.reason})`);
265
+ }
266
+ }
267
+
268
+ // Sch1 — strict CVSS-vector prefix (warning-only for v0.12.12). The schema
269
+ // pattern stays loose; this admits only known CVSS versions.
270
+ if (typeof entry.cvss_vector === 'string' && entry.cvss_vector.length > 0) {
271
+ if (!STRICT_CVSS_PATTERN.test(entry.cvss_vector)) {
272
+ warnings.push(
273
+ `${key}: cvss_vector ${JSON.stringify(entry.cvss_vector)} does not match the strict prefix /^CVSS:(2.0|3.0|3.1|4.0)\\//. Schema tolerates this in v0.12.12; v0.13.0 will tighten the schema.`,
274
+ );
275
+ }
276
+ }
277
+
278
+ return warnings;
279
+ }
280
+
165
281
  function main() {
166
282
  const opts = parseArgs(process.argv);
167
283
  const schema = readJson(SCHEMA_PATH);
168
284
  const catalog = readJson(CATALOG_PATH);
169
285
  const lessons = readJson(LESSONS_PATH);
286
+ const atlas = fs.existsSync(ATLAS_PATH) ? readJson(ATLAS_PATH) : {};
287
+ const cwe = fs.existsSync(CWE_PATH) ? readJson(CWE_PATH) : {};
288
+
289
+ const ctx = {
290
+ atlasKeys: new Set(Object.keys(atlas).filter((k) => !k.startsWith('_'))),
291
+ cweKeys: new Set(Object.keys(cwe).filter((k) => !k.startsWith('_'))),
292
+ };
170
293
 
171
294
  const cveKeys = Object.keys(catalog).filter((k) => !k.startsWith('_'));
172
295
  const lessonKeys = new Set(Object.keys(lessons).filter((k) => !k.startsWith('_')));
173
296
 
174
297
  let failed = 0;
175
298
  let drafts = 0;
299
+ let warned = 0;
300
+
301
+ // V3 — Duplicate-name detection across all non-_meta entries.
302
+ const nameToKeys = new Map();
303
+ for (const k of cveKeys) {
304
+ const n = catalog[k] && catalog[k].name;
305
+ if (typeof n === 'string' && n.length > 0) {
306
+ if (!nameToKeys.has(n)) nameToKeys.set(n, []);
307
+ nameToKeys.get(n).push(k);
308
+ }
309
+ }
310
+ const dupNameWarnings = [];
311
+ for (const [n, ks] of nameToKeys) {
312
+ if (ks.length > 1) {
313
+ dupNameWarnings.push(
314
+ `duplicate CVE name ${JSON.stringify(n)} across keys: ${ks.join(', ')}`,
315
+ );
316
+ }
317
+ }
318
+
176
319
  for (const key of cveKeys) {
177
320
  const entry = catalog[key];
178
321
  // v0.12.0: GHSA-imported drafts are flagged `_auto_imported: true` +
@@ -182,6 +325,7 @@ function main() {
182
325
  // `exceptd run cve-curation --advisory <id>`.
183
326
  const isDraft = entry && (entry._auto_imported === true || entry._draft === true);
184
327
  const errors = validate(entry, schema, 'cve', key);
328
+ const warnings = additionalChecks(key, entry, ctx);
185
329
  if (!lessonKeys.has(key) && !isDraft) {
186
330
  errors.push(
187
331
  `${key}: missing matching entry in data/zeroday-lessons.json (rule #6: zero-day learning is live)`,
@@ -192,16 +336,22 @@ function main() {
192
336
  if (!opts.quiet) {
193
337
  console.log(`DRAFT ${key} (auto-imported — needs editorial review)`);
194
338
  for (const e of errors) console.log(` - [warn] ${e}`);
339
+ for (const w of warnings) console.log(` - [warn] ${w}`);
195
340
  }
196
341
  // Drafts don't increment `failed` — they're warnings, not errors.
197
342
  continue;
198
343
  }
199
- if (errors.length === 0) {
344
+ if (errors.length === 0 && warnings.length === 0) {
200
345
  if (!opts.quiet) console.log(`PASS ${key}`);
346
+ } else if (errors.length === 0) {
347
+ warned++;
348
+ if (!opts.quiet) console.log(`WARN ${key}`);
349
+ for (const w of warnings) console.log(` - [warn] ${w}`);
201
350
  } else {
202
351
  failed++;
203
352
  console.log(`FAIL ${key}`);
204
353
  for (const e of errors) console.log(` - ${e}`);
354
+ for (const w of warnings) console.log(` - [warn] ${w}`);
205
355
  }
206
356
  }
207
357
 
@@ -218,13 +368,31 @@ function main() {
218
368
  }
219
369
  }
220
370
 
371
+ // V3 — emit duplicate-name warnings as a catalog-wide tail block.
372
+ for (const w of dupNameWarnings) {
373
+ console.log(`WARN catalog`);
374
+ console.log(` - [warn] ${w}`);
375
+ }
376
+
221
377
  const total = cveKeys.length;
222
- const passed = total - failed - drafts;
378
+ const passed = total - failed - drafts - warned;
223
379
  const summary = `\n${passed}/${total} CVE entries validated` +
224
380
  (drafts ? `, ${drafts} draft(s) (auto-imported)` : '') +
381
+ (warned ? `, ${warned} with warnings` : '') +
225
382
  (failed ? `, ${failed} failed` : '') + '.';
226
383
  console.log(summary);
227
384
  process.exit(failed === 0 ? 0 : 1);
228
385
  }
229
386
 
230
- main();
387
+ module.exports = {
388
+ validate,
389
+ looksLikePublicExploitSource,
390
+ isUsableDate,
391
+ additionalChecks,
392
+ PUBLIC_EXPLOIT_URL_PATTERNS,
393
+ STRICT_CVSS_PATTERN,
394
+ };
395
+
396
+ if (require.main === module) {
397
+ main();
398
+ }