@blamejs/exceptd-skills 0.13.19 → 0.13.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,555 @@
1
+ "use strict";
2
+ /**
3
+ * lib/gap-detectors.js
4
+ *
5
+ * v0.13.21 — Catalog gap detection beyond the v0.13.19 missing-context /
6
+ * dangling-ref / draft-debt classes. The audit-catalog-gaps detector
7
+ * surfaced field-presence holes; this module adds seven cross-cutting
8
+ * detection classes the prior detector did not cover.
9
+ *
10
+ * Each detector is a pure function: takes the loaded catalogs + options,
11
+ * returns an array of findings. The audit-catalog-gaps CLI composes them
12
+ * into a unified report; the integrity test exercises them against the
13
+ * shipped catalogs; --class filters select between them.
14
+ *
15
+ * Detection classes:
16
+ *
17
+ * 1. content-quality — fields present but content weak
18
+ * (short, placeholder-language, name-as-
19
+ * description, KEV-listed but no advisories)
20
+ *
21
+ * 2. temporal-staleness — last_verified > 180d, last_updated > 365d,
22
+ * CISA-KEV due-date passed, EPSS stale
23
+ *
24
+ * 3. logical-consistency — internal-state contradictions
25
+ * (cisa_kev:true + date:null, etc.)
26
+ *
27
+ * 4. cross-ref-completeness — bidirectional references
28
+ * (CVE→CWE present but CWE.evidence_cves
29
+ * missing the back-ref)
30
+ *
31
+ * 5. schema-evolution — required-since-version fields missing
32
+ * on older entries
33
+ *
34
+ * 6. operator-action-sla — auto-imported entries older than the
35
+ * curation-SLA without operator action
36
+ *
37
+ * 7. unused-orphan — catalog entries no skill / playbook /
38
+ * CVE references — dead-weight content
39
+ *
40
+ * Why pure functions: each detector is independently testable against
41
+ * synthetic catalog inputs, and the integration is just `Array.concat`
42
+ * over the seven results. Composing in audit-catalog-gaps.js stays
43
+ * thin.
44
+ */
45
+
46
+ // Sentinel strings that indicate placeholder / curation-pending content.
47
+ // Adding new sentinels here makes them findable across every text-heavy
48
+ // field without changing the call sites.
49
+ const PLACEHOLDER_SENTINELS = [
50
+ /pending operator curation/i,
51
+ /refer to vendor advisory for IOC list/i,
52
+ /bulk-imported KEV entry, IOCs not extracted/i,
53
+ /\bTBD\b/,
54
+ /\bTKTK\b/,
55
+ /\bcoming soon\b/i,
56
+ /^\s*\[\s*\]\s*$/,
57
+ /\bplaceholder\b/i
58
+ ];
59
+
60
+ function hasPlaceholderLanguage(str) {
61
+ if (typeof str !== "string" || str.length === 0) return false;
62
+ for (const re of PLACEHOLDER_SENTINELS) {
63
+ if (re.test(str)) return true;
64
+ }
65
+ return false;
66
+ }
67
+
68
+ // ---------- 1. content-quality ----------
69
+ //
70
+ // Fields present but content weak. Each rule is per-catalog + per-field
71
+ // because the "what's weak" depends on the field's semantic role.
72
+
73
+ function contentQualityFindings(loaded) {
74
+ const out = [];
75
+ const cve = loaded["cve-catalog"];
76
+ if (!cve) return out;
77
+
78
+ for (const id of Object.keys(cve)) {
79
+ if (id === "_meta") continue;
80
+ const e = cve[id];
81
+ if (!e) continue;
82
+
83
+ // Vector text: < 50 chars or placeholder-language indicates the
84
+ // operator didn't actually describe the primitive. Hard Rule #1
85
+ // implicit: every CVE needs a real exploitation-vector description.
86
+ if (typeof e.vector === "string" && e.vector.length > 0 && e.vector.length < 50) {
87
+ out.push({ class: "content-quality", catalog: "cve-catalog", id,
88
+ field: "vector", reason: `vector is ${e.vector.length} chars (< 50 threshold) — likely a stub` });
89
+ }
90
+ if (typeof e.vector === "string" && hasPlaceholderLanguage(e.vector)) {
91
+ out.push({ class: "content-quality", catalog: "cve-catalog", id,
92
+ field: "vector", reason: "vector contains placeholder-language sentinel" });
93
+ }
94
+
95
+ // poc_description with placeholder language while poc_available:true
96
+ // is a contradiction — the project claims PoC exists but didn't
97
+ // document where.
98
+ if (e.poc_available === true && hasPlaceholderLanguage(e.poc_description)) {
99
+ out.push({ class: "content-quality", catalog: "cve-catalog", id,
100
+ field: "poc_description", reason: "poc_available:true but description carries placeholder sentinel" });
101
+ }
102
+
103
+ // KEV-listed CVEs MUST have vendor_advisories[] non-empty — the
104
+ // KEV listing implies CISA has linked vendor advisory metadata.
105
+ // Empty vendor_advisories is an operator-curation gap.
106
+ if (e.cisa_kev === true && (!Array.isArray(e.vendor_advisories) || e.vendor_advisories.length === 0)) {
107
+ out.push({ class: "content-quality", catalog: "cve-catalog", id,
108
+ field: "vendor_advisories", reason: "cisa_kev:true but vendor_advisories is empty" });
109
+ }
110
+
111
+ // Name reused as description (catalog noise — operator didn't
112
+ // write a real description, just echoed the name).
113
+ if (typeof e.name === "string" && typeof e.description === "string"
114
+ && e.name === e.description && e.name.length > 0) {
115
+ out.push({ class: "content-quality", catalog: "cve-catalog", id,
116
+ field: "description", reason: "description is just the name repeated" });
117
+ }
118
+ }
119
+ return out;
120
+ }
121
+
122
+ // ---------- 2. temporal-staleness ----------
123
+ //
124
+ // Time-based decay. Catalog entries get stale as the threat-intelligence
125
+ // landscape shifts. Surfacing stale entries gives operators a re-verify
126
+ // work-queue.
127
+
128
+ function daysSince(iso, now) {
129
+ if (typeof iso !== "string" || !/^\d{4}-\d{2}-\d{2}/.test(iso)) return null;
130
+ const t = Date.parse(iso);
131
+ if (Number.isNaN(t)) return null;
132
+ return Math.floor((now.getTime() - t) / (1000 * 60 * 60 * 24));
133
+ }
134
+
135
+ function temporalStalenessFindings(loaded, opts = {}) {
136
+ const now = opts.now || new Date();
137
+ const STALE_VERIFIED_DAYS = opts.stale_verified_days || 180;
138
+ const STALE_UPDATED_DAYS = opts.stale_updated_days || 365;
139
+ const STALE_EPSS_DAYS = opts.stale_epss_days || 90;
140
+ const out = [];
141
+ const cve = loaded["cve-catalog"];
142
+ if (!cve) return out;
143
+
144
+ for (const id of Object.keys(cve)) {
145
+ if (id === "_meta") continue;
146
+ const e = cve[id];
147
+ if (!e) continue;
148
+
149
+ const sinceVerified = daysSince(e.source_verified || e.last_verified, now);
150
+ if (sinceVerified !== null && sinceVerified > STALE_VERIFIED_DAYS) {
151
+ out.push({ class: "temporal-staleness", catalog: "cve-catalog", id,
152
+ field: "source_verified", reason: `source_verified is ${sinceVerified}d old (threshold ${STALE_VERIFIED_DAYS}d)` });
153
+ }
154
+ const sinceUpdated = daysSince(e.last_updated, now);
155
+ if (sinceUpdated !== null && sinceUpdated > STALE_UPDATED_DAYS) {
156
+ out.push({ class: "temporal-staleness", catalog: "cve-catalog", id,
157
+ field: "last_updated", reason: `last_updated is ${sinceUpdated}d old (threshold ${STALE_UPDATED_DAYS}d)` });
158
+ }
159
+
160
+ // CISA KEV due-date passed without remediation status — surfaces
161
+ // operationally-stale entries the operator should re-verify.
162
+ if (e.cisa_kev === true && typeof e.cisa_kev_due_date === "string") {
163
+ const sinceDue = daysSince(e.cisa_kev_due_date, now);
164
+ if (sinceDue !== null && sinceDue > 0) {
165
+ out.push({ class: "temporal-staleness", catalog: "cve-catalog", id,
166
+ field: "cisa_kev_due_date", reason: `CISA KEV due date passed ${sinceDue}d ago; verify remediation status` });
167
+ }
168
+ }
169
+
170
+ // EPSS score has its own currency clock — FIRST recalculates daily.
171
+ if (typeof e.epss_score === "number" && typeof e.epss_date === "string") {
172
+ const sinceEpss = daysSince(e.epss_date, now);
173
+ if (sinceEpss !== null && sinceEpss > STALE_EPSS_DAYS) {
174
+ out.push({ class: "temporal-staleness", catalog: "cve-catalog", id,
175
+ field: "epss_date", reason: `epss_date is ${sinceEpss}d old (threshold ${STALE_EPSS_DAYS}d); refresh via 'exceptd refresh --source epss'` });
176
+ }
177
+ }
178
+ }
179
+ return out;
180
+ }
181
+
182
+ // ---------- 3. logical-consistency ----------
183
+ //
184
+ // Internal-state rules that must hold across multiple fields. These are
185
+ // the bugs that pass schema validation (every required field is present)
186
+ // but the field combinations don't make sense.
187
+
188
+ function logicalConsistencyFindings(loaded) {
189
+ const out = [];
190
+ const cve = loaded["cve-catalog"];
191
+ if (!cve) return out;
192
+
193
+ for (const id of Object.keys(cve)) {
194
+ if (id === "_meta") continue;
195
+ const e = cve[id];
196
+ if (!e) continue;
197
+
198
+ // cisa_kev:true with null cisa_kev_date — KEV listing has a
199
+ // dateAdded field in CISA's authoritative JSON; null means we
200
+ // failed to record it at intake time.
201
+ if (e.cisa_kev === true && (e.cisa_kev_date == null || e.cisa_kev_date === "")) {
202
+ out.push({ class: "logical-consistency", catalog: "cve-catalog", id,
203
+ rule: "cisa_kev_date_present_when_kev_true",
204
+ reason: "cisa_kev:true requires cisa_kev_date (CISA's dateAdded)" });
205
+ }
206
+
207
+ // live_patch_available:true with empty live_patch_tools[] — the
208
+ // RWEP live_patch_available factor only fires when tools list
209
+ // names a real live-patch path; the boolean alone is a lie.
210
+ if (e.live_patch_available === true
211
+ && (!Array.isArray(e.live_patch_tools) || e.live_patch_tools.length === 0)) {
212
+ out.push({ class: "logical-consistency", catalog: "cve-catalog", id,
213
+ rule: "live_patch_tools_required_when_available",
214
+ reason: "live_patch_available:true but live_patch_tools is empty — RWEP factor would mis-fire" });
215
+ }
216
+
217
+ // ai_discovered:true requires named AI tool in attribution_note
218
+ // (Hard Rule #1 enforcement). The schema-validator catches
219
+ // discovery_source==unknown but not the attribution-text absence.
220
+ if (e.ai_discovered === true) {
221
+ const note = e.ai_discovery_notes || e.discovery_attribution_note || "";
222
+ if (typeof note !== "string" || note.length < 30) {
223
+ out.push({ class: "logical-consistency", catalog: "cve-catalog", id,
224
+ rule: "ai_discovery_attribution_text_required",
225
+ reason: "ai_discovered:true but attribution text is missing or too short to name the AI tool" });
226
+ }
227
+ }
228
+
229
+ // active_exploitation:"confirmed" with empty verification_sources
230
+ // is a credibility gap — exploitation claims need sourcing.
231
+ if (e.active_exploitation === "confirmed"
232
+ && (!Array.isArray(e.verification_sources) || e.verification_sources.length < 2)) {
233
+ out.push({ class: "logical-consistency", catalog: "cve-catalog", id,
234
+ rule: "confirmed_exploitation_needs_sources",
235
+ reason: `active_exploitation:"confirmed" requires >= 2 verification_sources; have ${(e.verification_sources || []).length}` });
236
+ }
237
+
238
+ // rwep_score declared but rwep_factors empty — score is unsupported.
239
+ if (typeof e.rwep_score === "number"
240
+ && (!e.rwep_factors || Object.keys(e.rwep_factors).length === 0)) {
241
+ out.push({ class: "logical-consistency", catalog: "cve-catalog", id,
242
+ rule: "rwep_factors_required_when_score_set",
243
+ reason: "rwep_score declared but rwep_factors is empty — score is unjustified" });
244
+ }
245
+ }
246
+ return out;
247
+ }
248
+
249
+ // ---------- 4. cross-ref-completeness ----------
250
+ //
251
+ // Bidirectional reference checks. Pre-v0.13.21, the dangling-ref class
252
+ // only verified the forward direction (CVE.cwe_refs[] resolves into
253
+ // cwe-catalog). This class verifies the BACK-reference is present too
254
+ // (CWE.evidence_cves[] includes the CVE that cited it).
255
+
256
+ function crossRefCompletenessFindings(loaded) {
257
+ const out = [];
258
+ const cve = loaded["cve-catalog"];
259
+ const cwe = loaded["cwe-catalog"];
260
+ const att = loaded["attack-techniques"];
261
+ const fwc = loaded["framework-control-gaps"];
262
+
263
+ // Build forward-ref maps: target-id → set of CVE-IDs that cite it.
264
+ const cveByCwe = new Map();
265
+ const cveByAttack = new Map();
266
+ const cveByFwc = new Map();
267
+
268
+ for (const cid of Object.keys(cve || {})) {
269
+ if (cid === "_meta") continue;
270
+ const e = cve[cid];
271
+ if (!e) continue;
272
+ // Drafts excluded — auto-imported entries don't yet have curated
273
+ // refs.
274
+ if (e._auto_imported) continue;
275
+ for (const c of (e.cwe_refs || [])) {
276
+ if (!cveByCwe.has(c)) cveByCwe.set(c, new Set());
277
+ cveByCwe.get(c).add(cid);
278
+ }
279
+ for (const a of (e.attack_refs || [])) {
280
+ if (!cveByAttack.has(a)) cveByAttack.set(a, new Set());
281
+ cveByAttack.get(a).add(cid);
282
+ }
283
+ for (const k of Object.keys(e.framework_control_gaps || {})) {
284
+ if (!cveByFwc.has(k)) cveByFwc.set(k, new Set());
285
+ cveByFwc.get(k).add(cid);
286
+ }
287
+ }
288
+
289
+ // CWE: every CVE-citation must be in the CWE entry's evidence_cves.
290
+ for (const [cweId, citingSet] of cveByCwe.entries()) {
291
+ const entry = cwe && cwe[cweId];
292
+ if (!entry) continue; // dangling-ref class handles this
293
+ const evidence = new Set(Array.isArray(entry.evidence_cves) ? entry.evidence_cves : []);
294
+ const missing = [];
295
+ for (const cid of citingSet) if (!evidence.has(cid)) missing.push(cid);
296
+ if (missing.length > 0) {
297
+ out.push({ class: "cross-ref-completeness", source: "cve-catalog", target: "cwe-catalog",
298
+ target_id: cweId, reason: `CWE entry's evidence_cves missing ${missing.length} CVE(s) that cite it: ${missing.slice(0, 3).join(", ")}` });
299
+ }
300
+ }
301
+
302
+ // Same back-ref check for ATT&CK and framework-control-gaps.
303
+ for (const [attId, citingSet] of cveByAttack.entries()) {
304
+ const entry = att && att[attId];
305
+ if (!entry) continue;
306
+ const evidence = new Set(Array.isArray(entry.cve_refs) ? entry.cve_refs : []);
307
+ const missing = [];
308
+ for (const cid of citingSet) if (!evidence.has(cid)) missing.push(cid);
309
+ if (missing.length > 0) {
310
+ out.push({ class: "cross-ref-completeness", source: "cve-catalog", target: "attack-techniques",
311
+ target_id: attId, reason: `ATT&CK entry's cve_refs missing ${missing.length} CVE(s) that cite it: ${missing.slice(0, 3).join(", ")}` });
312
+ }
313
+ }
314
+ for (const [fwId, citingSet] of cveByFwc.entries()) {
315
+ const entry = fwc && fwc[fwId];
316
+ if (!entry) continue;
317
+ const evidence = new Set(Array.isArray(entry.evidence_cves) ? entry.evidence_cves : []);
318
+ const missing = [];
319
+ for (const cid of citingSet) if (!evidence.has(cid)) missing.push(cid);
320
+ if (missing.length > 0) {
321
+ out.push({ class: "cross-ref-completeness", source: "cve-catalog", target: "framework-control-gaps",
322
+ target_id: fwId, reason: `framework-gap entry's evidence_cves missing ${missing.length} CVE(s) that cite it: ${missing.slice(0, 3).join(", ")}` });
323
+ }
324
+ }
325
+ return out;
326
+ }
327
+
328
+ // ---------- 5. schema-evolution ----------
329
+ //
330
+ // Required-since-version checks. Fields the schema requires today were
331
+ // optional on entries added in older releases. The audit surfaces those
332
+ // pre-existing entries so operator-curation can backfill.
333
+
334
+ const REQUIRED_SINCE = {
335
+ "cve-catalog": [
336
+ { field: "ai_discovered", since: "0.12.36", check: (v) => typeof v === "boolean" },
337
+ { field: "ai_assisted_weaponization", since: "0.12.36", check: (v) => typeof v === "boolean" },
338
+ { field: "rwep_factors", since: "0.12.36", check: (v) => v && Object.keys(v).length > 0 }
339
+ ]
340
+ };
341
+
342
+ function schemaEvolutionFindings(loaded) {
343
+ const out = [];
344
+ for (const catalogKey of Object.keys(REQUIRED_SINCE)) {
345
+ const cat = loaded[catalogKey];
346
+ if (!cat) continue;
347
+ for (const id of Object.keys(cat)) {
348
+ if (id === "_meta") continue;
349
+ const e = cat[id];
350
+ if (!e) continue;
351
+ for (const r of REQUIRED_SINCE[catalogKey]) {
352
+ if (!r.check(e[r.field])) {
353
+ out.push({ class: "schema-evolution", catalog: catalogKey, id,
354
+ field: r.field, since: r.since,
355
+ reason: `${r.field} required since v${r.since}; missing on this entry` });
356
+ }
357
+ }
358
+ }
359
+ }
360
+ return out;
361
+ }
362
+
363
+ // ---------- 6. operator-action-sla ----------
364
+ //
365
+ // Auto-imported entries are intake-class events. The catalog allows them
366
+ // to ship un-curated (operators add detail later) but past a threshold
367
+ // the un-curated state IS the problem.
368
+
369
+ function operatorActionSlaFindings(loaded, opts = {}) {
370
+ const now = opts.now || new Date();
371
+ const AUTO_IMPORT_SLA_DAYS = opts.auto_import_sla_days || 60;
372
+ const DRAFT_SLA_DAYS = opts.draft_sla_days || 90;
373
+ const out = [];
374
+ const cve = loaded["cve-catalog"];
375
+ if (!cve) return out;
376
+
377
+ for (const id of Object.keys(cve)) {
378
+ if (id === "_meta") continue;
379
+ const e = cve[id];
380
+ if (!e) continue;
381
+ if (e._auto_imported === true) {
382
+ const age = daysSince(e.last_updated, now);
383
+ if (age !== null && age > AUTO_IMPORT_SLA_DAYS) {
384
+ out.push({ class: "operator-action-sla", catalog: "cve-catalog", id,
385
+ reason: `_auto_imported entry is ${age}d old (SLA ${AUTO_IMPORT_SLA_DAYS}d); operator-curation pending` });
386
+ }
387
+ }
388
+ if (e._draft === true) {
389
+ const age = daysSince(e.last_updated, now);
390
+ if (age !== null && age > DRAFT_SLA_DAYS) {
391
+ out.push({ class: "operator-action-sla", catalog: "cve-catalog", id,
392
+ reason: `_draft entry is ${age}d old (SLA ${DRAFT_SLA_DAYS}d); promote-or-quarantine SLA breached` });
393
+ }
394
+ }
395
+ }
396
+ return out;
397
+ }
398
+
399
+ // ---------- 7. unused-orphan ----------
400
+ //
401
+ // Entries that no skill / playbook / CVE references — dead-weight
402
+ // content the operator can either repurpose or remove.
403
+
404
+ // Build reference sets from skills/*.md frontmatter + body and from
405
+ // data/playbooks/*.json content. Pre-v0.13.21 follow-up (codex P1 PR
406
+ // #61): unusedOrphanFindings defaulted these to empty sets, which
407
+ // flagged D3FEND / CWE / ATT&CK IDs referenced in skill bodies as
408
+ // "unused orphans" — false positive. v0.13.21+ builds the reference
409
+ // sets internally when the caller doesn't supply them.
410
+ //
411
+ // The regex is permissive — any CWE-NNN / T1234[.456] / AML.TNNNN /
412
+ // D3-XX / RFC-NNN token in a skill body or playbook JSON counts as a
413
+ // reference. We deliberately scan the FULL text, not just structured
414
+ // fields, because skill bodies cite IDs in prose ("see CWE-79") as
415
+ // often as in frontmatter.
416
+ const REFERENCE_TOKEN_RE = /\b(?:CWE-\d+|T\d{4}(?:\.\d{3})?|AML\.T\d{4}(?:\.\d{3})?|D3-[A-Z]+(?:-[A-Z]+)*|RFC-\d+)\b/g;
417
+
418
+ function buildExternalRefs(rootPath) {
419
+ // Lazy require — `path` + `fs` are already in scope at module level.
420
+ // Tolerate the absence of either directory (synthetic-test contexts
421
+ // may not have a skills/ tree). Returns { skillRefs, playbookRefs }
422
+ // as Sets of stringified IDs.
423
+ if (!rootPath) {
424
+ const path = require("path");
425
+ rootPath = path.join(__dirname, "..");
426
+ }
427
+ const path = require("path");
428
+ const fs = require("fs");
429
+ const skillRefs = new Set();
430
+ const playbookRefs = new Set();
431
+ const skillsDir = path.join(rootPath, "skills");
432
+ if (fs.existsSync(skillsDir)) {
433
+ for (const skillName of fs.readdirSync(skillsDir)) {
434
+ const skillPath = path.join(skillsDir, skillName, "skill.md");
435
+ if (!fs.existsSync(skillPath)) continue;
436
+ const text = fs.readFileSync(skillPath, "utf8");
437
+ const matches = text.match(REFERENCE_TOKEN_RE);
438
+ if (matches) for (const m of matches) skillRefs.add(m);
439
+ }
440
+ }
441
+ const playbooksDir = path.join(rootPath, "data", "playbooks");
442
+ if (fs.existsSync(playbooksDir)) {
443
+ for (const pbName of fs.readdirSync(playbooksDir)) {
444
+ if (!pbName.endsWith(".json")) continue;
445
+ const text = fs.readFileSync(path.join(playbooksDir, pbName), "utf8");
446
+ const matches = text.match(REFERENCE_TOKEN_RE);
447
+ if (matches) for (const m of matches) playbookRefs.add(m);
448
+ }
449
+ }
450
+ return { skillRefs, playbookRefs };
451
+ }
452
+
453
+ function unusedOrphanFindings(loaded, opts = {}) {
454
+ const out = [];
455
+ // Auto-populate skill/playbook refs when the caller didn't supply
456
+ // them. The composing runAllDetectors() also auto-populates via
457
+ // _autoLoadRefs unless tests pin explicit empty sets.
458
+ let skillRefs = opts.skillRefs;
459
+ let playbookRefs = opts.playbookRefs;
460
+ if (!skillRefs && !playbookRefs && opts._autoLoadRefs !== false) {
461
+ const refs = buildExternalRefs(opts._rootPath);
462
+ skillRefs = refs.skillRefs;
463
+ playbookRefs = refs.playbookRefs;
464
+ }
465
+ skillRefs = skillRefs || new Set();
466
+ playbookRefs = playbookRefs || new Set();
467
+ const cve = loaded["cve-catalog"];
468
+ const cveRefIds = new Set();
469
+ for (const id of Object.keys(cve || {})) {
470
+ if (id === "_meta") continue;
471
+ const e = cve[id];
472
+ if (!e) continue;
473
+ for (const r of (e.cwe_refs || [])) cveRefIds.add(r);
474
+ for (const r of (e.attack_refs || [])) cveRefIds.add(r);
475
+ for (const r of (e.atlas_refs || [])) cveRefIds.add(r);
476
+ for (const k of Object.keys(e.framework_control_gaps || {})) cveRefIds.add(k);
477
+ }
478
+ const isReferenced = (id) => skillRefs.has(id) || playbookRefs.has(id) || cveRefIds.has(id);
479
+
480
+ // CWE / ATT&CK / ATLAS / D3FEND / framework-gap entries that nothing
481
+ // references are orphans. Operator-curated entries get a longer
482
+ // grace period (intentional forward-looking content); auto-imported
483
+ // entries with no reference are clearer waste.
484
+ for (const catKey of ["cwe-catalog", "attack-techniques", "atlas-ttps", "d3fend-catalog", "framework-control-gaps"]) {
485
+ const cat = loaded[catKey];
486
+ if (!cat) continue;
487
+ for (const id of Object.keys(cat)) {
488
+ if (id === "_meta") continue;
489
+ const e = cat[id];
490
+ if (!e) continue;
491
+ if (e._auto_imported !== true) continue; // only flag auto-imported orphans
492
+ if (e.forward_looking === true) continue; // legitimate forward-looking
493
+ if (isReferenced(id)) continue;
494
+ out.push({ class: "unused-orphan", catalog: catKey, id,
495
+ reason: "auto-imported entry with zero references from skills / playbooks / CVE entries — consider quarantine or curation" });
496
+ }
497
+ }
498
+ return out;
499
+ }
500
+
501
+ // ---------- Composite ----------
502
+
503
+ function runAllDetectors(loaded, opts = {}) {
504
+ // Pre-populate external reference sets ONCE and thread them through
505
+ // every detector that needs them. Avoids re-scanning skills/ +
506
+ // playbooks/ per detector and keeps the same reference set
507
+ // consistent across the composed run.
508
+ const orphanOpts = { ...opts };
509
+ if (!orphanOpts.skillRefs && !orphanOpts.playbookRefs && opts._autoLoadRefs !== false) {
510
+ const refs = buildExternalRefs(opts._rootPath);
511
+ orphanOpts.skillRefs = refs.skillRefs;
512
+ orphanOpts.playbookRefs = refs.playbookRefs;
513
+ }
514
+ return [
515
+ ...contentQualityFindings(loaded),
516
+ ...temporalStalenessFindings(loaded, opts),
517
+ ...logicalConsistencyFindings(loaded),
518
+ ...crossRefCompletenessFindings(loaded),
519
+ ...schemaEvolutionFindings(loaded),
520
+ ...operatorActionSlaFindings(loaded, opts),
521
+ ...unusedOrphanFindings(loaded, orphanOpts)
522
+ ];
523
+ }
524
+
525
+ // Canonical list of detection classes runAllDetectors can emit. The
526
+ // budget gate asserts class-set equality against this list so a future
527
+ // 8th detector added without a budget entry fails-closed (codex P2
528
+ // PR #61).
529
+ const DETECTOR_CLASSES = [
530
+ "content-quality",
531
+ "temporal-staleness",
532
+ "logical-consistency",
533
+ "cross-ref-completeness",
534
+ "schema-evolution",
535
+ "operator-action-sla",
536
+ "unused-orphan"
537
+ ];
538
+
539
+ module.exports = {
540
+ hasPlaceholderLanguage,
541
+ daysSince,
542
+ contentQualityFindings,
543
+ temporalStalenessFindings,
544
+ logicalConsistencyFindings,
545
+ crossRefCompletenessFindings,
546
+ schemaEvolutionFindings,
547
+ operatorActionSlaFindings,
548
+ unusedOrphanFindings,
549
+ runAllDetectors,
550
+ buildExternalRefs,
551
+ DETECTOR_CLASSES,
552
+ REQUIRED_SINCE,
553
+ PLACEHOLDER_SENTINELS,
554
+ REFERENCE_TOKEN_RE
555
+ };
@@ -193,44 +193,19 @@ function extractCveIds(text) {
193
193
  }
194
194
 
195
195
  /**
196
- * Lightweight RSS / Atom parser. Avoids pulling in a dependency for what
197
- * is effectively `<item>` / `<entry>` extraction + `<title>` / `<link>` /
198
- * `<pubDate>` / `<published>` / `<description>` / `<content>` text grabs.
196
+ * RSS / Atom parser. v0.13.20 replaces the original regex-based parser
197
+ * (which silently failed on XML namespaces, nested CDATA, self-closing
198
+ * tags, HTML entities, and multi-line content) with a proper streaming
199
+ * XML tokenizer defined in lib/xml-tokenizer.js. Parser errors surface
200
+ * via the second `errors` argument so consumers can observe parse
201
+ * failures instead of receiving a silent empty array.
199
202
  *
200
203
  * Returns [{ title, link, published, body }, ...].
201
204
  */
202
- function parseRssAtom(xml) {
203
- if (typeof xml !== 'string') return [];
204
- const items = [];
205
- // Try Atom <entry>...</entry> first.
206
- const atomEntryRe = /<entry\b[\s\S]*?<\/entry>/g;
207
- const rssItemRe = /<item\b[\s\S]*?<\/item>/g;
208
- const blocks = (xml.match(atomEntryRe) || xml.match(rssItemRe) || []);
209
- for (const block of blocks) {
210
- const title = matchInner(block, 'title') || '';
211
- const link = matchInner(block, 'link') || matchAttr(block, 'link', 'href') || '';
212
- const published = matchInner(block, 'pubDate') || matchInner(block, 'published') || matchInner(block, 'updated') || '';
213
- const description = matchInner(block, 'description') || matchInner(block, 'content') || matchInner(block, 'summary') || '';
214
- items.push({ title: stripCdata(title), link: stripCdata(link), published: stripCdata(published), body: stripCdata(description) });
215
- }
216
- return items;
217
- }
218
-
219
- function matchInner(block, tag) {
220
- const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'i');
221
- const m = block.match(re);
222
- return m ? m[1].trim() : null;
223
- }
224
-
225
- function matchAttr(block, tag, attr) {
226
- const re = new RegExp(`<${tag}[^>]*\\b${attr}=["']([^"']+)["']`, 'i');
227
- const m = block.match(re);
228
- return m ? m[1] : null;
229
- }
205
+ const { parseFeed: tokenizerParseFeed } = require('./xml-tokenizer');
230
206
 
231
- function stripCdata(s) {
232
- if (typeof s !== 'string') return '';
233
- return s.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1').replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
207
+ function parseRssAtom(xml, errors = null) {
208
+ return tokenizerParseFeed(xml, errors);
234
209
  }
235
210
 
236
211
  /**
@@ -0,0 +1,73 @@
1
+ "use strict";
2
+ /**
3
+ * lib/version-pins.js
4
+ *
5
+ * Single source of truth for the canonical MITRE / ATT&CK / ATLAS /
6
+ * D3FEND version pins that operator-facing docs reference.
7
+ *
8
+ * Pre-v0.13.20 history: ATLAS version was pinned to v5.4.0 in 33+
9
+ * locations (READMEs, AGENTS.md, ARCHITECTURE.md, agent personas,
10
+ * skill bodies, schema descriptions, manifest.json). Bumping required
11
+ * a lockstep regex-replace across all 33 files. v0.13.18 bumped to
12
+ * v5.6.0; the regex sweep accidentally touched dates in unrelated
13
+ * paragraphs and only failed-loudly because the tests asserted
14
+ * version drift. v0.13.20 makes the pin schema-driven:
15
+ *
16
+ * - `data/atlas-ttps.json._meta.atlas_version` is the source of truth.
17
+ * - `data/attack-techniques.json._meta.attack_version` is too.
18
+ * - This module reads both, exposes them via getAtlasVersion() and
19
+ * getAttackVersion() helpers, and is the canonical resolver every
20
+ * consumer (test runner, doc-currency check, lint, skill-body
21
+ * scanner) reaches through.
22
+ *
23
+ * The drift-detection tests in tests/atlas-version-canonical.test.js
24
+ * and tests/attack-version-canonical.test.js now compare every
25
+ * operator-facing mention against the value this module returns.
26
+ * A future bump is `node $(exceptd path)/lib/sign.js sign-all` + this
27
+ * module reads the new value; no lockstep doc edit needed except where
28
+ * the mention is
29
+ * a literal-string semantic ("upgrade from v5.4.0 to v5.6.0") that an
30
+ * operator must read.
31
+ *
32
+ * API:
33
+ * getAtlasVersion() → "5.6.0"
34
+ * getAttackVersion() → "19.0"
35
+ * getAtlasReleaseDate() → "2026-05-08"
36
+ * getAllPins() → { atlas_version, atlas_release_date, attack_version, ... }
37
+ */
38
+
39
+ const fs = require("fs");
40
+ const path = require("path");
41
+
42
+ const ROOT = path.join(__dirname, "..");
43
+
44
+ let _cached = null;
45
+
46
+ function loadPins() {
47
+ if (_cached) return _cached;
48
+ const atlas = JSON.parse(fs.readFileSync(path.join(ROOT, "data", "atlas-ttps.json"), "utf8"));
49
+ const attack = JSON.parse(fs.readFileSync(path.join(ROOT, "data", "attack-techniques.json"), "utf8"));
50
+ const meta = JSON.parse(fs.readFileSync(path.join(ROOT, "manifest.json"), "utf8"));
51
+ _cached = {
52
+ atlas_version: (atlas._meta && atlas._meta.atlas_version) || null,
53
+ atlas_release_date: (atlas._meta && atlas._meta.atlas_release_date) || null,
54
+ attack_version: (attack._meta && attack._meta.attack_version) || null,
55
+ attack_version_date: (attack._meta && attack._meta.attack_version_date) || null,
56
+ manifest_atlas_version: meta.atlas_version || null,
57
+ manifest_attack_version: meta.attack_version || null
58
+ };
59
+ return _cached;
60
+ }
61
+
62
+ function clearCache() { _cached = null; }
63
+ function getAtlasVersion() { return loadPins().atlas_version; }
64
+ function getAtlasReleaseDate() { return loadPins().atlas_release_date; }
65
+ function getAttackVersion() { return loadPins().attack_version; }
66
+ function getAttackVersionDate() { return loadPins().attack_version_date; }
67
+ function getAllPins() { return { ...loadPins() }; }
68
+
69
+ module.exports = {
70
+ getAtlasVersion, getAtlasReleaseDate,
71
+ getAttackVersion, getAttackVersionDate,
72
+ getAllPins, clearCache
73
+ };