npm - @blamejs/exceptd-skills - Versions diffs - 0.13.19 → 0.13.20 - Mend

@blamejs/exceptd-skills 0.13.19 → 0.13.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +41 -0
package/data/_indexes/_meta.json +5 -5
package/data/cve-catalog.json +301 -3792
package/data/framework-control-gaps.json +168 -504
package/data/zeroday-lessons.json +5 -3029
package/lib/canonical-eq.js +88 -0
package/lib/cve-regression-watcher.js +130 -9
package/lib/source-advisories.js +9 -34
package/lib/version-pins.js +73 -0
package/lib/xml-tokenizer.js +344 -0
package/manifest.json +44 -44
package/package.json +2 -2
package/sbom.cdx.json +74 -29
package/scripts/audit-catalog-gaps.js +11 -2
package/scripts/check-test-coverage.js +16 -18
package/scripts/refresh-upstream-catalogs.js +13 -0

package/lib/canonical-eq.js ADDED Viewed

@@ -0,0 +1,88 @@
+"use strict";
+/**
+ * lib/canonical-eq.js
+ *
+ * Canonical-form deep equality for catalog diff detection. The diff-
+ * coverage gate previously compared `JSON.stringify(before.iocs)` vs
+ * `JSON.stringify(after.iocs)` which is non-canonical: key order,
+ * trailing whitespace, and numeric format differences all register as
+ * "different" when the operator made no semantic change.
+ *
+ * Pre-v0.13.20 history: the symptom was patched twice with skip rules
+ * (v0.13.17 _auto_imported skip; v0.13.19 _iocs_stub skip). v0.13.20
+ * fixes the root cause — canonical recursive equality with sorted-key
+ * object comparison and array-position-sensitive element comparison.
+ *
+ * Contract:
+ *   - Primitives (string / number / boolean / null / undefined) compare
+ *     by strict equality (===).
+ *   - Arrays compare element-by-element in order. [1,2] !== [2,1].
+ *     This matches operator intent — array order in IoCs / attack_refs
+ *     / cwe_refs is meaningful (most-relevant-first convention).
+ *   - Objects compare by key-set equality + per-key recursive equality.
+ *     Key order does NOT matter; { a:1, b:2 } === { b:2, a:1 }.
+ *   - Cycle protection: WeakSet of visited pairs prevents infinite
+ *     recursion on self-referential structures. Cycles compare unequal
+ *     across mismatched topologies; equal across identical topologies.
+ *   - NaN: NaN === NaN under this comparator (deviates from Object.is
+ *     to make the comparator total — useful for catalog data which
+ *     never legitimately contains NaN but might pick one up from a
+ *     buggy upstream).
+ *
+ * Helpers:
+ *   - canonicalEqual(a, b): full recursive equality.
+ *   - canonicalStringify(v): sorted-key JSON for hashing / display.
+ *     Produces stable output suitable for SHA-256 etc.
+ */
+function canonicalEqual(a, b, seen = new WeakMap()) {
+  if (a === b) return true;
+  // NaN === NaN under this comparator.
+  if (typeof a === "number" && typeof b === "number" && Number.isNaN(a) && Number.isNaN(b)) return true;
+  if (a === null || b === null) return a === b;
+  if (typeof a !== "object" || typeof b !== "object") return false;
+  // Cycle detection — if we've already compared this exact pair, treat
+  // as equal (assumes the rest of the structure decides). For sibling-
+  // cycle differences this means the comparator says "equal at the
+  // cycle point" and lets non-cyclic differences elsewhere decide.
+  const aSeen = seen.get(a);
+  if (aSeen && aSeen.has(b)) return true;
+  if (!aSeen) seen.set(a, new WeakSet([b]));
+  else aSeen.add(b);
+  const aIsArr = Array.isArray(a);
+  const bIsArr = Array.isArray(b);
+  if (aIsArr !== bIsArr) return false;
+  if (aIsArr) {
+    if (a.length !== b.length) return false;
+    for (let i = 0; i < a.length; i++) {
+      if (!canonicalEqual(a[i], b[i], seen)) return false;
+    }
+    return true;
+  }
+  // Plain objects — compare key sets + per-key recursive equality.
+  const aKeys = Object.keys(a).sort();
+  const bKeys = Object.keys(b).sort();
+  if (aKeys.length !== bKeys.length) return false;
+  for (let i = 0; i < aKeys.length; i++) {
+    if (aKeys[i] !== bKeys[i]) return false;
+  }
+  for (const k of aKeys) {
+    if (!canonicalEqual(a[k], b[k], seen)) return false;
+  }
+  return true;
+}
+// Sorted-key recursive JSON. Stable output for hash digests, diff
+// comparison, and human-readable display.
+function canonicalStringify(v) {
+  if (v === null || typeof v !== "object") return JSON.stringify(v);
+  if (Array.isArray(v)) return "[" + v.map(canonicalStringify).join(",") + "]";
+  const keys = Object.keys(v).sort();
+  return "{" + keys.map((k) => JSON.stringify(k) + ":" + canonicalStringify(v[k])).join(",") + "}";
+}
+module.exports = { canonicalEqual, canonicalStringify };

package/lib/cve-regression-watcher.js CHANGED Viewed

@@ -104,6 +104,81 @@ function findRegressionEntry(catalog, historicalId) {
  * @param {Object} opts — { now?: Date, threshold_years_ago?: number }
  * @returns {Object} report — { candidates, historical_id_threshold_year, evaluated_diffs }
  */
+// v0.13.20 — content-pattern signals layered on top of the CVE-ID match.
+// The audit-class-2.4 problem: pre-v0.13.20, the watcher detected only
+// when a poller diff carried an extracted CVE-YYYY-NNN identifier. If a
+// researcher's writeup announces "the 2020 Forshaw fix is silently
+// reverted" without typing the CVE ID, the watcher missed the class
+// entirely. v0.13.20 adds content-pattern signals so the watcher can
+// flag candidates from prose alone.
+// Historical-regression language. Phrases that indicate a researcher is
+// claiming a fix was silently reverted, downgrade-rolled-back, or
+// otherwise re-broken.
+const HISTORICAL_REGRESSION_PHRASES = [
+  /silently (re-?broken|reverted|regressed|rolled back)/i,
+  /(fix|patch|mitigation) (was|is)? ?(silently )?(reverted|undone|removed|missing)/i,
+  /re-?regression of/i,
+  /never (actually|truly) (fixed|patched)/i,
+  /\bre[- ]exploit(ed|able)\b/i,
+  /(same|identical|exact) (primitive|bug|vulnerability) as/i,
+  /unpatched (since|despite) (an? )?(earlier|previous|prior|original) (fix|patch|disclosure)/i,
+  /vendor (declined|refused|never issued) (a )?new CVE/i,
+];
+// Named-researcher patterns. Operator-curated names that have a prior
+// catalog-grade drop are tracked elsewhere (NEW-CTRL-073 handle tracker),
+// but the regression-watcher also looks for the names in poller-diff
+// content as an additional signal — a familiar handle re-disclosing an
+// old CVE is a higher-confidence regression candidate.
+const RESEARCHER_NAME_PATTERNS = [
+  /Nightmare-Eclipse/i,
+  /Chaotic Eclipse/i,
+  /James Forshaw/i,
+  /Project Zero/i,
+  /Big Sleep/i,
+  /Tavis Ormandy/i,
+  /Jann Horn/i,
+];
+// Component-string detection — when a poller diff text mentions one of
+// these in conjunction with a regression phrase, flag as candidate.
+const TRACKED_COMPONENT_TOKENS = [
+  /cldflt\.sys/i,
+  /\bldfltrl\.sys/i,
+  /HsmOsBlockPlaceholderAccess/i,
+  /ssh-?keysign/i,
+  /rxgk_decrypt_skb/i,
+  /CRI-?O/i,
+  /\bptrace\b/i,
+  /Cloud Files Mini Filter/i,
+  /Windows Recovery Environment|WinRE/i,
+  /\bCTFMON(\.exe)?\b/i,
+];
+function scanContentSignals(text) {
+  if (typeof text !== "string" || !text) return {};
+  const signals = {};
+  // Historical-regression language hit.
+  for (const re of HISTORICAL_REGRESSION_PHRASES) {
+    const m = text.match(re);
+    if (m) { signals.regression_language = m[0]; break; }
+  }
+  // Researcher-name hit.
+  for (const re of RESEARCHER_NAME_PATTERNS) {
+    const m = text.match(re);
+    if (m) { signals.researcher = m[0]; break; }
+  }
+  // Component-token hit.
+  const components = [];
+  for (const re of TRACKED_COMPONENT_TOKENS) {
+    const m = text.match(re);
+    if (m && !components.includes(m[0])) components.push(m[0]);
+  }
+  if (components.length) signals.components = components;
+  return signals;
+}
 function findRegressionCandidates(diffs, catalog, opts) {
   const now = (opts && opts.now) || new Date();
   const yearsAgo = (opts && typeof opts.threshold_years_ago === 'number') ? opts.threshold_years_ago : 2;
@@ -112,19 +187,62 @@ function findRegressionCandidates(diffs, catalog, opts) {
   // Group historical-CVE refs by id so multi-feed surfacing collapses.
   const byHistoricalId = new Map();
+  // Content-only candidates — surfaced by language/component pattern
+  // matching even when no CVE ID was extracted from the diff text.
+  const contentCandidates = [];
   for (const d of (diffs || [])) {
     if (!d || typeof d.id !== 'string') continue;
+    // Title field name depends on input shape:
+    //   - ADVISORIES_SOURCE diffs[] carry `title` (post-dedupe string).
+    //   - ADVISORIES_SOURCE observations[] carry `first_title` (also a
+    //     string — the first occurrence across feeds). Pre-v0.13.20
+    //     fix (codex P1 PR #60): the watcher only read `title`, which
+    //     is undefined on observations[], so the content-pattern layer
+    //     never fired in the primary production path.
+    // Advisory URL is `advisory_url` (string) on raw per-feed diffs and
+    // `advisory_urls` (array) after dedupe in both shapes.
+    const titleField = d.title || d.first_title || '';
+    const urls = Array.isArray(d.advisory_urls)
+      ? d.advisory_urls.join(' ')
+      : (d.advisory_url || '');
+    const text = `${titleField} ${d.body || ''} ${urls}`;
+    const signals = scanContentSignals(text);
+    const hasRegressionSignal = !!(signals.regression_language ||
+      (signals.researcher && signals.components));
     const year = cveYear(d.id);
-    if (year === null) continue;
-    if (year > thresholdYear) continue;
-    if (!byHistoricalId.has(d.id)) byHistoricalId.set(d.id, { sources: new Set(), titles: [] });
-    const slot = byHistoricalId.get(d.id);
-    if (Array.isArray(d.sources)) {
-      for (const s of d.sources) slot.sources.add(s);
-    } else if (typeof d.source === 'string') {
-      slot.sources.add(d.source);
+    if (year !== null && year <= thresholdYear) {
+      // CVE-ID-bearing historical reference (the original v0.13.17 path).
+      if (!byHistoricalId.has(d.id)) byHistoricalId.set(d.id, { sources: new Set(), titles: [], signals: {} });
+      const slot = byHistoricalId.get(d.id);
+      if (Array.isArray(d.sources)) {
+        for (const s of d.sources) slot.sources.add(s);
+      } else if (typeof d.source === 'string') {
+        slot.sources.add(d.source);
+      }
+      // Title may be carried as `title` (diffs[]) or `first_title`
+      // (observations[]) — accept either to keep the historical-
+      // candidate title list populated under both input shapes.
+      const titleStr = (typeof d.title === 'string' && d.title) ? d.title
+        : (typeof d.first_title === 'string' && d.first_title) ? d.first_title
+        : '';
+      if (titleStr) slot.titles.push(titleStr);
+      // Merge content signals — the strongest signal wins.
+      Object.assign(slot.signals, signals);
+      continue;
+    }
+    // No historical CVE-ID in this diff. If content signals fire, still
+    // surface as a content-only candidate so an operator can triage.
+    if (hasRegressionSignal) {
+      const titleStr = d.title || d.first_title || '';
+      contentCandidates.push({
+        historical_cve: null,
+        surfaced_by: Array.isArray(d.sources) ? d.sources.slice().sort() : (d.source ? [d.source] : []),
+        first_seen_titles: titleStr ? [titleStr] : [],
+        existing_regression_key: null,
+        action: 'content-only-investigate',
+        signals,
+      });
     }
-    if (typeof d.title === 'string' && d.title) slot.titles.push(d.title);
   }
   const candidates = [];
@@ -145,10 +263,12 @@ function findRegressionCandidates(diffs, catalog, opts) {
       first_seen_titles: slot.titles.slice(0, 5),
       existing_regression_key: existing,
       action,
+      signals: slot.signals,
     });
   }
   candidates.sort((a, b) => a.historical_cve.localeCompare(b.historical_cve));
+  candidates.push(...contentCandidates);
   return {
     candidates,
@@ -215,4 +335,5 @@ module.exports = {
   findRegressionCandidates,
   findRegressionEntry,
   cveYear,
+  scanContentSignals,
 };

package/lib/source-advisories.js CHANGED Viewed

@@ -193,44 +193,19 @@ function extractCveIds(text) {
 }
 /**
- * Lightweight RSS / Atom parser. Avoids pulling in a dependency for what
- * is effectively `<item>` / `<entry>` extraction + `<title>` / `<link>` /
- * `<pubDate>` / `<published>` / `<description>` / `<content>` text grabs.
+ * RSS / Atom parser. v0.13.20 replaces the original regex-based parser
+ * (which silently failed on XML namespaces, nested CDATA, self-closing
+ * tags, HTML entities, and multi-line content) with a proper streaming
+ * XML tokenizer defined in lib/xml-tokenizer.js. Parser errors surface
+ * via the second `errors` argument so consumers can observe parse
+ * failures instead of receiving a silent empty array.
  *
  * Returns [{ title, link, published, body }, ...].
  */
-function parseRssAtom(xml) {
-  if (typeof xml !== 'string') return [];
-  const items = [];
-  // Try Atom <entry>...</entry> first.
-  const atomEntryRe = /<entry\b[\s\S]*?<\/entry>/g;
-  const rssItemRe = /<item\b[\s\S]*?<\/item>/g;
-  const blocks = (xml.match(atomEntryRe) || xml.match(rssItemRe) || []);
-  for (const block of blocks) {
-    const title = matchInner(block, 'title') || '';
-    const link = matchInner(block, 'link') || matchAttr(block, 'link', 'href') || '';
-    const published = matchInner(block, 'pubDate') || matchInner(block, 'published') || matchInner(block, 'updated') || '';
-    const description = matchInner(block, 'description') || matchInner(block, 'content') || matchInner(block, 'summary') || '';
-    items.push({ title: stripCdata(title), link: stripCdata(link), published: stripCdata(published), body: stripCdata(description) });
-  }
-  return items;
-}
-function matchInner(block, tag) {
-  const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'i');
-  const m = block.match(re);
-  return m ? m[1].trim() : null;
-}
-function matchAttr(block, tag, attr) {
-  const re = new RegExp(`<${tag}[^>]*\\b${attr}=["']([^"']+)["']`, 'i');
-  const m = block.match(re);
-  return m ? m[1] : null;
-}
+const { parseFeed: tokenizerParseFeed } = require('./xml-tokenizer');
-function stripCdata(s) {
-  if (typeof s !== 'string') return '';
-  return s.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1').replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
+function parseRssAtom(xml, errors = null) {
+  return tokenizerParseFeed(xml, errors);
 }
 /**

package/lib/version-pins.js ADDED Viewed

@@ -0,0 +1,73 @@
+"use strict";
+/**
+ * lib/version-pins.js
+ *
+ * Single source of truth for the canonical MITRE / ATT&CK / ATLAS /
+ * D3FEND version pins that operator-facing docs reference.
+ *
+ * Pre-v0.13.20 history: ATLAS version was pinned to v5.4.0 in 33+
+ * locations (READMEs, AGENTS.md, ARCHITECTURE.md, agent personas,
+ * skill bodies, schema descriptions, manifest.json). Bumping required
+ * a lockstep regex-replace across all 33 files. v0.13.18 bumped to
+ * v5.6.0; the regex sweep accidentally touched dates in unrelated
+ * paragraphs and only failed-loudly because the tests asserted
+ * version drift. v0.13.20 makes the pin schema-driven:
+ *
+ *   - `data/atlas-ttps.json._meta.atlas_version` is the source of truth.
+ *   - `data/attack-techniques.json._meta.attack_version` is too.
+ *   - This module reads both, exposes them via getAtlasVersion() and
+ *     getAttackVersion() helpers, and is the canonical resolver every
+ *     consumer (test runner, doc-currency check, lint, skill-body
+ *     scanner) reaches through.
+ *
+ * The drift-detection tests in tests/atlas-version-canonical.test.js
+ * and tests/attack-version-canonical.test.js now compare every
+ * operator-facing mention against the value this module returns.
+ * A future bump is `node $(exceptd path)/lib/sign.js sign-all` + this
+ * module reads the new value; no lockstep doc edit needed except where
+ * the mention is
+ * a literal-string semantic ("upgrade from v5.4.0 to v5.6.0") that an
+ * operator must read.
+ *
+ * API:
+ *   getAtlasVersion() → "5.6.0"
+ *   getAttackVersion() → "19.0"
+ *   getAtlasReleaseDate() → "2026-05-08"
+ *   getAllPins() → { atlas_version, atlas_release_date, attack_version, ... }
+ */
+const fs = require("fs");
+const path = require("path");
+const ROOT = path.join(__dirname, "..");
+let _cached = null;
+function loadPins() {
+  if (_cached) return _cached;
+  const atlas = JSON.parse(fs.readFileSync(path.join(ROOT, "data", "atlas-ttps.json"), "utf8"));
+  const attack = JSON.parse(fs.readFileSync(path.join(ROOT, "data", "attack-techniques.json"), "utf8"));
+  const meta = JSON.parse(fs.readFileSync(path.join(ROOT, "manifest.json"), "utf8"));
+  _cached = {
+    atlas_version: (atlas._meta && atlas._meta.atlas_version) || null,
+    atlas_release_date: (atlas._meta && atlas._meta.atlas_release_date) || null,
+    attack_version: (attack._meta && attack._meta.attack_version) || null,
+    attack_version_date: (attack._meta && attack._meta.attack_version_date) || null,
+    manifest_atlas_version: meta.atlas_version || null,
+    manifest_attack_version: meta.attack_version || null
+  };
+  return _cached;
+}
+function clearCache() { _cached = null; }
+function getAtlasVersion() { return loadPins().atlas_version; }
+function getAtlasReleaseDate() { return loadPins().atlas_release_date; }
+function getAttackVersion() { return loadPins().attack_version; }
+function getAttackVersionDate() { return loadPins().attack_version_date; }
+function getAllPins() { return { ...loadPins() }; }
+module.exports = {
+  getAtlasVersion, getAtlasReleaseDate,
+  getAttackVersion, getAttackVersionDate,
+  getAllPins, clearCache
+};