npm - @blamejs/exceptd-skills - Versions diffs - 0.13.19 → 0.13.21 - Mend

@blamejs/exceptd-skills 0.13.19 → 0.13.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +72 -0
package/data/_indexes/_meta.json +6 -6
package/data/attack-techniques.json +2 -3
package/data/cve-catalog.json +301 -3792
package/data/framework-control-gaps.json +168 -504
package/data/zeroday-lessons.json +5 -3029
package/lib/canonical-eq.js +88 -0
package/lib/cve-regression-watcher.js +130 -9
package/lib/gap-detectors.js +555 -0
package/lib/source-advisories.js +9 -34
package/lib/version-pins.js +73 -0
package/lib/xml-tokenizer.js +344 -0
package/manifest.json +44 -44
package/package.json +4 -3
package/sbom.cdx.json +108 -33
package/scripts/audit-catalog-gaps.js +74 -13
package/scripts/check-catalog-gap-budget.js +133 -0
package/scripts/check-test-coverage.js +16 -18
package/scripts/predeploy.js +14 -0
package/scripts/refresh-upstream-catalogs.js +13 -0

package/lib/canonical-eq.js ADDED Viewed

@@ -0,0 +1,88 @@
+"use strict";
+/**
+ * lib/canonical-eq.js
+ *
+ * Canonical-form deep equality for catalog diff detection. The diff-
+ * coverage gate previously compared `JSON.stringify(before.iocs)` vs
+ * `JSON.stringify(after.iocs)` which is non-canonical: key order,
+ * trailing whitespace, and numeric format differences all register as
+ * "different" when the operator made no semantic change.
+ *
+ * Pre-v0.13.20 history: the symptom was patched twice with skip rules
+ * (v0.13.17 _auto_imported skip; v0.13.19 _iocs_stub skip). v0.13.20
+ * fixes the root cause — canonical recursive equality with sorted-key
+ * object comparison and array-position-sensitive element comparison.
+ *
+ * Contract:
+ *   - Primitives (string / number / boolean / null / undefined) compare
+ *     by strict equality (===).
+ *   - Arrays compare element-by-element in order. [1,2] !== [2,1].
+ *     This matches operator intent — array order in IoCs / attack_refs
+ *     / cwe_refs is meaningful (most-relevant-first convention).
+ *   - Objects compare by key-set equality + per-key recursive equality.
+ *     Key order does NOT matter; { a:1, b:2 } === { b:2, a:1 }.
+ *   - Cycle protection: WeakSet of visited pairs prevents infinite
+ *     recursion on self-referential structures. Cycles compare unequal
+ *     across mismatched topologies; equal across identical topologies.
+ *   - NaN: NaN === NaN under this comparator (deviates from Object.is
+ *     to make the comparator total — useful for catalog data which
+ *     never legitimately contains NaN but might pick one up from a
+ *     buggy upstream).
+ *
+ * Helpers:
+ *   - canonicalEqual(a, b): full recursive equality.
+ *   - canonicalStringify(v): sorted-key JSON for hashing / display.
+ *     Produces stable output suitable for SHA-256 etc.
+ */
+function canonicalEqual(a, b, seen = new WeakMap()) {
+  if (a === b) return true;
+  // NaN === NaN under this comparator.
+  if (typeof a === "number" && typeof b === "number" && Number.isNaN(a) && Number.isNaN(b)) return true;
+  if (a === null || b === null) return a === b;
+  if (typeof a !== "object" || typeof b !== "object") return false;
+  // Cycle detection — if we've already compared this exact pair, treat
+  // as equal (assumes the rest of the structure decides). For sibling-
+  // cycle differences this means the comparator says "equal at the
+  // cycle point" and lets non-cyclic differences elsewhere decide.
+  const aSeen = seen.get(a);
+  if (aSeen && aSeen.has(b)) return true;
+  if (!aSeen) seen.set(a, new WeakSet([b]));
+  else aSeen.add(b);
+  const aIsArr = Array.isArray(a);
+  const bIsArr = Array.isArray(b);
+  if (aIsArr !== bIsArr) return false;
+  if (aIsArr) {
+    if (a.length !== b.length) return false;
+    for (let i = 0; i < a.length; i++) {
+      if (!canonicalEqual(a[i], b[i], seen)) return false;
+    }
+    return true;
+  }
+  // Plain objects — compare key sets + per-key recursive equality.
+  const aKeys = Object.keys(a).sort();
+  const bKeys = Object.keys(b).sort();
+  if (aKeys.length !== bKeys.length) return false;
+  for (let i = 0; i < aKeys.length; i++) {
+    if (aKeys[i] !== bKeys[i]) return false;
+  }
+  for (const k of aKeys) {
+    if (!canonicalEqual(a[k], b[k], seen)) return false;
+  }
+  return true;
+}
+// Sorted-key recursive JSON. Stable output for hash digests, diff
+// comparison, and human-readable display.
+function canonicalStringify(v) {
+  if (v === null || typeof v !== "object") return JSON.stringify(v);
+  if (Array.isArray(v)) return "[" + v.map(canonicalStringify).join(",") + "]";
+  const keys = Object.keys(v).sort();
+  return "{" + keys.map((k) => JSON.stringify(k) + ":" + canonicalStringify(v[k])).join(",") + "}";
+}
+module.exports = { canonicalEqual, canonicalStringify };

package/lib/cve-regression-watcher.js CHANGED Viewed

@@ -104,6 +104,81 @@ function findRegressionEntry(catalog, historicalId) {
  * @param {Object} opts — { now?: Date, threshold_years_ago?: number }
  * @returns {Object} report — { candidates, historical_id_threshold_year, evaluated_diffs }
  */
+// v0.13.20 — content-pattern signals layered on top of the CVE-ID match.
+// The audit-class-2.4 problem: pre-v0.13.20, the watcher detected only
+// when a poller diff carried an extracted CVE-YYYY-NNN identifier. If a
+// researcher's writeup announces "the 2020 Forshaw fix is silently
+// reverted" without typing the CVE ID, the watcher missed the class
+// entirely. v0.13.20 adds content-pattern signals so the watcher can
+// flag candidates from prose alone.
+// Historical-regression language. Phrases that indicate a researcher is
+// claiming a fix was silently reverted, downgrade-rolled-back, or
+// otherwise re-broken.
+const HISTORICAL_REGRESSION_PHRASES = [
+  /silently (re-?broken|reverted|regressed|rolled back)/i,
+  /(fix|patch|mitigation) (was|is)? ?(silently )?(reverted|undone|removed|missing)/i,
+  /re-?regression of/i,
+  /never (actually|truly) (fixed|patched)/i,
+  /\bre[- ]exploit(ed|able)\b/i,
+  /(same|identical|exact) (primitive|bug|vulnerability) as/i,
+  /unpatched (since|despite) (an? )?(earlier|previous|prior|original) (fix|patch|disclosure)/i,
+  /vendor (declined|refused|never issued) (a )?new CVE/i,
+];
+// Named-researcher patterns. Operator-curated names that have a prior
+// catalog-grade drop are tracked elsewhere (NEW-CTRL-073 handle tracker),
+// but the regression-watcher also looks for the names in poller-diff
+// content as an additional signal — a familiar handle re-disclosing an
+// old CVE is a higher-confidence regression candidate.
+const RESEARCHER_NAME_PATTERNS = [
+  /Nightmare-Eclipse/i,
+  /Chaotic Eclipse/i,
+  /James Forshaw/i,
+  /Project Zero/i,
+  /Big Sleep/i,
+  /Tavis Ormandy/i,
+  /Jann Horn/i,
+];
+// Component-string detection — when a poller diff text mentions one of
+// these in conjunction with a regression phrase, flag as candidate.
+const TRACKED_COMPONENT_TOKENS = [
+  /cldflt\.sys/i,
+  /\bldfltrl\.sys/i,
+  /HsmOsBlockPlaceholderAccess/i,
+  /ssh-?keysign/i,
+  /rxgk_decrypt_skb/i,
+  /CRI-?O/i,
+  /\bptrace\b/i,
+  /Cloud Files Mini Filter/i,
+  /Windows Recovery Environment|WinRE/i,
+  /\bCTFMON(\.exe)?\b/i,
+];
+function scanContentSignals(text) {
+  if (typeof text !== "string" || !text) return {};
+  const signals = {};
+  // Historical-regression language hit.
+  for (const re of HISTORICAL_REGRESSION_PHRASES) {
+    const m = text.match(re);
+    if (m) { signals.regression_language = m[0]; break; }
+  }
+  // Researcher-name hit.
+  for (const re of RESEARCHER_NAME_PATTERNS) {
+    const m = text.match(re);
+    if (m) { signals.researcher = m[0]; break; }
+  }
+  // Component-token hit.
+  const components = [];
+  for (const re of TRACKED_COMPONENT_TOKENS) {
+    const m = text.match(re);
+    if (m && !components.includes(m[0])) components.push(m[0]);
+  }
+  if (components.length) signals.components = components;
+  return signals;
+}
 function findRegressionCandidates(diffs, catalog, opts) {
   const now = (opts && opts.now) || new Date();
   const yearsAgo = (opts && typeof opts.threshold_years_ago === 'number') ? opts.threshold_years_ago : 2;
@@ -112,19 +187,62 @@ function findRegressionCandidates(diffs, catalog, opts) {
   // Group historical-CVE refs by id so multi-feed surfacing collapses.
   const byHistoricalId = new Map();
+  // Content-only candidates — surfaced by language/component pattern
+  // matching even when no CVE ID was extracted from the diff text.
+  const contentCandidates = [];
   for (const d of (diffs || [])) {
     if (!d || typeof d.id !== 'string') continue;
+    // Title field name depends on input shape:
+    //   - ADVISORIES_SOURCE diffs[] carry `title` (post-dedupe string).
+    //   - ADVISORIES_SOURCE observations[] carry `first_title` (also a
+    //     string — the first occurrence across feeds). Pre-v0.13.20
+    //     fix (codex P1 PR #60): the watcher only read `title`, which
+    //     is undefined on observations[], so the content-pattern layer
+    //     never fired in the primary production path.
+    // Advisory URL is `advisory_url` (string) on raw per-feed diffs and
+    // `advisory_urls` (array) after dedupe in both shapes.
+    const titleField = d.title || d.first_title || '';
+    const urls = Array.isArray(d.advisory_urls)
+      ? d.advisory_urls.join(' ')
+      : (d.advisory_url || '');
+    const text = `${titleField} ${d.body || ''} ${urls}`;
+    const signals = scanContentSignals(text);
+    const hasRegressionSignal = !!(signals.regression_language ||
+      (signals.researcher && signals.components));
     const year = cveYear(d.id);
-    if (year === null) continue;
-    if (year > thresholdYear) continue;
-    if (!byHistoricalId.has(d.id)) byHistoricalId.set(d.id, { sources: new Set(), titles: [] });
-    const slot = byHistoricalId.get(d.id);
-    if (Array.isArray(d.sources)) {
-      for (const s of d.sources) slot.sources.add(s);
-    } else if (typeof d.source === 'string') {
-      slot.sources.add(d.source);
+    if (year !== null && year <= thresholdYear) {
+      // CVE-ID-bearing historical reference (the original v0.13.17 path).
+      if (!byHistoricalId.has(d.id)) byHistoricalId.set(d.id, { sources: new Set(), titles: [], signals: {} });
+      const slot = byHistoricalId.get(d.id);
+      if (Array.isArray(d.sources)) {
+        for (const s of d.sources) slot.sources.add(s);
+      } else if (typeof d.source === 'string') {
+        slot.sources.add(d.source);
+      }
+      // Title may be carried as `title` (diffs[]) or `first_title`
+      // (observations[]) — accept either to keep the historical-
+      // candidate title list populated under both input shapes.
+      const titleStr = (typeof d.title === 'string' && d.title) ? d.title
+        : (typeof d.first_title === 'string' && d.first_title) ? d.first_title
+        : '';
+      if (titleStr) slot.titles.push(titleStr);
+      // Merge content signals — the strongest signal wins.
+      Object.assign(slot.signals, signals);
+      continue;
+    }
+    // No historical CVE-ID in this diff. If content signals fire, still
+    // surface as a content-only candidate so an operator can triage.
+    if (hasRegressionSignal) {
+      const titleStr = d.title || d.first_title || '';
+      contentCandidates.push({
+        historical_cve: null,
+        surfaced_by: Array.isArray(d.sources) ? d.sources.slice().sort() : (d.source ? [d.source] : []),
+        first_seen_titles: titleStr ? [titleStr] : [],
+        existing_regression_key: null,
+        action: 'content-only-investigate',
+        signals,
+      });
     }
-    if (typeof d.title === 'string' && d.title) slot.titles.push(d.title);
   }
   const candidates = [];
@@ -145,10 +263,12 @@ function findRegressionCandidates(diffs, catalog, opts) {
       first_seen_titles: slot.titles.slice(0, 5),
       existing_regression_key: existing,
       action,
+      signals: slot.signals,
     });
   }
   candidates.sort((a, b) => a.historical_cve.localeCompare(b.historical_cve));
+  candidates.push(...contentCandidates);
   return {
     candidates,
@@ -215,4 +335,5 @@ module.exports = {
   findRegressionCandidates,
   findRegressionEntry,
   cveYear,
+  scanContentSignals,
 };