@blamejs/exceptd-skills 0.13.19 → 0.13.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ "use strict";
2
+ /**
3
+ * lib/canonical-eq.js
4
+ *
5
+ * Canonical-form deep equality for catalog diff detection. The diff-
6
+ * coverage gate previously compared `JSON.stringify(before.iocs)` vs
7
+ * `JSON.stringify(after.iocs)` which is non-canonical: key order,
8
+ * trailing whitespace, and numeric format differences all register as
9
+ * "different" when the operator made no semantic change.
10
+ *
11
+ * Pre-v0.13.20 history: the symptom was patched twice with skip rules
12
+ * (v0.13.17 _auto_imported skip; v0.13.19 _iocs_stub skip). v0.13.20
13
+ * fixes the root cause — canonical recursive equality with sorted-key
14
+ * object comparison and array-position-sensitive element comparison.
15
+ *
16
+ * Contract:
17
+ * - Primitives (string / number / boolean / null / undefined) compare
18
+ * by strict equality (===).
19
+ * - Arrays compare element-by-element in order. [1,2] !== [2,1].
20
+ * This matches operator intent — array order in IoCs / attack_refs
21
+ * / cwe_refs is meaningful (most-relevant-first convention).
22
+ * - Objects compare by key-set equality + per-key recursive equality.
23
+ * Key order does NOT matter; { a:1, b:2 } === { b:2, a:1 }.
24
+ * - Cycle protection: WeakSet of visited pairs prevents infinite
25
+ * recursion on self-referential structures. Cycles compare unequal
26
+ * across mismatched topologies; equal across identical topologies.
27
+ * - NaN: NaN === NaN under this comparator (deviates from Object.is
28
+ * to make the comparator total — useful for catalog data which
29
+ * never legitimately contains NaN but might pick one up from a
30
+ * buggy upstream).
31
+ *
32
+ * Helpers:
33
+ * - canonicalEqual(a, b): full recursive equality.
34
+ * - canonicalStringify(v): sorted-key JSON for hashing / display.
35
+ * Produces stable output suitable for SHA-256 etc.
36
+ */
37
+
38
+ function canonicalEqual(a, b, seen = new WeakMap()) {
39
+ if (a === b) return true;
40
+ // NaN === NaN under this comparator.
41
+ if (typeof a === "number" && typeof b === "number" && Number.isNaN(a) && Number.isNaN(b)) return true;
42
+ if (a === null || b === null) return a === b;
43
+ if (typeof a !== "object" || typeof b !== "object") return false;
44
+
45
+ // Cycle detection — if we've already compared this exact pair, treat
46
+ // as equal (assumes the rest of the structure decides). For sibling-
47
+ // cycle differences this means the comparator says "equal at the
48
+ // cycle point" and lets non-cyclic differences elsewhere decide.
49
+ const aSeen = seen.get(a);
50
+ if (aSeen && aSeen.has(b)) return true;
51
+ if (!aSeen) seen.set(a, new WeakSet([b]));
52
+ else aSeen.add(b);
53
+
54
+ const aIsArr = Array.isArray(a);
55
+ const bIsArr = Array.isArray(b);
56
+ if (aIsArr !== bIsArr) return false;
57
+
58
+ if (aIsArr) {
59
+ if (a.length !== b.length) return false;
60
+ for (let i = 0; i < a.length; i++) {
61
+ if (!canonicalEqual(a[i], b[i], seen)) return false;
62
+ }
63
+ return true;
64
+ }
65
+
66
+ // Plain objects — compare key sets + per-key recursive equality.
67
+ const aKeys = Object.keys(a).sort();
68
+ const bKeys = Object.keys(b).sort();
69
+ if (aKeys.length !== bKeys.length) return false;
70
+ for (let i = 0; i < aKeys.length; i++) {
71
+ if (aKeys[i] !== bKeys[i]) return false;
72
+ }
73
+ for (const k of aKeys) {
74
+ if (!canonicalEqual(a[k], b[k], seen)) return false;
75
+ }
76
+ return true;
77
+ }
78
+
79
+ // Sorted-key recursive JSON. Stable output for hash digests, diff
80
+ // comparison, and human-readable display.
81
+ function canonicalStringify(v) {
82
+ if (v === null || typeof v !== "object") return JSON.stringify(v);
83
+ if (Array.isArray(v)) return "[" + v.map(canonicalStringify).join(",") + "]";
84
+ const keys = Object.keys(v).sort();
85
+ return "{" + keys.map((k) => JSON.stringify(k) + ":" + canonicalStringify(v[k])).join(",") + "}";
86
+ }
87
+
88
+ module.exports = { canonicalEqual, canonicalStringify };
@@ -104,6 +104,81 @@ function findRegressionEntry(catalog, historicalId) {
104
104
  * @param {Object} opts — { now?: Date, threshold_years_ago?: number }
105
105
  * @returns {Object} report — { candidates, historical_id_threshold_year, evaluated_diffs }
106
106
  */
107
+ // v0.13.20 — content-pattern signals layered on top of the CVE-ID match.
108
+ // The audit-class-2.4 problem: pre-v0.13.20, the watcher detected only
109
+ // when a poller diff carried an extracted CVE-YYYY-NNN identifier. If a
110
+ // researcher's writeup announces "the 2020 Forshaw fix is silently
111
+ // reverted" without typing the CVE ID, the watcher missed the class
112
+ // entirely. v0.13.20 adds content-pattern signals so the watcher can
113
+ // flag candidates from prose alone.
114
+
115
+ // Historical-regression language. Phrases that indicate a researcher is
116
+ // claiming a fix was silently reverted, downgrade-rolled-back, or
117
+ // otherwise re-broken.
118
+ const HISTORICAL_REGRESSION_PHRASES = [
119
+ /silently (re-?broken|reverted|regressed|rolled back)/i,
120
+ /(fix|patch|mitigation) (was|is)? ?(silently )?(reverted|undone|removed|missing)/i,
121
+ /re-?regression of/i,
122
+ /never (actually|truly) (fixed|patched)/i,
123
+ /\bre[- ]exploit(ed|able)\b/i,
124
+ /(same|identical|exact) (primitive|bug|vulnerability) as/i,
125
+ /unpatched (since|despite) (an? )?(earlier|previous|prior|original) (fix|patch|disclosure)/i,
126
+ /vendor (declined|refused|never issued) (a )?new CVE/i,
127
+ ];
128
+
129
+ // Named-researcher patterns. Operator-curated names that have a prior
130
+ // catalog-grade drop are tracked elsewhere (NEW-CTRL-073 handle tracker),
131
+ // but the regression-watcher also looks for the names in poller-diff
132
+ // content as an additional signal — a familiar handle re-disclosing an
133
+ // old CVE is a higher-confidence regression candidate.
134
+ const RESEARCHER_NAME_PATTERNS = [
135
+ /Nightmare-Eclipse/i,
136
+ /Chaotic Eclipse/i,
137
+ /James Forshaw/i,
138
+ /Project Zero/i,
139
+ /Big Sleep/i,
140
+ /Tavis Ormandy/i,
141
+ /Jann Horn/i,
142
+ ];
143
+
144
+ // Component-string detection — when a poller diff text mentions one of
145
+ // these in conjunction with a regression phrase, flag as candidate.
146
+ const TRACKED_COMPONENT_TOKENS = [
147
+ /cldflt\.sys/i,
148
+ /\bldfltrl\.sys/i,
149
+ /HsmOsBlockPlaceholderAccess/i,
150
+ /ssh-?keysign/i,
151
+ /rxgk_decrypt_skb/i,
152
+ /CRI-?O/i,
153
+ /\bptrace\b/i,
154
+ /Cloud Files Mini Filter/i,
155
+ /Windows Recovery Environment|WinRE/i,
156
+ /\bCTFMON(\.exe)?\b/i,
157
+ ];
158
+
159
+ function scanContentSignals(text) {
160
+ if (typeof text !== "string" || !text) return {};
161
+ const signals = {};
162
+ // Historical-regression language hit.
163
+ for (const re of HISTORICAL_REGRESSION_PHRASES) {
164
+ const m = text.match(re);
165
+ if (m) { signals.regression_language = m[0]; break; }
166
+ }
167
+ // Researcher-name hit.
168
+ for (const re of RESEARCHER_NAME_PATTERNS) {
169
+ const m = text.match(re);
170
+ if (m) { signals.researcher = m[0]; break; }
171
+ }
172
+ // Component-token hit.
173
+ const components = [];
174
+ for (const re of TRACKED_COMPONENT_TOKENS) {
175
+ const m = text.match(re);
176
+ if (m && !components.includes(m[0])) components.push(m[0]);
177
+ }
178
+ if (components.length) signals.components = components;
179
+ return signals;
180
+ }
181
+
107
182
  function findRegressionCandidates(diffs, catalog, opts) {
108
183
  const now = (opts && opts.now) || new Date();
109
184
  const yearsAgo = (opts && typeof opts.threshold_years_ago === 'number') ? opts.threshold_years_ago : 2;
@@ -112,19 +187,62 @@ function findRegressionCandidates(diffs, catalog, opts) {
112
187
 
113
188
  // Group historical-CVE refs by id so multi-feed surfacing collapses.
114
189
  const byHistoricalId = new Map();
190
+ // Content-only candidates — surfaced by language/component pattern
191
+ // matching even when no CVE ID was extracted from the diff text.
192
+ const contentCandidates = [];
115
193
  for (const d of (diffs || [])) {
116
194
  if (!d || typeof d.id !== 'string') continue;
195
+ // Title field name depends on input shape:
196
+ // - ADVISORIES_SOURCE diffs[] carry `title` (post-dedupe string).
197
+ // - ADVISORIES_SOURCE observations[] carry `first_title` (also a
198
+ // string — the first occurrence across feeds). Pre-v0.13.20
199
+ // fix (codex P1 PR #60): the watcher only read `title`, which
200
+ // is undefined on observations[], so the content-pattern layer
201
+ // never fired in the primary production path.
202
+ // Advisory URL is `advisory_url` (string) on raw per-feed diffs and
203
+ // `advisory_urls` (array) after dedupe in both shapes.
204
+ const titleField = d.title || d.first_title || '';
205
+ const urls = Array.isArray(d.advisory_urls)
206
+ ? d.advisory_urls.join(' ')
207
+ : (d.advisory_url || '');
208
+ const text = `${titleField} ${d.body || ''} ${urls}`;
209
+ const signals = scanContentSignals(text);
210
+ const hasRegressionSignal = !!(signals.regression_language ||
211
+ (signals.researcher && signals.components));
117
212
  const year = cveYear(d.id);
118
- if (year === null) continue;
119
- if (year > thresholdYear) continue;
120
- if (!byHistoricalId.has(d.id)) byHistoricalId.set(d.id, { sources: new Set(), titles: [] });
121
- const slot = byHistoricalId.get(d.id);
122
- if (Array.isArray(d.sources)) {
123
- for (const s of d.sources) slot.sources.add(s);
124
- } else if (typeof d.source === 'string') {
125
- slot.sources.add(d.source);
213
+ if (year !== null && year <= thresholdYear) {
214
+ // CVE-ID-bearing historical reference (the original v0.13.17 path).
215
+ if (!byHistoricalId.has(d.id)) byHistoricalId.set(d.id, { sources: new Set(), titles: [], signals: {} });
216
+ const slot = byHistoricalId.get(d.id);
217
+ if (Array.isArray(d.sources)) {
218
+ for (const s of d.sources) slot.sources.add(s);
219
+ } else if (typeof d.source === 'string') {
220
+ slot.sources.add(d.source);
221
+ }
222
+ // Title may be carried as `title` (diffs[]) or `first_title`
223
+ // (observations[]) — accept either to keep the historical-
224
+ // candidate title list populated under both input shapes.
225
+ const titleStr = (typeof d.title === 'string' && d.title) ? d.title
226
+ : (typeof d.first_title === 'string' && d.first_title) ? d.first_title
227
+ : '';
228
+ if (titleStr) slot.titles.push(titleStr);
229
+ // Merge content signals — the strongest signal wins.
230
+ Object.assign(slot.signals, signals);
231
+ continue;
232
+ }
233
+ // No historical CVE-ID in this diff. If content signals fire, still
234
+ // surface as a content-only candidate so an operator can triage.
235
+ if (hasRegressionSignal) {
236
+ const titleStr = d.title || d.first_title || '';
237
+ contentCandidates.push({
238
+ historical_cve: null,
239
+ surfaced_by: Array.isArray(d.sources) ? d.sources.slice().sort() : (d.source ? [d.source] : []),
240
+ first_seen_titles: titleStr ? [titleStr] : [],
241
+ existing_regression_key: null,
242
+ action: 'content-only-investigate',
243
+ signals,
244
+ });
126
245
  }
127
- if (typeof d.title === 'string' && d.title) slot.titles.push(d.title);
128
246
  }
129
247
 
130
248
  const candidates = [];
@@ -145,10 +263,12 @@ function findRegressionCandidates(diffs, catalog, opts) {
145
263
  first_seen_titles: slot.titles.slice(0, 5),
146
264
  existing_regression_key: existing,
147
265
  action,
266
+ signals: slot.signals,
148
267
  });
149
268
  }
150
269
 
151
270
  candidates.sort((a, b) => a.historical_cve.localeCompare(b.historical_cve));
271
+ candidates.push(...contentCandidates);
152
272
 
153
273
  return {
154
274
  candidates,
@@ -215,4 +335,5 @@ module.exports = {
215
335
  findRegressionCandidates,
216
336
  findRegressionEntry,
217
337
  cveYear,
338
+ scanContentSignals,
218
339
  };
@@ -193,44 +193,19 @@ function extractCveIds(text) {
193
193
  }
194
194
 
195
195
  /**
196
- * Lightweight RSS / Atom parser. Avoids pulling in a dependency for what
197
- * is effectively `<item>` / `<entry>` extraction + `<title>` / `<link>` /
198
- * `<pubDate>` / `<published>` / `<description>` / `<content>` text grabs.
196
+ * RSS / Atom parser. v0.13.20 replaces the original regex-based parser
197
+ * (which silently failed on XML namespaces, nested CDATA, self-closing
198
+ * tags, HTML entities, and multi-line content) with a proper streaming
199
+ * XML tokenizer defined in lib/xml-tokenizer.js. Parser errors surface
200
+ * via the second `errors` argument so consumers can observe parse
201
+ * failures instead of receiving a silent empty array.
199
202
  *
200
203
  * Returns [{ title, link, published, body }, ...].
201
204
  */
202
- function parseRssAtom(xml) {
203
- if (typeof xml !== 'string') return [];
204
- const items = [];
205
- // Try Atom <entry>...</entry> first.
206
- const atomEntryRe = /<entry\b[\s\S]*?<\/entry>/g;
207
- const rssItemRe = /<item\b[\s\S]*?<\/item>/g;
208
- const blocks = (xml.match(atomEntryRe) || xml.match(rssItemRe) || []);
209
- for (const block of blocks) {
210
- const title = matchInner(block, 'title') || '';
211
- const link = matchInner(block, 'link') || matchAttr(block, 'link', 'href') || '';
212
- const published = matchInner(block, 'pubDate') || matchInner(block, 'published') || matchInner(block, 'updated') || '';
213
- const description = matchInner(block, 'description') || matchInner(block, 'content') || matchInner(block, 'summary') || '';
214
- items.push({ title: stripCdata(title), link: stripCdata(link), published: stripCdata(published), body: stripCdata(description) });
215
- }
216
- return items;
217
- }
218
-
219
- function matchInner(block, tag) {
220
- const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'i');
221
- const m = block.match(re);
222
- return m ? m[1].trim() : null;
223
- }
224
-
225
- function matchAttr(block, tag, attr) {
226
- const re = new RegExp(`<${tag}[^>]*\\b${attr}=["']([^"']+)["']`, 'i');
227
- const m = block.match(re);
228
- return m ? m[1] : null;
229
- }
205
+ const { parseFeed: tokenizerParseFeed } = require('./xml-tokenizer');
230
206
 
231
- function stripCdata(s) {
232
- if (typeof s !== 'string') return '';
233
- return s.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1').replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
207
+ function parseRssAtom(xml, errors = null) {
208
+ return tokenizerParseFeed(xml, errors);
234
209
  }
235
210
 
236
211
  /**
@@ -0,0 +1,73 @@
1
+ "use strict";
2
+ /**
3
+ * lib/version-pins.js
4
+ *
5
+ * Single source of truth for the canonical MITRE / ATT&CK / ATLAS /
6
+ * D3FEND version pins that operator-facing docs reference.
7
+ *
8
+ * Pre-v0.13.20 history: ATLAS version was pinned to v5.4.0 in 33+
9
+ * locations (READMEs, AGENTS.md, ARCHITECTURE.md, agent personas,
10
+ * skill bodies, schema descriptions, manifest.json). Bumping required
11
+ * a lockstep regex-replace across all 33 files. v0.13.18 bumped to
12
+ * v5.6.0; the regex sweep accidentally touched dates in unrelated
13
+ * paragraphs and only failed-loudly because the tests asserted
14
+ * version drift. v0.13.20 makes the pin schema-driven:
15
+ *
16
+ * - `data/atlas-ttps.json._meta.atlas_version` is the source of truth.
17
+ * - `data/attack-techniques.json._meta.attack_version` is too.
18
+ * - This module reads both, exposes them via getAtlasVersion() and
19
+ * getAttackVersion() helpers, and is the canonical resolver every
20
+ * consumer (test runner, doc-currency check, lint, skill-body
21
+ * scanner) reaches through.
22
+ *
23
+ * The drift-detection tests in tests/atlas-version-canonical.test.js
24
+ * and tests/attack-version-canonical.test.js now compare every
25
+ * operator-facing mention against the value this module returns.
26
+ * A future bump is `node $(exceptd path)/lib/sign.js sign-all` + this
27
+ * module reads the new value; no lockstep doc edit needed except where
28
+ * the mention is
29
+ * a literal-string semantic ("upgrade from v5.4.0 to v5.6.0") that an
30
+ * operator must read.
31
+ *
32
+ * API:
33
+ * getAtlasVersion() → "5.6.0"
34
+ * getAttackVersion() → "19.0"
35
+ * getAtlasReleaseDate() → "2026-05-08"
36
+ * getAllPins() → { atlas_version, atlas_release_date, attack_version, ... }
37
+ */
38
+
39
+ const fs = require("fs");
40
+ const path = require("path");
41
+
42
+ const ROOT = path.join(__dirname, "..");
43
+
44
+ let _cached = null;
45
+
46
+ function loadPins() {
47
+ if (_cached) return _cached;
48
+ const atlas = JSON.parse(fs.readFileSync(path.join(ROOT, "data", "atlas-ttps.json"), "utf8"));
49
+ const attack = JSON.parse(fs.readFileSync(path.join(ROOT, "data", "attack-techniques.json"), "utf8"));
50
+ const meta = JSON.parse(fs.readFileSync(path.join(ROOT, "manifest.json"), "utf8"));
51
+ _cached = {
52
+ atlas_version: (atlas._meta && atlas._meta.atlas_version) || null,
53
+ atlas_release_date: (atlas._meta && atlas._meta.atlas_release_date) || null,
54
+ attack_version: (attack._meta && attack._meta.attack_version) || null,
55
+ attack_version_date: (attack._meta && attack._meta.attack_version_date) || null,
56
+ manifest_atlas_version: meta.atlas_version || null,
57
+ manifest_attack_version: meta.attack_version || null
58
+ };
59
+ return _cached;
60
+ }
61
+
62
+ function clearCache() { _cached = null; }
63
+ function getAtlasVersion() { return loadPins().atlas_version; }
64
+ function getAtlasReleaseDate() { return loadPins().atlas_release_date; }
65
+ function getAttackVersion() { return loadPins().attack_version; }
66
+ function getAttackVersionDate() { return loadPins().attack_version_date; }
67
+ function getAllPins() { return { ...loadPins() }; }
68
+
69
+ module.exports = {
70
+ getAtlasVersion, getAtlasReleaseDate,
71
+ getAttackVersion, getAttackVersionDate,
72
+ getAllPins, clearCache
73
+ };