@blamejs/exceptd-skills 0.13.19 → 0.13.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ "use strict";
2
+ /**
3
+ * lib/canonical-eq.js
4
+ *
5
+ * Canonical-form deep equality for catalog diff detection. The diff-
6
+ * coverage gate previously compared `JSON.stringify(before.iocs)` vs
7
+ * `JSON.stringify(after.iocs)` which is non-canonical: key order,
8
+ * trailing whitespace, and numeric format differences all register as
9
+ * "different" when the operator made no semantic change.
10
+ *
11
+ * Pre-v0.13.20 history: the symptom was patched twice with skip rules
12
+ * (v0.13.17 _auto_imported skip; v0.13.19 _iocs_stub skip). v0.13.20
13
+ * fixes the root cause — canonical recursive equality with sorted-key
14
+ * object comparison and array-position-sensitive element comparison.
15
+ *
16
+ * Contract:
17
+ * - Primitives (string / number / boolean / null / undefined) compare
18
+ * by strict equality (===).
19
+ * - Arrays compare element-by-element in order. [1,2] !== [2,1].
20
+ * This matches operator intent — array order in IoCs / attack_refs
21
+ * / cwe_refs is meaningful (most-relevant-first convention).
22
+ * - Objects compare by key-set equality + per-key recursive equality.
23
+ * Key order does NOT matter; { a:1, b:2 } === { b:2, a:1 }.
24
+ * - Cycle protection: WeakSet of visited pairs prevents infinite
25
+ * recursion on self-referential structures. Cycles compare unequal
26
+ * across mismatched topologies; equal across identical topologies.
27
+ * - NaN: NaN === NaN under this comparator (deviates from Object.is
28
+ * to make the comparator total — useful for catalog data which
29
+ * never legitimately contains NaN but might pick one up from a
30
+ * buggy upstream).
31
+ *
32
+ * Helpers:
33
+ * - canonicalEqual(a, b): full recursive equality.
34
+ * - canonicalStringify(v): sorted-key JSON for hashing / display.
35
+ * Produces stable output suitable for SHA-256 etc.
36
+ */
37
+
38
+ function canonicalEqual(a, b, seen = new WeakMap()) {
39
+ if (a === b) return true;
40
+ // NaN === NaN under this comparator.
41
+ if (typeof a === "number" && typeof b === "number" && Number.isNaN(a) && Number.isNaN(b)) return true;
42
+ if (a === null || b === null) return a === b;
43
+ if (typeof a !== "object" || typeof b !== "object") return false;
44
+
45
+ // Cycle detection — if we've already compared this exact pair, treat
46
+ // as equal (assumes the rest of the structure decides). For sibling-
47
+ // cycle differences this means the comparator says "equal at the
48
+ // cycle point" and lets non-cyclic differences elsewhere decide.
49
+ const aSeen = seen.get(a);
50
+ if (aSeen && aSeen.has(b)) return true;
51
+ if (!aSeen) seen.set(a, new WeakSet([b]));
52
+ else aSeen.add(b);
53
+
54
+ const aIsArr = Array.isArray(a);
55
+ const bIsArr = Array.isArray(b);
56
+ if (aIsArr !== bIsArr) return false;
57
+
58
+ if (aIsArr) {
59
+ if (a.length !== b.length) return false;
60
+ for (let i = 0; i < a.length; i++) {
61
+ if (!canonicalEqual(a[i], b[i], seen)) return false;
62
+ }
63
+ return true;
64
+ }
65
+
66
+ // Plain objects — compare key sets + per-key recursive equality.
67
+ const aKeys = Object.keys(a).sort();
68
+ const bKeys = Object.keys(b).sort();
69
+ if (aKeys.length !== bKeys.length) return false;
70
+ for (let i = 0; i < aKeys.length; i++) {
71
+ if (aKeys[i] !== bKeys[i]) return false;
72
+ }
73
+ for (const k of aKeys) {
74
+ if (!canonicalEqual(a[k], b[k], seen)) return false;
75
+ }
76
+ return true;
77
+ }
78
+
79
+ // Sorted-key recursive JSON. Stable output for hash digests, diff
80
+ // comparison, and human-readable display.
81
+ function canonicalStringify(v) {
82
+ if (v === null || typeof v !== "object") return JSON.stringify(v);
83
+ if (Array.isArray(v)) return "[" + v.map(canonicalStringify).join(",") + "]";
84
+ const keys = Object.keys(v).sort();
85
+ return "{" + keys.map((k) => JSON.stringify(k) + ":" + canonicalStringify(v[k])).join(",") + "}";
86
+ }
87
+
88
+ module.exports = { canonicalEqual, canonicalStringify };
@@ -104,6 +104,81 @@ function findRegressionEntry(catalog, historicalId) {
104
104
  * @param {Object} opts — { now?: Date, threshold_years_ago?: number }
105
105
  * @returns {Object} report — { candidates, historical_id_threshold_year, evaluated_diffs }
106
106
  */
107
+ // v0.13.20 — content-pattern signals layered on top of the CVE-ID match.
108
+ // The audit-class-2.4 problem: pre-v0.13.20, the watcher detected only
109
+ // when a poller diff carried an extracted CVE-YYYY-NNN identifier. If a
110
+ // researcher's writeup announces "the 2020 Forshaw fix is silently
111
+ // reverted" without typing the CVE ID, the watcher missed the class
112
+ // entirely. v0.13.20 adds content-pattern signals so the watcher can
113
+ // flag candidates from prose alone.
114
+
115
+ // Historical-regression language. Phrases that indicate a researcher is
116
+ // claiming a fix was silently reverted, downgrade-rolled-back, or
117
+ // otherwise re-broken.
118
+ const HISTORICAL_REGRESSION_PHRASES = [
119
+ /silently (re-?broken|reverted|regressed|rolled back)/i,
120
+ /(fix|patch|mitigation) (was|is)? ?(silently )?(reverted|undone|removed|missing)/i,
121
+ /re-?regression of/i,
122
+ /never (actually|truly) (fixed|patched)/i,
123
+ /\bre[- ]exploit(ed|able)\b/i,
124
+ /(same|identical|exact) (primitive|bug|vulnerability) as/i,
125
+ /unpatched (since|despite) (an? )?(earlier|previous|prior|original) (fix|patch|disclosure)/i,
126
+ /vendor (declined|refused|never issued) (a )?new CVE/i,
127
+ ];
128
+
129
+ // Named-researcher patterns. Operator-curated names that have a prior
130
+ // catalog-grade drop are tracked elsewhere (NEW-CTRL-073 handle tracker),
131
+ // but the regression-watcher also looks for the names in poller-diff
132
+ // content as an additional signal — a familiar handle re-disclosing an
133
+ // old CVE is a higher-confidence regression candidate.
134
+ const RESEARCHER_NAME_PATTERNS = [
135
+ /Nightmare-Eclipse/i,
136
+ /Chaotic Eclipse/i,
137
+ /James Forshaw/i,
138
+ /Project Zero/i,
139
+ /Big Sleep/i,
140
+ /Tavis Ormandy/i,
141
+ /Jann Horn/i,
142
+ ];
143
+
144
+ // Component-string detection — when a poller diff text mentions one of
145
+ // these in conjunction with a regression phrase, flag as candidate.
146
+ const TRACKED_COMPONENT_TOKENS = [
147
+ /cldflt\.sys/i,
148
+ /\bldfltrl\.sys/i,
149
+ /HsmOsBlockPlaceholderAccess/i,
150
+ /ssh-?keysign/i,
151
+ /rxgk_decrypt_skb/i,
152
+ /CRI-?O/i,
153
+ /\bptrace\b/i,
154
+ /Cloud Files Mini Filter/i,
155
+ /Windows Recovery Environment|WinRE/i,
156
+ /\bCTFMON(\.exe)?\b/i,
157
+ ];
158
+
159
+ function scanContentSignals(text) {
160
+ if (typeof text !== "string" || !text) return {};
161
+ const signals = {};
162
+ // Historical-regression language hit.
163
+ for (const re of HISTORICAL_REGRESSION_PHRASES) {
164
+ const m = text.match(re);
165
+ if (m) { signals.regression_language = m[0]; break; }
166
+ }
167
+ // Researcher-name hit.
168
+ for (const re of RESEARCHER_NAME_PATTERNS) {
169
+ const m = text.match(re);
170
+ if (m) { signals.researcher = m[0]; break; }
171
+ }
172
+ // Component-token hit.
173
+ const components = [];
174
+ for (const re of TRACKED_COMPONENT_TOKENS) {
175
+ const m = text.match(re);
176
+ if (m && !components.includes(m[0])) components.push(m[0]);
177
+ }
178
+ if (components.length) signals.components = components;
179
+ return signals;
180
+ }
181
+
107
182
  function findRegressionCandidates(diffs, catalog, opts) {
108
183
  const now = (opts && opts.now) || new Date();
109
184
  const yearsAgo = (opts && typeof opts.threshold_years_ago === 'number') ? opts.threshold_years_ago : 2;
@@ -112,19 +187,62 @@ function findRegressionCandidates(diffs, catalog, opts) {
112
187
 
113
188
  // Group historical-CVE refs by id so multi-feed surfacing collapses.
114
189
  const byHistoricalId = new Map();
190
+ // Content-only candidates — surfaced by language/component pattern
191
+ // matching even when no CVE ID was extracted from the diff text.
192
+ const contentCandidates = [];
115
193
  for (const d of (diffs || [])) {
116
194
  if (!d || typeof d.id !== 'string') continue;
195
+ // Title field name depends on input shape:
196
+ // - ADVISORIES_SOURCE diffs[] carry `title` (post-dedupe string).
197
+ // - ADVISORIES_SOURCE observations[] carry `first_title` (also a
198
+ // string — the first occurrence across feeds). Pre-v0.13.20
199
+ // fix (codex P1 PR #60): the watcher only read `title`, which
200
+ // is undefined on observations[], so the content-pattern layer
201
+ // never fired in the primary production path.
202
+ // Advisory URL is `advisory_url` (string) on raw per-feed diffs and
203
+ // `advisory_urls` (array) after dedupe in both shapes.
204
+ const titleField = d.title || d.first_title || '';
205
+ const urls = Array.isArray(d.advisory_urls)
206
+ ? d.advisory_urls.join(' ')
207
+ : (d.advisory_url || '');
208
+ const text = `${titleField} ${d.body || ''} ${urls}`;
209
+ const signals = scanContentSignals(text);
210
+ const hasRegressionSignal = !!(signals.regression_language ||
211
+ (signals.researcher && signals.components));
117
212
  const year = cveYear(d.id);
118
- if (year === null) continue;
119
- if (year > thresholdYear) continue;
120
- if (!byHistoricalId.has(d.id)) byHistoricalId.set(d.id, { sources: new Set(), titles: [] });
121
- const slot = byHistoricalId.get(d.id);
122
- if (Array.isArray(d.sources)) {
123
- for (const s of d.sources) slot.sources.add(s);
124
- } else if (typeof d.source === 'string') {
125
- slot.sources.add(d.source);
213
+ if (year !== null && year <= thresholdYear) {
214
+ // CVE-ID-bearing historical reference (the original v0.13.17 path).
215
+ if (!byHistoricalId.has(d.id)) byHistoricalId.set(d.id, { sources: new Set(), titles: [], signals: {} });
216
+ const slot = byHistoricalId.get(d.id);
217
+ if (Array.isArray(d.sources)) {
218
+ for (const s of d.sources) slot.sources.add(s);
219
+ } else if (typeof d.source === 'string') {
220
+ slot.sources.add(d.source);
221
+ }
222
+ // Title may be carried as `title` (diffs[]) or `first_title`
223
+ // (observations[]) — accept either to keep the historical-
224
+ // candidate title list populated under both input shapes.
225
+ const titleStr = (typeof d.title === 'string' && d.title) ? d.title
226
+ : (typeof d.first_title === 'string' && d.first_title) ? d.first_title
227
+ : '';
228
+ if (titleStr) slot.titles.push(titleStr);
229
+ // Merge content signals — the strongest signal wins.
230
+ Object.assign(slot.signals, signals);
231
+ continue;
232
+ }
233
+ // No historical CVE-ID in this diff. If content signals fire, still
234
+ // surface as a content-only candidate so an operator can triage.
235
+ if (hasRegressionSignal) {
236
+ const titleStr = d.title || d.first_title || '';
237
+ contentCandidates.push({
238
+ historical_cve: null,
239
+ surfaced_by: Array.isArray(d.sources) ? d.sources.slice().sort() : (d.source ? [d.source] : []),
240
+ first_seen_titles: titleStr ? [titleStr] : [],
241
+ existing_regression_key: null,
242
+ action: 'content-only-investigate',
243
+ signals,
244
+ });
126
245
  }
127
- if (typeof d.title === 'string' && d.title) slot.titles.push(d.title);
128
246
  }
129
247
 
130
248
  const candidates = [];
@@ -145,10 +263,12 @@ function findRegressionCandidates(diffs, catalog, opts) {
145
263
  first_seen_titles: slot.titles.slice(0, 5),
146
264
  existing_regression_key: existing,
147
265
  action,
266
+ signals: slot.signals,
148
267
  });
149
268
  }
150
269
 
151
270
  candidates.sort((a, b) => a.historical_cve.localeCompare(b.historical_cve));
271
+ candidates.push(...contentCandidates);
152
272
 
153
273
  return {
154
274
  candidates,
@@ -215,4 +335,5 @@ module.exports = {
215
335
  findRegressionCandidates,
216
336
  findRegressionEntry,
217
337
  cveYear,
338
+ scanContentSignals,
218
339
  };