muaddib-scanner 2.11.71 → 2.11.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.71",
3
+ "version": "2.11.72",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-07T16:09:51.503Z",
3
+ "timestamp": "2026-06-07T16:43:07.004Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -0,0 +1,343 @@
1
+ /**
2
+ * GHSA active poller (Phase 2c, part 2).
3
+ *
4
+ * Polls the public GitHub Advisory Database for type=malware advisories on a short
5
+ * cadence (~15 min) and:
6
+ * - persists each advisory's malicious package(s) to a denominator JSONL
7
+ * (data/ghsa-malware.jsonl) — the authoritative "what SHOULD we have caught" list
8
+ * that the Phase 5 coverage-audit joins against the scan-ledger (closes 105/429);
9
+ * - pre-alerts genuinely fresh names (updated since our cursor) as an early warning;
10
+ * - records withdrawn advisories (withdrawn_at set) to the scan-ledger as
11
+ * outcome:'dropped', source:'ghsa_gone' so a removed package keeps an identity;
12
+ * - feeds the GHSA fetch count into the feed-health alarm (a GHSA feed that returns 0
13
+ * after a healthy baseline = API down / auth broken).
14
+ *
15
+ * HONEST SCOPE (v1): this does NOT inject scans into the live queue. GHSA lags the
16
+ * downstream vendors, so by the time a name lands here it is usually already removed from
17
+ * the registry (a scan would just 404) or already in the IOC store via the OSV scraper —
18
+ * injection would mostly burn registry calls for no coverage gain. The value here is the
19
+ * denominator (Phase 5) + the early-warning pre-alert + withdrawn tracking. The poller
20
+ * runs as the muaddib daemon, which has no `gh` CLI auth, so it hits the REST API directly
21
+ * (public endpoint; an optional GITHUB_TOKEN raises the rate limit).
22
+ *
23
+ * First run (no cursor) SEEDS silently: it records the recent page to the denominator and
24
+ * sets the cursor, but does not pre-alert (those advisories are historical relative to our
25
+ * start, not "fresh"). Subsequent runs pre-alert only advisories newer than the cursor.
26
+ */
27
+ 'use strict';
28
+
29
+ const fs = require('fs');
30
+ const path = require('path');
31
+ const https = require('https');
32
+
33
+ const GHSA_API_HOST = 'api.github.com';
34
+ const GHSA_ECOSYSTEMS = ['npm', 'pypi'];
35
+ const GHSA_CURSOR_FILE = process.env.MUADDIB_GHSA_CURSOR_FILE ||
36
+ path.join(__dirname, '..', '..', 'data', 'ghsa-cursor.json');
37
+ const GHSA_MALWARE_FILE = process.env.MUADDIB_GHSA_MALWARE_FILE ||
38
+ path.join(__dirname, '..', '..', 'data', 'ghsa-malware.jsonl');
39
+ const GHSA_MALWARE_MAX = 200_000; // denominator cap (GHSA malware is ~thousands; safety bound)
40
+ const GHSA_POLL_INTERVAL_MS = (() => {
41
+ const n = parseInt(process.env.MUADDIB_GHSA_POLL_INTERVAL_MS, 10);
42
+ return Number.isFinite(n) && n >= 60_000 ? n : 15 * 60 * 1000; // 15 min default
43
+ })();
44
+ // Cap pre-alerts per poll so a cursor gap (downtime catch-up) can't blast Discord.
45
+ const GHSA_PREALERT_CAP = (() => {
46
+ const n = parseInt(process.env.MUADDIB_GHSA_PREALERT_CAP, 10);
47
+ return Number.isFinite(n) && n > 0 ? n : 25;
48
+ })();
49
+
50
+ let _pollHandle = null;
51
+
52
+ // ── low-level fetch (dep-injectable) ──
53
+
54
+ /**
55
+ * GET a GitHub REST API path and parse JSON. Sets the required User-Agent and an optional
56
+ * bearer token (GITHUB_TOKEN/GH_TOKEN). Resolves { status, json } — never rejects on a
57
+ * non-200 (returns the status so the caller can decide); rejects only on transport error.
58
+ */
59
+ function _httpGetJson(pathName, { token, httpImpl = https, timeoutMs = 20_000 } = {}) {
60
+ return new Promise((resolve, reject) => {
61
+ const headers = {
62
+ 'User-Agent': 'MUADDIB-Scanner/3.0',
63
+ 'Accept': 'application/vnd.github+json'
64
+ };
65
+ if (token) headers['Authorization'] = 'Bearer ' + token;
66
+ const req = httpImpl.get({ hostname: GHSA_API_HOST, path: pathName, headers, timeout: timeoutMs }, (res) => {
67
+ let body = '';
68
+ res.on('data', (c) => { body += c; });
69
+ res.on('end', () => {
70
+ let json = null;
71
+ try { json = JSON.parse(body); } catch { /* leave null */ }
72
+ resolve({ status: res.statusCode, json });
73
+ });
74
+ });
75
+ req.on('timeout', () => { req.destroy(new Error('GHSA request timeout')); });
76
+ req.on('error', reject);
77
+ });
78
+ }
79
+
80
+ /**
81
+ * Fetch the most-recent page of type=malware advisories for one ecosystem (sorted by
82
+ * updated desc). Returns an array (possibly empty). Throws on transport / non-200 so the
83
+ * caller skips the cursor advance + feed-health for this ecosystem (an error is NOT a 0).
84
+ */
85
+ async function _defaultFetch(ecosystem, opts = {}) {
86
+ const token = opts.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
87
+ // GHSA names the Python ecosystem "pip" (not "pypi") in BOTH the query and the response;
88
+ // querying ecosystem=pypi returns HTTP 422. Map our internal name to GHSA's for the query.
89
+ const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem;
90
+ const p = `/advisories?type=malware&ecosystem=${encodeURIComponent(apiEco)}&per_page=100&sort=updated&direction=desc`;
91
+ const { status, json } = await _httpGetJson(p, { token, httpImpl: opts.httpImpl });
92
+ if (status !== 200 || !Array.isArray(json)) {
93
+ throw new Error(`GHSA fetch ${ecosystem} failed: HTTP ${status}`);
94
+ }
95
+ return json;
96
+ }
97
+
98
+ // ── parsing (pure) ──
99
+
100
+ /**
101
+ * Flatten one advisory into per-package denominator rows. PURE.
102
+ * @returns {Array<{ghsa_id,ecosystem,name,versionRange,published_at,updated_at,withdrawn:boolean}>}
103
+ */
104
+ function parseAdvisory(adv, ecosystems = GHSA_ECOSYSTEMS) {
105
+ if (!adv || !adv.ghsa_id || !Array.isArray(adv.vulnerabilities)) return [];
106
+ const out = [];
107
+ for (const v of adv.vulnerabilities) {
108
+ const pkg = v && v.package;
109
+ if (!pkg || !pkg.name || !pkg.ecosystem) continue;
110
+ let eco = String(pkg.ecosystem).toLowerCase();
111
+ if (eco === 'pip') eco = 'pypi'; // normalize GHSA's "pip" to our internal "pypi"
112
+ if (ecosystems && !ecosystems.includes(eco)) continue;
113
+ out.push({
114
+ ghsa_id: adv.ghsa_id,
115
+ ecosystem: eco,
116
+ name: pkg.name,
117
+ versionRange: v.vulnerable_version_range || '*',
118
+ published_at: adv.published_at || null,
119
+ updated_at: adv.updated_at || null,
120
+ withdrawn: !!adv.withdrawn_at
121
+ });
122
+ }
123
+ return out;
124
+ }
125
+
126
+ // ── cursor + denominator persistence (self-contained, atomic) ──
127
+
128
+ function loadGhsaCursor(file = GHSA_CURSOR_FILE) {
129
+ try {
130
+ const d = JSON.parse(fs.readFileSync(file, 'utf8'));
131
+ return (d && typeof d.cursor === 'string') ? d.cursor : null;
132
+ } catch { return null; }
133
+ }
134
+
135
+ function saveGhsaCursor(cursor, file = GHSA_CURSOR_FILE) {
136
+ try {
137
+ const dir = path.dirname(file);
138
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
139
+ const tmp = file + '.tmp';
140
+ fs.writeFileSync(tmp, JSON.stringify({ cursor, updatedAt: new Date().toISOString() }, null, 2));
141
+ fs.renameSync(tmp, file);
142
+ } catch (err) {
143
+ if (err && ['EROFS', 'EACCES', 'EPERM', 'ENOSPC'].includes(err.code)) return;
144
+ console.warn('[GHSA] Failed to persist cursor: ' + err.message);
145
+ }
146
+ }
147
+
148
+ /** Append denominator rows (best-effort) with a coarse size cap. */
149
+ function appendGhsaMalware(rows, file = GHSA_MALWARE_FILE) {
150
+ if (!rows || rows.length === 0) return;
151
+ try {
152
+ const dir = path.dirname(file);
153
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
154
+ const ts = new Date().toISOString();
155
+ const lines = rows.map(r => JSON.stringify({ ts, ...r })).join('\n') + '\n';
156
+ fs.appendFileSync(file, lines, 'utf8');
157
+ _maybeCompactMalware(file);
158
+ } catch (err) {
159
+ if (err && ['EROFS', 'EACCES', 'EPERM', 'ENOSPC'].includes(err.code)) return;
160
+ console.warn('[GHSA] Failed to persist denominator: ' + err.message);
161
+ }
162
+ }
163
+
164
+ function _maybeCompactMalware(file) {
165
+ try {
166
+ const txt = fs.readFileSync(file, 'utf8');
167
+ const lines = txt.split('\n').filter(Boolean);
168
+ if (lines.length <= GHSA_MALWARE_MAX) return;
169
+ const kept = lines.slice(lines.length - GHSA_MALWARE_MAX).join('\n') + '\n';
170
+ const tmp = file + '.tmp';
171
+ fs.writeFileSync(tmp, kept, 'utf8');
172
+ fs.renameSync(tmp, file);
173
+ } catch { /* best-effort */ }
174
+ }
175
+
176
+ // ── pre-alert dispatch ──
177
+
178
+ function buildGhsaPreAlertEmbed(row) {
179
+ const link = row.ecosystem === 'pypi'
180
+ ? `https://pypi.org/project/${encodeURIComponent(row.name)}/`
181
+ : `https://www.npmjs.com/package/${encodeURIComponent(row.name)}`;
182
+ return {
183
+ embeds: [{
184
+ title: '⚠️ GHSA PRE-ALERT — Fresh Malware Advisory',
185
+ color: 0xe74c3c,
186
+ fields: [
187
+ { name: 'Package', value: `[${row.ecosystem}/${row.name}](${link})`, inline: true },
188
+ { name: 'Range', value: String(row.versionRange || '*'), inline: true },
189
+ { name: 'Advisory', value: `[${row.ghsa_id}](https://github.com/advisories/${row.ghsa_id})`, inline: true },
190
+ { name: 'Source', value: 'GitHub Advisory DB (type=malware) — active poller', inline: false }
191
+ ],
192
+ footer: { text: `MUAD'DIB GHSA Pre-Alert | ${new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC')}` },
193
+ timestamp: new Date().toISOString()
194
+ }]
195
+ };
196
+ }
197
+
198
+ async function _defaultDispatch(payload) {
199
+ const url = process.env.MUADDIB_WEBHOOK_URL;
200
+ if (!url) return;
201
+ try {
202
+ const { sendWebhook } = require('../webhook.js');
203
+ await sendWebhook(url, payload, { rawPayload: true });
204
+ } catch (err) {
205
+ console.warn('[GHSA] pre-alert dispatch failed: ' + err.message);
206
+ }
207
+ }
208
+
209
+ function _defaultLedger(entry) {
210
+ try { require('../monitor/state.js').appendScanLedger(entry); } catch { /* best-effort */ }
211
+ }
212
+
213
+ // ── orchestration ──
214
+
215
+ /**
216
+ * One poll pass. Best-effort: never throws. Dep-injectable for tests via opts.
217
+ * @returns {Promise<{fresh:number, withdrawn:number, prealerted:number, seeded:boolean, errors:string[]}>}
218
+ */
219
+ async function pollGhsaOnce(opts = {}) {
220
+ const ecosystems = opts.ecosystems || GHSA_ECOSYSTEMS;
221
+ const fetchImpl = opts.fetchImpl || _defaultFetch;
222
+ const dispatch = opts.dispatch || _defaultDispatch;
223
+ const appendLedger = opts.appendLedger || _defaultLedger;
224
+ const cursorFile = opts.cursorFile || GHSA_CURSOR_FILE;
225
+ const malwareFile = opts.malwareFile || GHSA_MALWARE_FILE;
226
+ const prealertCap = opts.prealertCap != null ? opts.prealertCap : GHSA_PREALERT_CAP;
227
+
228
+ const summary = { fresh: 0, withdrawn: 0, prealerted: 0, seeded: false, errors: [] };
229
+ const prevCursor = loadGhsaCursor(cursorFile);
230
+ const seeding = !prevCursor; // first run: seed silently, no pre-alert blast
231
+ summary.seeded = seeding;
232
+
233
+ const healthCounts = {};
234
+ let maxUpdated = prevCursor || '';
235
+ const freshRows = [];
236
+ const seenRows = [];
237
+ const withdrawnRows = [];
238
+
239
+ for (const eco of ecosystems) {
240
+ let advisories;
241
+ try {
242
+ advisories = await fetchImpl(eco, opts);
243
+ } catch (err) {
244
+ // A transport/HTTP error is NOT a "feed returned 0" — skip this ecosystem entirely
245
+ // (no cursor advance, no feed-health entry → carry-forward).
246
+ summary.errors.push(`${eco}: ${err.message}`);
247
+ continue;
248
+ }
249
+ healthCounts[`GHSA-${eco}`] = advisories.length;
250
+
251
+ for (const adv of advisories) {
252
+ const rows = parseAdvisory(adv, ecosystems);
253
+ for (const row of rows) {
254
+ seenRows.push(row);
255
+ if (row.updated_at && row.updated_at > maxUpdated) maxUpdated = row.updated_at;
256
+ const isNew = !prevCursor || (row.updated_at && row.updated_at > prevCursor);
257
+ if (!isNew) continue;
258
+ if (row.withdrawn) withdrawnRows.push(row);
259
+ else freshRows.push(row);
260
+ }
261
+ }
262
+ }
263
+
264
+ // Persist the denominator: on first run, the whole recent page; afterwards, only the
265
+ // new/changed rows. (Phase 5 dedups by ghsa_id+ecosystem+name, latest-wins.)
266
+ appendGhsaMalware(seeding ? seenRows : freshRows.concat(withdrawnRows), malwareFile);
267
+
268
+ // Withdrawn advisories → ledger (a removed package keeps an identity for coverage-audit).
269
+ for (const w of withdrawnRows) {
270
+ appendLedger({ name: w.name, version: null, ecosystem: w.ecosystem, outcome: 'dropped', source: 'ghsa_gone' });
271
+ summary.withdrawn++;
272
+ }
273
+
274
+ // Pre-alert genuinely fresh names (not on the seeding run), capped.
275
+ summary.fresh = freshRows.length;
276
+ if (!seeding) {
277
+ let sent = 0;
278
+ for (const row of freshRows) {
279
+ if (sent >= prealertCap) {
280
+ console.warn(`[GHSA] pre-alert cap (${prealertCap}) reached — ${freshRows.length - sent} fresh advisory row(s) not pinged this cycle (still persisted to denominator)`);
281
+ break;
282
+ }
283
+ try { await dispatch(buildGhsaPreAlertEmbed(row)); sent++; } catch { /* dispatch is best-effort */ }
284
+ }
285
+ summary.prealerted = sent;
286
+ }
287
+
288
+ // Advance the cursor only if we successfully fetched at least one ecosystem.
289
+ if (Object.keys(healthCounts).length > 0 && maxUpdated && maxUpdated !== prevCursor) {
290
+ saveGhsaCursor(maxUpdated, cursorFile);
291
+ }
292
+
293
+ // Feed-health on the GHSA feed(s) (only ecosystems that actually fetched successfully).
294
+ // opts.feedHealthFile lets tests/smoke keep the shared data/feed-health.json untouched.
295
+ if (Object.keys(healthCounts).length > 0) {
296
+ try {
297
+ await require('./feed-health.js').checkFeedHealth(healthCounts,
298
+ opts.feedHealthFile ? { file: opts.feedHealthFile } : {});
299
+ } catch { /* best-effort */ }
300
+ }
301
+
302
+ if (seeding) {
303
+ console.log(`[GHSA] seeded denominator with ${seenRows.length} advisory row(s); cursor=${maxUpdated || '(none)'} (no pre-alerts on first run)`);
304
+ } else {
305
+ console.log(`[GHSA] poll: ${summary.fresh} fresh, ${summary.withdrawn} withdrawn, ${summary.prealerted} pre-alerted${summary.errors.length ? `, errors: ${summary.errors.join('; ')}` : ''}`);
306
+ }
307
+ return summary;
308
+ }
309
+
310
+ // ── daemon lifecycle ──
311
+
312
+ function startGhsaPoller(stats) {
313
+ if (_pollHandle) return _pollHandle;
314
+ console.log(`[GHSA] Active poller started (interval=${GHSA_POLL_INTERVAL_MS / 60000}min)`);
315
+ // Initial poll (best-effort, fire-and-forget — never blocks daemon startup).
316
+ pollGhsaOnce({ stats }).catch(err => console.warn('[GHSA] initial poll error: ' + err.message));
317
+ _pollHandle = setInterval(() => {
318
+ pollGhsaOnce({ stats }).catch(err => console.warn('[GHSA] poll error: ' + err.message));
319
+ }, GHSA_POLL_INTERVAL_MS);
320
+ if (_pollHandle.unref) _pollHandle.unref();
321
+ return _pollHandle;
322
+ }
323
+
324
+ function stopGhsaPoller() {
325
+ if (_pollHandle) { clearInterval(_pollHandle); _pollHandle = null; console.log('[GHSA] Poller stopped'); }
326
+ }
327
+
328
+ module.exports = {
329
+ parseAdvisory,
330
+ pollGhsaOnce,
331
+ loadGhsaCursor,
332
+ saveGhsaCursor,
333
+ appendGhsaMalware,
334
+ buildGhsaPreAlertEmbed,
335
+ startGhsaPoller,
336
+ stopGhsaPoller,
337
+ _httpGetJson,
338
+ _defaultFetch,
339
+ GHSA_CURSOR_FILE,
340
+ GHSA_MALWARE_FILE,
341
+ GHSA_POLL_INTERVAL_MS,
342
+ GHSA_PREALERT_CAP
343
+ };
@@ -13,6 +13,7 @@ const { ensureWorkers, drainWorkers, getTargetConcurrency, setTargetConcurrency,
13
13
  const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY } = require('./adaptive-concurrency.js');
14
14
  const { startHealthcheck } = require('./healthcheck.js');
15
15
  const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
16
+ const { startGhsaPoller, stopGhsaPoller } = require('../ioc/ghsa-poller.js');
16
17
  const { cleanupOldArchives, getRetentionDays, startPeriodicCleanup } = require('./tarball-archive.js');
17
18
  const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
18
19
  // Caches not previously cleared by handleMemoryPressure (OOM fix). These live
@@ -920,6 +921,7 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
920
921
  // Stop deferred sandbox worker and persist its queue
921
922
  stopDeferredWorker();
922
923
  persistDeferredQueue();
924
+ stopGhsaPoller();
923
925
  healthcheck.stop();
924
926
  // Flush all pending scope groups before exit
925
927
  for (const [scope, group] of pendingGrouped) {
@@ -945,6 +947,12 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
945
947
  console.log('[MONITOR] Deferred sandbox worker started (30s interval, dedicated slot)');
946
948
  }
947
949
 
950
+ // Phase 2c part 2: active GHSA malware-advisory poller (~15 min). Independent of the
951
+ // sandbox — it surfaces fresh advisories (pre-alert), records withdrawn ones to the
952
+ // ledger, and accumulates the denominator the Phase 5 coverage-audit joins against.
953
+ // Best-effort and fire-and-forget; never blocks the daemon.
954
+ startGhsaPoller(stats);
955
+
948
956
  // ─── Initial poll ───
949
957
  // Fills the queue with pending packages. Processing starts in the main loop
950
958
  // via ensureWorkers (non-blocking) — NOT await processQueue (blocking).