muaddib-scanner 2.11.111 → 2.11.113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.111",
3
+ "version": "2.11.113",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-13T09:27:21.416Z",
3
+ "timestamp": "2026-06-14T08:06:18.378Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -9,6 +9,7 @@
9
9
 
10
10
  const https = require('https');
11
11
  const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
12
+ const { registryAuthHeaders } = require('../shared/registry-auth.js');
12
13
  const { loadCachedIOCs } = require('../ioc/updater.js');
13
14
  const { enqueueScan } = require('./scan-queue.js');
14
15
  const {
@@ -99,7 +100,7 @@ function httpsGet(url, timeoutMs = 30_000, deadlineMs = Math.max(timeoutMs * 2,
99
100
  clearTimeout(deadline);
100
101
  if (err) reject(err); else resolve(value);
101
102
  };
102
- req = _deps.https.get(url, { timeout: timeoutMs }, (res) => {
103
+ req = _deps.https.get(url, { timeout: timeoutMs, headers: registryAuthHeaders(url) }, (res) => {
103
104
  if (res.statusCode === 301 || res.statusCode === 302) {
104
105
  res.resume();
105
106
  const location = res.headers.location;
@@ -1252,6 +1252,13 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1252
1252
  const pct = (ledger.distinctCoverage * 100).toFixed(0);
1253
1253
  const approx = ledger.exactVanished === false ? '~' : '';
1254
1254
  coverageText = `${ledger.distinctScanned}/${ledger.distinctPackages} pkgs (${approx}${pct}%)`;
1255
+ // Honest 24h coverage loss surfaced next to coverage: `vanished` = distinct names
1256
+ // dropped and never re-scanned in the window — the real miss count. The raw
1257
+ // `dropped` aggregate (which also folds in recoverable spill + retries, so it
1258
+ // OVERSTATES loss) is relegated to the Ops embed's Ledger field, not the headline.
1259
+ if (ledger.vanished > 0) {
1260
+ coverageText += ` · ${ledger.exactVanished ? '' : '≥'}${ledger.vanished} vanished`;
1261
+ }
1255
1262
  if (published > 0) coverageText += `\nRaw events: ${attempted}/${published}`;
1256
1263
  coverageText += opsSuffix;
1257
1264
  } else if (published > 0) {
@@ -1343,14 +1350,7 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1343
1350
  { name: 'vs Yesterday', value: trendsText, inline: false },
1344
1351
  { name: 'ML', value: mlText, inline: true },
1345
1352
  { name: 'LLM Detective', value: llmText, inline: true },
1346
- { name: 'Top Suspects', value: top3Text, inline: false },
1347
- ...((stats.sandboxDeferred || stats.deferredProcessed || stats.deferredExpired)
1348
- ? [{ name: 'Deferred Sandbox', value: `Enqueued: ${stats.sandboxDeferred || 0} | Processed: ${stats.deferredProcessed || 0} | Expired: ${stats.deferredExpired || 0}`, inline: false }]
1349
- : []),
1350
- { name: 'Stability', value: _stabilityFieldValue(stats), inline: false },
1351
- { name: 'Degradations', value: _degradationsFieldValue(), inline: false },
1352
- ...(ledgerField ? [ledgerField] : []),
1353
- { name: 'System', value: healthText, inline: false }
1353
+ { name: 'Top Suspects', value: top3Text, inline: false }
1354
1354
  ],
1355
1355
  footer: {
1356
1356
  // Headline-source annotation: 'ledger' = window-exact [last report → now]
@@ -1359,6 +1359,30 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1359
1359
  text: `MUAD'DIB - Daily summary | headline: ${headline ? 'ledger — completed/deduped, exact 24h window' : 'counters (in-memory fallback)'} | ${readableTime}`
1360
1360
  },
1361
1361
  timestamp: now.toISOString()
1362
+ }, {
1363
+ // --- Embed 2: Ops / system state (kept OUT of the daily headline) ---
1364
+ // Operator feedback: a daily that mixes 24h outcome with multi-day system state
1365
+ // reads as failure when it isn't. Each line here carries its own clock:
1366
+ // • Ledger → 24h window. Its `dropped` folds in recoverable spill + retries,
1367
+ // so it OVERSTATES loss — `vanished` (in the Coverage field) is the
1368
+ // honest miss count, which is why dropped sits here, not the headline.
1369
+ // • Stability → cumulative since the 08:00 reset (backlog = point-in-time depth
1370
+ // of the persistent spill file, the one snapshot in this field).
1371
+ // • Degradations / System → instantaneous snapshot (degradations have no TTL: if
1372
+ // shown, the condition is active right now, not earlier in the window).
1373
+ title: '⚙️ Ops / état système',
1374
+ color: 0x95a5a6,
1375
+ description: 'Ledger = fenêtre 24h (dropped inclut le spill récupérable — voir « vanished » pour la perte réelle) · Stability = cumulé depuis 08:00 (backlog = instantané) · Degradations/System = instantané',
1376
+ fields: [
1377
+ ...((stats.sandboxDeferred || stats.deferredProcessed || stats.deferredExpired)
1378
+ ? [{ name: 'Deferred Sandbox', value: `Enqueued: ${stats.sandboxDeferred || 0} | Processed: ${stats.deferredProcessed || 0} | Expired: ${stats.deferredExpired || 0}`, inline: false }]
1379
+ : []),
1380
+ { name: 'Stability (cumulé depuis 08:00)', value: _stabilityFieldValue(stats), inline: false },
1381
+ { name: 'Degradations (actif maintenant)', value: _degradationsFieldValue(), inline: false },
1382
+ ...(ledgerField ? [ledgerField] : []),
1383
+ { name: 'System', value: healthText, inline: false }
1384
+ ],
1385
+ timestamp: now.toISOString()
1362
1386
  }]
1363
1387
  };
1364
1388
  }
@@ -1,6 +1,7 @@
1
1
  const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
2
2
  const { debugLog } = require('../utils.js');
3
3
  const { acquireRegistrySlot, releaseRegistrySlot, awaitRateToken, signal429, hostForUrl } = require('../shared/http-limiter.js');
4
+ const { registryAuthHeaders } = require('../shared/registry-auth.js');
4
5
  const { computeAdvancedRegistrySignals } = require('../integrations/registry-signals.js');
5
6
 
6
7
  const REGISTRY_URL = 'https://registry.npmjs.org';
@@ -12,6 +13,16 @@ const SEARCH_URL = 'https://registry.npmjs.org/-/v1/search';
12
13
  const REQUEST_TIMEOUT = Math.max(1000, parseInt(process.env.MUADDIB_REGISTRY_TIMEOUT_MS, 10) || 10000); // 10s default
13
14
  const MAX_RETRIES = Math.max(1, parseInt(process.env.MUADDIB_REGISTRY_RETRIES, 10) || 5);
14
15
 
16
+ // Per-maintainer cache for the /-/v1/search author-count lookup — the only
17
+ // DYNAMIC (non-CDN), rate-limited registry endpoint we hit per scan. Without it,
18
+ // firing the search on every scan generated the bulk of the monitor's 429s and
19
+ // the shared brain then throttled the (healthy, CDN-served) packument reads too.
20
+ // See getPackageMetadata. Env-tunable; defaults preserve the signal.
21
+ const _authorCountCache = new Map(); // maintainer → { count, at }
22
+ const AUTHOR_CACHE_TTL_MS = Math.max(0, parseInt(process.env.MUADDIB_AUTHOR_CACHE_TTL_MS, 10) || 3_600_000); // 1h
23
+ const AUTHOR_CACHE_MAX = Math.max(100, parseInt(process.env.MUADDIB_AUTHOR_CACHE_MAX, 10) || 5000);
24
+ const AUTHOR_SEARCH_ENABLED = process.env.MUADDIB_NPM_AUTHOR_SEARCH !== '0'; // kill-switch
25
+
15
26
  /**
16
27
  * Create a timeout signal, with fallback for older Node versions.
17
28
  * Returns { signal, cleanup } — call cleanup() after fetch to prevent timer leaks.
@@ -25,7 +36,7 @@ function createTimeoutSignal(ms) {
25
36
  return { signal: controller.signal, cleanup: () => clearTimeout(timer) };
26
37
  }
27
38
 
28
- async function fetchWithRetry(url) {
39
+ async function fetchWithRetry(url, opts = {}) {
29
40
  for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
30
41
  // The caller's acquireRegistrySlot paid the rate token for the FIRST
31
42
  // attempt only. Every retry is a new network request and must pay its own
@@ -43,7 +54,7 @@ async function fetchWithRetry(url) {
43
54
  let response;
44
55
  const { signal, cleanup } = createTimeoutSignal(REQUEST_TIMEOUT);
45
56
  try {
46
- response = await fetch(url, { signal });
57
+ response = await fetch(url, { signal, headers: registryAuthHeaders(url) });
47
58
  } catch {
48
59
  cleanup();
49
60
  // REG-001: Retry on timeout/abort instead of returning null immediately.
@@ -71,7 +82,16 @@ async function fetchWithRetry(url) {
71
82
  // Retry-After (capped at 30s) with jitter so retries don't re-synchronize.
72
83
  if (response.status === 429) {
73
84
  try { await response.text(); } catch (e) { debugLog('response drain failed:', e.message); }
74
- try { signal429(); } catch { /* limiter is best-effort */ }
85
+ // Back off the CORRECT host's bucket. This previously defaulted to
86
+ // registry.npmjs.org, so a 429 from api.npmjs.org/downloads (a SEPARATE,
87
+ // aggressively rate-limited host that 429s ~every request) poisoned the
88
+ // registry brain and stalled the tarball/packument fetches that were
89
+ // themselves healthy — the measured ~20s/scan throughput wall.
90
+ try { signal429(hostForUrl(url)); } catch { /* limiter is best-effort */ }
91
+ // Best-effort callers (the weekly-downloads reputation signal, whose
92
+ // endpoint 429s on essentially every request) opt out of the retry storm:
93
+ // retrying 5× with ~2s sleeps just burns ~10s/scan to still return null.
94
+ if (opts.noRetryOn429) return null;
75
95
  const retryAfter = parseInt(response.headers.get('retry-after'), 10);
76
96
  const base = Math.min(retryAfter && retryAfter > 0 ? retryAfter * 1000 : 2000, 30000);
77
97
  await new Promise(r => setTimeout(r, Math.round(base * (0.5 + Math.random() * 0.5))));
@@ -170,26 +190,45 @@ async function getPackageMetadata(packageName) {
170
190
  }
171
191
  const provenanceRegressed = !latestHasProvenance && anyPriorHadProvenance;
172
192
 
173
- // 2. Weekly downloads + author search (parallel)
193
+ // 2. Weekly downloads + author package count (parallel).
194
+ // The author count comes from /-/v1/search?text=maintainer: which — unlike the
195
+ // CDN-served packument — is DYNAMIC, slow (~300-950ms) and the one per-scan call
196
+ // npm aggressively rate-limits. A TTL cache keyed on the maintainer collapses
197
+ // the search volume (maintainers repeat heavily: scopes / bots / monorepos)
198
+ // while keeping author_package_count byte-identical. MUADDIB_NPM_AUTHOR_SEARCH=0
199
+ // drops the call entirely — the count then stays absent, exactly as the
200
+ // pre-resolve fast path already leaves it.
174
201
  const downloadsUrl = DOWNLOADS_URL + '/' + encodeURIComponent(packageName);
175
- const authorUrl = maintainer
202
+ const authorUrl = (AUTHOR_SEARCH_ENABLED && maintainer)
176
203
  ? SEARCH_URL + '?text=maintainer:' + encodeURIComponent(maintainer) + '&size=1'
177
204
  : null;
178
205
 
179
- async function fetchAuthorWithSlot() {
180
- if (!authorUrl) return null;
206
+ async function getAuthorPackageCount() {
207
+ if (!authorUrl) return 0;
208
+ const hit = _authorCountCache.get(maintainer);
209
+ if (hit && (Date.now() - hit.at) < AUTHOR_CACHE_TTL_MS) return hit.count;
181
210
  await acquireRegistrySlot();
182
- try { return await fetchWithRetry(authorUrl); }
211
+ let data;
212
+ try { data = await fetchWithRetry(authorUrl); }
183
213
  finally { releaseRegistrySlot(); }
214
+ // 429-exhausted / error → fetchWithRetry returns null: reuse a stale entry if
215
+ // present and do NOT cache the miss (a transient 0 would poison the typosquat
216
+ // "author has ≤1 package" signal).
217
+ if (!data) return hit ? hit.count : 0;
218
+ const count = data.total ?? 0;
219
+ if (_authorCountCache.size >= AUTHOR_CACHE_MAX) {
220
+ _authorCountCache.delete(_authorCountCache.keys().next().value); // FIFO evict (bounded)
221
+ }
222
+ _authorCountCache.set(maintainer, { count, at: Date.now() });
223
+ return count;
184
224
  }
185
225
 
186
- const [downloadsData, authorData] = await Promise.all([
187
- fetchWithRetry(downloadsUrl), // api.npmjs.org — no semaphore needed
188
- fetchAuthorWithSlot() // registry.npmjs.org — semaphore protected
226
+ const [downloadsData, authorPackageCount] = await Promise.all([
227
+ fetchWithRetry(downloadsUrl, { noRetryOn429: true }), // api.npmjs.org — rate-limited; best-effort single shot (no retry storm, correct-host backoff)
228
+ getAuthorPackageCount() // registry.npmjs.org search cached + kill-switchable
189
229
  ]);
190
230
 
191
231
  const weeklyDownloads = downloadsData?.downloads ?? 0;
192
- const authorPackageCount = authorData?.total ?? 0;
193
232
  const versionCount = meta.versions ? Object.keys(meta.versions).length : 0;
194
233
  const description = (typeof latestMeta?.description === 'string' ? latestMeta.description
195
234
  : (typeof meta.description === 'string' ? meta.description : ''));
@@ -4,6 +4,7 @@ const path = require('path');
4
4
  const { execFileSync } = require('child_process');
5
5
  const AdmZip = require('adm-zip');
6
6
  const { MAX_TARBALL_SIZE, DOWNLOAD_TIMEOUT } = require('./constants.js');
7
+ const { registryAuthHeaders } = require('./registry-auth.js');
7
8
 
8
9
  // Allowed redirect domains for tarball downloads (SSRF protection)
9
10
  const ALLOWED_DOWNLOAD_DOMAINS = [
@@ -159,7 +160,7 @@ function downloadToFile(url, destPath, timeoutMs = DOWNLOAD_TIMEOUT) {
159
160
  if (redirectCount >= MAX_REDIRECTS) {
160
161
  return reject(new Error(`Too many redirects (${MAX_REDIRECTS}) for ${url}`));
161
162
  }
162
- const req = https.get(requestUrl, { timeout: timeoutMs }, (res) => {
163
+ const req = https.get(requestUrl, { timeout: timeoutMs, headers: registryAuthHeaders(requestUrl) }, (res) => {
163
164
  if (res.statusCode === 301 || res.statusCode === 302) {
164
165
  res.resume();
165
166
  const location = res.headers.location;
@@ -0,0 +1,98 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * npm registry authentication (2026-06-13).
5
+ *
6
+ * A supply-chain scanner fetches thousands of brand-new, never-CDN-cached
7
+ * packages; anonymous registry.npmjs.org traffic gets aggressively 429-throttled
8
+ * per-IP (observed: ~500/h of 429s at <1 req/s, scans stalling 20-46s waiting on
9
+ * metadata tokens). An authenticated token raises the per-account limit and
10
+ * de-anonymizes us.
11
+ *
12
+ * Token resolution (first hit wins), memoized for the process lifetime:
13
+ * 1. env MUADDIB_NPM_TOKEN (canonical — set via systemd EnvironmentFile / drop-in)
14
+ * 2. env NPM_TOKEN (common fallback)
15
+ * 3. .npmrc //registry.npmjs.org/:_authToken=... (npm-standard; cwd, $HOME, /home/muaddib)
16
+ *
17
+ * Auth is applied ONLY to registry.npmjs.org requests — other hosts (pypi.org,
18
+ * api.npmjs.org, replicate.npmjs.com) get NO header, so the token can never leak
19
+ * to a third-party host. With no token configured the header set is empty and
20
+ * behaviour is identical to the previous anonymous path.
21
+ */
22
+
23
+ const fs = require('fs');
24
+ const path = require('path');
25
+
26
+ const AUTH_HOSTS = new Set(['registry.npmjs.org']);
27
+
28
+ let _resolved = false;
29
+ let _token = null;
30
+ let _source = null;
31
+
32
+ function _fromNpmrc() {
33
+ const files = [
34
+ process.env.MUADDIB_NPMRC,
35
+ path.join(process.cwd(), '.npmrc'),
36
+ process.env.HOME ? path.join(process.env.HOME, '.npmrc') : null,
37
+ '/home/muaddib/.npmrc',
38
+ ].filter(Boolean);
39
+ for (const f of files) {
40
+ let txt;
41
+ try { txt = fs.readFileSync(f, 'utf8'); } catch { continue; }
42
+ // npm-standard line: //registry.npmjs.org/:_authToken=<token>
43
+ const m = txt.match(/^\s*\/\/registry\.npmjs\.org\/:_authToken\s*=\s*(.+?)\s*$/m);
44
+ if (m) return { token: m[1].replace(/^["']|["']$/g, ''), source: `npmrc:${f}` };
45
+ }
46
+ return null;
47
+ }
48
+
49
+ function getNpmToken() {
50
+ if (_resolved) return _token;
51
+ _resolved = true;
52
+ const env = (process.env.MUADDIB_NPM_TOKEN || process.env.NPM_TOKEN || '').trim();
53
+ if (env) {
54
+ _token = env;
55
+ _source = process.env.MUADDIB_NPM_TOKEN ? 'env:MUADDIB_NPM_TOKEN' : 'env:NPM_TOKEN';
56
+ return _token;
57
+ }
58
+ const rc = _fromNpmrc();
59
+ if (rc) { _token = rc.token; _source = rc.source; }
60
+ return _token;
61
+ }
62
+
63
+ /** {enabled, source, last4} — for the one-time boot log. NEVER returns the token. */
64
+ function npmAuthStatus() {
65
+ const t = getNpmToken();
66
+ return { enabled: !!t, source: t ? _source : null, last4: t ? String(t).slice(-4) : null };
67
+ }
68
+
69
+ let _logged = false;
70
+ function logAuthStatusOnce(logger = console) {
71
+ if (_logged) return;
72
+ _logged = true;
73
+ const s = npmAuthStatus();
74
+ if (s.enabled) {
75
+ logger.log(`[REGISTRY-AUTH] npm registry auth ENABLED (source=${s.source}, token …${s.last4})`);
76
+ } else {
77
+ logger.warn('[REGISTRY-AUTH] npm registry auth DISABLED — anonymous registry.npmjs.org (set MUADDIB_NPM_TOKEN); expect heavier 429 throttling.');
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Headers to merge into a registry request. Empty object for non-npm hosts or
83
+ * when no token is configured (→ anonymous, unchanged behaviour).
84
+ */
85
+ function registryAuthHeaders(url) {
86
+ // First call doubles as the boot confirmation in the journal.
87
+ logAuthStatusOnce();
88
+ let host;
89
+ try { host = new URL(url).hostname; } catch { return {}; }
90
+ if (!AUTH_HOSTS.has(host)) return {};
91
+ const t = getNpmToken();
92
+ return t ? { Authorization: `Bearer ${t}` } : {};
93
+ }
94
+
95
+ // Test seam: reset memoized resolution (so a test can flip MUADDIB_NPM_TOKEN).
96
+ function _resetForTests() { _resolved = false; _token = null; _source = null; _logged = false; }
97
+
98
+ module.exports = { registryAuthHeaders, getNpmToken, npmAuthStatus, logAuthStatusOnce, _resetForTests };