muaddib-scanner 2.11.111 → 2.11.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/monitor/ingestion.js
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
const https = require('https');
|
|
11
11
|
const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
|
|
12
|
+
const { registryAuthHeaders } = require('../shared/registry-auth.js');
|
|
12
13
|
const { loadCachedIOCs } = require('../ioc/updater.js');
|
|
13
14
|
const { enqueueScan } = require('./scan-queue.js');
|
|
14
15
|
const {
|
|
@@ -99,7 +100,7 @@ function httpsGet(url, timeoutMs = 30_000, deadlineMs = Math.max(timeoutMs * 2,
|
|
|
99
100
|
clearTimeout(deadline);
|
|
100
101
|
if (err) reject(err); else resolve(value);
|
|
101
102
|
};
|
|
102
|
-
req = _deps.https.get(url, { timeout: timeoutMs }, (res) => {
|
|
103
|
+
req = _deps.https.get(url, { timeout: timeoutMs, headers: registryAuthHeaders(url) }, (res) => {
|
|
103
104
|
if (res.statusCode === 301 || res.statusCode === 302) {
|
|
104
105
|
res.resume();
|
|
105
106
|
const location = res.headers.location;
|
package/src/monitor/webhook.js
CHANGED
|
@@ -1252,6 +1252,13 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1252
1252
|
const pct = (ledger.distinctCoverage * 100).toFixed(0);
|
|
1253
1253
|
const approx = ledger.exactVanished === false ? '~' : '';
|
|
1254
1254
|
coverageText = `${ledger.distinctScanned}/${ledger.distinctPackages} pkgs (${approx}${pct}%)`;
|
|
1255
|
+
// Honest 24h coverage loss surfaced next to coverage: `vanished` = distinct names
|
|
1256
|
+
// dropped and never re-scanned in the window — the real miss count. The raw
|
|
1257
|
+
// `dropped` aggregate (which also folds in recoverable spill + retries, so it
|
|
1258
|
+
// OVERSTATES loss) is relegated to the Ops embed's Ledger field, not the headline.
|
|
1259
|
+
if (ledger.vanished > 0) {
|
|
1260
|
+
coverageText += ` · ${ledger.exactVanished ? '' : '≥'}${ledger.vanished} vanished`;
|
|
1261
|
+
}
|
|
1255
1262
|
if (published > 0) coverageText += `\nRaw events: ${attempted}/${published}`;
|
|
1256
1263
|
coverageText += opsSuffix;
|
|
1257
1264
|
} else if (published > 0) {
|
|
@@ -1343,14 +1350,7 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1343
1350
|
{ name: 'vs Yesterday', value: trendsText, inline: false },
|
|
1344
1351
|
{ name: 'ML', value: mlText, inline: true },
|
|
1345
1352
|
{ name: 'LLM Detective', value: llmText, inline: true },
|
|
1346
|
-
{ name: 'Top Suspects', value: top3Text, inline: false }
|
|
1347
|
-
...((stats.sandboxDeferred || stats.deferredProcessed || stats.deferredExpired)
|
|
1348
|
-
? [{ name: 'Deferred Sandbox', value: `Enqueued: ${stats.sandboxDeferred || 0} | Processed: ${stats.deferredProcessed || 0} | Expired: ${stats.deferredExpired || 0}`, inline: false }]
|
|
1349
|
-
: []),
|
|
1350
|
-
{ name: 'Stability', value: _stabilityFieldValue(stats), inline: false },
|
|
1351
|
-
{ name: 'Degradations', value: _degradationsFieldValue(), inline: false },
|
|
1352
|
-
...(ledgerField ? [ledgerField] : []),
|
|
1353
|
-
{ name: 'System', value: healthText, inline: false }
|
|
1353
|
+
{ name: 'Top Suspects', value: top3Text, inline: false }
|
|
1354
1354
|
],
|
|
1355
1355
|
footer: {
|
|
1356
1356
|
// Headline-source annotation: 'ledger' = window-exact [last report → now]
|
|
@@ -1359,6 +1359,30 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1359
1359
|
text: `MUAD'DIB - Daily summary | headline: ${headline ? 'ledger — completed/deduped, exact 24h window' : 'counters (in-memory fallback)'} | ${readableTime}`
|
|
1360
1360
|
},
|
|
1361
1361
|
timestamp: now.toISOString()
|
|
1362
|
+
}, {
|
|
1363
|
+
// --- Embed 2: Ops / system state (kept OUT of the daily headline) ---
|
|
1364
|
+
// Operator feedback: a daily that mixes 24h outcome with multi-day system state
|
|
1365
|
+
// reads as failure when it isn't. Each line here carries its own clock:
|
|
1366
|
+
// • Ledger → 24h window. Its `dropped` folds in recoverable spill + retries,
|
|
1367
|
+
// so it OVERSTATES loss — `vanished` (in the Coverage field) is the
|
|
1368
|
+
// honest miss count, which is why dropped sits here, not the headline.
|
|
1369
|
+
// • Stability → cumulative since the 08:00 reset (backlog = point-in-time depth
|
|
1370
|
+
// of the persistent spill file, the one snapshot in this field).
|
|
1371
|
+
// • Degradations / System → instantaneous snapshot (degradations have no TTL: if
|
|
1372
|
+
// shown, the condition is active right now, not earlier in the window).
|
|
1373
|
+
title: '⚙️ Ops / état système',
|
|
1374
|
+
color: 0x95a5a6,
|
|
1375
|
+
description: 'Ledger = fenêtre 24h (dropped inclut le spill récupérable — voir « vanished » pour la perte réelle) · Stability = cumulé depuis 08:00 (backlog = instantané) · Degradations/System = instantané',
|
|
1376
|
+
fields: [
|
|
1377
|
+
...((stats.sandboxDeferred || stats.deferredProcessed || stats.deferredExpired)
|
|
1378
|
+
? [{ name: 'Deferred Sandbox', value: `Enqueued: ${stats.sandboxDeferred || 0} | Processed: ${stats.deferredProcessed || 0} | Expired: ${stats.deferredExpired || 0}`, inline: false }]
|
|
1379
|
+
: []),
|
|
1380
|
+
{ name: 'Stability (cumulé depuis 08:00)', value: _stabilityFieldValue(stats), inline: false },
|
|
1381
|
+
{ name: 'Degradations (actif maintenant)', value: _degradationsFieldValue(), inline: false },
|
|
1382
|
+
...(ledgerField ? [ledgerField] : []),
|
|
1383
|
+
{ name: 'System', value: healthText, inline: false }
|
|
1384
|
+
],
|
|
1385
|
+
timestamp: now.toISOString()
|
|
1362
1386
|
}]
|
|
1363
1387
|
};
|
|
1364
1388
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
|
|
2
2
|
const { debugLog } = require('../utils.js');
|
|
3
3
|
const { acquireRegistrySlot, releaseRegistrySlot, awaitRateToken, signal429, hostForUrl } = require('../shared/http-limiter.js');
|
|
4
|
+
const { registryAuthHeaders } = require('../shared/registry-auth.js');
|
|
4
5
|
const { computeAdvancedRegistrySignals } = require('../integrations/registry-signals.js');
|
|
5
6
|
|
|
6
7
|
const REGISTRY_URL = 'https://registry.npmjs.org';
|
|
@@ -12,6 +13,16 @@ const SEARCH_URL = 'https://registry.npmjs.org/-/v1/search';
|
|
|
12
13
|
const REQUEST_TIMEOUT = Math.max(1000, parseInt(process.env.MUADDIB_REGISTRY_TIMEOUT_MS, 10) || 10000); // 10s default
|
|
13
14
|
const MAX_RETRIES = Math.max(1, parseInt(process.env.MUADDIB_REGISTRY_RETRIES, 10) || 5);
|
|
14
15
|
|
|
16
|
+
// Per-maintainer cache for the /-/v1/search author-count lookup — the only
|
|
17
|
+
// DYNAMIC (non-CDN), rate-limited registry endpoint we hit per scan. Without it,
|
|
18
|
+
// firing the search on every scan generated the bulk of the monitor's 429s and
|
|
19
|
+
// the shared brain then throttled the (healthy, CDN-served) packument reads too.
|
|
20
|
+
// See getPackageMetadata. Env-tunable; defaults preserve the signal.
|
|
21
|
+
const _authorCountCache = new Map(); // maintainer → { count, at }
|
|
22
|
+
const AUTHOR_CACHE_TTL_MS = Math.max(0, parseInt(process.env.MUADDIB_AUTHOR_CACHE_TTL_MS, 10) || 3_600_000); // 1h
|
|
23
|
+
const AUTHOR_CACHE_MAX = Math.max(100, parseInt(process.env.MUADDIB_AUTHOR_CACHE_MAX, 10) || 5000);
|
|
24
|
+
const AUTHOR_SEARCH_ENABLED = process.env.MUADDIB_NPM_AUTHOR_SEARCH !== '0'; // kill-switch
|
|
25
|
+
|
|
15
26
|
/**
|
|
16
27
|
* Create a timeout signal, with fallback for older Node versions.
|
|
17
28
|
* Returns { signal, cleanup } — call cleanup() after fetch to prevent timer leaks.
|
|
@@ -25,7 +36,7 @@ function createTimeoutSignal(ms) {
|
|
|
25
36
|
return { signal: controller.signal, cleanup: () => clearTimeout(timer) };
|
|
26
37
|
}
|
|
27
38
|
|
|
28
|
-
async function fetchWithRetry(url) {
|
|
39
|
+
async function fetchWithRetry(url, opts = {}) {
|
|
29
40
|
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
|
|
30
41
|
// The caller's acquireRegistrySlot paid the rate token for the FIRST
|
|
31
42
|
// attempt only. Every retry is a new network request and must pay its own
|
|
@@ -43,7 +54,7 @@ async function fetchWithRetry(url) {
|
|
|
43
54
|
let response;
|
|
44
55
|
const { signal, cleanup } = createTimeoutSignal(REQUEST_TIMEOUT);
|
|
45
56
|
try {
|
|
46
|
-
response = await fetch(url, { signal });
|
|
57
|
+
response = await fetch(url, { signal, headers: registryAuthHeaders(url) });
|
|
47
58
|
} catch {
|
|
48
59
|
cleanup();
|
|
49
60
|
// REG-001: Retry on timeout/abort instead of returning null immediately.
|
|
@@ -71,7 +82,16 @@ async function fetchWithRetry(url) {
|
|
|
71
82
|
// Retry-After (capped at 30s) with jitter so retries don't re-synchronize.
|
|
72
83
|
if (response.status === 429) {
|
|
73
84
|
try { await response.text(); } catch (e) { debugLog('response drain failed:', e.message); }
|
|
74
|
-
|
|
85
|
+
// Back off the CORRECT host's bucket. This previously defaulted to
|
|
86
|
+
// registry.npmjs.org, so a 429 from api.npmjs.org/downloads (a SEPARATE,
|
|
87
|
+
// aggressively rate-limited host that 429s ~every request) poisoned the
|
|
88
|
+
// registry brain and stalled the tarball/packument fetches that were
|
|
89
|
+
// themselves healthy — the measured ~20s/scan throughput wall.
|
|
90
|
+
try { signal429(hostForUrl(url)); } catch { /* limiter is best-effort */ }
|
|
91
|
+
// Best-effort callers (the weekly-downloads reputation signal, whose
|
|
92
|
+
// endpoint 429s on essentially every request) opt out of the retry storm:
|
|
93
|
+
// retrying 5× with ~2s sleeps just burns ~10s/scan to still return null.
|
|
94
|
+
if (opts.noRetryOn429) return null;
|
|
75
95
|
const retryAfter = parseInt(response.headers.get('retry-after'), 10);
|
|
76
96
|
const base = Math.min(retryAfter && retryAfter > 0 ? retryAfter * 1000 : 2000, 30000);
|
|
77
97
|
await new Promise(r => setTimeout(r, Math.round(base * (0.5 + Math.random() * 0.5))));
|
|
@@ -170,26 +190,45 @@ async function getPackageMetadata(packageName) {
|
|
|
170
190
|
}
|
|
171
191
|
const provenanceRegressed = !latestHasProvenance && anyPriorHadProvenance;
|
|
172
192
|
|
|
173
|
-
// 2. Weekly downloads + author
|
|
193
|
+
// 2. Weekly downloads + author package count (parallel).
|
|
194
|
+
// The author count comes from /-/v1/search?text=maintainer: which — unlike the
|
|
195
|
+
// CDN-served packument — is DYNAMIC, slow (~300-950ms) and the one per-scan call
|
|
196
|
+
// npm aggressively rate-limits. A TTL cache keyed on the maintainer collapses
|
|
197
|
+
// the search volume (maintainers repeat heavily: scopes / bots / monorepos)
|
|
198
|
+
// while keeping author_package_count byte-identical. MUADDIB_NPM_AUTHOR_SEARCH=0
|
|
199
|
+
// drops the call entirely — the count then stays absent, exactly as the
|
|
200
|
+
// pre-resolve fast path already leaves it.
|
|
174
201
|
const downloadsUrl = DOWNLOADS_URL + '/' + encodeURIComponent(packageName);
|
|
175
|
-
const authorUrl = maintainer
|
|
202
|
+
const authorUrl = (AUTHOR_SEARCH_ENABLED && maintainer)
|
|
176
203
|
? SEARCH_URL + '?text=maintainer:' + encodeURIComponent(maintainer) + '&size=1'
|
|
177
204
|
: null;
|
|
178
205
|
|
|
179
|
-
async function
|
|
180
|
-
if (!authorUrl) return
|
|
206
|
+
async function getAuthorPackageCount() {
|
|
207
|
+
if (!authorUrl) return 0;
|
|
208
|
+
const hit = _authorCountCache.get(maintainer);
|
|
209
|
+
if (hit && (Date.now() - hit.at) < AUTHOR_CACHE_TTL_MS) return hit.count;
|
|
181
210
|
await acquireRegistrySlot();
|
|
182
|
-
|
|
211
|
+
let data;
|
|
212
|
+
try { data = await fetchWithRetry(authorUrl); }
|
|
183
213
|
finally { releaseRegistrySlot(); }
|
|
214
|
+
// 429-exhausted / error → fetchWithRetry returns null: reuse a stale entry if
|
|
215
|
+
// present and do NOT cache the miss (a transient 0 would poison the typosquat
|
|
216
|
+
// "author has ≤1 package" signal).
|
|
217
|
+
if (!data) return hit ? hit.count : 0;
|
|
218
|
+
const count = data.total ?? 0;
|
|
219
|
+
if (_authorCountCache.size >= AUTHOR_CACHE_MAX) {
|
|
220
|
+
_authorCountCache.delete(_authorCountCache.keys().next().value); // FIFO evict (bounded)
|
|
221
|
+
}
|
|
222
|
+
_authorCountCache.set(maintainer, { count, at: Date.now() });
|
|
223
|
+
return count;
|
|
184
224
|
}
|
|
185
225
|
|
|
186
|
-
const [downloadsData,
|
|
187
|
-
fetchWithRetry(downloadsUrl), // api.npmjs.org — no
|
|
188
|
-
|
|
226
|
+
const [downloadsData, authorPackageCount] = await Promise.all([
|
|
227
|
+
fetchWithRetry(downloadsUrl, { noRetryOn429: true }), // api.npmjs.org — rate-limited; best-effort single shot (no retry storm, correct-host backoff)
|
|
228
|
+
getAuthorPackageCount() // registry.npmjs.org search — cached + kill-switchable
|
|
189
229
|
]);
|
|
190
230
|
|
|
191
231
|
const weeklyDownloads = downloadsData?.downloads ?? 0;
|
|
192
|
-
const authorPackageCount = authorData?.total ?? 0;
|
|
193
232
|
const versionCount = meta.versions ? Object.keys(meta.versions).length : 0;
|
|
194
233
|
const description = (typeof latestMeta?.description === 'string' ? latestMeta.description
|
|
195
234
|
: (typeof meta.description === 'string' ? meta.description : ''));
|
package/src/shared/download.js
CHANGED
|
@@ -4,6 +4,7 @@ const path = require('path');
|
|
|
4
4
|
const { execFileSync } = require('child_process');
|
|
5
5
|
const AdmZip = require('adm-zip');
|
|
6
6
|
const { MAX_TARBALL_SIZE, DOWNLOAD_TIMEOUT } = require('./constants.js');
|
|
7
|
+
const { registryAuthHeaders } = require('./registry-auth.js');
|
|
7
8
|
|
|
8
9
|
// Allowed redirect domains for tarball downloads (SSRF protection)
|
|
9
10
|
const ALLOWED_DOWNLOAD_DOMAINS = [
|
|
@@ -159,7 +160,7 @@ function downloadToFile(url, destPath, timeoutMs = DOWNLOAD_TIMEOUT) {
|
|
|
159
160
|
if (redirectCount >= MAX_REDIRECTS) {
|
|
160
161
|
return reject(new Error(`Too many redirects (${MAX_REDIRECTS}) for ${url}`));
|
|
161
162
|
}
|
|
162
|
-
const req = https.get(requestUrl, { timeout: timeoutMs }, (res) => {
|
|
163
|
+
const req = https.get(requestUrl, { timeout: timeoutMs, headers: registryAuthHeaders(requestUrl) }, (res) => {
|
|
163
164
|
if (res.statusCode === 301 || res.statusCode === 302) {
|
|
164
165
|
res.resume();
|
|
165
166
|
const location = res.headers.location;
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* npm registry authentication (2026-06-13).
|
|
5
|
+
*
|
|
6
|
+
* A supply-chain scanner fetches thousands of brand-new, never-CDN-cached
|
|
7
|
+
* packages; anonymous registry.npmjs.org traffic gets aggressively 429-throttled
|
|
8
|
+
* per-IP (observed: ~500/h of 429s at <1 req/s, scans stalling 20-46s waiting on
|
|
9
|
+
* metadata tokens). An authenticated token raises the per-account limit and
|
|
10
|
+
* de-anonymizes us.
|
|
11
|
+
*
|
|
12
|
+
* Token resolution (first hit wins), memoized for the process lifetime:
|
|
13
|
+
* 1. env MUADDIB_NPM_TOKEN (canonical — set via systemd EnvironmentFile / drop-in)
|
|
14
|
+
* 2. env NPM_TOKEN (common fallback)
|
|
15
|
+
* 3. .npmrc //registry.npmjs.org/:_authToken=... (npm-standard; cwd, $HOME, /home/muaddib)
|
|
16
|
+
*
|
|
17
|
+
* Auth is applied ONLY to registry.npmjs.org requests — other hosts (pypi.org,
|
|
18
|
+
* api.npmjs.org, replicate.npmjs.com) get NO header, so the token can never leak
|
|
19
|
+
* to a third-party host. With no token configured the header set is empty and
|
|
20
|
+
* behaviour is identical to the previous anonymous path.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const fs = require('fs');
|
|
24
|
+
const path = require('path');
|
|
25
|
+
|
|
26
|
+
const AUTH_HOSTS = new Set(['registry.npmjs.org']);
|
|
27
|
+
|
|
28
|
+
let _resolved = false;
|
|
29
|
+
let _token = null;
|
|
30
|
+
let _source = null;
|
|
31
|
+
|
|
32
|
+
function _fromNpmrc() {
|
|
33
|
+
const files = [
|
|
34
|
+
process.env.MUADDIB_NPMRC,
|
|
35
|
+
path.join(process.cwd(), '.npmrc'),
|
|
36
|
+
process.env.HOME ? path.join(process.env.HOME, '.npmrc') : null,
|
|
37
|
+
'/home/muaddib/.npmrc',
|
|
38
|
+
].filter(Boolean);
|
|
39
|
+
for (const f of files) {
|
|
40
|
+
let txt;
|
|
41
|
+
try { txt = fs.readFileSync(f, 'utf8'); } catch { continue; }
|
|
42
|
+
// npm-standard line: //registry.npmjs.org/:_authToken=<token>
|
|
43
|
+
const m = txt.match(/^\s*\/\/registry\.npmjs\.org\/:_authToken\s*=\s*(.+?)\s*$/m);
|
|
44
|
+
if (m) return { token: m[1].replace(/^["']|["']$/g, ''), source: `npmrc:${f}` };
|
|
45
|
+
}
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function getNpmToken() {
|
|
50
|
+
if (_resolved) return _token;
|
|
51
|
+
_resolved = true;
|
|
52
|
+
const env = (process.env.MUADDIB_NPM_TOKEN || process.env.NPM_TOKEN || '').trim();
|
|
53
|
+
if (env) {
|
|
54
|
+
_token = env;
|
|
55
|
+
_source = process.env.MUADDIB_NPM_TOKEN ? 'env:MUADDIB_NPM_TOKEN' : 'env:NPM_TOKEN';
|
|
56
|
+
return _token;
|
|
57
|
+
}
|
|
58
|
+
const rc = _fromNpmrc();
|
|
59
|
+
if (rc) { _token = rc.token; _source = rc.source; }
|
|
60
|
+
return _token;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** {enabled, source, last4} — for the one-time boot log. NEVER returns the token. */
|
|
64
|
+
function npmAuthStatus() {
|
|
65
|
+
const t = getNpmToken();
|
|
66
|
+
return { enabled: !!t, source: t ? _source : null, last4: t ? String(t).slice(-4) : null };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
let _logged = false;
|
|
70
|
+
function logAuthStatusOnce(logger = console) {
|
|
71
|
+
if (_logged) return;
|
|
72
|
+
_logged = true;
|
|
73
|
+
const s = npmAuthStatus();
|
|
74
|
+
if (s.enabled) {
|
|
75
|
+
logger.log(`[REGISTRY-AUTH] npm registry auth ENABLED (source=${s.source}, token …${s.last4})`);
|
|
76
|
+
} else {
|
|
77
|
+
logger.warn('[REGISTRY-AUTH] npm registry auth DISABLED — anonymous registry.npmjs.org (set MUADDIB_NPM_TOKEN); expect heavier 429 throttling.');
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Headers to merge into a registry request. Empty object for non-npm hosts or
|
|
83
|
+
* when no token is configured (→ anonymous, unchanged behaviour).
|
|
84
|
+
*/
|
|
85
|
+
function registryAuthHeaders(url) {
|
|
86
|
+
// First call doubles as the boot confirmation in the journal.
|
|
87
|
+
logAuthStatusOnce();
|
|
88
|
+
let host;
|
|
89
|
+
try { host = new URL(url).hostname; } catch { return {}; }
|
|
90
|
+
if (!AUTH_HOSTS.has(host)) return {};
|
|
91
|
+
const t = getNpmToken();
|
|
92
|
+
return t ? { Authorization: `Bearer ${t}` } : {};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Test seam: reset memoized resolution (so a test can flip MUADDIB_NPM_TOKEN).
|
|
96
|
+
function _resetForTests() { _resolved = false; _token = null; _source = null; _logged = false; }
|
|
97
|
+
|
|
98
|
+
module.exports = { registryAuthHeaders, getNpmToken, npmAuthStatus, logAuthStatusOnce, _resetForTests };
|