muaddib-scanner 2.11.110 → 2.11.112
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/monitor/ingestion.js
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
const https = require('https');
|
|
11
11
|
const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
|
|
12
|
+
const { registryAuthHeaders } = require('../shared/registry-auth.js');
|
|
12
13
|
const { loadCachedIOCs } = require('../ioc/updater.js');
|
|
13
14
|
const { enqueueScan } = require('./scan-queue.js');
|
|
14
15
|
const {
|
|
@@ -99,7 +100,7 @@ function httpsGet(url, timeoutMs = 30_000, deadlineMs = Math.max(timeoutMs * 2,
|
|
|
99
100
|
clearTimeout(deadline);
|
|
100
101
|
if (err) reject(err); else resolve(value);
|
|
101
102
|
};
|
|
102
|
-
req = _deps.https.get(url, { timeout: timeoutMs }, (res) => {
|
|
103
|
+
req = _deps.https.get(url, { timeout: timeoutMs, headers: registryAuthHeaders(url) }, (res) => {
|
|
103
104
|
if (res.statusCode === 301 || res.statusCode === 302) {
|
|
104
105
|
res.resume();
|
|
105
106
|
const location = res.headers.location;
|
|
@@ -564,6 +565,34 @@ async function getNpmLatestTarball(packageName) {
|
|
|
564
565
|
// holds ~10KB of state; 1000 of them is a needless heap spike).
|
|
565
566
|
const PRE_RESOLVE_CHUNK_SIZE = 50;
|
|
566
567
|
|
|
568
|
+
// --- Load-aware pre-resolve shedding (2026-06-13) ---
|
|
569
|
+
// Under catch-up (deep scan queue) or active npm throttle (elevated brain
|
|
570
|
+
// level), prefetching up to CHANGES_LIMIT (1000) packuments per poll cycle
|
|
571
|
+
// through the SHARED registry rate budget starves the per-scan metadata fetches
|
|
572
|
+
// the workers actually need — and most prefetched items get spilled/shed before
|
|
573
|
+
// any worker scans them, so the fetch is wasted budget that also keeps npm
|
|
574
|
+
// 429-ing. When shedding, the batch skips the prefetch and enqueues items with
|
|
575
|
+
// tarballUrl=null; resolveTarballAndScan() lazily resolves ONLY the items a
|
|
576
|
+
// worker actually scans (the existing zero-scan-loss fallback path).
|
|
577
|
+
const PRE_RESOLVE_SHED_QUEUE = Math.max(0, parseInt(process.env.MUADDIB_PRERESOLVE_SHED_QUEUE, 10) || 2000);
|
|
578
|
+
const PRE_RESOLVE_SHED_LEVEL = Math.max(1, parseInt(process.env.MUADDIB_PRERESOLVE_SHED_LEVEL, 10) || 3);
|
|
579
|
+
|
|
580
|
+
function preResolveShouldShed(scanQueue) {
|
|
581
|
+
// Kill-switch read live so it can be flipped via the systemd EnvironmentFile
|
|
582
|
+
// + restart without a code change/rebuild.
|
|
583
|
+
if (process.env.MUADDIB_PRERESOLVE_NO_SHED === '1') return false;
|
|
584
|
+
if (scanQueue && scanQueue.length > PRE_RESOLVE_SHED_QUEUE) return true;
|
|
585
|
+
try {
|
|
586
|
+
// Lazy-require so the brain accessor is stubbable in tests (a top-level
|
|
587
|
+
// destructure captures a frozen reference) and to dodge load-order cycles.
|
|
588
|
+
// require() is cached — negligible on this per-chunk check.
|
|
589
|
+
const { getBrainState, DEFAULT_HOST } = require('../shared/http-limiter.js');
|
|
590
|
+
const brain = getBrainState(DEFAULT_HOST);
|
|
591
|
+
if (brain && (brain.level || 0) >= PRE_RESOLVE_SHED_LEVEL) return true;
|
|
592
|
+
} catch { /* observability seam — must never block ingestion */ }
|
|
593
|
+
return false;
|
|
594
|
+
}
|
|
595
|
+
|
|
567
596
|
// If a scanQueue is provided, items are pushed onto it as soon as their chunk
|
|
568
597
|
// finishes resolution — so a crash mid-batch only loses the current chunk's
|
|
569
598
|
// in-flight work, not all the chunks that already completed. When scanQueue
|
|
@@ -575,8 +604,18 @@ async function preResolveNpmBatch(items, stats, scanQueue) {
|
|
|
575
604
|
let resolved = 0;
|
|
576
605
|
let alreadyResolved = 0;
|
|
577
606
|
let failed = 0;
|
|
607
|
+
let shed = 0;
|
|
578
608
|
for (let i = 0; i < items.length; i += PRE_RESOLVE_CHUNK_SIZE) {
|
|
579
609
|
const chunk = items.slice(i, i + PRE_RESOLVE_CHUNK_SIZE);
|
|
610
|
+
if (preResolveShouldShed(scanQueue)) {
|
|
611
|
+
// Load-aware shed: skip the packument prefetch; enqueue as-is so workers
|
|
612
|
+
// lazy-resolve ONLY what they actually scan (resolveTarballAndScan handles
|
|
613
|
+
// tarballUrl=null — zero scan loss). Re-checked per chunk so prefetch
|
|
614
|
+
// resumes mid-batch the moment the queue drains below the threshold.
|
|
615
|
+
shed += chunk.length;
|
|
616
|
+
if (scanQueue) { for (const item of chunk) enqueueScan(scanQueue, item, stats); }
|
|
617
|
+
continue;
|
|
618
|
+
}
|
|
580
619
|
await Promise.all(chunk.map(async (item) => {
|
|
581
620
|
if (item.tarballUrl) { alreadyResolved++; return; }
|
|
582
621
|
try {
|
|
@@ -626,10 +665,12 @@ async function preResolveNpmBatch(items, stats, scanQueue) {
|
|
|
626
665
|
if (stats) {
|
|
627
666
|
stats.npmPreResolved = (stats.npmPreResolved || 0) + resolved;
|
|
628
667
|
stats.npmPreResolveFailed = (stats.npmPreResolveFailed || 0) + failed;
|
|
668
|
+
if (shed) stats.npmPreResolveShed = (stats.npmPreResolveShed || 0) + shed;
|
|
629
669
|
}
|
|
630
670
|
if (items.length >= 5) {
|
|
631
671
|
const elapsed = Date.now() - start;
|
|
632
|
-
|
|
672
|
+
const shedNote = shed ? `, ${shed} shed (load-aware)` : '';
|
|
673
|
+
console.log(`[MONITOR] PRE-RESOLVE npm: ${resolved}/${items.length} in ${elapsed}ms (${failed} → lazy fallback${alreadyResolved ? `, ${alreadyResolved} already resolved` : ''}${shedNote})`);
|
|
633
674
|
}
|
|
634
675
|
}
|
|
635
676
|
|
|
@@ -639,8 +680,17 @@ async function preResolvePyPIBatch(items, stats, scanQueue) {
|
|
|
639
680
|
let resolved = 0;
|
|
640
681
|
let alreadyResolved = 0;
|
|
641
682
|
let failed = 0;
|
|
683
|
+
let shed = 0;
|
|
642
684
|
for (let i = 0; i < items.length; i += PRE_RESOLVE_CHUNK_SIZE) {
|
|
643
685
|
const chunk = items.slice(i, i + PRE_RESOLVE_CHUNK_SIZE);
|
|
686
|
+
if (preResolveShouldShed(scanQueue)) {
|
|
687
|
+
// Load-aware shed (shared gate): queue-depth dominates here; the prefetched
|
|
688
|
+
// PyPI metadata would mostly be for items shed before any worker scans them.
|
|
689
|
+
// Enqueue as-is — resolveTarballAndScan lazily resolves PyPI URLs too.
|
|
690
|
+
shed += chunk.length;
|
|
691
|
+
if (scanQueue) { for (const item of chunk) enqueueScan(scanQueue, item, stats); }
|
|
692
|
+
continue;
|
|
693
|
+
}
|
|
644
694
|
await Promise.all(chunk.map(async (item) => {
|
|
645
695
|
if (item.tarballUrl) { alreadyResolved++; return; }
|
|
646
696
|
try {
|
|
@@ -679,10 +729,12 @@ async function preResolvePyPIBatch(items, stats, scanQueue) {
|
|
|
679
729
|
if (stats) {
|
|
680
730
|
stats.pypiPreResolved = (stats.pypiPreResolved || 0) + resolved;
|
|
681
731
|
stats.pypiPreResolveFailed = (stats.pypiPreResolveFailed || 0) + failed;
|
|
732
|
+
if (shed) stats.pypiPreResolveShed = (stats.pypiPreResolveShed || 0) + shed;
|
|
682
733
|
}
|
|
683
734
|
if (items.length >= 5) {
|
|
684
735
|
const elapsed = Date.now() - start;
|
|
685
|
-
|
|
736
|
+
const shedNote = shed ? `, ${shed} shed (load-aware)` : '';
|
|
737
|
+
console.log(`[MONITOR] PRE-RESOLVE pypi: ${resolved}/${items.length} in ${elapsed}ms (${failed} → lazy fallback${alreadyResolved ? `, ${alreadyResolved} already resolved` : ''}${shedNote})`);
|
|
686
738
|
}
|
|
687
739
|
}
|
|
688
740
|
|
|
@@ -1493,6 +1545,7 @@ module.exports = {
|
|
|
1493
1545
|
getNpmLatestTarball,
|
|
1494
1546
|
preResolveNpmBatch,
|
|
1495
1547
|
preResolvePyPIBatch,
|
|
1548
|
+
preResolveShouldShed,
|
|
1496
1549
|
|
|
1497
1550
|
// RSS parsing
|
|
1498
1551
|
parseNpmRss,
|
package/src/monitor/webhook.js
CHANGED
|
@@ -1235,9 +1235,18 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1235
1235
|
const pypiPub = stats.pypiChangelogPackages || 0;
|
|
1236
1236
|
const published = npmPub + pypiPub;
|
|
1237
1237
|
const catchupSkipped = (stats.npmCatchupSkippedSeqs || 0) + (stats.pypiCatchupSkippedEvents || 0);
|
|
1238
|
+
// Clarify the Ops headline so it isn't read as an overnight drop: it counts
|
|
1239
|
+
// COMPLETED scans in the exact ledger window [last report → now], version/
|
|
1240
|
+
// dedup-collapsed — intentionally lower than the in-memory counter (stats.scanned),
|
|
1241
|
+
// which also tallies retries, burst extras and size-cap rejections
|
|
1242
|
+
// (cf. queue.js uniqueScanAttempts). Surface the raw counter when it diverges.
|
|
1243
|
+
const opsQualifier = headline ? ' (completed, deduped, 24h)' : '';
|
|
1244
|
+
const rawCounter = (headline && typeof stats.scanned === 'number' && stats.scanned > hScanned)
|
|
1245
|
+
? ` · counter ${stats.scanned} (incl. retries/burst)`
|
|
1246
|
+
: '';
|
|
1238
1247
|
const opsSuffix = catchupSkipped > 0
|
|
1239
|
-
? `\nOps: ${hScanned} | Catch-up skip: ${catchupSkipped}`
|
|
1240
|
-
: `\nOps: ${hScanned}`;
|
|
1248
|
+
? `\nOps: ${hScanned}${opsQualifier}${rawCounter} | Catch-up skip: ${catchupSkipped}`
|
|
1249
|
+
: `\nOps: ${hScanned}${opsQualifier}${rawCounter}`;
|
|
1241
1250
|
let coverageText;
|
|
1242
1251
|
if (ledger && ledger.distinctPackages > 0 && ledger.distinctCoverage != null) {
|
|
1243
1252
|
const pct = (ledger.distinctCoverage * 100).toFixed(0);
|
|
@@ -1344,9 +1353,10 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1344
1353
|
{ name: 'System', value: healthText, inline: false }
|
|
1345
1354
|
],
|
|
1346
1355
|
footer: {
|
|
1347
|
-
// Headline-source annotation: 'ledger' = window-exact [last report → now]
|
|
1348
|
-
// 'counters' = in-memory fallback (ledger
|
|
1349
|
-
|
|
1356
|
+
// Headline-source annotation: 'ledger' = window-exact [last report → now]
|
|
1357
|
+
// (completed/deduped scans), 'counters' = in-memory fallback (ledger
|
|
1358
|
+
// unavailable — pre-upgrade behavior).
|
|
1359
|
+
text: `MUAD'DIB - Daily summary | headline: ${headline ? 'ledger — completed/deduped, exact 24h window' : 'counters (in-memory fallback)'} | ${readableTime}`
|
|
1350
1360
|
},
|
|
1351
1361
|
timestamp: now.toISOString()
|
|
1352
1362
|
}]
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
|
|
2
2
|
const { debugLog } = require('../utils.js');
|
|
3
3
|
const { acquireRegistrySlot, releaseRegistrySlot, awaitRateToken, signal429, hostForUrl } = require('../shared/http-limiter.js');
|
|
4
|
+
const { registryAuthHeaders } = require('../shared/registry-auth.js');
|
|
4
5
|
const { computeAdvancedRegistrySignals } = require('../integrations/registry-signals.js');
|
|
5
6
|
|
|
6
7
|
const REGISTRY_URL = 'https://registry.npmjs.org';
|
|
@@ -12,6 +13,16 @@ const SEARCH_URL = 'https://registry.npmjs.org/-/v1/search';
|
|
|
12
13
|
const REQUEST_TIMEOUT = Math.max(1000, parseInt(process.env.MUADDIB_REGISTRY_TIMEOUT_MS, 10) || 10000); // 10s default
|
|
13
14
|
const MAX_RETRIES = Math.max(1, parseInt(process.env.MUADDIB_REGISTRY_RETRIES, 10) || 5);
|
|
14
15
|
|
|
16
|
+
// Per-maintainer cache for the /-/v1/search author-count lookup — the only
|
|
17
|
+
// DYNAMIC (non-CDN), rate-limited registry endpoint we hit per scan. Without it,
|
|
18
|
+
// firing the search on every scan generated the bulk of the monitor's 429s and
|
|
19
|
+
// the shared brain then throttled the (healthy, CDN-served) packument reads too.
|
|
20
|
+
// See getPackageMetadata. Env-tunable; defaults preserve the signal.
|
|
21
|
+
const _authorCountCache = new Map(); // maintainer → { count, at }
|
|
22
|
+
const AUTHOR_CACHE_TTL_MS = Math.max(0, parseInt(process.env.MUADDIB_AUTHOR_CACHE_TTL_MS, 10) || 3_600_000); // 1h
|
|
23
|
+
const AUTHOR_CACHE_MAX = Math.max(100, parseInt(process.env.MUADDIB_AUTHOR_CACHE_MAX, 10) || 5000);
|
|
24
|
+
const AUTHOR_SEARCH_ENABLED = process.env.MUADDIB_NPM_AUTHOR_SEARCH !== '0'; // kill-switch
|
|
25
|
+
|
|
15
26
|
/**
|
|
16
27
|
* Create a timeout signal, with fallback for older Node versions.
|
|
17
28
|
* Returns { signal, cleanup } — call cleanup() after fetch to prevent timer leaks.
|
|
@@ -25,7 +36,7 @@ function createTimeoutSignal(ms) {
|
|
|
25
36
|
return { signal: controller.signal, cleanup: () => clearTimeout(timer) };
|
|
26
37
|
}
|
|
27
38
|
|
|
28
|
-
async function fetchWithRetry(url) {
|
|
39
|
+
async function fetchWithRetry(url, opts = {}) {
|
|
29
40
|
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
|
|
30
41
|
// The caller's acquireRegistrySlot paid the rate token for the FIRST
|
|
31
42
|
// attempt only. Every retry is a new network request and must pay its own
|
|
@@ -43,7 +54,7 @@ async function fetchWithRetry(url) {
|
|
|
43
54
|
let response;
|
|
44
55
|
const { signal, cleanup } = createTimeoutSignal(REQUEST_TIMEOUT);
|
|
45
56
|
try {
|
|
46
|
-
response = await fetch(url, { signal });
|
|
57
|
+
response = await fetch(url, { signal, headers: registryAuthHeaders(url) });
|
|
47
58
|
} catch {
|
|
48
59
|
cleanup();
|
|
49
60
|
// REG-001: Retry on timeout/abort instead of returning null immediately.
|
|
@@ -71,7 +82,16 @@ async function fetchWithRetry(url) {
|
|
|
71
82
|
// Retry-After (capped at 30s) with jitter so retries don't re-synchronize.
|
|
72
83
|
if (response.status === 429) {
|
|
73
84
|
try { await response.text(); } catch (e) { debugLog('response drain failed:', e.message); }
|
|
74
|
-
|
|
85
|
+
// Back off the CORRECT host's bucket. This previously defaulted to
|
|
86
|
+
// registry.npmjs.org, so a 429 from api.npmjs.org/downloads (a SEPARATE,
|
|
87
|
+
// aggressively rate-limited host that 429s ~every request) poisoned the
|
|
88
|
+
// registry brain and stalled the tarball/packument fetches that were
|
|
89
|
+
// themselves healthy — the measured ~20s/scan throughput wall.
|
|
90
|
+
try { signal429(hostForUrl(url)); } catch { /* limiter is best-effort */ }
|
|
91
|
+
// Best-effort callers (the weekly-downloads reputation signal, whose
|
|
92
|
+
// endpoint 429s on essentially every request) opt out of the retry storm:
|
|
93
|
+
// retrying 5× with ~2s sleeps just burns ~10s/scan to still return null.
|
|
94
|
+
if (opts.noRetryOn429) return null;
|
|
75
95
|
const retryAfter = parseInt(response.headers.get('retry-after'), 10);
|
|
76
96
|
const base = Math.min(retryAfter && retryAfter > 0 ? retryAfter * 1000 : 2000, 30000);
|
|
77
97
|
await new Promise(r => setTimeout(r, Math.round(base * (0.5 + Math.random() * 0.5))));
|
|
@@ -170,26 +190,45 @@ async function getPackageMetadata(packageName) {
|
|
|
170
190
|
}
|
|
171
191
|
const provenanceRegressed = !latestHasProvenance && anyPriorHadProvenance;
|
|
172
192
|
|
|
173
|
-
// 2. Weekly downloads + author
|
|
193
|
+
// 2. Weekly downloads + author package count (parallel).
|
|
194
|
+
// The author count comes from /-/v1/search?text=maintainer: which — unlike the
|
|
195
|
+
// CDN-served packument — is DYNAMIC, slow (~300-950ms) and the one per-scan call
|
|
196
|
+
// npm aggressively rate-limits. A TTL cache keyed on the maintainer collapses
|
|
197
|
+
// the search volume (maintainers repeat heavily: scopes / bots / monorepos)
|
|
198
|
+
// while keeping author_package_count byte-identical. MUADDIB_NPM_AUTHOR_SEARCH=0
|
|
199
|
+
// drops the call entirely — the count then stays absent, exactly as the
|
|
200
|
+
// pre-resolve fast path already leaves it.
|
|
174
201
|
const downloadsUrl = DOWNLOADS_URL + '/' + encodeURIComponent(packageName);
|
|
175
|
-
const authorUrl = maintainer
|
|
202
|
+
const authorUrl = (AUTHOR_SEARCH_ENABLED && maintainer)
|
|
176
203
|
? SEARCH_URL + '?text=maintainer:' + encodeURIComponent(maintainer) + '&size=1'
|
|
177
204
|
: null;
|
|
178
205
|
|
|
179
|
-
async function
|
|
180
|
-
if (!authorUrl) return
|
|
206
|
+
async function getAuthorPackageCount() {
|
|
207
|
+
if (!authorUrl) return 0;
|
|
208
|
+
const hit = _authorCountCache.get(maintainer);
|
|
209
|
+
if (hit && (Date.now() - hit.at) < AUTHOR_CACHE_TTL_MS) return hit.count;
|
|
181
210
|
await acquireRegistrySlot();
|
|
182
|
-
|
|
211
|
+
let data;
|
|
212
|
+
try { data = await fetchWithRetry(authorUrl); }
|
|
183
213
|
finally { releaseRegistrySlot(); }
|
|
214
|
+
// 429-exhausted / error → fetchWithRetry returns null: reuse a stale entry if
|
|
215
|
+
// present and do NOT cache the miss (a transient 0 would poison the typosquat
|
|
216
|
+
// "author has ≤1 package" signal).
|
|
217
|
+
if (!data) return hit ? hit.count : 0;
|
|
218
|
+
const count = data.total ?? 0;
|
|
219
|
+
if (_authorCountCache.size >= AUTHOR_CACHE_MAX) {
|
|
220
|
+
_authorCountCache.delete(_authorCountCache.keys().next().value); // FIFO evict (bounded)
|
|
221
|
+
}
|
|
222
|
+
_authorCountCache.set(maintainer, { count, at: Date.now() });
|
|
223
|
+
return count;
|
|
184
224
|
}
|
|
185
225
|
|
|
186
|
-
const [downloadsData,
|
|
187
|
-
fetchWithRetry(downloadsUrl), // api.npmjs.org — no
|
|
188
|
-
|
|
226
|
+
const [downloadsData, authorPackageCount] = await Promise.all([
|
|
227
|
+
fetchWithRetry(downloadsUrl, { noRetryOn429: true }), // api.npmjs.org — rate-limited; best-effort single shot (no retry storm, correct-host backoff)
|
|
228
|
+
getAuthorPackageCount() // registry.npmjs.org search — cached + kill-switchable
|
|
189
229
|
]);
|
|
190
230
|
|
|
191
231
|
const weeklyDownloads = downloadsData?.downloads ?? 0;
|
|
192
|
-
const authorPackageCount = authorData?.total ?? 0;
|
|
193
232
|
const versionCount = meta.versions ? Object.keys(meta.versions).length : 0;
|
|
194
233
|
const description = (typeof latestMeta?.description === 'string' ? latestMeta.description
|
|
195
234
|
: (typeof meta.description === 'string' ? meta.description : ''));
|
package/src/shared/download.js
CHANGED
|
@@ -4,6 +4,7 @@ const path = require('path');
|
|
|
4
4
|
const { execFileSync } = require('child_process');
|
|
5
5
|
const AdmZip = require('adm-zip');
|
|
6
6
|
const { MAX_TARBALL_SIZE, DOWNLOAD_TIMEOUT } = require('./constants.js');
|
|
7
|
+
const { registryAuthHeaders } = require('./registry-auth.js');
|
|
7
8
|
|
|
8
9
|
// Allowed redirect domains for tarball downloads (SSRF protection)
|
|
9
10
|
const ALLOWED_DOWNLOAD_DOMAINS = [
|
|
@@ -159,7 +160,7 @@ function downloadToFile(url, destPath, timeoutMs = DOWNLOAD_TIMEOUT) {
|
|
|
159
160
|
if (redirectCount >= MAX_REDIRECTS) {
|
|
160
161
|
return reject(new Error(`Too many redirects (${MAX_REDIRECTS}) for ${url}`));
|
|
161
162
|
}
|
|
162
|
-
const req = https.get(requestUrl, { timeout: timeoutMs }, (res) => {
|
|
163
|
+
const req = https.get(requestUrl, { timeout: timeoutMs, headers: registryAuthHeaders(requestUrl) }, (res) => {
|
|
163
164
|
if (res.statusCode === 301 || res.statusCode === 302) {
|
|
164
165
|
res.resume();
|
|
165
166
|
const location = res.headers.location;
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* npm registry authentication (2026-06-13).
|
|
5
|
+
*
|
|
6
|
+
* A supply-chain scanner fetches thousands of brand-new, never-CDN-cached
|
|
7
|
+
* packages; anonymous registry.npmjs.org traffic gets aggressively 429-throttled
|
|
8
|
+
* per-IP (observed: ~500/h of 429s at <1 req/s, scans stalling 20-46s waiting on
|
|
9
|
+
* metadata tokens). An authenticated token raises the per-account limit and
|
|
10
|
+
* de-anonymizes us.
|
|
11
|
+
*
|
|
12
|
+
* Token resolution (first hit wins), memoized for the process lifetime:
|
|
13
|
+
* 1. env MUADDIB_NPM_TOKEN (canonical — set via systemd EnvironmentFile / drop-in)
|
|
14
|
+
* 2. env NPM_TOKEN (common fallback)
|
|
15
|
+
* 3. .npmrc //registry.npmjs.org/:_authToken=... (npm-standard; cwd, $HOME, /home/muaddib)
|
|
16
|
+
*
|
|
17
|
+
* Auth is applied ONLY to registry.npmjs.org requests — other hosts (pypi.org,
|
|
18
|
+
* api.npmjs.org, replicate.npmjs.com) get NO header, so the token can never leak
|
|
19
|
+
* to a third-party host. With no token configured the header set is empty and
|
|
20
|
+
* behaviour is identical to the previous anonymous path.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const fs = require('fs');
|
|
24
|
+
const path = require('path');
|
|
25
|
+
|
|
26
|
+
const AUTH_HOSTS = new Set(['registry.npmjs.org']);
|
|
27
|
+
|
|
28
|
+
let _resolved = false;
|
|
29
|
+
let _token = null;
|
|
30
|
+
let _source = null;
|
|
31
|
+
|
|
32
|
+
function _fromNpmrc() {
|
|
33
|
+
const files = [
|
|
34
|
+
process.env.MUADDIB_NPMRC,
|
|
35
|
+
path.join(process.cwd(), '.npmrc'),
|
|
36
|
+
process.env.HOME ? path.join(process.env.HOME, '.npmrc') : null,
|
|
37
|
+
'/home/muaddib/.npmrc',
|
|
38
|
+
].filter(Boolean);
|
|
39
|
+
for (const f of files) {
|
|
40
|
+
let txt;
|
|
41
|
+
try { txt = fs.readFileSync(f, 'utf8'); } catch { continue; }
|
|
42
|
+
// npm-standard line: //registry.npmjs.org/:_authToken=<token>
|
|
43
|
+
const m = txt.match(/^\s*\/\/registry\.npmjs\.org\/:_authToken\s*=\s*(.+?)\s*$/m);
|
|
44
|
+
if (m) return { token: m[1].replace(/^["']|["']$/g, ''), source: `npmrc:${f}` };
|
|
45
|
+
}
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function getNpmToken() {
|
|
50
|
+
if (_resolved) return _token;
|
|
51
|
+
_resolved = true;
|
|
52
|
+
const env = (process.env.MUADDIB_NPM_TOKEN || process.env.NPM_TOKEN || '').trim();
|
|
53
|
+
if (env) {
|
|
54
|
+
_token = env;
|
|
55
|
+
_source = process.env.MUADDIB_NPM_TOKEN ? 'env:MUADDIB_NPM_TOKEN' : 'env:NPM_TOKEN';
|
|
56
|
+
return _token;
|
|
57
|
+
}
|
|
58
|
+
const rc = _fromNpmrc();
|
|
59
|
+
if (rc) { _token = rc.token; _source = rc.source; }
|
|
60
|
+
return _token;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** {enabled, source, last4} — for the one-time boot log. NEVER returns the token. */
|
|
64
|
+
function npmAuthStatus() {
|
|
65
|
+
const t = getNpmToken();
|
|
66
|
+
return { enabled: !!t, source: t ? _source : null, last4: t ? String(t).slice(-4) : null };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
let _logged = false;
|
|
70
|
+
function logAuthStatusOnce(logger = console) {
|
|
71
|
+
if (_logged) return;
|
|
72
|
+
_logged = true;
|
|
73
|
+
const s = npmAuthStatus();
|
|
74
|
+
if (s.enabled) {
|
|
75
|
+
logger.log(`[REGISTRY-AUTH] npm registry auth ENABLED (source=${s.source}, token …${s.last4})`);
|
|
76
|
+
} else {
|
|
77
|
+
logger.warn('[REGISTRY-AUTH] npm registry auth DISABLED — anonymous registry.npmjs.org (set MUADDIB_NPM_TOKEN); expect heavier 429 throttling.');
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Headers to merge into a registry request. Empty object for non-npm hosts or
|
|
83
|
+
* when no token is configured (→ anonymous, unchanged behaviour).
|
|
84
|
+
*/
|
|
85
|
+
function registryAuthHeaders(url) {
|
|
86
|
+
// First call doubles as the boot confirmation in the journal.
|
|
87
|
+
logAuthStatusOnce();
|
|
88
|
+
let host;
|
|
89
|
+
try { host = new URL(url).hostname; } catch { return {}; }
|
|
90
|
+
if (!AUTH_HOSTS.has(host)) return {};
|
|
91
|
+
const t = getNpmToken();
|
|
92
|
+
return t ? { Authorization: `Bearer ${t}` } : {};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Test seam: reset memoized resolution (so a test can flip MUADDIB_NPM_TOKEN).
|
|
96
|
+
function _resetForTests() { _resolved = false; _token = null; _source = null; _logged = false; }
|
|
97
|
+
|
|
98
|
+
module.exports = { registryAuthHeaders, getNpmToken, npmAuthStatus, logAuthStatusOnce, _resetForTests };
|