muaddib-scanner 2.11.109 → 2.11.111
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/monitor/ingestion.js
CHANGED
|
@@ -564,6 +564,34 @@ async function getNpmLatestTarball(packageName) {
|
|
|
564
564
|
// holds ~10KB of state; 1000 of them is a needless heap spike).
|
|
565
565
|
const PRE_RESOLVE_CHUNK_SIZE = 50;
|
|
566
566
|
|
|
567
|
+
// --- Load-aware pre-resolve shedding (2026-06-13) ---
|
|
568
|
+
// Under catch-up (deep scan queue) or active npm throttle (elevated brain
|
|
569
|
+
// level), prefetching up to CHANGES_LIMIT (1000) packuments per poll cycle
|
|
570
|
+
// through the SHARED registry rate budget starves the per-scan metadata fetches
|
|
571
|
+
// the workers actually need — and most prefetched items get spilled/shed before
|
|
572
|
+
// any worker scans them, so the fetch is wasted budget that also keeps npm
|
|
573
|
+
// 429-ing. When shedding, the batch skips the prefetch and enqueues items with
|
|
574
|
+
// tarballUrl=null; resolveTarballAndScan() lazily resolves ONLY the items a
|
|
575
|
+
// worker actually scans (the existing zero-scan-loss fallback path).
|
|
576
|
+
const PRE_RESOLVE_SHED_QUEUE = Math.max(0, parseInt(process.env.MUADDIB_PRERESOLVE_SHED_QUEUE, 10) || 2000);
|
|
577
|
+
const PRE_RESOLVE_SHED_LEVEL = Math.max(1, parseInt(process.env.MUADDIB_PRERESOLVE_SHED_LEVEL, 10) || 3);
|
|
578
|
+
|
|
579
|
+
function preResolveShouldShed(scanQueue) {
|
|
580
|
+
// Kill-switch read live so it can be flipped via the systemd EnvironmentFile
|
|
581
|
+
// + restart without a code change/rebuild.
|
|
582
|
+
if (process.env.MUADDIB_PRERESOLVE_NO_SHED === '1') return false;
|
|
583
|
+
if (scanQueue && scanQueue.length > PRE_RESOLVE_SHED_QUEUE) return true;
|
|
584
|
+
try {
|
|
585
|
+
// Lazy-require so the brain accessor is stubbable in tests (a top-level
|
|
586
|
+
// destructure captures a frozen reference) and to dodge load-order cycles.
|
|
587
|
+
// require() is cached — negligible on this per-chunk check.
|
|
588
|
+
const { getBrainState, DEFAULT_HOST } = require('../shared/http-limiter.js');
|
|
589
|
+
const brain = getBrainState(DEFAULT_HOST);
|
|
590
|
+
if (brain && (brain.level || 0) >= PRE_RESOLVE_SHED_LEVEL) return true;
|
|
591
|
+
} catch { /* observability seam — must never block ingestion */ }
|
|
592
|
+
return false;
|
|
593
|
+
}
|
|
594
|
+
|
|
567
595
|
// If a scanQueue is provided, items are pushed onto it as soon as their chunk
|
|
568
596
|
// finishes resolution — so a crash mid-batch only loses the current chunk's
|
|
569
597
|
// in-flight work, not all the chunks that already completed. When scanQueue
|
|
@@ -575,8 +603,18 @@ async function preResolveNpmBatch(items, stats, scanQueue) {
|
|
|
575
603
|
let resolved = 0;
|
|
576
604
|
let alreadyResolved = 0;
|
|
577
605
|
let failed = 0;
|
|
606
|
+
let shed = 0;
|
|
578
607
|
for (let i = 0; i < items.length; i += PRE_RESOLVE_CHUNK_SIZE) {
|
|
579
608
|
const chunk = items.slice(i, i + PRE_RESOLVE_CHUNK_SIZE);
|
|
609
|
+
if (preResolveShouldShed(scanQueue)) {
|
|
610
|
+
// Load-aware shed: skip the packument prefetch; enqueue as-is so workers
|
|
611
|
+
// lazy-resolve ONLY what they actually scan (resolveTarballAndScan handles
|
|
612
|
+
// tarballUrl=null — zero scan loss). Re-checked per chunk so prefetch
|
|
613
|
+
// resumes mid-batch the moment the queue drains below the threshold.
|
|
614
|
+
shed += chunk.length;
|
|
615
|
+
if (scanQueue) { for (const item of chunk) enqueueScan(scanQueue, item, stats); }
|
|
616
|
+
continue;
|
|
617
|
+
}
|
|
580
618
|
await Promise.all(chunk.map(async (item) => {
|
|
581
619
|
if (item.tarballUrl) { alreadyResolved++; return; }
|
|
582
620
|
try {
|
|
@@ -626,10 +664,12 @@ async function preResolveNpmBatch(items, stats, scanQueue) {
|
|
|
626
664
|
if (stats) {
|
|
627
665
|
stats.npmPreResolved = (stats.npmPreResolved || 0) + resolved;
|
|
628
666
|
stats.npmPreResolveFailed = (stats.npmPreResolveFailed || 0) + failed;
|
|
667
|
+
if (shed) stats.npmPreResolveShed = (stats.npmPreResolveShed || 0) + shed;
|
|
629
668
|
}
|
|
630
669
|
if (items.length >= 5) {
|
|
631
670
|
const elapsed = Date.now() - start;
|
|
632
|
-
|
|
671
|
+
const shedNote = shed ? `, ${shed} shed (load-aware)` : '';
|
|
672
|
+
console.log(`[MONITOR] PRE-RESOLVE npm: ${resolved}/${items.length} in ${elapsed}ms (${failed} → lazy fallback${alreadyResolved ? `, ${alreadyResolved} already resolved` : ''}${shedNote})`);
|
|
633
673
|
}
|
|
634
674
|
}
|
|
635
675
|
|
|
@@ -639,8 +679,17 @@ async function preResolvePyPIBatch(items, stats, scanQueue) {
|
|
|
639
679
|
let resolved = 0;
|
|
640
680
|
let alreadyResolved = 0;
|
|
641
681
|
let failed = 0;
|
|
682
|
+
let shed = 0;
|
|
642
683
|
for (let i = 0; i < items.length; i += PRE_RESOLVE_CHUNK_SIZE) {
|
|
643
684
|
const chunk = items.slice(i, i + PRE_RESOLVE_CHUNK_SIZE);
|
|
685
|
+
if (preResolveShouldShed(scanQueue)) {
|
|
686
|
+
// Load-aware shed (shared gate): queue-depth dominates here; the prefetched
|
|
687
|
+
// PyPI metadata would mostly be for items shed before any worker scans them.
|
|
688
|
+
// Enqueue as-is — resolveTarballAndScan lazily resolves PyPI URLs too.
|
|
689
|
+
shed += chunk.length;
|
|
690
|
+
if (scanQueue) { for (const item of chunk) enqueueScan(scanQueue, item, stats); }
|
|
691
|
+
continue;
|
|
692
|
+
}
|
|
644
693
|
await Promise.all(chunk.map(async (item) => {
|
|
645
694
|
if (item.tarballUrl) { alreadyResolved++; return; }
|
|
646
695
|
try {
|
|
@@ -679,10 +728,12 @@ async function preResolvePyPIBatch(items, stats, scanQueue) {
|
|
|
679
728
|
if (stats) {
|
|
680
729
|
stats.pypiPreResolved = (stats.pypiPreResolved || 0) + resolved;
|
|
681
730
|
stats.pypiPreResolveFailed = (stats.pypiPreResolveFailed || 0) + failed;
|
|
731
|
+
if (shed) stats.pypiPreResolveShed = (stats.pypiPreResolveShed || 0) + shed;
|
|
682
732
|
}
|
|
683
733
|
if (items.length >= 5) {
|
|
684
734
|
const elapsed = Date.now() - start;
|
|
685
|
-
|
|
735
|
+
const shedNote = shed ? `, ${shed} shed (load-aware)` : '';
|
|
736
|
+
console.log(`[MONITOR] PRE-RESOLVE pypi: ${resolved}/${items.length} in ${elapsed}ms (${failed} → lazy fallback${alreadyResolved ? `, ${alreadyResolved} already resolved` : ''}${shedNote})`);
|
|
686
737
|
}
|
|
687
738
|
}
|
|
688
739
|
|
|
@@ -1493,6 +1544,7 @@ module.exports = {
|
|
|
1493
1544
|
getNpmLatestTarball,
|
|
1494
1545
|
preResolveNpmBatch,
|
|
1495
1546
|
preResolvePyPIBatch,
|
|
1547
|
+
preResolveShouldShed,
|
|
1496
1548
|
|
|
1497
1549
|
// RSS parsing
|
|
1498
1550
|
parseNpmRss,
|
package/src/monitor/webhook.js
CHANGED
|
@@ -1235,9 +1235,18 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1235
1235
|
const pypiPub = stats.pypiChangelogPackages || 0;
|
|
1236
1236
|
const published = npmPub + pypiPub;
|
|
1237
1237
|
const catchupSkipped = (stats.npmCatchupSkippedSeqs || 0) + (stats.pypiCatchupSkippedEvents || 0);
|
|
1238
|
+
// Clarify the Ops headline so it isn't read as an overnight drop: it counts
|
|
1239
|
+
// COMPLETED scans in the exact ledger window [last report → now], version/
|
|
1240
|
+
// dedup-collapsed — intentionally lower than the in-memory counter (stats.scanned),
|
|
1241
|
+
// which also tallies retries, burst extras and size-cap rejections
|
|
1242
|
+
// (cf. queue.js uniqueScanAttempts). Surface the raw counter when it diverges.
|
|
1243
|
+
const opsQualifier = headline ? ' (completed, deduped, 24h)' : '';
|
|
1244
|
+
const rawCounter = (headline && typeof stats.scanned === 'number' && stats.scanned > hScanned)
|
|
1245
|
+
? ` · counter ${stats.scanned} (incl. retries/burst)`
|
|
1246
|
+
: '';
|
|
1238
1247
|
const opsSuffix = catchupSkipped > 0
|
|
1239
|
-
? `\nOps: ${hScanned} | Catch-up skip: ${catchupSkipped}`
|
|
1240
|
-
: `\nOps: ${hScanned}`;
|
|
1248
|
+
? `\nOps: ${hScanned}${opsQualifier}${rawCounter} | Catch-up skip: ${catchupSkipped}`
|
|
1249
|
+
: `\nOps: ${hScanned}${opsQualifier}${rawCounter}`;
|
|
1241
1250
|
let coverageText;
|
|
1242
1251
|
if (ledger && ledger.distinctPackages > 0 && ledger.distinctCoverage != null) {
|
|
1243
1252
|
const pct = (ledger.distinctCoverage * 100).toFixed(0);
|
|
@@ -1344,9 +1353,10 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1344
1353
|
{ name: 'System', value: healthText, inline: false }
|
|
1345
1354
|
],
|
|
1346
1355
|
footer: {
|
|
1347
|
-
// Headline-source annotation: 'ledger' = window-exact [last report → now]
|
|
1348
|
-
// 'counters' = in-memory fallback (ledger
|
|
1349
|
-
|
|
1356
|
+
// Headline-source annotation: 'ledger' = window-exact [last report → now]
|
|
1357
|
+
// (completed/deduped scans), 'counters' = in-memory fallback (ledger
|
|
1358
|
+
// unavailable — pre-upgrade behavior).
|
|
1359
|
+
text: `MUAD'DIB - Daily summary | headline: ${headline ? 'ledger — completed/deduped, exact 24h window' : 'counters (in-memory fallback)'} | ${readableTime}`
|
|
1350
1360
|
},
|
|
1351
1361
|
timestamp: now.toISOString()
|
|
1352
1362
|
}]
|
|
@@ -98,7 +98,10 @@ function computeBackoffTransition(state, event, consts = {}) {
|
|
|
98
98
|
// Full-quiet reset (the incident is over, restart at base).
|
|
99
99
|
const quietResetMs = s.lastPauseMs * 2 + base * 5;
|
|
100
100
|
if (s.last429At && now - s.last429At > quietResetMs) s.level = 0;
|
|
101
|
-
s
|
|
101
|
+
// Cap: beyond ~12 both the pause (60s) and the rate (1/s floor) are
|
|
102
|
+
// saturated — an unbounded counter only makes operators read "level 25"
|
|
103
|
+
// as an emergency when it carries no additional behavior.
|
|
104
|
+
s.level = Math.min(s.level + 1, 12);
|
|
102
105
|
const pause = Math.min(max, base * 2 ** (s.level - 1));
|
|
103
106
|
s.lastPauseMs = pause;
|
|
104
107
|
s.last429At = now;
|
|
@@ -159,20 +162,37 @@ function hostForUrl(url) {
|
|
|
159
162
|
try { return new URL(url).hostname || DEFAULT_HOST; } catch { return DEFAULT_HOST; }
|
|
160
163
|
}
|
|
161
164
|
|
|
162
|
-
function _effectiveRate() {
|
|
165
|
+
function _effectiveRate(level = 0) {
|
|
166
|
+
let rate = RATE_LIMIT_PER_SEC;
|
|
163
167
|
if (BOOT_SLOWSTART_MS > 0 && Date.now() - _bootAt < BOOT_SLOWSTART_MS) {
|
|
164
|
-
|
|
168
|
+
rate = Math.max(1, Math.floor(rate / 4));
|
|
165
169
|
}
|
|
166
|
-
|
|
170
|
+
// Rate-by-level (the partial-throttle fix, 2026-06-12 evening): the pause
|
|
171
|
+
// alone cannot converge against a registry that PERMANENTLY rejects a
|
|
172
|
+
// fraction of requests — every post-pause burst guarantees a 429, every
|
|
173
|
+
// window stays dirty, the level ratchets to the cap and throughput pins at
|
|
174
|
+
// ~10 req/min forever (observed: level 25, zero de-escalations, while
|
|
175
|
+
// tarball downloads flowed fine). Halving the SEND RATE per level (floor
|
|
176
|
+
// 1 req/s) makes a clean 30s window reachable — 30 spaced probes instead of
|
|
177
|
+
// one burst — so the AIMD de-escalation actually fires and the brain
|
|
178
|
+
// CONVERGES on the registry's real granted budget instead of oscillating
|
|
179
|
+
// burst→reject at the cap.
|
|
180
|
+
if (level > 0) rate = Math.max(1, Math.floor(rate / 2 ** Math.min(level, 5)));
|
|
181
|
+
return rate;
|
|
167
182
|
}
|
|
168
183
|
|
|
169
184
|
function _refillTokens(b) {
|
|
170
185
|
const now = Date.now();
|
|
171
186
|
if (now < b.bo.pauseUntil) return; // backoff pause: no refills, no grants
|
|
172
|
-
const rate = _effectiveRate();
|
|
187
|
+
const rate = _effectiveRate(b.bo.level);
|
|
173
188
|
if (b.bo.pauseUntil > b.lastRefill) {
|
|
174
|
-
// First refill after a backoff pause:
|
|
175
|
-
|
|
189
|
+
// First refill after a backoff pause: PROBE OF ONE. The previous half-
|
|
190
|
+
// budget restart fired a 10-request burst the instant the pause expired —
|
|
191
|
+
// against a partially-throttling registry that burst GUARANTEED a 429 and
|
|
192
|
+
// re-armed the next pause. One spaced probe at a time is how the level's
|
|
193
|
+
// reduced rate (see _effectiveRate) gets a chance to produce the clean
|
|
194
|
+
// window that de-escalates.
|
|
195
|
+
b.tokens = 1;
|
|
176
196
|
b.lastRefill = now;
|
|
177
197
|
return;
|
|
178
198
|
}
|