muaddib-scanner 2.11.110 → 2.11.111

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.110",
3
+ "version": "2.11.111",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-12T17:33:52.917Z",
3
+ "timestamp": "2026-06-13T09:27:21.416Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -564,6 +564,34 @@ async function getNpmLatestTarball(packageName) {
564
564
  // holds ~10KB of state; 1000 of them is a needless heap spike).
565
565
  const PRE_RESOLVE_CHUNK_SIZE = 50;
566
566
 
567
+ // --- Load-aware pre-resolve shedding (2026-06-13) ---
568
+ // Under catch-up (deep scan queue) or active npm throttle (elevated brain
569
+ // level), prefetching up to CHANGES_LIMIT (1000) packuments per poll cycle
570
+ // through the SHARED registry rate budget starves the per-scan metadata fetches
571
+ // the workers actually need — and most prefetched items get spilled/shed before
572
+ // any worker scans them, so the fetch is wasted budget that also keeps npm
573
+ // 429-ing. When shedding, the batch skips the prefetch and enqueues items with
574
+ // tarballUrl=null; resolveTarballAndScan() lazily resolves ONLY the items a
575
+ // worker actually scans (the existing zero-scan-loss fallback path).
576
+ const PRE_RESOLVE_SHED_QUEUE = Math.max(0, parseInt(process.env.MUADDIB_PRERESOLVE_SHED_QUEUE, 10) || 2000);
577
+ const PRE_RESOLVE_SHED_LEVEL = Math.max(1, parseInt(process.env.MUADDIB_PRERESOLVE_SHED_LEVEL, 10) || 3);
578
+
579
+ function preResolveShouldShed(scanQueue) {
580
+ // Kill-switch read live so it can be flipped via the systemd EnvironmentFile
581
+ // + restart without a code change/rebuild.
582
+ if (process.env.MUADDIB_PRERESOLVE_NO_SHED === '1') return false;
583
+ if (scanQueue && scanQueue.length > PRE_RESOLVE_SHED_QUEUE) return true;
584
+ try {
585
+ // Lazy-require so the brain accessor is stubbable in tests (a top-level
586
+ // destructure captures a frozen reference) and to dodge load-order cycles.
587
+ // require() is cached — negligible on this per-chunk check.
588
+ const { getBrainState, DEFAULT_HOST } = require('../shared/http-limiter.js');
589
+ const brain = getBrainState(DEFAULT_HOST);
590
+ if (brain && (brain.level || 0) >= PRE_RESOLVE_SHED_LEVEL) return true;
591
+ } catch { /* observability seam — must never block ingestion */ }
592
+ return false;
593
+ }
594
+
567
595
  // If a scanQueue is provided, items are pushed onto it as soon as their chunk
568
596
  // finishes resolution — so a crash mid-batch only loses the current chunk's
569
597
  // in-flight work, not all the chunks that already completed. When scanQueue
@@ -575,8 +603,18 @@ async function preResolveNpmBatch(items, stats, scanQueue) {
575
603
  let resolved = 0;
576
604
  let alreadyResolved = 0;
577
605
  let failed = 0;
606
+ let shed = 0;
578
607
  for (let i = 0; i < items.length; i += PRE_RESOLVE_CHUNK_SIZE) {
579
608
  const chunk = items.slice(i, i + PRE_RESOLVE_CHUNK_SIZE);
609
+ if (preResolveShouldShed(scanQueue)) {
610
+ // Load-aware shed: skip the packument prefetch; enqueue as-is so workers
611
+ // lazy-resolve ONLY what they actually scan (resolveTarballAndScan handles
612
+ // tarballUrl=null — zero scan loss). Re-checked per chunk so prefetch
613
+ // resumes mid-batch the moment the queue drains below the threshold.
614
+ shed += chunk.length;
615
+ if (scanQueue) { for (const item of chunk) enqueueScan(scanQueue, item, stats); }
616
+ continue;
617
+ }
580
618
  await Promise.all(chunk.map(async (item) => {
581
619
  if (item.tarballUrl) { alreadyResolved++; return; }
582
620
  try {
@@ -626,10 +664,12 @@ async function preResolveNpmBatch(items, stats, scanQueue) {
626
664
  if (stats) {
627
665
  stats.npmPreResolved = (stats.npmPreResolved || 0) + resolved;
628
666
  stats.npmPreResolveFailed = (stats.npmPreResolveFailed || 0) + failed;
667
+ if (shed) stats.npmPreResolveShed = (stats.npmPreResolveShed || 0) + shed;
629
668
  }
630
669
  if (items.length >= 5) {
631
670
  const elapsed = Date.now() - start;
632
- console.log(`[MONITOR] PRE-RESOLVE npm: ${resolved}/${items.length} in ${elapsed}ms (${failed} → lazy fallback${alreadyResolved ? `, ${alreadyResolved} already resolved` : ''})`);
671
+ const shedNote = shed ? `, ${shed} shed (load-aware)` : '';
672
+ console.log(`[MONITOR] PRE-RESOLVE npm: ${resolved}/${items.length} in ${elapsed}ms (${failed} → lazy fallback${alreadyResolved ? `, ${alreadyResolved} already resolved` : ''}${shedNote})`);
633
673
  }
634
674
  }
635
675
 
@@ -639,8 +679,17 @@ async function preResolvePyPIBatch(items, stats, scanQueue) {
639
679
  let resolved = 0;
640
680
  let alreadyResolved = 0;
641
681
  let failed = 0;
682
+ let shed = 0;
642
683
  for (let i = 0; i < items.length; i += PRE_RESOLVE_CHUNK_SIZE) {
643
684
  const chunk = items.slice(i, i + PRE_RESOLVE_CHUNK_SIZE);
685
+ if (preResolveShouldShed(scanQueue)) {
686
+ // Load-aware shed (shared gate): queue-depth dominates here; the prefetched
687
+ // PyPI metadata would mostly be for items shed before any worker scans them.
688
+ // Enqueue as-is — resolveTarballAndScan lazily resolves PyPI URLs too.
689
+ shed += chunk.length;
690
+ if (scanQueue) { for (const item of chunk) enqueueScan(scanQueue, item, stats); }
691
+ continue;
692
+ }
644
693
  await Promise.all(chunk.map(async (item) => {
645
694
  if (item.tarballUrl) { alreadyResolved++; return; }
646
695
  try {
@@ -679,10 +728,12 @@ async function preResolvePyPIBatch(items, stats, scanQueue) {
679
728
  if (stats) {
680
729
  stats.pypiPreResolved = (stats.pypiPreResolved || 0) + resolved;
681
730
  stats.pypiPreResolveFailed = (stats.pypiPreResolveFailed || 0) + failed;
731
+ if (shed) stats.pypiPreResolveShed = (stats.pypiPreResolveShed || 0) + shed;
682
732
  }
683
733
  if (items.length >= 5) {
684
734
  const elapsed = Date.now() - start;
685
- console.log(`[MONITOR] PRE-RESOLVE pypi: ${resolved}/${items.length} in ${elapsed}ms (${failed} → lazy fallback${alreadyResolved ? `, ${alreadyResolved} already resolved` : ''})`);
735
+ const shedNote = shed ? `, ${shed} shed (load-aware)` : '';
736
+ console.log(`[MONITOR] PRE-RESOLVE pypi: ${resolved}/${items.length} in ${elapsed}ms (${failed} → lazy fallback${alreadyResolved ? `, ${alreadyResolved} already resolved` : ''}${shedNote})`);
686
737
  }
687
738
  }
688
739
 
@@ -1493,6 +1544,7 @@ module.exports = {
1493
1544
  getNpmLatestTarball,
1494
1545
  preResolveNpmBatch,
1495
1546
  preResolvePyPIBatch,
1547
+ preResolveShouldShed,
1496
1548
 
1497
1549
  // RSS parsing
1498
1550
  parseNpmRss,
@@ -1235,9 +1235,18 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1235
1235
  const pypiPub = stats.pypiChangelogPackages || 0;
1236
1236
  const published = npmPub + pypiPub;
1237
1237
  const catchupSkipped = (stats.npmCatchupSkippedSeqs || 0) + (stats.pypiCatchupSkippedEvents || 0);
1238
+ // Clarify the Ops headline so it isn't read as an overnight drop: it counts
1239
+ // COMPLETED scans in the exact ledger window [last report → now], version/
1240
+ // dedup-collapsed — intentionally lower than the in-memory counter (stats.scanned),
1241
+ // which also tallies retries, burst extras and size-cap rejections
1242
+ // (cf. queue.js uniqueScanAttempts). Surface the raw counter when it diverges.
1243
+ const opsQualifier = headline ? ' (completed, deduped, 24h)' : '';
1244
+ const rawCounter = (headline && typeof stats.scanned === 'number' && stats.scanned > hScanned)
1245
+ ? ` · counter ${stats.scanned} (incl. retries/burst)`
1246
+ : '';
1238
1247
  const opsSuffix = catchupSkipped > 0
1239
- ? `\nOps: ${hScanned} | Catch-up skip: ${catchupSkipped}`
1240
- : `\nOps: ${hScanned}`;
1248
+ ? `\nOps: ${hScanned}${opsQualifier}${rawCounter} | Catch-up skip: ${catchupSkipped}`
1249
+ : `\nOps: ${hScanned}${opsQualifier}${rawCounter}`;
1241
1250
  let coverageText;
1242
1251
  if (ledger && ledger.distinctPackages > 0 && ledger.distinctCoverage != null) {
1243
1252
  const pct = (ledger.distinctCoverage * 100).toFixed(0);
@@ -1344,9 +1353,10 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1344
1353
  { name: 'System', value: healthText, inline: false }
1345
1354
  ],
1346
1355
  footer: {
1347
- // Headline-source annotation: 'ledger' = window-exact [last report → now],
1348
- // 'counters' = in-memory fallback (ledger unavailable — pre-upgrade behavior).
1349
- text: `MUAD'DIB - Daily summary | headline: ${headline ? 'ledger (since last report)' : 'counters'} | ${readableTime}`
1356
+ // Headline-source annotation: 'ledger' = window-exact [last report → now]
1357
+ // (completed/deduped scans), 'counters' = in-memory fallback (ledger
1358
+ // unavailable — pre-upgrade behavior).
1359
+ text: `MUAD'DIB - Daily summary | headline: ${headline ? 'ledger — completed/deduped, exact 24h window' : 'counters (in-memory fallback)'} | ${readableTime}`
1350
1360
  },
1351
1361
  timestamp: now.toISOString()
1352
1362
  }]