muaddib-scanner 2.11.51 → 2.11.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.51",
3
+ "version": "2.11.52",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-05-26T20:25:43.730Z",
3
+ "timestamp": "2026-05-26T21:21:36.874Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -141,7 +141,10 @@ async function getWeeklyDownloads(packageName) {
141
141
  }
142
142
  try {
143
143
  const url = `https://api.npmjs.org/downloads/point/last-week/${encodeURIComponent(packageName)}`;
144
- const body = await httpsGet(url, 3000);
144
+ // Routed via _deps so tests can stub the downloads endpoint independently
145
+ // of the registry endpoint (Stage 2.1 added parallel-fetch from
146
+ // preResolveNpmBatch).
147
+ const body = await _deps.httpsGet(url, 3000);
145
148
  const data = JSON.parse(body);
146
149
  const downloads = typeof data.downloads === 'number' ? data.downloads : -1;
147
150
  downloadsCache.set(packageName, { downloads, fetchedAt: Date.now() });
@@ -429,8 +432,23 @@ async function getNpmLatestTarball(packageName) {
429
432
  version: '', tarball: null, unpackedSize: 0, scripts: {},
430
433
  homepage: '', description: '',
431
434
  latestTagVersion: null, recentVersions: [],
435
+ age_days: null, version_count: 0,
432
436
  };
433
437
  }
438
+ // Stage 2.1 — extract reputation signals from the packument we already have,
439
+ // so triageRisk in queue.js doesn't have to refetch metadata via
440
+ // getPackageMetadata. Two fields are derivable from the packument alone:
441
+ // - age_days : time.created (package creation timestamp)
442
+ // - version_count : Object.keys(versions).length (excludes unpublished
443
+ // tombstones kept only in `time`)
444
+ // weekly_downloads requires a separate api.npmjs.org call and is fetched in
445
+ // parallel by preResolveNpmBatch (it has its own cache + no semaphore).
446
+ const createdAt = (packument && packument.time && packument.time.created) || null;
447
+ result.age_days = createdAt
448
+ ? Math.floor((Date.now() - new Date(createdAt).getTime()) / 86_400_000)
449
+ : null;
450
+ result.version_count = (packument && packument.versions)
451
+ ? Object.keys(packument.versions).length : 0;
434
452
  return result;
435
453
  }
436
454
 
@@ -465,15 +483,30 @@ async function preResolveNpmBatch(items, stats, scanQueue) {
465
483
  await Promise.all(chunk.map(async (item) => {
466
484
  if (item.tarballUrl) { alreadyResolved++; return; }
467
485
  try {
468
- const npmInfo = await getNpmLatestTarball(item.name);
486
+ // Stage 2.1 fetch downloads in parallel with the packument. The
487
+ // downloads endpoint (api.npmjs.org) is not on the registry semaphore
488
+ // and has its own internal cache, so this is effectively free in the
489
+ // warm-cache case and adds at most one parallel HTTP otherwise.
490
+ const [npmInfo, weeklyDownloads] = await Promise.all([
491
+ getNpmLatestTarball(item.name),
492
+ getWeeklyDownloads(item.name).catch(() => null)
493
+ ]);
469
494
  if (npmInfo && npmInfo.tarball) {
470
495
  item.tarballUrl = npmInfo.tarball;
471
496
  if (!item.version) item.version = npmInfo.version || '';
472
497
  if (!item.unpackedSize) item.unpackedSize = npmInfo.unpackedSize || 0;
473
498
  if (!item.registryScripts) item.registryScripts = npmInfo.scripts || null;
499
+ // weekly_downloads is best-effort. getWeeklyDownloads returns -1 on
500
+ // failure; normalize that to null so triageRisk treats it as missing
501
+ // (rather than silently biasing the reputation factor toward "suspect").
502
+ npmInfo.weekly_downloads = (typeof weeklyDownloads === 'number' && weeklyDownloads >= 0)
503
+ ? weeklyDownloads : null;
474
504
  // Stash full packument-derived metadata for resolveTarballAndScan so
475
505
  // the worker can run ATO-signature, burst-extras, and fast-track logic
476
- // without a second registry call.
506
+ // without a second registry call. Stage 2.1 enriches this with
507
+ // age_days / version_count (from getNpmLatestTarball) and
508
+ // weekly_downloads (from getWeeklyDownloads) so the triage block in
509
+ // queue.js can read meta directly without re-fetching.
477
510
  item._npmInfo = npmInfo;
478
511
  resolved++;
479
512
  } else {
@@ -128,6 +128,22 @@ const LARGE_PACKAGE_SIZE = 10 * 1024 * 1024; // 10MB
128
128
  const FIRST_PUBLISH_SANDBOX_MAX_QUEUE = parseInt(process.env.MUADDIB_FIRST_PUBLISH_SANDBOX_MAX_QUEUE, 10) || 10;
129
129
  const FIRST_PUBLISH_SANDBOX_ENABLED = process.env.MUADDIB_FIRST_PUBLISH_SANDBOX !== '0';
130
130
 
131
+ // Stage 3 — sandbox gate. Static-score threshold below which T1b/T2 packages
132
+ // are NOT sandboxed (static result alone is authoritative). Tightens the prior
133
+ // "T1b sandbox if score >= 25 or queue < 20" to remove low-signal sandbox runs
134
+ // that consume slots without producing actionable findings (the dominant cost
135
+ // in the queue-saturation diagnostic). Validated by axon-enterprise@1.0.0
136
+ // (static 52, sandbox confirmed 100) — gate >= 40 still catches it.
137
+ // T1a (high-confidence malice) bypasses this gate; it's mandatory.
138
+ // Override via env var to widen the gate (lower threshold) for a short
139
+ // rollback window without redeploying. Clamped to [0, 100].
140
+ function computeSandboxScoreThreshold(envValue) {
141
+ const parsed = parseInt(envValue, 10);
142
+ const value = Number.isFinite(parsed) ? parsed : 40;
143
+ return Math.max(0, Math.min(100, value));
144
+ }
145
+ const SANDBOX_SCORE_THRESHOLD = computeSandboxScoreThreshold(process.env.MUADDIB_SANDBOX_SCORE_THRESHOLD);
146
+
131
147
  // --- Bundled tooling false-positive filter ---
132
148
 
133
149
  const KNOWN_BUNDLED_FILES = ['yarn.js', 'webpack.js', 'terser.js', 'esbuild.js', 'polyfills.js'];
@@ -738,14 +754,16 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
738
754
  }
739
755
 
740
756
  // T1a: mandatory sandbox (HC malice types, TIER1_TYPES non-LOW, lifecycle + intent compound)
741
- // T1b: conditional sandbox (HIGH/CRITICAL without HC type bundler FP zone)
742
- // sandbox only if score >= 25 (significant risk) or queue pressure is low
743
- // T2: sandbox if queue < 50 (as before)
757
+ // T1b: conditional sandbox gated by SANDBOX_SCORE_THRESHOLD (Stage 3).
758
+ // Previously gated at >= 25 OR queue < 20; tightened to >= 40 by
759
+ // default because the 25-39 band produced no decisive sandbox
760
+ // findings in 4 months of prod data (axon-enterprise was at 52).
761
+ // T2: conditional sandbox — same score gate AND queue < 50.
744
762
  let sandboxResult = null;
745
763
  const shouldSandbox = !skipSandboxLargePackage && isSandboxEnabled() && sandboxAvailable && (
746
764
  tier === '1a' ||
747
- (tier === '1b' && (riskScore >= 25 || scanQueue.length < 20)) ||
748
- (tier === 2 && scanQueue.length < 50)
765
+ (tier === '1b' && riskScore >= SANDBOX_SCORE_THRESHOLD) ||
766
+ (tier === 2 && riskScore >= SANDBOX_SCORE_THRESHOLD && scanQueue.length < 50)
749
767
  );
750
768
 
751
769
  if (shouldSandbox) {
@@ -813,8 +831,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
813
831
  } catch (err) {
814
832
  console.error(`[MONITOR] SANDBOX error for ${name}@${version}: ${err.message}`);
815
833
  }
816
- } else if (tier === '1b' && sandboxAvailable) {
817
- console.log(`[MONITOR] SANDBOX DEFERRED (T1b, score=${riskScore} < 25, queue ${scanQueue.length} >= 20): ${name}@${version}`);
834
+ } else if (tier === '1b' && sandboxAvailable && riskScore >= SANDBOX_SCORE_THRESHOLD) {
835
+ // Stage 3 defer only when the score crosses the gate. Below the
836
+ // threshold, sandbox is skipped entirely (static result is final).
837
+ // This stops the deferred-queue from filling with low-score items
838
+ // that would never produce decisive sandbox findings.
839
+ console.log(`[MONITOR] SANDBOX DEFERRED (T1b, score=${riskScore}, queue ${scanQueue.length}): ${name}@${version}`);
818
840
  enqueueDeferred({
819
841
  name, version, ecosystem, tier, riskScore, tarballUrl,
820
842
  enqueuedAt: Date.now(),
@@ -823,10 +845,14 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
823
845
  retries: 0
824
846
  });
825
847
  stats.sandboxDeferred = (stats.sandboxDeferred || 0) + 1;
848
+ } else if (tier === '1b' && sandboxAvailable) {
849
+ // Below SANDBOX_SCORE_THRESHOLD — no sandbox, no defer.
850
+ console.log(`[MONITOR] SANDBOX GATED (T1b, score=${riskScore} < ${SANDBOX_SCORE_THRESHOLD}): ${name}@${version}`);
851
+ stats.sandboxGated = (stats.sandboxGated || 0) + 1;
826
852
  } else if (tier === '1b') {
827
853
  console.log(`[MONITOR] SANDBOX SKIPPED (T1b, no Docker): ${name}@${version}`);
828
- } else if (tier === 2 && sandboxAvailable) {
829
- console.log(`[MONITOR] SANDBOX DEFERRED (T2, queue ${scanQueue.length} >= 50): ${name}@${version}`);
854
+ } else if (tier === 2 && sandboxAvailable && riskScore >= SANDBOX_SCORE_THRESHOLD) {
855
+ console.log(`[MONITOR] SANDBOX DEFERRED (T2, score=${riskScore}, queue ${scanQueue.length}): ${name}@${version}`);
830
856
  enqueueDeferred({
831
857
  name, version, ecosystem, tier, riskScore, tarballUrl,
832
858
  enqueuedAt: Date.now(),
@@ -835,6 +861,11 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
835
861
  retries: 0
836
862
  });
837
863
  stats.sandboxDeferred = (stats.sandboxDeferred || 0) + 1;
864
+ } else if (tier === 2 && sandboxAvailable) {
865
+ // Below SANDBOX_SCORE_THRESHOLD — T2 was already passive; staying
866
+ // static-only matches the existing T3 behaviour.
867
+ console.log(`[MONITOR] SANDBOX GATED (T2, score=${riskScore} < ${SANDBOX_SCORE_THRESHOLD}): ${name}@${version}`);
868
+ stats.sandboxGated = (stats.sandboxGated || 0) + 1;
838
869
  } else if (tier === 2) {
839
870
  console.log(`[MONITOR] SANDBOX SKIPPED (T2, no Docker): ${name}@${version}`);
840
871
  }
@@ -1295,10 +1326,23 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
1295
1326
  if (triageMode !== 'off' && !item.fastTrack) {
1296
1327
  let triageMeta = null;
1297
1328
  if (item.ecosystem === 'npm') {
1298
- try {
1299
- const { getPackageMetadata } = require('../scanner/npm-registry.js');
1300
- triageMeta = await getPackageMetadata(item.name);
1301
- } catch { /* metadata unavailable triageRisk will see null and pick 'full' */ }
1329
+ // Stage 2.1 — Stage 1 pre-resolve already fetched the packument and
1330
+ // (Stage 2.1) computed age_days + version_count, plus parallel-fetched
1331
+ // weekly_downloads. Read those directly to skip the second
1332
+ // registry round-trip via getPackageMetadata. Fallback to the lazy
1333
+ // metadata fetch only when _npmInfo is absent (lazy-resolve path).
1334
+ if (item._npmInfo) {
1335
+ triageMeta = {
1336
+ age_days: item._npmInfo.age_days,
1337
+ version_count: item._npmInfo.version_count,
1338
+ weekly_downloads: item._npmInfo.weekly_downloads,
1339
+ };
1340
+ } else {
1341
+ try {
1342
+ const { getPackageMetadata } = require('../scanner/npm-registry.js');
1343
+ triageMeta = await getPackageMetadata(item.name);
1344
+ } catch { /* metadata unavailable → triageRisk will see null and pick 'full' */ }
1345
+ }
1302
1346
  } else if (item.ecosystem === 'pypi') {
1303
1347
  triageMeta = item._pypiInfo || null;
1304
1348
  }
@@ -1413,6 +1457,8 @@ module.exports = {
1413
1457
  LARGE_PACKAGE_SIZE,
1414
1458
  FIRST_PUBLISH_SANDBOX_MAX_QUEUE,
1415
1459
  FIRST_PUBLISH_SANDBOX_ENABLED,
1460
+ SANDBOX_SCORE_THRESHOLD,
1461
+ computeSandboxScoreThreshold,
1416
1462
  KNOWN_BUNDLED_FILES,
1417
1463
  KNOWN_BUNDLED_PATHS,
1418
1464
  ML_EXCLUDED_DIRS,