muaddib-scanner 2.11.60 → 2.11.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.60",
3
+ "version": "2.11.63",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-05T10:13:00.374Z",
3
+ "timestamp": "2026-06-05T21:19:52.384Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -18,7 +18,7 @@ const { runSandbox } = require('../sandbox/index.js');
18
18
  const { isCanaryEnabled, TIER1_TYPES } = require('./classify.js');
19
19
  const { getWebhookUrl, alertedPackageRules, persistAlert, buildAlertData } = require('./webhook.js');
20
20
  const { sendWebhook } = require('../webhook.js');
21
- const { atomicWriteFileSync } = require('./state.js');
21
+ const { atomicWriteFileSync, markSandboxed } = require('./state.js');
22
22
 
23
23
  // ── Constants ──
24
24
  const DEFERRED_QUEUE_MAX = 500;
@@ -200,6 +200,7 @@ async function processDeferredItem(stats) {
200
200
  const canary = isCanaryEnabled();
201
201
  // maxRuns=1: deferred items are T1b/T2, time bomb detection (3 runs) is a luxury.
202
202
  // 90s instead of 270s per item → 3× faster deferred queue drain.
203
+ markSandboxed(item.name); // stamp for sandbox-revalidation cadence (matches the synchronous path)
203
204
  sandboxResult = await runSandbox(item.name, { canary, skipSemaphore: true, maxRuns: 1, signal: ac.signal });
204
205
  console.log(`[DEFERRED] SANDBOX COMPLETE: ${key} -> score=${sandboxResult.score}, severity=${sandboxResult.severity}`);
205
206
  } catch (err) {
@@ -33,7 +33,10 @@ const {
33
33
  appendAlert,
34
34
  getParisHour,
35
35
  hasReportBeenSentToday,
36
- MAX_DAILY_ALERTS
36
+ MAX_DAILY_ALERTS,
37
+ loadScanMemory,
38
+ shouldSuppressByMemory,
39
+ markSandboxed
37
40
  } = require('./state.js');
38
41
 
39
42
  // From ./classify.js
@@ -142,6 +145,29 @@ function computeSandboxScoreThreshold(envValue) {
142
145
  }
143
146
  const SANDBOX_SCORE_THRESHOLD = computeSandboxScoreThreshold(process.env.MUADDIB_SANDBOX_SCORE_THRESHOLD);
144
147
 
148
+ // --- Sandbox waste-cut (v2.11.6x): skip sandbox time that yields no new verdict ---
149
+ // Two skip paths, both detection-safe, applied BEFORE the tier sandbox decision:
150
+ // (1) memory match — re-sandboxing a package whose static result is equivalent to a
151
+ // remembered scan produces nothing the webhook wouldn't already memory-suppress.
152
+ // The dominant waste source is restart-replay: recentlyScanned is in-memory (lost on
153
+ // restart) but scan-memory persists 30d, so the changes-stream backlog gets
154
+ // re-sandboxed then suppressed. We skip, but re-sandbox at most once per
155
+ // SANDBOX_REVALIDATE_MS so runtime/canary coverage is retained on a slow cadence.
156
+ // (2) native binary shard — platform-specific prebuilt packages (os/cpu constrained or
157
+ // name like `*-linux-x64`) with trivial JS hang the sandbox install and always time
158
+ // out INCONCLUSIVE. Same guard rails as the large-low-signal skip (queue.js ~768):
159
+ // any lifecycle script, HIGH/CRITICAL finding, or temporal signal → sandbox runs.
160
+ const SANDBOX_REVALIDATE_MS = (() => {
161
+ const v = parseInt(process.env.MUADDIB_SANDBOX_REVALIDATE_MS, 10);
162
+ return Number.isFinite(v) && v >= 0 ? v : 7 * 24 * 60 * 60 * 1000; // default 7 days
163
+ })();
164
+ // npm platform-shard naming: <scope>/<pkg>-<os>-<arch>[-<libc/abi>] (esbuild/swc/turbo pattern).
165
+ const NATIVE_SHARD_NAME_RE = /-(linux|darwin|win32|freebsd|openbsd|android|sunos|aix)-(x64|arm64|arm|ia32|ppc64|s390x|riscv64|loong64|mips64el)(-(gnu|gnueabihf|musl|eabi|eabihf|msvc))?$/;
166
+ const LIFECYCLE_SCRIPT_KEYS = ['preinstall', 'install', 'postinstall', 'prepare', 'prepublish', 'prepublishOnly', 'preuninstall', 'uninstall', 'postuninstall'];
167
+ // A genuine prebuilt shard is a thin wrapper around a binary (index.js + index.d.ts at most).
168
+ // More JS than this means real logic → not a pure shard → don't skip.
169
+ const NATIVE_SHARD_MAX_JS_FILES = 3;
170
+
145
171
  // --- Bundled tooling false-positive filter ---
146
172
 
147
173
  const KNOWN_BUNDLED_FILES = ['yarn.js', 'webpack.js', 'terser.js', 'esbuild.js', 'polyfills.js'];
@@ -232,6 +258,88 @@ function countPackageFiles(dir) {
232
258
  return { fileCountTotal, hasTests };
233
259
  }
234
260
 
261
+ /**
262
+ * Pure classifier: is this a prebuilt native-binary platform shard (the kind that
263
+ * hangs the sandbox install and always times out INCONCLUSIVE)? No I/O — the parsed
264
+ * package.json manifest is passed in so this is unit-testable. Mirrors the extracted
265
+ * pure helpers computeWorkersToSpawn / computeTarget.
266
+ *
267
+ * A package is a shard when it declares a platform constraint (npm `os`/`cpu`) OR its
268
+ * name matches the `*-<os>-<arch>` convention, AND it carries only a trivial amount of
269
+ * JS (a real shard is a thin wrapper around a binary). hasLifecycleScripts is returned
270
+ * separately so the caller can keep sandboxing shards that DO run install hooks — the
271
+ * actual supply-chain vector.
272
+ *
273
+ * @param {string} name - Package name
274
+ * @param {number} fileCountTotal - JS/TS file count from countPackageFiles
275
+ * @param {Object|null} manifest - Parsed package.json (or null if unreadable)
276
+ * @returns {{ isShard: boolean, hasLifecycleScripts: boolean }}
277
+ */
278
+ function classifyNativeShard(name, fileCountTotal, manifest) {
279
+ const m = manifest || {};
280
+ const scripts = (m.scripts && typeof m.scripts === 'object') ? m.scripts : {};
281
+ const hasLifecycleScripts = LIFECYCLE_SCRIPT_KEYS.some(
282
+ k => typeof scripts[k] === 'string' && scripts[k].trim().length > 0
283
+ );
284
+ const platformConstrained =
285
+ (Array.isArray(m.os) && m.os.length > 0) ||
286
+ (Array.isArray(m.cpu) && m.cpu.length > 0);
287
+ const nameMatches = NATIVE_SHARD_NAME_RE.test(name || '');
288
+ const lowJs = (fileCountTotal || 0) <= NATIVE_SHARD_MAX_JS_FILES;
289
+ return { isShard: (platformConstrained || nameMatches) && lowJs, hasLifecycleScripts };
290
+ }
291
+
292
+ /**
293
+ * Pure decision: should the sandbox be skipped entirely for this package, BEFORE the
294
+ * tier-level run/defer/gate logic? Returns the skip descriptor or null. No I/O — every
295
+ * input is precomputed, so this is unit-testable without launching a real sandbox.
296
+ *
297
+ * Both skip paths are detection-safe:
298
+ * - skip-memory: only when shouldSuppressByMemory already holds (the webhook would be
299
+ * suppressed anyway → the sandbox produces nothing actionable) AND we re-sandboxed
300
+ * this package within revalidateMs. A memory match that is stale (or never sandboxed)
301
+ * falls through to run, so canary coverage is revalidated on the revalidateMs cadence.
302
+ * New threat types / new HC types / score shift / IOC match all make memorySuppress
303
+ * false upstream → never skipped.
304
+ * - skip-native: only a native binary shard with NO lifecycle script, NO HIGH/CRITICAL
305
+ * finding and NO temporal signal — same guard rails as the large-low-signal skip.
306
+ *
307
+ * @param {Object} ctx
308
+ * @param {boolean} ctx.memorySuppress - shouldSuppressByMemory(name, result).suppress
309
+ * @param {number} [ctx.lastSandboxAt] - last real sandbox timestamp from scan memory
310
+ * @param {number} ctx.now - current time (ms)
311
+ * @param {number} ctx.revalidateMs - SANDBOX_REVALIDATE_MS
312
+ * @param {boolean} ctx.isNativeShard
313
+ * @param {boolean} ctx.hasLifecycleScripts
314
+ * @param {boolean} ctx.hasHighOrCritical
315
+ * @param {boolean} ctx.hasTemporal
316
+ * @returns {{ action: 'skip-memory'|'skip-native', reason: string } | null}
317
+ */
318
+ function shouldSkipSandbox(ctx) {
319
+ const {
320
+ memorySuppress, lastSandboxAt, now, revalidateMs,
321
+ isNativeShard, hasLifecycleScripts, hasHighOrCritical, hasTemporal
322
+ } = ctx;
323
+
324
+ // (1) Memory match — skip only if we sandboxed it recently (else revalidate).
325
+ if (memorySuppress) {
326
+ const sandboxedRecently =
327
+ typeof lastSandboxAt === 'number' && (now - lastSandboxAt) < revalidateMs;
328
+ if (sandboxedRecently) {
329
+ const days = ((now - lastSandboxAt) / 86_400_000).toFixed(1);
330
+ return { action: 'skip-memory', reason: `memory match, last sandbox ${days}d ago` };
331
+ }
332
+ // fall through — stale/never-sandboxed memory match revalidates via the normal path
333
+ }
334
+
335
+ // (2) Native binary shard — same guard rails as the large-low-signal skip.
336
+ if (isNativeShard && !hasLifecycleScripts && !hasHighOrCritical && !hasTemporal) {
337
+ return { action: 'skip-native', reason: 'native binary shard, no lifecycle' };
338
+ }
339
+
340
+ return null;
341
+ }
342
+
235
343
  /**
236
344
  * Run the static scan in a Worker thread with a hard timeout.
237
345
  * worker.terminate() calls V8::TerminateExecution which can interrupt
@@ -791,7 +899,35 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
791
899
  (tier === 2 && riskScore >= SANDBOX_SCORE_THRESHOLD && scanQueue.length < 50)
792
900
  );
793
901
 
794
- if (shouldSandbox) {
902
+ // Waste-cut: skip the sandbox (run AND defer) when re-running it yields no new
903
+ // verdict — a memory match the webhook would suppress anyway (dominant cost:
904
+ // restart-replay of the changes-stream backlog), or a native binary shard that
905
+ // just hangs the install. Both detection-safe (see shouldSkipSandbox). Cheap:
906
+ // one package.json read + a scan-memory lookup.
907
+ let shardManifest = null;
908
+ try {
909
+ shardManifest = JSON.parse(fs.readFileSync(path.join(extractedDir, 'package.json'), 'utf8'));
910
+ } catch { /* unreadable manifest → classifyNativeShard treats it as non-shard */ }
911
+ const { isShard: isNativeShard, hasLifecycleScripts: shardHasLifecycle } =
912
+ classifyNativeShard(name, fileCountTotal, shardManifest);
913
+ const memEntry = loadScanMemory()[name];
914
+ const sandboxSkip = (isSandboxEnabled() && sandboxAvailable) ? shouldSkipSandbox({
915
+ memorySuppress: shouldSuppressByMemory(name, result).suppress,
916
+ lastSandboxAt: memEntry && memEntry.lastSandboxAt,
917
+ now: Date.now(),
918
+ revalidateMs: SANDBOX_REVALIDATE_MS,
919
+ isNativeShard,
920
+ hasLifecycleScripts: shardHasLifecycle,
921
+ hasHighOrCritical: hasHighOrCriticalFinding,
922
+ hasTemporal: hasTemporalSignal
923
+ }) : null;
924
+
925
+ if (sandboxSkip) {
926
+ console.log(`[MONITOR] SANDBOX SKIP (${sandboxSkip.reason}): ${name}@${version}`);
927
+ stats.sandboxWasteSkipped = (stats.sandboxWasteSkipped || 0) + 1;
928
+ if (sandboxSkip.action === 'skip-memory') stats.sandboxSkipMemory = (stats.sandboxSkipMemory || 0) + 1;
929
+ else stats.sandboxSkipNative = (stats.sandboxSkipNative || 0) + 1;
930
+ } else if (shouldSandbox) {
795
931
  try {
796
932
  const canary = isCanaryEnabled();
797
933
  const maxRuns = tier === '1a' ? undefined : 1;
@@ -799,11 +935,13 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
799
935
  if (tier === '1a') {
800
936
  // T1a: mandatory sandbox — block-wait (high-confidence threats MUST get sandbox)
801
937
  console.log(`[MONITOR] SANDBOX: launching for ${name}@${version}${canary ? ' (canary: on)' : ''}...`);
938
+ markSandboxed(name); // stamp before the await: an aborted/inconclusive run still spent the time
802
939
  sandboxResult = await runSandbox(name, { canary, maxRuns, signal });
803
940
  } else if (tryAcquireSandboxSlot()) {
804
941
  // T1b/T2: non-blocking — slot acquired atomically, run with skipSemaphore
805
942
  const reason = tier === 2 ? ' (T2, queue low)' : ' (T1b, conditional)';
806
943
  console.log(`[MONITOR] SANDBOX${reason}: launching for ${name}@${version}${canary ? ' (canary: on)' : ''}...`);
944
+ markSandboxed(name); // stamp before the await: an aborted/inconclusive run still spent the time
807
945
  sandboxResult = await runSandbox(name, { canary, maxRuns, skipSemaphore: true, signal });
808
946
  } else {
809
947
  // T1b/T2: all sandbox slots busy — defer instead of blocking worker
@@ -1530,6 +1668,7 @@ module.exports = {
1530
1668
  FIRST_PUBLISH_SANDBOX_ENABLED,
1531
1669
  SANDBOX_SCORE_THRESHOLD,
1532
1670
  computeSandboxScoreThreshold,
1671
+ SANDBOX_REVALIDATE_MS,
1533
1672
  KNOWN_BUNDLED_FILES,
1534
1673
  KNOWN_BUNDLED_PATHS,
1535
1674
  ML_EXCLUDED_DIRS,
@@ -1550,6 +1689,8 @@ module.exports = {
1550
1689
  isBundledToolingOnly,
1551
1690
  recordTrainingSample,
1552
1691
  countPackageFiles,
1692
+ classifyNativeShard,
1693
+ shouldSkipSandbox,
1553
1694
  runScanInWorker,
1554
1695
  scanPackage,
1555
1696
  timeoutPromise,
@@ -308,7 +308,14 @@ function saveScanMemory() {
308
308
  */
309
309
  function recordScanMemory(name, score, types, hcTypes) {
310
310
  const store = loadScanMemory();
311
+ // Read-modify-write: preserve fields set out-of-band (notably lastSandboxAt,
312
+ // stamped by markSandboxed when a real sandbox runs) so a record at webhook time
313
+ // does NOT clobber the sandbox-revalidation timestamp the sandbox-skip decision
314
+ // reads. Without this, every webhook record would reset lastSandboxAt and the
315
+ // 7-day canary-revalidation cadence would never settle.
316
+ const prev = store[name] || {};
311
317
  store[name] = {
318
+ ...prev,
312
319
  score,
313
320
  types: types.sort(),
314
321
  hcTypes: hcTypes.sort(),
@@ -316,6 +323,24 @@ function recordScanMemory(name, score, types, hcTypes) {
316
323
  };
317
324
  }
318
325
 
326
+ /**
327
+ * Stamp lastSandboxAt on a package's scan-memory entry — call when a real sandbox
328
+ * run was just performed. The sandbox-skip decision (queue.js shouldSkipSandbox)
329
+ * uses this to skip re-sandboxing a memory-matched package until SANDBOX_REVALIDATE_MS
330
+ * has elapsed: kills restart-replay / re-publish sandbox waste while retaining canary
331
+ * coverage on a slow cadence. Mutates the in-memory cache; persisted by the next
332
+ * saveScanMemory(). A timestamp is set too so a sandbox-before-first-scan entry still
333
+ * has a valid expiry/eviction key.
334
+ * @param {string} name - Package name
335
+ * @param {number} [at] - Timestamp in ms (defaults to now)
336
+ */
337
+ function markSandboxed(name, at) {
338
+ const store = loadScanMemory();
339
+ const ts = at || Date.now();
340
+ const prev = store[name] || {};
341
+ store[name] = { ...prev, lastSandboxAt: ts, timestamp: prev.timestamp || ts };
342
+ }
343
+
319
344
  /**
320
345
  * Check if a webhook should be suppressed based on scan memory.
321
346
  * Returns { suppress: boolean, reason?: string }.
@@ -1416,6 +1441,7 @@ module.exports = {
1416
1441
  loadScanMemory,
1417
1442
  saveScanMemory,
1418
1443
  recordScanMemory,
1444
+ markSandboxed,
1419
1445
  shouldSuppressByMemory,
1420
1446
  loadTarballCacheIndex,
1421
1447
  saveTarballCacheIndex,