muaddib-scanner 2.11.60 → 2.11.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -18,7 +18,7 @@ const { runSandbox } = require('../sandbox/index.js');
|
|
|
18
18
|
const { isCanaryEnabled, TIER1_TYPES } = require('./classify.js');
|
|
19
19
|
const { getWebhookUrl, alertedPackageRules, persistAlert, buildAlertData } = require('./webhook.js');
|
|
20
20
|
const { sendWebhook } = require('../webhook.js');
|
|
21
|
-
const { atomicWriteFileSync } = require('./state.js');
|
|
21
|
+
const { atomicWriteFileSync, markSandboxed } = require('./state.js');
|
|
22
22
|
|
|
23
23
|
// ── Constants ──
|
|
24
24
|
const DEFERRED_QUEUE_MAX = 500;
|
|
@@ -200,6 +200,7 @@ async function processDeferredItem(stats) {
|
|
|
200
200
|
const canary = isCanaryEnabled();
|
|
201
201
|
// maxRuns=1: deferred items are T1b/T2, time bomb detection (3 runs) is a luxury.
|
|
202
202
|
// 90s instead of 270s per item → 3× faster deferred queue drain.
|
|
203
|
+
markSandboxed(item.name); // stamp for sandbox-revalidation cadence (matches the synchronous path)
|
|
203
204
|
sandboxResult = await runSandbox(item.name, { canary, skipSemaphore: true, maxRuns: 1, signal: ac.signal });
|
|
204
205
|
console.log(`[DEFERRED] SANDBOX COMPLETE: ${key} -> score=${sandboxResult.score}, severity=${sandboxResult.severity}`);
|
|
205
206
|
} catch (err) {
|
package/src/monitor/queue.js
CHANGED
|
@@ -33,7 +33,10 @@ const {
|
|
|
33
33
|
appendAlert,
|
|
34
34
|
getParisHour,
|
|
35
35
|
hasReportBeenSentToday,
|
|
36
|
-
MAX_DAILY_ALERTS
|
|
36
|
+
MAX_DAILY_ALERTS,
|
|
37
|
+
loadScanMemory,
|
|
38
|
+
shouldSuppressByMemory,
|
|
39
|
+
markSandboxed
|
|
37
40
|
} = require('./state.js');
|
|
38
41
|
|
|
39
42
|
// From ./classify.js
|
|
@@ -142,6 +145,29 @@ function computeSandboxScoreThreshold(envValue) {
|
|
|
142
145
|
}
|
|
143
146
|
const SANDBOX_SCORE_THRESHOLD = computeSandboxScoreThreshold(process.env.MUADDIB_SANDBOX_SCORE_THRESHOLD);
|
|
144
147
|
|
|
148
|
+
// --- Sandbox waste-cut (v2.11.6x): skip sandbox time that yields no new verdict ---
|
|
149
|
+
// Two skip paths, both detection-safe, applied BEFORE the tier sandbox decision:
|
|
150
|
+
// (1) memory match — re-sandboxing a package whose static result is equivalent to a
|
|
151
|
+
// remembered scan produces nothing the webhook wouldn't already memory-suppress.
|
|
152
|
+
// The dominant waste source is restart-replay: recentlyScanned is in-memory (lost on
|
|
153
|
+
// restart) but scan-memory persists 30d, so the changes-stream backlog gets
|
|
154
|
+
// re-sandboxed then suppressed. We skip, but re-sandbox at most once per
|
|
155
|
+
// SANDBOX_REVALIDATE_MS so runtime/canary coverage is retained on a slow cadence.
|
|
156
|
+
// (2) native binary shard — platform-specific prebuilt packages (os/cpu constrained or
|
|
157
|
+
// name like `*-linux-x64`) with trivial JS hang the sandbox install and always time
|
|
158
|
+
// out INCONCLUSIVE. Same guard rails as the large-low-signal skip (queue.js ~768):
|
|
159
|
+
// any lifecycle script, HIGH/CRITICAL finding, or temporal signal → sandbox runs.
|
|
160
|
+
const SANDBOX_REVALIDATE_MS = (() => {
|
|
161
|
+
const v = parseInt(process.env.MUADDIB_SANDBOX_REVALIDATE_MS, 10);
|
|
162
|
+
return Number.isFinite(v) && v >= 0 ? v : 7 * 24 * 60 * 60 * 1000; // default 7 days
|
|
163
|
+
})();
|
|
164
|
+
// npm platform-shard naming: <scope>/<pkg>-<os>-<arch>[-<libc/abi>] (esbuild/swc/turbo pattern).
|
|
165
|
+
const NATIVE_SHARD_NAME_RE = /-(linux|darwin|win32|freebsd|openbsd|android|sunos|aix)-(x64|arm64|arm|ia32|ppc64|s390x|riscv64|loong64|mips64el)(-(gnu|gnueabihf|musl|eabi|eabihf|msvc))?$/;
|
|
166
|
+
const LIFECYCLE_SCRIPT_KEYS = ['preinstall', 'install', 'postinstall', 'prepare', 'prepublish', 'prepublishOnly', 'preuninstall', 'uninstall', 'postuninstall'];
|
|
167
|
+
// A genuine prebuilt shard is a thin wrapper around a binary (index.js + index.d.ts at most).
|
|
168
|
+
// More JS than this means real logic → not a pure shard → don't skip.
|
|
169
|
+
const NATIVE_SHARD_MAX_JS_FILES = 3;
|
|
170
|
+
|
|
145
171
|
// --- Bundled tooling false-positive filter ---
|
|
146
172
|
|
|
147
173
|
const KNOWN_BUNDLED_FILES = ['yarn.js', 'webpack.js', 'terser.js', 'esbuild.js', 'polyfills.js'];
|
|
@@ -232,6 +258,88 @@ function countPackageFiles(dir) {
|
|
|
232
258
|
return { fileCountTotal, hasTests };
|
|
233
259
|
}
|
|
234
260
|
|
|
261
|
+
/**
|
|
262
|
+
* Pure classifier: is this a prebuilt native-binary platform shard (the kind that
|
|
263
|
+
* hangs the sandbox install and always times out INCONCLUSIVE)? No I/O — the parsed
|
|
264
|
+
* package.json manifest is passed in so this is unit-testable. Mirrors the extracted
|
|
265
|
+
* pure helpers computeWorkersToSpawn / computeTarget.
|
|
266
|
+
*
|
|
267
|
+
* A package is a shard when it declares a platform constraint (npm `os`/`cpu`) OR its
|
|
268
|
+
* name matches the `*-<os>-<arch>` convention, AND it carries only a trivial amount of
|
|
269
|
+
* JS (a real shard is a thin wrapper around a binary). hasLifecycleScripts is returned
|
|
270
|
+
* separately so the caller can keep sandboxing shards that DO run install hooks — the
|
|
271
|
+
* actual supply-chain vector.
|
|
272
|
+
*
|
|
273
|
+
* @param {string} name - Package name
|
|
274
|
+
* @param {number} fileCountTotal - JS/TS file count from countPackageFiles
|
|
275
|
+
* @param {Object|null} manifest - Parsed package.json (or null if unreadable)
|
|
276
|
+
* @returns {{ isShard: boolean, hasLifecycleScripts: boolean }}
|
|
277
|
+
*/
|
|
278
|
+
function classifyNativeShard(name, fileCountTotal, manifest) {
|
|
279
|
+
const m = manifest || {};
|
|
280
|
+
const scripts = (m.scripts && typeof m.scripts === 'object') ? m.scripts : {};
|
|
281
|
+
const hasLifecycleScripts = LIFECYCLE_SCRIPT_KEYS.some(
|
|
282
|
+
k => typeof scripts[k] === 'string' && scripts[k].trim().length > 0
|
|
283
|
+
);
|
|
284
|
+
const platformConstrained =
|
|
285
|
+
(Array.isArray(m.os) && m.os.length > 0) ||
|
|
286
|
+
(Array.isArray(m.cpu) && m.cpu.length > 0);
|
|
287
|
+
const nameMatches = NATIVE_SHARD_NAME_RE.test(name || '');
|
|
288
|
+
const lowJs = (fileCountTotal || 0) <= NATIVE_SHARD_MAX_JS_FILES;
|
|
289
|
+
return { isShard: (platformConstrained || nameMatches) && lowJs, hasLifecycleScripts };
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Pure decision: should the sandbox be skipped entirely for this package, BEFORE the
|
|
294
|
+
* tier-level run/defer/gate logic? Returns the skip descriptor or null. No I/O — every
|
|
295
|
+
* input is precomputed, so this is unit-testable without launching a real sandbox.
|
|
296
|
+
*
|
|
297
|
+
* Both skip paths are detection-safe:
|
|
298
|
+
* - skip-memory: only when shouldSuppressByMemory already holds (the webhook would be
|
|
299
|
+
* suppressed anyway → the sandbox produces nothing actionable) AND we re-sandboxed
|
|
300
|
+
* this package within revalidateMs. A memory match that is stale (or never sandboxed)
|
|
301
|
+
* falls through to run, so canary coverage is revalidated on the revalidateMs cadence.
|
|
302
|
+
* New threat types / new HC types / score shift / IOC match all make memorySuppress
|
|
303
|
+
* false upstream → never skipped.
|
|
304
|
+
* - skip-native: only a native binary shard with NO lifecycle script, NO HIGH/CRITICAL
|
|
305
|
+
* finding and NO temporal signal — same guard rails as the large-low-signal skip.
|
|
306
|
+
*
|
|
307
|
+
* @param {Object} ctx
|
|
308
|
+
* @param {boolean} ctx.memorySuppress - shouldSuppressByMemory(name, result).suppress
|
|
309
|
+
* @param {number} [ctx.lastSandboxAt] - last real sandbox timestamp from scan memory
|
|
310
|
+
* @param {number} ctx.now - current time (ms)
|
|
311
|
+
* @param {number} ctx.revalidateMs - SANDBOX_REVALIDATE_MS
|
|
312
|
+
* @param {boolean} ctx.isNativeShard
|
|
313
|
+
* @param {boolean} ctx.hasLifecycleScripts
|
|
314
|
+
* @param {boolean} ctx.hasHighOrCritical
|
|
315
|
+
* @param {boolean} ctx.hasTemporal
|
|
316
|
+
* @returns {{ action: 'skip-memory'|'skip-native', reason: string } | null}
|
|
317
|
+
*/
|
|
318
|
+
function shouldSkipSandbox(ctx) {
|
|
319
|
+
const {
|
|
320
|
+
memorySuppress, lastSandboxAt, now, revalidateMs,
|
|
321
|
+
isNativeShard, hasLifecycleScripts, hasHighOrCritical, hasTemporal
|
|
322
|
+
} = ctx;
|
|
323
|
+
|
|
324
|
+
// (1) Memory match — skip only if we sandboxed it recently (else revalidate).
|
|
325
|
+
if (memorySuppress) {
|
|
326
|
+
const sandboxedRecently =
|
|
327
|
+
typeof lastSandboxAt === 'number' && (now - lastSandboxAt) < revalidateMs;
|
|
328
|
+
if (sandboxedRecently) {
|
|
329
|
+
const days = ((now - lastSandboxAt) / 86_400_000).toFixed(1);
|
|
330
|
+
return { action: 'skip-memory', reason: `memory match, last sandbox ${days}d ago` };
|
|
331
|
+
}
|
|
332
|
+
// fall through — stale/never-sandboxed memory match revalidates via the normal path
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// (2) Native binary shard — same guard rails as the large-low-signal skip.
|
|
336
|
+
if (isNativeShard && !hasLifecycleScripts && !hasHighOrCritical && !hasTemporal) {
|
|
337
|
+
return { action: 'skip-native', reason: 'native binary shard, no lifecycle' };
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
return null;
|
|
341
|
+
}
|
|
342
|
+
|
|
235
343
|
/**
|
|
236
344
|
* Run the static scan in a Worker thread with a hard timeout.
|
|
237
345
|
* worker.terminate() calls V8::TerminateExecution which can interrupt
|
|
@@ -791,7 +899,35 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
791
899
|
(tier === 2 && riskScore >= SANDBOX_SCORE_THRESHOLD && scanQueue.length < 50)
|
|
792
900
|
);
|
|
793
901
|
|
|
794
|
-
|
|
902
|
+
// Waste-cut: skip the sandbox (run AND defer) when re-running it yields no new
|
|
903
|
+
// verdict — a memory match the webhook would suppress anyway (dominant cost:
|
|
904
|
+
// restart-replay of the changes-stream backlog), or a native binary shard that
|
|
905
|
+
// just hangs the install. Both detection-safe (see shouldSkipSandbox). Cheap:
|
|
906
|
+
// one package.json read + a scan-memory lookup.
|
|
907
|
+
let shardManifest = null;
|
|
908
|
+
try {
|
|
909
|
+
shardManifest = JSON.parse(fs.readFileSync(path.join(extractedDir, 'package.json'), 'utf8'));
|
|
910
|
+
} catch { /* unreadable manifest → classifyNativeShard treats it as non-shard */ }
|
|
911
|
+
const { isShard: isNativeShard, hasLifecycleScripts: shardHasLifecycle } =
|
|
912
|
+
classifyNativeShard(name, fileCountTotal, shardManifest);
|
|
913
|
+
const memEntry = loadScanMemory()[name];
|
|
914
|
+
const sandboxSkip = (isSandboxEnabled() && sandboxAvailable) ? shouldSkipSandbox({
|
|
915
|
+
memorySuppress: shouldSuppressByMemory(name, result).suppress,
|
|
916
|
+
lastSandboxAt: memEntry && memEntry.lastSandboxAt,
|
|
917
|
+
now: Date.now(),
|
|
918
|
+
revalidateMs: SANDBOX_REVALIDATE_MS,
|
|
919
|
+
isNativeShard,
|
|
920
|
+
hasLifecycleScripts: shardHasLifecycle,
|
|
921
|
+
hasHighOrCritical: hasHighOrCriticalFinding,
|
|
922
|
+
hasTemporal: hasTemporalSignal
|
|
923
|
+
}) : null;
|
|
924
|
+
|
|
925
|
+
if (sandboxSkip) {
|
|
926
|
+
console.log(`[MONITOR] SANDBOX SKIP (${sandboxSkip.reason}): ${name}@${version}`);
|
|
927
|
+
stats.sandboxWasteSkipped = (stats.sandboxWasteSkipped || 0) + 1;
|
|
928
|
+
if (sandboxSkip.action === 'skip-memory') stats.sandboxSkipMemory = (stats.sandboxSkipMemory || 0) + 1;
|
|
929
|
+
else stats.sandboxSkipNative = (stats.sandboxSkipNative || 0) + 1;
|
|
930
|
+
} else if (shouldSandbox) {
|
|
795
931
|
try {
|
|
796
932
|
const canary = isCanaryEnabled();
|
|
797
933
|
const maxRuns = tier === '1a' ? undefined : 1;
|
|
@@ -799,11 +935,13 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
799
935
|
if (tier === '1a') {
|
|
800
936
|
// T1a: mandatory sandbox — block-wait (high-confidence threats MUST get sandbox)
|
|
801
937
|
console.log(`[MONITOR] SANDBOX: launching for ${name}@${version}${canary ? ' (canary: on)' : ''}...`);
|
|
938
|
+
markSandboxed(name); // stamp before the await: an aborted/inconclusive run still spent the time
|
|
802
939
|
sandboxResult = await runSandbox(name, { canary, maxRuns, signal });
|
|
803
940
|
} else if (tryAcquireSandboxSlot()) {
|
|
804
941
|
// T1b/T2: non-blocking — slot acquired atomically, run with skipSemaphore
|
|
805
942
|
const reason = tier === 2 ? ' (T2, queue low)' : ' (T1b, conditional)';
|
|
806
943
|
console.log(`[MONITOR] SANDBOX${reason}: launching for ${name}@${version}${canary ? ' (canary: on)' : ''}...`);
|
|
944
|
+
markSandboxed(name); // stamp before the await: an aborted/inconclusive run still spent the time
|
|
807
945
|
sandboxResult = await runSandbox(name, { canary, maxRuns, skipSemaphore: true, signal });
|
|
808
946
|
} else {
|
|
809
947
|
// T1b/T2: all sandbox slots busy — defer instead of blocking worker
|
|
@@ -1530,6 +1668,7 @@ module.exports = {
|
|
|
1530
1668
|
FIRST_PUBLISH_SANDBOX_ENABLED,
|
|
1531
1669
|
SANDBOX_SCORE_THRESHOLD,
|
|
1532
1670
|
computeSandboxScoreThreshold,
|
|
1671
|
+
SANDBOX_REVALIDATE_MS,
|
|
1533
1672
|
KNOWN_BUNDLED_FILES,
|
|
1534
1673
|
KNOWN_BUNDLED_PATHS,
|
|
1535
1674
|
ML_EXCLUDED_DIRS,
|
|
@@ -1550,6 +1689,8 @@ module.exports = {
|
|
|
1550
1689
|
isBundledToolingOnly,
|
|
1551
1690
|
recordTrainingSample,
|
|
1552
1691
|
countPackageFiles,
|
|
1692
|
+
classifyNativeShard,
|
|
1693
|
+
shouldSkipSandbox,
|
|
1553
1694
|
runScanInWorker,
|
|
1554
1695
|
scanPackage,
|
|
1555
1696
|
timeoutPromise,
|
package/src/monitor/state.js
CHANGED
|
@@ -308,7 +308,14 @@ function saveScanMemory() {
|
|
|
308
308
|
*/
|
|
309
309
|
function recordScanMemory(name, score, types, hcTypes) {
|
|
310
310
|
const store = loadScanMemory();
|
|
311
|
+
// Read-modify-write: preserve fields set out-of-band (notably lastSandboxAt,
|
|
312
|
+
// stamped by markSandboxed when a real sandbox runs) so a record at webhook time
|
|
313
|
+
// does NOT clobber the sandbox-revalidation timestamp the sandbox-skip decision
|
|
314
|
+
// reads. Without this, every webhook record would reset lastSandboxAt and the
|
|
315
|
+
// 7-day canary-revalidation cadence would never settle.
|
|
316
|
+
const prev = store[name] || {};
|
|
311
317
|
store[name] = {
|
|
318
|
+
...prev,
|
|
312
319
|
score,
|
|
313
320
|
types: types.sort(),
|
|
314
321
|
hcTypes: hcTypes.sort(),
|
|
@@ -316,6 +323,24 @@ function recordScanMemory(name, score, types, hcTypes) {
|
|
|
316
323
|
};
|
|
317
324
|
}
|
|
318
325
|
|
|
326
|
+
/**
|
|
327
|
+
* Stamp lastSandboxAt on a package's scan-memory entry — call when a real sandbox
|
|
328
|
+
* run was just performed. The sandbox-skip decision (queue.js shouldSkipSandbox)
|
|
329
|
+
* uses this to skip re-sandboxing a memory-matched package until SANDBOX_REVALIDATE_MS
|
|
330
|
+
* has elapsed: kills restart-replay / re-publish sandbox waste while retaining canary
|
|
331
|
+
* coverage on a slow cadence. Mutates the in-memory cache; persisted by the next
|
|
332
|
+
* saveScanMemory(). A timestamp is set too so a sandbox-before-first-scan entry still
|
|
333
|
+
* has a valid expiry/eviction key.
|
|
334
|
+
* @param {string} name - Package name
|
|
335
|
+
* @param {number} [at] - Timestamp in ms (defaults to now)
|
|
336
|
+
*/
|
|
337
|
+
function markSandboxed(name, at) {
|
|
338
|
+
const store = loadScanMemory();
|
|
339
|
+
const ts = at || Date.now();
|
|
340
|
+
const prev = store[name] || {};
|
|
341
|
+
store[name] = { ...prev, lastSandboxAt: ts, timestamp: prev.timestamp || ts };
|
|
342
|
+
}
|
|
343
|
+
|
|
319
344
|
/**
|
|
320
345
|
* Check if a webhook should be suppressed based on scan memory.
|
|
321
346
|
* Returns { suppress: boolean, reason?: string }.
|
|
@@ -1416,6 +1441,7 @@ module.exports = {
|
|
|
1416
1441
|
loadScanMemory,
|
|
1417
1442
|
saveScanMemory,
|
|
1418
1443
|
recordScanMemory,
|
|
1444
|
+
markSandboxed,
|
|
1419
1445
|
shouldSuppressByMemory,
|
|
1420
1446
|
loadTarballCacheIndex,
|
|
1421
1447
|
saveTarballCacheIndex,
|