muaddib-scanner 2.11.53 → 2.11.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bin/muaddib.js +1 -1
- package/package.json +4 -4
- package/{self-scan-v2.11.53.json → self-scan-v2.11.58.json} +2 -2
- package/src/commands/safe-install.js +0 -1
- package/src/index.js +1 -1
- package/src/integrations/maintainer-change.js +0 -1
- package/src/integrations/webhook.js +1 -3
- package/src/ioc/scraper.js +8 -69
- package/src/ml/classifier.js +3 -2
- package/src/ml/feature-extractor.js +1 -1
- package/src/ml/llm-detective.js +2 -2
- package/src/monitor/daemon.js +65 -15
- package/src/monitor/deferred-sandbox.js +8 -1
- package/src/monitor/ingestion.js +4 -4
- package/src/monitor/queue.js +92 -73
- package/src/monitor/state.js +2 -2
- package/src/monitor/webhook.js +9 -10
- package/src/output/cyclonedx.js +1 -1
- package/src/output/report.js +1 -1
- package/src/output/sarif.js +1 -1
- package/src/pipeline/executor.js +2 -2
- package/src/pipeline/processor.js +2 -2
- package/src/runtime/monitor-feed.js +0 -3
- package/src/sandbox/compound-triggers.js +2 -2
- package/src/sandbox/index.js +219 -104
- package/src/scanner/ai-config.js +1 -1
- package/src/scanner/ast-detectors/handle-assignment-expression.js +0 -1
- package/src/scanner/ast-detectors/handle-call-expression.js +2 -5
- package/src/scanner/ast-detectors/handle-variable-declarator.js +2 -2
- package/src/scanner/ast.js +2 -3
- package/src/scanner/dataflow.js +1 -2
- package/src/scanner/deobfuscate.js +1 -2
- package/src/scanner/entropy.js +0 -1
- package/src/scanner/github-actions.js +1 -1
- package/src/scanner/hash.js +1 -1
- package/src/scanner/module-graph/annotate-sinks.js +1 -2
- package/src/scanner/module-graph/annotate-tainted.js +1 -2
- package/src/scanner/module-graph/detect-callback-flows.js +0 -1
- package/src/scanner/module-graph/detect-cross-file.js +1 -1
- package/src/scanner/module-graph/detect-event-flows.js +1 -1
- package/src/scanner/module-graph/parse-utils.js +1 -2
- package/src/scanner/npm-registry.js +17 -12
- package/src/scanner/obfuscation.js +0 -1
- package/src/scanner/package.js +1 -1
- package/src/scanner/python-ast-detectors/handle-setup-call.js +0 -1
- package/src/scanner/reachability.js +1 -1
- package/src/scanner/shell.js +1 -1
- package/src/scanner/temporal-ast-diff.js +1 -2
- package/src/scanner/typosquat.js +3 -3
- package/src/scoring.js +1 -1
- package/src/shared/constants.js +1 -1
- package/src/shared/download.js +1 -0
- package/src/shared/http-limiter.js +7 -4
- package/src/utils.js +1 -1
package/README.md
CHANGED
|
@@ -303,7 +303,7 @@ These are the numbers a user gets when running `muaddib scan` against npm or PyP
|
|
|
303
303
|
| **FPR PyPI** (v2.11.48, first honest measurement) | **9.68%** (12/124 scanned, 132 total) | **Track D fixed the PyPI downloader** — removed `pip --no-binary :all:` flag (forced compile of wheel-only packages, timed out 38% of the time) + added `.whl` extraction via `extractArchive()`. Brought 42 previously-skipped giants (numpy/pandas/django/matplotlib/scikit-learn/...) into scope. All 12 FPs cluster at score 25-35: this is the cap-PyPI-35 artifact, not new rule misfires. Lifting the cap (Track E) would drop FPR PyPI to ≈0%. 8 residual fails are >500MB packages (torch, tensorflow, scipy, opencv-python, ansible…) hitting the 30s `PACK_TIMEOUT_MS`. |
|
|
304
304
|
| **ADR** (Adversarial + Holdout, v2.11.48) | **96.26%** (103/107) | 67 adversarial + 40 holdout, global threshold=20. Stable vs v2.10.95. |
|
|
305
305
|
|
|
306
|
-
**
|
|
306
|
+
**3969 tests** across 109 files. **262 rules** (257 RULES + 5 PARANOID — Track D added 3: AST-093, AST-094, COMPOUND-016).
|
|
307
307
|
|
|
308
308
|
**Known issues (v2.11.48):**
|
|
309
309
|
- *Cap PyPI à 35/100*: Python samples plafonnent à `riskScore=35` even when `globalRiskScore=100`. Confirmed empirically — all 12 PyPI FPs at score 25-35 (flask 32, django 35, tornado 35, bottle 30, pandas 25, matplotlib 25, plotly 25, bokeh 25, pymongo 35, coverage 32, fabric 35, websockets 35). Lifting the cap will simultaneously drop FPR PyPI to ≈0% and unblock PyPI MALWARE detection at higher thresholds. Track E target.
|
package/bin/muaddib.js
CHANGED
|
@@ -383,7 +383,6 @@ if (command === 'version' || command === '--version' || command === '-v') {
|
|
|
383
383
|
});
|
|
384
384
|
} else if (isTemporal && isTest && testPkg.length > 0) {
|
|
385
385
|
const { detectSuddenLifecycleChange } = require('../src/temporal-analysis.js');
|
|
386
|
-
const pkgName = testPkg[testPkg.indexOf('--test') !== -1 ? testPkg.length - 1 : 0] || testPkg[0];
|
|
387
386
|
// Find the package name: it's the non-flag argument
|
|
388
387
|
const actualPkg = options.filter(o => !o.startsWith('-')).pop();
|
|
389
388
|
if (!actualPkg) {
|
|
@@ -756,6 +755,7 @@ if (command === 'version' || command === '--version' || command === '-v') {
|
|
|
756
755
|
const helpCmd = options.filter(o => !o.startsWith('-'))[0];
|
|
757
756
|
showHelp(helpCmd);
|
|
758
757
|
} else {
|
|
758
|
+
// eslint-disable-next-line no-control-regex -- strips control chars from untrusted command before display
|
|
759
759
|
console.log(`Unknown command: ${String(command).replace(/[\x00-\x1f\x7f-\x9f]/g, '')}`);
|
|
760
760
|
console.log('Type "muaddib help" to see available commands.');
|
|
761
761
|
process.exit(1);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "muaddib-scanner",
|
|
3
|
-
"version": "2.11.
|
|
3
|
+
"version": "2.11.58",
|
|
4
4
|
"description": "Supply-chain threat detection & response for npm & PyPI/Python",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -47,17 +47,17 @@
|
|
|
47
47
|
"node": ">=18.0.0"
|
|
48
48
|
},
|
|
49
49
|
"dependencies": {
|
|
50
|
-
"@inquirer/prompts": "8.
|
|
50
|
+
"@inquirer/prompts": "8.5.2",
|
|
51
51
|
"acorn": "8.16.0",
|
|
52
52
|
"acorn-walk": "8.3.5",
|
|
53
53
|
"adm-zip": "0.5.17",
|
|
54
|
-
"js-yaml": "4.
|
|
54
|
+
"js-yaml": "4.2.0",
|
|
55
55
|
"loadash": "^1.0.0",
|
|
56
56
|
"web-tree-sitter": "^0.26.9"
|
|
57
57
|
},
|
|
58
58
|
"devDependencies": {
|
|
59
59
|
"@eslint/js": "10.0.1",
|
|
60
|
-
"eslint": "10.4.
|
|
60
|
+
"eslint": "10.4.1",
|
|
61
61
|
"eslint-plugin-security": "^4.0.0",
|
|
62
62
|
"globals": "17.6.0"
|
|
63
63
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"target": "node_modules",
|
|
3
|
-
"timestamp": "2026-
|
|
3
|
+
"timestamp": "2026-06-04T21:33:40.755Z",
|
|
4
4
|
"threats": [
|
|
5
5
|
{
|
|
6
6
|
"type": "string_mutation_obfuscation",
|
|
@@ -1862,7 +1862,7 @@
|
|
|
1862
1862
|
"ajv/lib/ajv.js": 15837,
|
|
1863
1863
|
"ajv/scripts/bundle.js": 1795,
|
|
1864
1864
|
"debug/src/node.js": 4728,
|
|
1865
|
-
"eslint/bin/eslint.js":
|
|
1865
|
+
"eslint/bin/eslint.js": 6028,
|
|
1866
1866
|
"fast-json-stable-stringify/benchmark/index.js": 740,
|
|
1867
1867
|
"isexe/test/basic.js": 4996,
|
|
1868
1868
|
"keyv/src/index.js": 6603,
|
package/src/index.js
CHANGED
|
@@ -8,7 +8,7 @@ const { output } = require('./pipeline/outputter.js');
|
|
|
8
8
|
async function run(targetPath, options = {}) {
|
|
9
9
|
try {
|
|
10
10
|
// Phase 1: Initialization (validate, IOCs, config, Python detection)
|
|
11
|
-
const { pythonDeps,
|
|
11
|
+
const { pythonDeps, warnings } = await initialize(targetPath, options);
|
|
12
12
|
|
|
13
13
|
// Phase 2: Execute all scanners
|
|
14
14
|
const { threats, scannerErrors } = await execute(targetPath, options, pythonDeps, warnings);
|
|
@@ -126,7 +126,6 @@ async function detectMaintainerChange(packageName) {
|
|
|
126
126
|
|
|
127
127
|
// Build name sets for comparison
|
|
128
128
|
const previousNames = new Set(previousMaint.maintainers.map(m => m.name.toLowerCase()));
|
|
129
|
-
const currentNames = new Set(newestMaint.maintainers.map(m => m.name.toLowerCase()));
|
|
130
129
|
|
|
131
130
|
// Detect NEW_MAINTAINER: maintainers in newest that weren't in previous
|
|
132
131
|
for (const m of newestMaint.maintainers) {
|
|
@@ -86,7 +86,7 @@ async function sendWebhook(url, results, options = {}) {
|
|
|
86
86
|
resolvedAddress = ipv4Addresses[0] || null;
|
|
87
87
|
} catch (e) {
|
|
88
88
|
if (e.message.startsWith('Webhook blocked')) throw e;
|
|
89
|
-
throw new Error(`Webhook blocked: DNS resolution failed for ${urlObj.hostname}
|
|
89
|
+
throw new Error(`Webhook blocked: DNS resolution failed for ${urlObj.hostname}`, { cause: e });
|
|
90
90
|
}
|
|
91
91
|
|
|
92
92
|
// rawPayload: send the results object directly as the payload (for pre-built embeds)
|
|
@@ -403,7 +403,6 @@ function sendOnce(url, payload, resolvedAddress) {
|
|
|
403
403
|
};
|
|
404
404
|
|
|
405
405
|
const req = protocol.request(options, (res) => {
|
|
406
|
-
let data = '';
|
|
407
406
|
let size = 0;
|
|
408
407
|
res.on('data', chunk => {
|
|
409
408
|
size += chunk.length;
|
|
@@ -412,7 +411,6 @@ function sendOnce(url, payload, resolvedAddress) {
|
|
|
412
411
|
reject(new Error('Webhook response exceeded 1MB limit'));
|
|
413
412
|
return;
|
|
414
413
|
}
|
|
415
|
-
data += chunk;
|
|
416
414
|
});
|
|
417
415
|
res.on('end', () => {
|
|
418
416
|
if (res.statusCode >= 200 && res.statusCode < 300) {
|
package/src/ioc/scraper.js
CHANGED
|
@@ -22,7 +22,7 @@ const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
|
|
|
22
22
|
// whitespace, slashes, length > 100. The previous regex required first
|
|
23
23
|
// char in [1-9] after a '0' which broke ALL 0.x.y versions (false negative
|
|
24
24
|
// spam in scraper logs ; ~600 valid PyPI/npm versions wrongly skipped per scrape).
|
|
25
|
-
const VERSION_INVALID_CHARS = /[\s\\/'"`;|&$<>(){}
|
|
25
|
+
const VERSION_INVALID_CHARS = /[\s\\/'"`;|&$<>(){}[\]?]/;
|
|
26
26
|
function isValidVersion(version) {
|
|
27
27
|
if (!version || typeof version !== 'string') return false;
|
|
28
28
|
if (version === '*') return true;
|
|
@@ -31,7 +31,7 @@ function isValidVersion(version) {
|
|
|
31
31
|
if (VERSION_INVALID_CHARS.test(version)) return false;
|
|
32
32
|
// Must start with a digit (or 'v' prefix), and contain only word chars / . / + / -
|
|
33
33
|
if (!/^v?\d/.test(version)) return false;
|
|
34
|
-
return /^[\w
|
|
34
|
+
return /^[\w.+-]+$/.test(version);
|
|
35
35
|
}
|
|
36
36
|
// Backwards compat: keep VERSION_RE as a no-op test wrapper for any legacy
|
|
37
37
|
// caller that imports it. Prefer isValidVersion() in new code.
|
|
@@ -303,63 +303,6 @@ function fetchText(url, redirectCount = 0) {
|
|
|
303
303
|
});
|
|
304
304
|
}
|
|
305
305
|
|
|
306
|
-
function fetchBuffer(url, redirectCount = 0) {
|
|
307
|
-
return new Promise((resolve, reject) => {
|
|
308
|
-
const urlObj = new URL(url);
|
|
309
|
-
const reqOptions = {
|
|
310
|
-
hostname: urlObj.hostname,
|
|
311
|
-
path: urlObj.pathname + urlObj.search,
|
|
312
|
-
method: 'GET',
|
|
313
|
-
headers: {
|
|
314
|
-
'User-Agent': 'MUADDIB-Scanner/3.0'
|
|
315
|
-
}
|
|
316
|
-
};
|
|
317
|
-
|
|
318
|
-
const req = https.request(reqOptions, (res) => {
|
|
319
|
-
if ([301, 302, 307, 308].includes(res.statusCode)) {
|
|
320
|
-
res.resume(); // Drain response body before following redirect
|
|
321
|
-
if (redirectCount >= MAX_REDIRECTS) {
|
|
322
|
-
reject(new Error('Too many redirects'));
|
|
323
|
-
return;
|
|
324
|
-
}
|
|
325
|
-
const redirectUrl = res.headers.location;
|
|
326
|
-
if (!isAllowedRedirect(redirectUrl)) {
|
|
327
|
-
reject(new Error('Unauthorized redirect to: ' + redirectUrl));
|
|
328
|
-
return;
|
|
329
|
-
}
|
|
330
|
-
fetchBuffer(redirectUrl, redirectCount + 1).then(resolve).catch(reject);
|
|
331
|
-
return;
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
if (res.statusCode !== 200) {
|
|
335
|
-
res.resume(); // Drain response body on error
|
|
336
|
-
reject(new Error('HTTP ' + res.statusCode));
|
|
337
|
-
return;
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
const chunks = [];
|
|
341
|
-
let received = 0;
|
|
342
|
-
res.on('data', chunk => {
|
|
343
|
-
received += chunk.length;
|
|
344
|
-
if (received > MAX_RESPONSE_SIZE) {
|
|
345
|
-
req.destroy();
|
|
346
|
-
reject(new Error('Response exceeded maximum size'));
|
|
347
|
-
return;
|
|
348
|
-
}
|
|
349
|
-
chunks.push(chunk);
|
|
350
|
-
});
|
|
351
|
-
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
352
|
-
});
|
|
353
|
-
|
|
354
|
-
req.on('error', reject);
|
|
355
|
-
req.setTimeout(120000, () => {
|
|
356
|
-
req.destroy();
|
|
357
|
-
reject(new Error('Timeout'));
|
|
358
|
-
});
|
|
359
|
-
|
|
360
|
-
req.end();
|
|
361
|
-
});
|
|
362
|
-
}
|
|
363
306
|
|
|
364
307
|
/**
|
|
365
308
|
* Download a large file with spinner progress (npm/ora style).
|
|
@@ -367,6 +310,7 @@ function fetchBuffer(url, redirectCount = 0) {
|
|
|
367
310
|
*/
|
|
368
311
|
function fetchBufferWithProgress(url, label, redirectCount = 0) {
|
|
369
312
|
return new Promise((resolve, reject) => {
|
|
313
|
+
let spinner = null;
|
|
370
314
|
const urlObj = new URL(url);
|
|
371
315
|
const reqOptions = {
|
|
372
316
|
hostname: urlObj.hostname,
|
|
@@ -404,7 +348,7 @@ function fetchBufferWithProgress(url, label, redirectCount = 0) {
|
|
|
404
348
|
const chunks = [];
|
|
405
349
|
let received = 0;
|
|
406
350
|
|
|
407
|
-
|
|
351
|
+
spinner = new Spinner();
|
|
408
352
|
spinner.start('Downloading ' + label + '...');
|
|
409
353
|
|
|
410
354
|
res.on('data', (chunk) => {
|
|
@@ -432,12 +376,12 @@ function fetchBufferWithProgress(url, label, redirectCount = 0) {
|
|
|
432
376
|
});
|
|
433
377
|
|
|
434
378
|
req.on('error', (err) => {
|
|
435
|
-
spinner.fail('Download failed: ' + err.message);
|
|
379
|
+
if (spinner) spinner.fail('Download failed: ' + err.message);
|
|
436
380
|
reject(err);
|
|
437
381
|
});
|
|
438
382
|
req.setTimeout(300000, () => {
|
|
439
383
|
req.destroy();
|
|
440
|
-
spinner.fail('Download timed out');
|
|
384
|
+
if (spinner) spinner.fail('Download timed out');
|
|
441
385
|
reject(new Error('Timeout downloading ' + label));
|
|
442
386
|
});
|
|
443
387
|
|
|
@@ -850,7 +794,7 @@ async function scrapeOSVDataDump() {
|
|
|
850
794
|
// Track known IDs so OSSF can skip them
|
|
851
795
|
knownIds.add(vuln.id || path.basename(name, '.json'));
|
|
852
796
|
malCount++;
|
|
853
|
-
} catch
|
|
797
|
+
} catch {
|
|
854
798
|
console.warn(`[WARN] Skipping unparseable entry: ${name}`);
|
|
855
799
|
}
|
|
856
800
|
}
|
|
@@ -923,7 +867,7 @@ async function scrapeOSVPyPIDataDump() {
|
|
|
923
867
|
const parsed = parseOSVEntry(vuln, 'osv-malicious-pypi', 'PyPI');
|
|
924
868
|
for (const p of parsed) packages.push(p);
|
|
925
869
|
malCount++;
|
|
926
|
-
} catch
|
|
870
|
+
} catch {
|
|
927
871
|
console.warn(`[WARN] Skipping unparseable entry: ${name}`);
|
|
928
872
|
}
|
|
929
873
|
}
|
|
@@ -1143,8 +1087,6 @@ async function runScraper() {
|
|
|
1143
1087
|
// (used by `getSourceConfidence` for webhook gating).
|
|
1144
1088
|
let addedPackages = 0;
|
|
1145
1089
|
let upgradedPackages = 0;
|
|
1146
|
-
let skippedInvalid = 0;
|
|
1147
|
-
let skippedNeverWildcard = 0;
|
|
1148
1090
|
function appendSource(target, pkg) {
|
|
1149
1091
|
if (!Array.isArray(target.sources)) target.sources = [];
|
|
1150
1092
|
const newSrc = pkg.source || (pkg.freshness && pkg.freshness.source) || 'unknown';
|
|
@@ -1166,12 +1108,10 @@ async function runScraper() {
|
|
|
1166
1108
|
}
|
|
1167
1109
|
for (const pkg of allPackages) {
|
|
1168
1110
|
if (!validateIOCEntry(pkg.name, pkg.version, 'npm')) {
|
|
1169
|
-
skippedInvalid++;
|
|
1170
1111
|
continue;
|
|
1171
1112
|
}
|
|
1172
1113
|
// Skip wildcard entries for packages that must stay version-specific
|
|
1173
1114
|
if (pkg.version === '*' && NEVER_WILDCARD.has(pkg.name)) {
|
|
1174
|
-
skippedNeverWildcard++;
|
|
1175
1115
|
continue;
|
|
1176
1116
|
}
|
|
1177
1117
|
const key = pkg.name + '@' + pkg.version;
|
|
@@ -1218,7 +1158,6 @@ async function runScraper() {
|
|
|
1218
1158
|
const allPyPIPackages = pypiPackages.concat(aikidoResult.pypi_packages || [], osmResult.pypi_packages || []);
|
|
1219
1159
|
for (const pkg of allPyPIPackages) {
|
|
1220
1160
|
if (!validateIOCEntry(pkg.name, pkg.version, 'pypi')) {
|
|
1221
|
-
skippedInvalid++;
|
|
1222
1161
|
continue;
|
|
1223
1162
|
}
|
|
1224
1163
|
const key = pkg.name + '@' + pkg.version;
|
package/src/ml/classifier.js
CHANGED
|
@@ -341,8 +341,9 @@ function classifyPackage(result, meta) {
|
|
|
341
341
|
const bundlerResult = predictBundler(bundlerVec);
|
|
342
342
|
// Log-only: record prediction for retraining validation
|
|
343
343
|
const roundedP = Math.round(bundlerResult.probability * 1000) / 1000;
|
|
344
|
-
// When retrained and validated,
|
|
345
|
-
|
|
344
|
+
// When retrained and validated, set BUNDLER_FILTER_ENABLED to true.
|
|
345
|
+
const BUNDLER_FILTER_ENABLED = false;
|
|
346
|
+
if (BUNDLER_FILTER_ENABLED && bundlerResult.prediction === 'clean') {
|
|
346
347
|
return {
|
|
347
348
|
prediction: 'fp_bundler',
|
|
348
349
|
probability: roundedP,
|
|
@@ -602,7 +602,7 @@ const F9_INFRA_KEYS = new Set([
|
|
|
602
602
|
|
|
603
603
|
// Credential file paths that a malicious MCP dropper would harvest.
|
|
604
604
|
// Appearance in any threat message disqualifies F9.
|
|
605
|
-
const F9_CREDENTIAL_FILE_RE = /\.npmrc\b|\.aws[
|
|
605
|
+
const F9_CREDENTIAL_FILE_RE = /\.npmrc\b|\.aws[/\\](?:credentials|config)\b|\bid_rsa\b|\bid_ed25519\b|\.ssh[/\\]|\.kube[/\\]config\b|\.docker[/\\]config\b|\.netrc\b|\.git-credentials\b|wallet\.dat\b|\bsecret_token\b/i;
|
|
606
606
|
|
|
607
607
|
// v2.11.31 F14: split exfil types into HARD (real malware signals) vs
|
|
608
608
|
// SOFT (compound/intent threats that legitimately fire on AI proxies +
|
package/src/ml/llm-detective.js
CHANGED
|
@@ -459,7 +459,7 @@ async function callAnthropicAPI(system, messages) {
|
|
|
459
459
|
} catch (err) {
|
|
460
460
|
clearTimeout(timeout);
|
|
461
461
|
if (err.name === 'AbortError') {
|
|
462
|
-
throw new Error(`API timeout (${LLM_TIMEOUT_MS}ms)
|
|
462
|
+
throw new Error(`API timeout (${LLM_TIMEOUT_MS}ms)`, { cause: err });
|
|
463
463
|
}
|
|
464
464
|
if (attempt < maxAttempts - 1 && err.message && /ECONNRESET|ETIMEDOUT|ENOTFOUND/.test(err.message)) {
|
|
465
465
|
await new Promise(r => setTimeout(r, 2000));
|
|
@@ -553,7 +553,7 @@ function parseResponse(text) {
|
|
|
553
553
|
* @returns {Promise<Object|null>} verdict object or null on skip/error
|
|
554
554
|
*/
|
|
555
555
|
async function investigatePackage(extractedDir, scanResult, options = {}) {
|
|
556
|
-
const { name, version, ecosystem, npmRegistryMeta
|
|
556
|
+
const { name, version, ecosystem, npmRegistryMeta } = options;
|
|
557
557
|
|
|
558
558
|
// Guard rails
|
|
559
559
|
if (!isLlmEnabled()) {
|
package/src/monitor/daemon.js
CHANGED
|
@@ -3,14 +3,14 @@ const fs = require('fs');
|
|
|
3
3
|
const path = require('path');
|
|
4
4
|
const os = require('os');
|
|
5
5
|
const v8 = require('v8');
|
|
6
|
-
const { isDockerAvailable, SANDBOX_CONCURRENCY_MAX } = require('../sandbox/index.js');
|
|
6
|
+
const { isDockerAvailable, SANDBOX_CONCURRENCY_MAX, killAllSandboxContainers } = require('../sandbox/index.js');
|
|
7
7
|
const { setVerboseMode, isSandboxEnabled, isCanaryEnabled, isLlmDetectiveEnabled, getLlmDetectiveMode, DOWNLOADS_CACHE_TTL } = require('./classify.js');
|
|
8
8
|
const { loadState, saveState, loadDailyStats, saveDailyStats, purgeTarballCache, getParisHour, atomicWriteFileSync, saveNpmSeq, ALERTS_FILE, runStateMigrations } = require('./state.js');
|
|
9
9
|
const { isTemporalEnabled, isTemporalAstEnabled, isTemporalPublishEnabled, isTemporalMaintainerEnabled } = require('./temporal.js');
|
|
10
|
-
const { pendingGrouped, flushScopeGroup, sendDailyReport, DAILY_REPORT_HOUR, alertedPackageRules } = require('./webhook.js');
|
|
10
|
+
const { pendingGrouped, flushScopeGroup, sendDailyReport, DAILY_REPORT_HOUR, alertedPackageRules, ALERTED_PACKAGES_MAX: MAX_ALERTED_PACKAGES } = require('./webhook.js');
|
|
11
11
|
const { poll } = require('./ingestion.js');
|
|
12
|
-
const {
|
|
13
|
-
const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY
|
|
12
|
+
const { ensureWorkers, drainWorkers, getTargetConcurrency, setTargetConcurrency, getActiveWorkers, terminateAllWorkers } = require('./queue.js');
|
|
13
|
+
const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY } = require('./adaptive-concurrency.js');
|
|
14
14
|
const { startHealthcheck } = require('./healthcheck.js');
|
|
15
15
|
const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
|
|
16
16
|
const { cleanupOldArchives, getRetentionDays, startPeriodicCleanup } = require('./tarball-archive.js');
|
|
@@ -65,6 +65,16 @@ const MEMORY_THRESHOLD_ELEVATED = 0.75;
|
|
|
65
65
|
const MEMORY_THRESHOLD_HIGH = 0.85;
|
|
66
66
|
const MEMORY_THRESHOLD_CRITICAL = 0.90;
|
|
67
67
|
const MEMORY_THRESHOLD_EMERGENCY = 0.92;
|
|
68
|
+
// RSS budget (OOM fix). The heap thresholds above miss the real failure mode: the
|
|
69
|
+
// process dies from total RSS (off-heap — worker isolates, gVisor sandboxes, tarball
|
|
70
|
+
// buffers) while heapUsed/heap_size_limit sits at ~20%. Gate on
|
|
71
|
+
// process.memoryUsage().rss against an absolute budget so EMERGENCY fires before the
|
|
72
|
+
// kernel OOM-killer. Default 8500MB on the 11.7GB VPS (~3GB headroom for
|
|
73
|
+
// docker / gVisor / kernel). Override via MUADDIB_RSS_LIMIT_MB.
|
|
74
|
+
const RSS_LIMIT_MB = (() => {
|
|
75
|
+
const parsed = parseInt(process.env.MUADDIB_RSS_LIMIT_MB, 10);
|
|
76
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : 8500;
|
|
77
|
+
})();
|
|
68
78
|
// When truncating queue under EMERGENCY, keep the N most recent items.
|
|
69
79
|
// These are the newest packages — most likely to still be on npm for re-scan.
|
|
70
80
|
const EMERGENCY_QUEUE_KEEP = 500;
|
|
@@ -293,7 +303,9 @@ function checkDiskSpace() {
|
|
|
293
303
|
// --- Memory management ---
|
|
294
304
|
|
|
295
305
|
const MAX_RECENTLY_SCANNED = 50_000;
|
|
296
|
-
|
|
306
|
+
// MAX_ALERTED_PACKAGES is imported from webhook.js (single source of truth — the
|
|
307
|
+
// alertedPackageRules Map lives there and FIFO-caps itself at insert with the same value).
|
|
308
|
+
const MAX_DOWNLOADS_CACHE = 20_000; // hard size cap on top of the 24h TTL (bounded resource)
|
|
297
309
|
|
|
298
310
|
/**
|
|
299
311
|
* Compute current memory pressure level from V8 heap usage.
|
|
@@ -310,23 +322,30 @@ const MAX_ALERTED_PACKAGES = 5_000;
|
|
|
310
322
|
* - With --max-old-space-size=3072: ~3264MB (3072 + new space overhead)
|
|
311
323
|
* - Without the flag: ~4288MB (V8 default on 64-bit)
|
|
312
324
|
*/
|
|
313
|
-
function computeMemoryPressure() {
|
|
314
|
-
const mem = process.memoryUsage();
|
|
325
|
+
function computeMemoryPressure(memSample = null, rssLimitMb = RSS_LIMIT_MB) {
|
|
326
|
+
const mem = memSample || process.memoryUsage();
|
|
315
327
|
const heapLimit = v8.getHeapStatistics().heap_size_limit;
|
|
316
328
|
const ratio = heapLimit > 0 ? mem.heapUsed / heapLimit : 0;
|
|
329
|
+
const rssLimitBytes = rssLimitMb * 1024 * 1024;
|
|
330
|
+
const rssRatio = rssLimitBytes > 0 ? mem.rss / rssLimitBytes : 0;
|
|
317
331
|
|
|
318
|
-
|
|
332
|
+
// Pressure is the WORSE of heap and RSS. The RSS arm catches the off-heap leak
|
|
333
|
+
// that the heap ratio is structurally blind to (heap sat at ~20% during every OOM
|
|
334
|
+
// while RSS climbed to 10.3GB). `ratio` stays the heap ratio for backward compat.
|
|
335
|
+
const worst = Math.max(ratio, rssRatio);
|
|
336
|
+
|
|
337
|
+
if (worst >= MEMORY_THRESHOLD_EMERGENCY) {
|
|
319
338
|
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.EMERGENCY;
|
|
320
|
-
} else if (
|
|
339
|
+
} else if (worst >= MEMORY_THRESHOLD_CRITICAL) {
|
|
321
340
|
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.CRITICAL;
|
|
322
|
-
} else if (
|
|
341
|
+
} else if (worst >= MEMORY_THRESHOLD_HIGH) {
|
|
323
342
|
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.HIGH;
|
|
324
|
-
} else if (
|
|
343
|
+
} else if (worst >= MEMORY_THRESHOLD_ELEVATED) {
|
|
325
344
|
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.ELEVATED;
|
|
326
345
|
} else {
|
|
327
346
|
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.NONE;
|
|
328
347
|
}
|
|
329
|
-
return { level: _memoryPressureLevel, mem, ratio };
|
|
348
|
+
return { level: _memoryPressureLevel, mem, ratio, rssRatio };
|
|
330
349
|
}
|
|
331
350
|
|
|
332
351
|
/**
|
|
@@ -362,6 +381,12 @@ function pruneMemoryCaches(recentlyScanned, downloadsCache, alertedPackageRules)
|
|
|
362
381
|
pruned++;
|
|
363
382
|
}
|
|
364
383
|
}
|
|
384
|
+
// 2b. downloadsCache — hard size cap (FIFO) on top of TTL. A Map preserves
|
|
385
|
+
// insertion order, so the first key is the oldest (bounded resource).
|
|
386
|
+
while (downloadsCache.size > MAX_DOWNLOADS_CACHE) {
|
|
387
|
+
downloadsCache.delete(downloadsCache.keys().next().value);
|
|
388
|
+
pruned++;
|
|
389
|
+
}
|
|
365
390
|
|
|
366
391
|
// 3. alertedPackageRules — cap size
|
|
367
392
|
if (alertedPackageRules.size > MAX_ALERTED_PACKAGES) {
|
|
@@ -394,6 +419,12 @@ function pruneMemoryCaches(recentlyScanned, downloadsCache, alertedPackageRules)
|
|
|
394
419
|
*/
|
|
395
420
|
function handleMemoryPressure(level, ratio, recentlyScanned, downloadsCache, scanQueue) {
|
|
396
421
|
const pct = (ratio * 100).toFixed(0);
|
|
422
|
+
// Structured summary of what the breaker actually did this tick. Returned (the poll loop
|
|
423
|
+
// at the call site ignores it) so the reclaim is observable to callers and tests without
|
|
424
|
+
// scraping console output — CLAUDE.md §3 "Toujours logger un resume". The two kill fields
|
|
425
|
+
// stay `undefined` until the EMERGENCY branch sets them, so a reader can distinguish
|
|
426
|
+
// "reclaim never ran" (undefined) from "ran, nothing to free" (0) from "reclaim threw" (-1).
|
|
427
|
+
const summary = { level, cachesCleared: false, queueDropped: 0, deferredDropped: 0 };
|
|
397
428
|
|
|
398
429
|
// HIGH (85%+): clear auxiliary caches — same as old emergency prune
|
|
399
430
|
if (level >= MEMORY_PRESSURE_LEVELS.HIGH) {
|
|
@@ -401,6 +432,7 @@ function handleMemoryPressure(level, ratio, recentlyScanned, downloadsCache, sca
|
|
|
401
432
|
recentlyScanned.clear();
|
|
402
433
|
downloadsCache.clear();
|
|
403
434
|
alertedPackageRules.clear();
|
|
435
|
+
summary.cachesCleared = true;
|
|
404
436
|
}
|
|
405
437
|
|
|
406
438
|
// CRITICAL (90%+): clear scanner caches, force GC
|
|
@@ -416,7 +448,7 @@ function handleMemoryPressure(level, ratio, recentlyScanned, downloadsCache, sca
|
|
|
416
448
|
try { clearFileListCache(); } catch {}
|
|
417
449
|
try { clearASTCache(); } catch {}
|
|
418
450
|
// pendingGrouped webhook buffers
|
|
419
|
-
for (const [
|
|
451
|
+
for (const [, group] of pendingGrouped) {
|
|
420
452
|
clearTimeout(group.timer);
|
|
421
453
|
}
|
|
422
454
|
pendingGrouped.clear();
|
|
@@ -438,18 +470,34 @@ function handleMemoryPressure(level, ratio, recentlyScanned, downloadsCache, sca
|
|
|
438
470
|
const dropped = queueBefore - EMERGENCY_QUEUE_KEEP;
|
|
439
471
|
// splice from the front: older items were pushed first
|
|
440
472
|
scanQueue.splice(0, dropped);
|
|
473
|
+
summary.queueDropped = dropped;
|
|
441
474
|
console.error(`[MONITOR] MEMORY EMERGENCY: heap at ${pct}% — truncated queue ${queueBefore} → ${scanQueue.length} (dropped ${dropped} oldest items)`);
|
|
442
475
|
}
|
|
443
476
|
// Clear deferred sandbox queue (holds full staticResult objects)
|
|
444
477
|
const deferredDropped = clearDeferredQueue();
|
|
478
|
+
summary.deferredDropped = deferredDropped;
|
|
445
479
|
if (deferredDropped > 0) {
|
|
446
480
|
console.error(`[MONITOR] MEMORY EMERGENCY: cleared ${deferredDropped} deferred sandbox items`);
|
|
447
481
|
}
|
|
482
|
+
// Free the off-heap leak that queue truncation can't touch: orphaned sandbox
|
|
483
|
+
// containers (gVisor runsc survives `docker kill`) and wedged scan workers.
|
|
484
|
+
// Under a real RSS leak this — not the queue splice — is what reclaims memory.
|
|
485
|
+
try {
|
|
486
|
+
const killed = killAllSandboxContainers();
|
|
487
|
+
summary.containersKilled = killed;
|
|
488
|
+
if (killed > 0) console.error(`[MONITOR] MEMORY EMERGENCY: force-removed ${killed} sandbox container(s)`);
|
|
489
|
+
} catch (err) { summary.containersKilled = -1; console.error(`[MONITOR] EMERGENCY container kill failed: ${err.message}`); }
|
|
490
|
+
try {
|
|
491
|
+
const terminated = terminateAllWorkers();
|
|
492
|
+
summary.workersTerminated = terminated;
|
|
493
|
+
if (terminated > 0) console.error(`[MONITOR] MEMORY EMERGENCY: terminated ${terminated} scan worker(s)`);
|
|
494
|
+
} catch (err) { summary.workersTerminated = -1; console.error(`[MONITOR] EMERGENCY worker terminate failed: ${err.message}`); }
|
|
448
495
|
// Second GC pass after freeing queue + deferred references
|
|
449
496
|
if (global.gc) {
|
|
450
497
|
global.gc();
|
|
451
498
|
}
|
|
452
499
|
}
|
|
500
|
+
return summary;
|
|
453
501
|
}
|
|
454
502
|
|
|
455
503
|
function reportStats(stats) {
|
|
@@ -753,7 +801,7 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
753
801
|
// computeMemoryPressure() is cheap (~0.1ms). Running every 2s ensures fast
|
|
754
802
|
// reaction to memory spikes — the 2026-04-13 incident showed that checking
|
|
755
803
|
// every 5min is too slow (250 packages ingested between checks).
|
|
756
|
-
const { level: pressureLevel, mem: currentMem, ratio: heapRatio } = computeMemoryPressure();
|
|
804
|
+
const { level: pressureLevel, mem: currentMem, ratio: heapRatio, rssRatio } = computeMemoryPressure();
|
|
757
805
|
|
|
758
806
|
// Top up workers ONLY when memory pressure is below HIGH.
|
|
759
807
|
// At HIGH+, existing workers continue (they'll finish or timeout) but no new
|
|
@@ -775,7 +823,7 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
775
823
|
const rssMB = (currentMem.rss / 1024 / 1024).toFixed(0);
|
|
776
824
|
const pctUsed = (heapRatio * 100).toFixed(0);
|
|
777
825
|
const levelName = Object.keys(MEMORY_PRESSURE_LEVELS).find(k => MEMORY_PRESSURE_LEVELS[k] === pressureLevel) || 'UNKNOWN';
|
|
778
|
-
console.log(`[MONITOR] MEMORY: heap=${heapUsedMB}MB/${heapLimitMB}MB (${pctUsed}%), rss=${rssMB}MB, queue=${scanQueue.length}, dedup=${recentlyScanned.size}, downloads=${downloadsCache.size}, alerts=${alertedPackageRules.size}, dailyAlerts=${dailyAlerts.length}, pressure=${levelName}`);
|
|
826
|
+
console.log(`[MONITOR] MEMORY: heap=${heapUsedMB}MB/${heapLimitMB}MB (${pctUsed}%), rss=${rssMB}MB (${(rssRatio * 100).toFixed(0)}%/${RSS_LIMIT_MB}MB), queue=${scanQueue.length}, dedup=${recentlyScanned.size}, downloads=${downloadsCache.size}, alerts=${alertedPackageRules.size}, dailyAlerts=${dailyAlerts.length}, pressure=${levelName}`);
|
|
779
827
|
|
|
780
828
|
// Graduated response at HIGH+
|
|
781
829
|
if (pressureLevel >= MEMORY_PRESSURE_LEVELS.HIGH) {
|
|
@@ -844,6 +892,7 @@ module.exports = {
|
|
|
844
892
|
pruneMemoryCaches,
|
|
845
893
|
MAX_RECENTLY_SCANNED,
|
|
846
894
|
MAX_ALERTED_PACKAGES,
|
|
895
|
+
MAX_DOWNLOADS_CACHE,
|
|
847
896
|
// Memory circuit breaker
|
|
848
897
|
computeMemoryPressure,
|
|
849
898
|
getMemoryPressureLevel,
|
|
@@ -853,6 +902,7 @@ module.exports = {
|
|
|
853
902
|
MEMORY_THRESHOLD_HIGH,
|
|
854
903
|
MEMORY_THRESHOLD_CRITICAL,
|
|
855
904
|
MEMORY_THRESHOLD_EMERGENCY,
|
|
905
|
+
RSS_LIMIT_MB,
|
|
856
906
|
EMERGENCY_QUEUE_KEEP,
|
|
857
907
|
MEMORY_LOG_INTERVAL_NORMAL,
|
|
858
908
|
MEMORY_LOG_INTERVAL_PRESSURE
|
|
@@ -32,6 +32,10 @@ const DEFERRED_STATE_FILE = path.join(__dirname, '..', '..', 'data', 'deferred-q
|
|
|
32
32
|
// slot. HIGH=10 pts is the intended T1b floor — values below 5 are LOW-only
|
|
33
33
|
// aggregates which carry no actionable sandbox signal.
|
|
34
34
|
const DEFERRED_MIN_SCORE = 5;
|
|
35
|
+
// Hard ceiling on a single deferred sandbox run so the dedicated slot
|
|
36
|
+
// (_deferredSlotBusy) can never wedge. maxRuns=1 self-bounds at ~SINGLE_RUN_TIMEOUT
|
|
37
|
+
// (90s) + the sandbox watchdog grace; this AbortController is belt-and-suspenders.
|
|
38
|
+
const DEFERRED_SANDBOX_TIMEOUT_MS = 150_000;
|
|
35
39
|
|
|
36
40
|
// ── Mutable state ──
|
|
37
41
|
const _deferredQueue = [];
|
|
@@ -190,11 +194,13 @@ async function processDeferredItem(stats) {
|
|
|
190
194
|
// 4. Run sandbox on dedicated slot (bypasses shared semaphore)
|
|
191
195
|
_deferredSlotBusy = true;
|
|
192
196
|
let sandboxResult;
|
|
197
|
+
const ac = new AbortController();
|
|
198
|
+
const deadline = setTimeout(() => ac.abort(), DEFERRED_SANDBOX_TIMEOUT_MS);
|
|
193
199
|
try {
|
|
194
200
|
const canary = isCanaryEnabled();
|
|
195
201
|
// maxRuns=1: deferred items are T1b/T2, time bomb detection (3 runs) is a luxury.
|
|
196
202
|
// 90s instead of 270s per item → 3× faster deferred queue drain.
|
|
197
|
-
sandboxResult = await runSandbox(item.name, { canary, skipSemaphore: true, maxRuns: 1 });
|
|
203
|
+
sandboxResult = await runSandbox(item.name, { canary, skipSemaphore: true, maxRuns: 1, signal: ac.signal });
|
|
198
204
|
console.log(`[DEFERRED] SANDBOX COMPLETE: ${key} -> score=${sandboxResult.score}, severity=${sandboxResult.severity}`);
|
|
199
205
|
} catch (err) {
|
|
200
206
|
console.error(`[DEFERRED] SANDBOX ERROR: ${key} — ${err.message}`);
|
|
@@ -210,6 +216,7 @@ async function processDeferredItem(stats) {
|
|
|
210
216
|
}
|
|
211
217
|
return null;
|
|
212
218
|
} finally {
|
|
219
|
+
clearTimeout(deadline);
|
|
213
220
|
_deferredSlotBusy = false;
|
|
214
221
|
}
|
|
215
222
|
|
package/src/monitor/ingestion.js
CHANGED
|
@@ -11,7 +11,7 @@ const https = require('https');
|
|
|
11
11
|
const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
|
|
12
12
|
const { loadCachedIOCs } = require('../ioc/updater.js');
|
|
13
13
|
const {
|
|
14
|
-
|
|
14
|
+
saveNpmSeq, CHANGES_STREAM_URL, CHANGES_LIMIT, CHANGES_CATCHUP_MAX,
|
|
15
15
|
savePypiSerial, PYPI_XMLRPC_URL, PYPI_CATCHUP_MAX
|
|
16
16
|
} = require('./state.js');
|
|
17
17
|
const { sendIOCPreAlert, sendCampaignPreAlert } = require('./webhook.js');
|
|
@@ -31,7 +31,7 @@ function matchCampaignPattern(name) {
|
|
|
31
31
|
}
|
|
32
32
|
return null;
|
|
33
33
|
}
|
|
34
|
-
const { evaluateCacheTrigger,
|
|
34
|
+
const { evaluateCacheTrigger, downloadsCache, DOWNLOADS_CACHE_TTL } = require('./classify.js');
|
|
35
35
|
|
|
36
36
|
const SELF_PACKAGE_NAME = require('../../package.json').name;
|
|
37
37
|
|
|
@@ -175,7 +175,7 @@ async function getPyPITarballUrl(packageName, packageVersion = '') {
|
|
|
175
175
|
try {
|
|
176
176
|
data = JSON.parse(body);
|
|
177
177
|
} catch (e) {
|
|
178
|
-
throw new Error(`Invalid JSON from PyPI for ${packageName}: ${e.message}
|
|
178
|
+
throw new Error(`Invalid JSON from PyPI for ${packageName}: ${e.message}`, { cause: e });
|
|
179
179
|
}
|
|
180
180
|
|
|
181
181
|
const latestVersion = (data.info && data.info.version) || '';
|
|
@@ -424,7 +424,7 @@ async function getNpmLatestTarball(packageName) {
|
|
|
424
424
|
try {
|
|
425
425
|
packument = JSON.parse(body);
|
|
426
426
|
} catch (e) {
|
|
427
|
-
throw new Error(`Invalid JSON from npm registry for ${packageName}: ${e.message}
|
|
427
|
+
throw new Error(`Invalid JSON from npm registry for ${packageName}: ${e.message}`, { cause: e });
|
|
428
428
|
}
|
|
429
429
|
const result = selectMostRecentVersion(packument);
|
|
430
430
|
if (!result) {
|