muaddib-scanner 2.6.4 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -270,7 +270,7 @@ With pre-commit framework:
270
270
  ```yaml
271
271
  repos:
272
272
  - repo: https://github.com/DNSZLSK/muad-dib
273
- rev: v2.6.2
273
+ rev: v2.6.6
274
274
  hooks:
275
275
  - id: muaddib-scan
276
276
  ```
@@ -284,9 +284,14 @@ repos:
284
284
  | **Wild TPR** (Datadog 17K) | **88.2%** raw / **~100%** adjusted | 17,922 real malware. 2,077 out-of-scope (phishing, binaries, corrected) |
285
285
  | **TPR** (Ground Truth) | **93.9%** (46/49) | 51 real attacks. 3 out-of-scope: browser-only |
286
286
  | **FPR** (Benign) | **12.1%** (64/529) | 529 npm packages, real source via `npm pack` |
287
- | **ADR** (Adversarial + Holdout) | **94.8%** (73/77) | 53 adversarial + 40 holdout (77 available on disk) |
287
+ | **ADR** (Adversarial + Holdout) | **92.2%** (71/77) | 53 adversarial + 40 holdout (77 available on disk), global threshold=20 |
288
288
 
289
- **1940 tests** across 44 files, 86% code coverage. **129 rules** (124 RULES + 5 PARANOID).
289
+ **2009 tests** across 46 files, 86% code coverage. **130 rules** (125 RULES + 5 PARANOID).
290
+
291
+ > **Methodology caveats:**
292
+ > - TPR measured on 49 Node.js attack samples (3 browser-only excluded from 51 total)
293
+ > - FPR measured on 529 curated popular npm packages (not a random sample)
294
+ > - ADR measured with global threshold (score >= 20) as of v2.6.5
290
295
 
291
296
  See [Evaluation Methodology](docs/EVALUATION_METHODOLOGY.md) for the full experimental protocol, holdout history, and Datadog benchmark details.
292
297
 
@@ -322,7 +327,7 @@ npm test
322
327
 
323
328
  ### Testing
324
329
 
325
- - **1940 tests** across 44 modular test files - 86% code coverage
330
+ - **2009 tests** across 46 modular test files - 86% code coverage
326
331
  - **56 fuzz tests** - Malformed inputs, ReDoS, unicode, binary
327
332
  - **Datadog 17K benchmark** - 17,922 real malware samples
328
333
  - **Ground truth validation** - 51 real-world attacks (93.9% TPR)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.6.4",
3
+ "version": "2.6.7",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -48,8 +48,7 @@
48
48
  "acorn": "8.16.0",
49
49
  "acorn-walk": "8.3.5",
50
50
  "adm-zip": "0.5.16",
51
- "js-yaml": "4.1.1",
52
- "muaddib-scanner": "^2.6.3"
51
+ "js-yaml": "4.1.1"
53
52
  },
54
53
  "overrides": {
55
54
  "loadash": "0.0.0-security"
package/src/index.js CHANGED
@@ -18,7 +18,7 @@ const fs = require('fs');
18
18
  const path = require('path');
19
19
  const { scanGitHubActions } = require('./scanner/github-actions.js');
20
20
  const { detectPythonProject, normalizePythonName } = require('./scanner/python.js');
21
- const { loadCachedIOCs } = require('./ioc/updater.js');
21
+ const { loadCachedIOCs, checkIOCStaleness } = require('./ioc/updater.js');
22
22
  const { ensureIOCs } = require('./ioc/bootstrap.js');
23
23
  const { scanEntropy } = require('./scanner/entropy.js');
24
24
  const { scanAIConfig } = require('./scanner/ai-config.js');
@@ -34,10 +34,6 @@ const { buildIntentPairs } = require('./intent-graph.js');
34
34
  const { MAX_FILE_SIZE, safeParse } = require('./shared/constants.js');
35
35
  const walk = require('acorn-walk');
36
36
 
37
- // Timeout constants for scan safety
38
- const SCANNER_TIMEOUT = 15000; // 15s per individual scanner
39
- const SCAN_TIMEOUT = 60000; // 60s global scan timeout
40
-
41
37
  // Paranoid mode scanner
42
38
  function scanParanoid(targetPath) {
43
39
  const threats = [];
@@ -92,18 +88,32 @@ function scanParanoid(targetPath) {
92
88
 
93
89
  const found = new Set(); // deduplicate: one finding per rule per file
94
90
 
91
+ // v2.6.5: Track aliases of eval, Function, require for bypass detection
92
+ // e.g., const e = eval; e(code) — or — const F = Function; new F(code)
93
+ const ALIAS_TARGETS = new Set(['eval', 'Function', 'require']);
94
+ const aliases = new Map(); // aliasName → originalName
95
+
95
96
  walk.simple(ast, {
97
+ VariableDeclarator(node) {
98
+ // const e = eval / const F = Function / const r = require
99
+ if (node.id?.type === 'Identifier' && node.init?.type === 'Identifier' &&
100
+ ALIAS_TARGETS.has(node.init.name)) {
101
+ aliases.set(node.id.name, node.init.name);
102
+ }
103
+ },
96
104
  CallExpression(node) {
97
105
  // Direct calls: eval(), exec(), fetch(), etc.
98
106
  if (node.callee.type === 'Identifier') {
99
- const name = node.callee.name;
107
+ // Resolve alias to original name if applicable
108
+ const name = aliases.get(node.callee.name) || node.callee.name;
100
109
  for (const [ruleKey, detector] of Object.entries(PARANOID_AST_DETECTORS)) {
101
110
  if (detector.callNames && detector.callNames.has(name) && !found.has(ruleKey)) {
102
111
  found.add(ruleKey);
103
112
  const rule = PARANOID_RULES[ruleKey];
104
113
  threats.push({
105
114
  type: rule.id, severity: rule.severity.toUpperCase(),
106
- message: `${rule.message}: "${name}"`, file: relFile, mitre: rule.mitre
115
+ message: `${rule.message}: "${node.callee.name}"${aliases.has(node.callee.name) ? ` (alias of ${name})` : ''}`,
116
+ file: relFile, mitre: rule.mitre
107
117
  });
108
118
  }
109
119
  }
@@ -130,14 +140,16 @@ function scanParanoid(targetPath) {
130
140
  },
131
141
  NewExpression(node) {
132
142
  if (node.callee.type === 'Identifier') {
133
- const name = node.callee.name;
143
+ // Resolve alias: const F = Function; new F(code)
144
+ const name = aliases.get(node.callee.name) || node.callee.name;
134
145
  for (const [ruleKey, detector] of Object.entries(PARANOID_AST_DETECTORS)) {
135
146
  if (detector.newNames && detector.newNames.has(name) && !found.has(ruleKey)) {
136
147
  found.add(ruleKey);
137
148
  const rule = PARANOID_RULES[ruleKey];
138
149
  threats.push({
139
150
  type: rule.id, severity: rule.severity.toUpperCase(),
140
- message: `${rule.message}: "new ${name}"`, file: relFile, mitre: rule.mitre
151
+ message: `${rule.message}: "new ${node.callee.name}"${aliases.has(node.callee.name) ? ` (alias of ${name})` : ''}`,
152
+ file: relFile, mitre: rule.mitre
141
153
  });
142
154
  }
143
155
  }
@@ -333,6 +345,9 @@ async function run(targetPath, options = {}) {
333
345
  // Ensure IOCs are downloaded (first run only, graceful failure)
334
346
  await ensureIOCs();
335
347
 
348
+ // Check IOC freshness — warn if database is older than 30 days
349
+ const iocStalenessWarning = checkIOCStaleness(30);
350
+
336
351
  // Apply --exclude dirs for this scan
337
352
  if (options.exclude && options.exclude.length > 0) {
338
353
  setExtraExcludes(options.exclude, targetPath);
@@ -359,10 +374,16 @@ async function run(targetPath, options = {}) {
359
374
  // Wrapped in yieldThen to unblock spinner animation
360
375
  // Bounded: 5s timeout to prevent DoS on large/adversarial packages
361
376
  const MODULE_GRAPH_TIMEOUT_MS = 5000;
377
+ const warnings = [];
378
+ if (iocStalenessWarning) warnings.push(iocStalenessWarning);
362
379
  let crossFileFlows = [];
363
380
  if (!options.noModuleGraph) {
364
381
  const moduleGraphWork = async () => {
365
382
  const graph = await yieldThen(() => buildModuleGraph(targetPath));
383
+ if (Object.keys(graph).length === 0) {
384
+ // buildModuleGraph returns empty when MAX_GRAPH_NODES exceeded
385
+ warnings.push('Module graph skipped: package exceeds 100 files limit');
386
+ }
366
387
  const tainted = await yieldThen(() => annotateTaintedExports(graph, targetPath));
367
388
  const sinkAnnotations = await yieldThen(() => annotateSinkExports(graph, targetPath));
368
389
  crossFileFlows = await yieldThen(() => detectCrossFileFlows(graph, tainted, sinkAnnotations, targetPath));
@@ -373,14 +394,20 @@ async function run(targetPath, options = {}) {
373
394
  const emitterFlows = await yieldThen(() => detectEventEmitterFlows(graph, tainted, sinkAnnotations, targetPath));
374
395
  crossFileFlows = crossFileFlows.concat(emitterFlows);
375
396
  };
376
- const timeout = new Promise((_, reject) =>
377
- setTimeout(() => reject(new Error('Module graph timeout')), MODULE_GRAPH_TIMEOUT_MS)
378
- );
397
+ let graphTimerId;
398
+ const timeout = new Promise((_, reject) => {
399
+ graphTimerId = setTimeout(() => reject(new Error('Module graph timeout')), MODULE_GRAPH_TIMEOUT_MS);
400
+ });
379
401
  try {
380
402
  await Promise.race([moduleGraphWork(), timeout]);
381
403
  } catch (e) {
382
404
  // Graceful fallback — module graph is best-effort
383
405
  debugLog('[MODULE-GRAPH] Error:', e && e.message);
406
+ if (e && e.message === 'Module graph timeout') {
407
+ warnings.push(`Module graph analysis timed out (${MODULE_GRAPH_TIMEOUT_MS / 1000}s) — cross-file flows may be incomplete`);
408
+ }
409
+ } finally {
410
+ clearTimeout(graphTimerId);
384
411
  }
385
412
  }
386
413
 
@@ -593,6 +620,10 @@ async function run(targetPath, options = {}) {
593
620
  threats: pythonThreats.length + pypiTyposquatThreats.length
594
621
  } : null;
595
622
 
623
+ // Track deobfuscation failures
624
+ // (deobfuscate returns {deobfuscatedThreats, failures} but failures aren't surfaced)
625
+ // We detect this via scannerErrors for now
626
+
596
627
  const result = {
597
628
  target: targetPath,
598
629
  timestamp: new Date().toISOString(),
@@ -614,6 +645,7 @@ async function run(targetPath, options = {}) {
614
645
  breakdown
615
646
  },
616
647
  sandbox: sandboxData,
648
+ warnings: warnings.length > 0 ? warnings : undefined,
617
649
  scannerErrors: scannerErrors.length > 0 ? scannerErrors : undefined
618
650
  };
619
651
 
@@ -24,11 +24,14 @@ const SOURCE_TYPES = {
24
24
  credential_regex_harvest: 'credential_read', // regex patterns for tokens/passwords
25
25
  llm_api_key_harvest: 'credential_read', // OPENAI_API_KEY, ANTHROPIC_API_KEY
26
26
  credential_cli_steal: 'credential_read', // gh auth token, gcloud auth
27
- // env_access EXCLUDEDstandard config (process.env.PORT, AWS_REGION, NODE_ENV)
27
+ // env_access: conditionally classified see classifySource()
28
28
  // suspicious_dataflow EXCLUDED — already compound detection
29
29
  // cross_file_dataflow EXCLUDED — already scored CRITICAL by module-graph
30
30
  };
31
31
 
32
+ // Sensitive env var patterns — env_access referencing these is credential theft, not config
33
+ const SENSITIVE_ENV_PATTERNS = /TOKEN|KEY|SECRET|PASSWORD|CREDENTIAL|API_KEY|AUTH/i;
34
+
32
35
  // ============================================
33
36
  // SINK CLASSIFICATION (from existing threats only)
34
37
  // ============================================
@@ -94,10 +97,6 @@ const COHERENCE_MATRIX = {
94
97
  },
95
98
  };
96
99
 
97
- // Kept for backward compatibility but no longer used in pairing
98
- // Cross-file detection is handled by module-graph.js (cross_file_dataflow)
99
- const CROSS_FILE_MULTIPLIER = 0.5;
100
-
101
100
  /**
102
101
  * Classify a threat as a source type.
103
102
  * Only high-confidence credential access patterns.
@@ -105,9 +104,17 @@ const CROSS_FILE_MULTIPLIER = 0.5;
105
104
  function classifySource(threat) {
106
105
  if (SOURCE_TYPES[threat.type]) return SOURCE_TYPES[threat.type];
107
106
 
107
+ // env_access: only classify as credential_read if accessing sensitive vars
108
+ // Standard config (NODE_ENV, PORT, DEBUG) → null (no pairing)
109
+ if (threat.type === 'env_access') {
110
+ if (threat.message && SENSITIVE_ENV_PATTERNS.test(threat.message)) {
111
+ return 'credential_read';
112
+ }
113
+ return null;
114
+ }
115
+
108
116
  // Explicitly excluded types
109
117
  if (threat.type === 'suspicious_dataflow') return null;
110
- if (threat.type === 'env_access') return null;
111
118
  if (threat.type === 'cross_file_dataflow') return null;
112
119
 
113
120
  // Message-based: only for threats referencing sensitive file paths
@@ -228,6 +235,5 @@ module.exports = {
228
235
  classifySource,
229
236
  classifySink,
230
237
  buildIntentPairs,
231
- COHERENCE_MATRIX,
232
- CROSS_FILE_MULTIPLIER
238
+ COHERENCE_MATRIX
233
239
  };
@@ -10,6 +10,38 @@ const HOME_IOC_FILE = path.join(os.homedir(), '.muaddib', 'data', 'iocs.json');
10
10
  const STATIC_IOCS_FILE = path.join(__dirname, '../../data/static-iocs.json');
11
11
  const { generateCompactIOCs } = require('./updater.js');
12
12
  const { Spinner } = require('../utils.js');
13
+ const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
14
+
15
+ // Version format validation (semver-like + wildcard)
16
+ const VERSION_RE = /^(\*|0|[1-9]\d*(\.\d+){0,2}(-[\w.]+)?(\+[\w.]+)?)$/;
17
+
18
+ /**
19
+ * Validate an IOC package entry before insertion.
20
+ * Returns true if valid, false if should be skipped.
21
+ */
22
+ function validateIOCEntry(pkgName, version, ecosystem) {
23
+ if (!pkgName || typeof pkgName !== 'string') return false;
24
+ // npm: validate with NPM_PACKAGE_REGEX
25
+ if (ecosystem === 'npm' || !ecosystem) {
26
+ if (!NPM_PACKAGE_REGEX.test(pkgName)) {
27
+ console.warn(`[WARN] Invalid ${ecosystem || 'npm'} package name skipped: ${pkgName}`);
28
+ return false;
29
+ }
30
+ }
31
+ // PyPI: basic check — no path traversal, no slashes
32
+ if (ecosystem === 'pypi') {
33
+ if (/[/\\]|\.\./.test(pkgName)) {
34
+ console.warn(`[WARN] Invalid PyPI package name skipped: ${pkgName}`);
35
+ return false;
36
+ }
37
+ }
38
+ // Version validation
39
+ if (version && !VERSION_RE.test(version)) {
40
+ console.warn(`[WARN] Invalid version skipped: ${version} for ${pkgName}`);
41
+ return false;
42
+ }
43
+ return true;
44
+ }
13
45
 
14
46
  // Allowed domains for redirections (SSRF security)
15
47
  const ALLOWED_REDIRECT_DOMAINS = [
@@ -1110,10 +1142,15 @@ async function runScraper() {
1110
1142
  dedupMap.set(key, pkg);
1111
1143
  }
1112
1144
 
1113
- // Merge new IOCs with smart replacement
1145
+ // Merge new IOCs with smart replacement (with input validation)
1114
1146
  let addedPackages = 0;
1115
1147
  let upgradedPackages = 0;
1148
+ let skippedInvalid = 0;
1116
1149
  for (const pkg of allPackages) {
1150
+ if (!validateIOCEntry(pkg.name, pkg.version, 'npm')) {
1151
+ skippedInvalid++;
1152
+ continue;
1153
+ }
1117
1154
  const key = pkg.name + '@' + pkg.version;
1118
1155
  if (!dedupMap.has(key)) {
1119
1156
  dedupMap.set(key, pkg);
@@ -1148,6 +1185,10 @@ async function runScraper() {
1148
1185
  }
1149
1186
  let addedPyPIPackages = 0;
1150
1187
  for (const pkg of pypiPackages) {
1188
+ if (!validateIOCEntry(pkg.name, pkg.version, 'pypi')) {
1189
+ skippedInvalid++;
1190
+ continue;
1191
+ }
1151
1192
  const key = pkg.name + '@' + pkg.version;
1152
1193
  if (!pypiDedupMap.has(key)) {
1153
1194
  pypiDedupMap.set(key, pkg);
@@ -1308,6 +1349,7 @@ module.exports = {
1308
1349
  // Pure utility functions (exported for testing)
1309
1350
  parseCSVLine, parseCSV, extractVersions, parseOSVEntry,
1310
1351
  createFreshness, isAllowedRedirect, loadStaticIOCs,
1352
+ validateIOCEntry,
1311
1353
  CONFIDENCE_ORDER, ALLOWED_REDIRECT_DOMAINS
1312
1354
  };
1313
1355
 
@@ -463,6 +463,34 @@ function invalidateCache() {
463
463
  cachedIOCsTime = 0;
464
464
  }
465
465
 
466
+ /**
467
+ * Check IOC freshness based on cached file mtime.
468
+ * Returns a warning string if IOCs are older than maxAgeDays, null otherwise.
469
+ * @param {number} maxAgeDays - Maximum acceptable age in days (default: 30)
470
+ * @returns {string|null} Warning message or null
471
+ */
472
+ function checkIOCStaleness(maxAgeDays = 30) {
473
+ const filesToCheck = [CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_COMPACT_FILE];
474
+ let newestMtime = 0;
475
+
476
+ for (const f of filesToCheck) {
477
+ try {
478
+ const stat = fs.statSync(f);
479
+ if (stat.mtimeMs > newestMtime) newestMtime = stat.mtimeMs;
480
+ } catch {
481
+ // File doesn't exist — skip
482
+ }
483
+ }
484
+
485
+ if (newestMtime === 0) return null; // No IOC files found — bootstrap will handle
486
+
487
+ const ageDays = (Date.now() - newestMtime) / (1000 * 60 * 60 * 24);
488
+ if (ageDays > maxAgeDays) {
489
+ return `IOC database is ${Math.floor(ageDays)} days old (threshold: ${maxAgeDays}d). Run "muaddib update" for latest threat data.`;
490
+ }
491
+ return null;
492
+ }
493
+
466
494
  // ============================================
467
495
  // IOC INTEGRITY: HMAC-SHA256 signing/verification
468
496
  // ============================================
@@ -510,4 +538,4 @@ function verifyIOCHMAC(data, hmac) {
510
538
  }
511
539
  }
512
540
 
513
- module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, NEVER_WILDCARD };
541
+ module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD };
@@ -180,6 +180,9 @@ const PLAYBOOKS = {
180
180
  workflow_injection:
181
181
  'Injection potentielle dans GitHub Actions via input non sanitise sur self-hosted runner. Supprimer ou corriger le workflow.',
182
182
 
183
+ workflow_pwn_request:
184
+ 'CRITIQUE: Pwn request detecte — pull_request_target avec checkout du head de la PR permet l\'execution de code arbitraire. Remplacer par pull_request ou utiliser une strategie de checkout securisee (base ref uniquement).',
185
+
183
186
  sandbox_sensitive_file_read:
184
187
  'CRITIQUE: Package lit des fichiers sensibles (credentials) lors de l\'installation. Ne pas installer. Supprimer immediatement.',
185
188
  sandbox_sensitive_file_write:
@@ -844,6 +844,18 @@ const RULES = {
844
844
  references: ['https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions'],
845
845
  mitre: 'T1195.002'
846
846
  },
847
+ workflow_pwn_request: {
848
+ id: 'MUADDIB-GHA-003',
849
+ name: 'GitHub Actions Pwn Request',
850
+ severity: 'CRITICAL',
851
+ confidence: 'high',
852
+ description: 'Workflow pull_request_target avec checkout du head ref/sha de la PR — permet execution de code arbitraire (pwn request)',
853
+ references: [
854
+ 'https://securitylab.github.com/research/github-actions-preventing-pwn-requests/',
855
+ 'https://attack.mitre.org/techniques/T1195/002/'
856
+ ],
857
+ mitre: 'T1195.002'
858
+ },
847
859
 
848
860
  // Sandbox detections
849
861
  sandbox_sensitive_file_read: {
@@ -1104,7 +1116,7 @@ const RULES = {
1104
1116
  description: 'Package inactif depuis 6+ mois avec une nouvelle version soudaine. Possible changement de mainteneur ou compromission.',
1105
1117
  references: [
1106
1118
  'https://blog.npmjs.org/post/180565383195/details-about-the-event-stream-incident',
1107
- 'https://snyk.io/blog/a]]malicious-npm-packages-targeting-developers/'
1119
+ 'https://snyk.io/blog/malicious-npm-packages-targeting-developers/'
1108
1120
  ],
1109
1121
  mitre: 'T1195.002'
1110
1122
  },
@@ -1387,6 +1399,7 @@ const RULES = {
1387
1399
  function getRule(type) {
1388
1400
  if (RULES[type]) return RULES[type];
1389
1401
  if (PARANOID_RULES[type]) return PARANOID_RULES[type];
1402
+ if (PARANOID_RULES_BY_ID[type]) return PARANOID_RULES_BY_ID[type];
1390
1403
  return {
1391
1404
  id: 'MUADDIB-UNK-001',
1392
1405
  name: 'Unknown Threat',
@@ -1437,4 +1450,10 @@ const PARANOID_RULES = {
1437
1450
  }
1438
1451
  };
1439
1452
 
1453
+ // Reverse-map: PARANOID rule ID → rule object (for scanParanoid threats)
1454
+ const PARANOID_RULES_BY_ID = {};
1455
+ for (const [, rule] of Object.entries(PARANOID_RULES)) {
1456
+ PARANOID_RULES_BY_ID[rule.id] = rule;
1457
+ }
1458
+
1440
1459
  module.exports = { RULES, getRule, PARANOID_RULES };
@@ -273,7 +273,7 @@ async function runSingleSandbox(packageName, options = {}) {
273
273
  let report;
274
274
  try {
275
275
  const REPORT_DELIMITER = '---MUADDIB-REPORT-START---';
276
- const delimIdx = stdout.indexOf(REPORT_DELIMITER);
276
+ const delimIdx = stdout.lastIndexOf(REPORT_DELIMITER);
277
277
  let jsonStr;
278
278
  if (delimIdx !== -1) {
279
279
  // Reliable: use delimiter to skip any package output before the report
@@ -110,6 +110,32 @@ function buildTaintMap(ast) {
110
110
  }
111
111
  }
112
112
 
113
+ // B8 fix: const fn = tools.read — resolve object property alias to tainted method
114
+ if (node.id.type === 'Identifier' && init.type === 'MemberExpression' &&
115
+ init.object?.type === 'Identifier' && init.property?.type === 'Identifier') {
116
+ const aliasKey = `${init.object.name}.${init.property.name}`;
117
+ const aliasTaint = taintMap.get(aliasKey);
118
+ if (aliasTaint && TRACKED_MODULES.has(aliasTaint.source)) {
119
+ taintMap.set(node.id.name, aliasTaint);
120
+ }
121
+ }
122
+
123
+ // B9 fix: const [x] = [fs.readFileSync(...)] — array destructuring taint
124
+ if (node.id.type === 'ArrayPattern' && init.type === 'ArrayExpression') {
125
+ for (let i = 0; i < node.id.elements.length && i < init.elements.length; i++) {
126
+ const elem = node.id.elements[i];
127
+ const val = init.elements[i];
128
+ if (!elem || elem.type !== 'Identifier' || !val) continue;
129
+ if (val.type === 'CallExpression' && val.callee?.type === 'MemberExpression' &&
130
+ val.callee.object?.type === 'Identifier' && val.callee.property?.type === 'Identifier') {
131
+ const parentTaint = taintMap.get(val.callee.object.name);
132
+ if (parentTaint && TRACKED_MODULES.has(parentTaint.source)) {
133
+ taintMap.set(elem.name, { source: parentTaint.source, detail: `${parentTaint.source}.${val.callee.property.name}` });
134
+ }
135
+ }
136
+ }
137
+ }
138
+
113
139
  // B5 fix: const tools = { read: fs.readFileSync, home: os.homedir }
114
140
  // Track object properties that reference tainted module methods as tainted aliases
115
141
  if (node.id.type === 'Identifier' && init.type === 'ObjectExpression') {
@@ -193,6 +219,18 @@ function analyzeFile(content, filePath, basePath) {
193
219
  },
194
220
 
195
221
  VariableDeclarator(node) {
222
+ // B9: Array destructuring taint propagation: const [data] = [fs.readFileSync('.npmrc')]
223
+ if (node.id?.type === 'ArrayPattern' && node.init?.type === 'ArrayExpression') {
224
+ for (let i = 0; i < node.id.elements.length && i < node.init.elements.length; i++) {
225
+ const elem = node.id.elements[i];
226
+ const val = node.init.elements[i];
227
+ if (!elem || elem.type !== 'Identifier' || !val) continue;
228
+ if (containsSensitiveLiteral(val)) {
229
+ sensitivePathVars.add(elem.name);
230
+ }
231
+ }
232
+ }
233
+
196
234
  if (node.id?.type === 'Identifier' && node.init) {
197
235
  let initNode = node.init;
198
236
  if (initNode.type === 'AwaitExpression') initNode = initNode.argument;
@@ -232,7 +232,7 @@ function scanEntropy(targetPath, options = {}) {
232
232
  // B12: Windowed analysis for strings > MAX_STRING_LENGTH
233
233
  if (str.length > MAX_STRING_LENGTH) {
234
234
  if (SOURCE_MAP_REGEX.test(str) || SHA256_HEX_REGEX.test(str)) continue;
235
- const WINDOW = 500, WIN_THRESHOLD = 6.0;
235
+ const WINDOW = 500, WIN_THRESHOLD = 5.5;
236
236
  for (let i = 0; i < str.length; i += WINDOW) {
237
237
  const w = str.slice(i, i + WINDOW);
238
238
  if (w.length < 20) continue;
@@ -76,7 +76,7 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
76
76
 
77
77
  // GHA-002: Detect attacker-controlled context injection on ALL runners (not just self-hosted)
78
78
  const injectionPatterns = [
79
- { regex: /\$\{\{\s*github\.event\.(comment\.body|issue\.body|issue\.title|pull_request\.body|pull_request\.title|discussion\.body|discussion\.title)/, msg: 'Attacker-controlled GitHub event context used in workflow' },
79
+ { regex: /\$\{\{\s*github\.event\.(comment\.body|issue\.body|issue\.title|pull_request\.body|pull_request\.title|discussion\.body|discussion\.title|pages\[\]\.html_url)/, msg: 'Attacker-controlled GitHub event context used in workflow' },
80
80
  { regex: /\$\{\{\s*github\.head_ref/, msg: 'github.head_ref is attacker-controlled in pull_request workflows' }
81
81
  ];
82
82
 
@@ -90,6 +90,18 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
90
90
  });
91
91
  }
92
92
  }
93
+
94
+ // GHA-003: Compound — pull_request_target + checkout of PR head (pwn request)
95
+ const hasPRTarget = /pull_request_target/m.test(activeContent);
96
+ const hasCheckoutPRHead = /actions\/checkout[\s\S]*?ref:\s*\$\{\{\s*github\.event\.pull_request\.head\.(ref|sha)\s*\}\}/m.test(activeContent);
97
+ if (hasPRTarget && hasCheckoutPRHead) {
98
+ threats.push({
99
+ type: 'workflow_pwn_request',
100
+ severity: 'CRITICAL',
101
+ message: 'Pwn request: pull_request_target with checkout of PR head ref/sha allows arbitrary code execution',
102
+ file: relFile
103
+ });
104
+ }
93
105
  }
94
106
  }
95
107
 
@@ -5,9 +5,10 @@ const { findFiles, EXCLUDED_DIRS, debugLog } = require('../utils');
5
5
  const { ACORN_OPTIONS: BASE_ACORN_OPTIONS, safeParse } = require('../shared/constants.js');
6
6
 
7
7
  // --- Bounded path limits ---
8
- const MAX_GRAPH_NODES = 50; // Max files in dependency graph
9
- const MAX_GRAPH_EDGES = 200; // Max total import edges
8
+ const MAX_GRAPH_NODES = 100; // Max files in dependency graph (covers ~86% of npm packages)
9
+ const MAX_GRAPH_EDGES = 400; // Max total import edges
10
10
  const MAX_FLOWS = 20; // Max cross-file flow findings per package
11
+ const MAX_TAINT_DEPTH = 50; // Max AST recursion depth (DoS guard)
11
12
 
12
13
  // --- Sensitive source patterns ---
13
14
  const SENSITIVE_MODULES = new Set(['fs', 'child_process', 'dns', 'os', 'dgram']);
@@ -103,7 +104,9 @@ function tryResolveConcatRequire(node, depth) {
103
104
  return null;
104
105
  }
105
106
 
106
- function walkForRequires(node, fileDir, packagePath, imports) {
107
+ function walkForRequires(node, fileDir, packagePath, imports, depth) {
108
+ if (depth === undefined) depth = 0;
109
+ if (depth > MAX_TAINT_DEPTH) return;
107
110
  if (!node || typeof node !== 'object') return;
108
111
  if (
109
112
  node.type === 'CallExpression' &&
@@ -130,11 +133,11 @@ function walkForRequires(node, fileDir, packagePath, imports) {
130
133
  if (Array.isArray(child)) {
131
134
  for (const item of child) {
132
135
  if (item && typeof item === 'object' && item.type) {
133
- walkForRequires(item, fileDir, packagePath, imports);
136
+ walkForRequires(item, fileDir, packagePath, imports, depth + 1);
134
137
  }
135
138
  }
136
139
  } else if (child && typeof child === 'object' && child.type) {
137
- walkForRequires(child, fileDir, packagePath, imports);
140
+ walkForRequires(child, fileDir, packagePath, imports, depth + 1);
138
141
  }
139
142
  }
140
143
  }
@@ -1462,7 +1465,9 @@ function parseFile(filePath) {
1462
1465
  return safeParse(content, { allowReturnOutsideFunction: true, allowImportExportEverywhere: true });
1463
1466
  }
1464
1467
 
1465
- function walkAST(node, visitor) {
1468
+ function walkAST(node, visitor, depth) {
1469
+ if (depth === undefined) depth = 0;
1470
+ if (depth > MAX_TAINT_DEPTH) return;
1466
1471
  if (!node || typeof node !== 'object') return;
1467
1472
  if (node.type) visitor(node);
1468
1473
  for (const key of Object.keys(node)) {
@@ -1470,10 +1475,10 @@ function walkAST(node, visitor) {
1470
1475
  const child = node[key];
1471
1476
  if (Array.isArray(child)) {
1472
1477
  for (const item of child) {
1473
- if (item && typeof item === 'object' && item.type) walkAST(item, visitor);
1478
+ if (item && typeof item === 'object' && item.type) walkAST(item, visitor, depth + 1);
1474
1479
  }
1475
1480
  } else if (child && typeof child === 'object' && child.type) {
1476
- walkAST(child, visitor);
1481
+ walkAST(child, visitor, depth + 1);
1477
1482
  }
1478
1483
  }
1479
1484
  }
@@ -1536,10 +1541,12 @@ function getFunctionBody(node) {
1536
1541
  return null;
1537
1542
  }
1538
1543
 
1539
- function getMemberChain(node) {
1544
+ function getMemberChain(node, depth) {
1545
+ if (depth === undefined) depth = 0;
1546
+ if (depth > MAX_TAINT_DEPTH) return '';
1540
1547
  if (node.type === 'Identifier') return node.name;
1541
1548
  if (node.type === 'MemberExpression') {
1542
- const obj = getMemberChain(node.object);
1549
+ const obj = getMemberChain(node.object, depth + 1);
1543
1550
  const prop = node.property.name || node.property.value || '';
1544
1551
  return `${obj}.${prop}`;
1545
1552
  }
@@ -2084,5 +2091,5 @@ module.exports = {
2084
2091
  annotateSinkExports, detectCallbackCrossFileFlows, detectEventEmitterFlows,
2085
2092
  resolveLocal, extractLocalImports, parseFile, isLocalImport, toRel, isFileExists,
2086
2093
  tryResolveConcatRequire,
2087
- MAX_GRAPH_NODES, MAX_GRAPH_EDGES, MAX_FLOWS
2094
+ MAX_GRAPH_NODES, MAX_GRAPH_EDGES, MAX_FLOWS, MAX_TAINT_DEPTH
2088
2095
  };
@@ -1,6 +1,7 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
3
  const { findFiles, forEachSafeFile } = require('../utils.js');
4
+ const { MAX_FILE_SIZE } = require('../shared/constants.js');
4
5
 
5
6
  const SHELL_EXCLUDED_DIRS = ['node_modules', '.git', '.muaddib-cache'];
6
7
 
@@ -22,31 +23,75 @@ const MALICIOUS_PATTERNS = [
22
23
  { pattern: /wget\s+\S+.*&&.*base64\s+-d/m, name: 'wget_base64_decode', severity: 'HIGH' }
23
24
  ];
24
25
 
26
+ const SHEBANG_RE = /^#!.*\b(?:ba)?sh\b/;
27
+
28
+ function scanFileContent(file, content, targetPath, threats) {
29
+ // Strip comment lines to avoid false positives on documentation
30
+ const activeContent = content.split(/\r?\n/)
31
+ .filter(line => !line.trimStart().startsWith('#'))
32
+ .join('\n');
33
+
34
+ for (const { pattern, name, severity } of MALICIOUS_PATTERNS) {
35
+ if (pattern.test(activeContent)) {
36
+ threats.push({
37
+ type: name,
38
+ severity: severity,
39
+ message: `Pattern malveillant "${name}" detecte.`,
40
+ file: path.relative(targetPath, file)
41
+ });
42
+ }
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Find extensionless files in a directory (non-recursive into excluded dirs).
48
+ * Used for shebang-based shell script detection.
49
+ */
50
+ function findExtensionlessFiles(dir, excludedDirs, results = [], depth = 0) {
51
+ if (depth > 20) return results;
52
+ let items;
53
+ try { items = fs.readdirSync(dir); } catch { return results; }
54
+
55
+ for (const item of items) {
56
+ if (excludedDirs.includes(item)) continue;
57
+ const fullPath = path.join(dir, item);
58
+ try {
59
+ const lstat = fs.lstatSync(fullPath);
60
+ if (lstat.isSymbolicLink()) continue;
61
+ if (lstat.isDirectory()) {
62
+ findExtensionlessFiles(fullPath, excludedDirs, results, depth + 1);
63
+ } else if (lstat.isFile() && !path.extname(item) && lstat.size <= MAX_FILE_SIZE) {
64
+ results.push(fullPath);
65
+ }
66
+ } catch { /* permission error */ }
67
+ }
68
+ return results;
69
+ }
70
+
25
71
  async function scanShellScripts(targetPath) {
26
72
  const threats = [];
27
-
28
- // Cherche les fichiers shell
73
+
74
+ // Pass 1: files with shell extensions
29
75
  const files = findFiles(targetPath, { extensions: ['.sh', '.bash', '.zsh', '.command'], excludedDirs: SHELL_EXCLUDED_DIRS });
30
76
 
31
77
  forEachSafeFile(files, (file, content) => {
32
- // Strip comment lines to avoid false positives on documentation
33
- const activeContent = content.split(/\r?\n/)
34
- .filter(line => !line.trimStart().startsWith('#'))
35
- .join('\n');
36
-
37
- for (const { pattern, name, severity } of MALICIOUS_PATTERNS) {
38
- if (pattern.test(activeContent)) {
39
- threats.push({
40
- type: name,
41
- severity: severity,
42
- message: `Pattern malveillant "${name}" detecte.`,
43
- file: path.relative(targetPath, file)
44
- });
45
- }
46
- }
78
+ scanFileContent(file, content, targetPath, threats);
47
79
  });
48
80
 
81
+ // Pass 2: extensionless files with sh/bash shebang
82
+ const extensionless = findExtensionlessFiles(targetPath, SHELL_EXCLUDED_DIRS);
83
+
84
+ for (const file of extensionless) {
85
+ try {
86
+ const content = fs.readFileSync(file, 'utf8');
87
+ const firstLine = content.split(/\r?\n/, 1)[0];
88
+ if (SHEBANG_RE.test(firstLine)) {
89
+ scanFileContent(file, content, targetPath, threats);
90
+ }
91
+ } catch { /* ignore unreadable files */ }
92
+ }
93
+
49
94
  return threats;
50
95
  }
51
96
 
52
- module.exports = { scanShellScripts };
97
+ module.exports = { scanShellScripts };
package/src/scoring.js CHANGED
@@ -232,17 +232,13 @@ function applyFPReductions(threats, reachableFiles, packageName) {
232
232
  const rule = FP_COUNT_THRESHOLDS[t.type];
233
233
  if (rule && typeCounts[t.type] > rule.maxCount && (!rule.from || t.severity === rule.from)) {
234
234
  const typeRatio = typeCounts[t.type] / totalThreats;
235
- // suspicious_dataflow: full bypass of percentage guard. Packages with >3 suspicious_dataflow
236
- // findings are always legitimate SDKs (SMTP, monitoring, analytics). Real malware has 1-2
237
- // targeted source→sink pairs. The count >3 threshold is sufficient protection.
238
- // P7: removed 80% ratio cap it caused ~30k FP hits in production on SDK packages
239
- // where dataflow was the dominant finding type (e.g. @darajs/core, addio-admin-sdk).
240
- // vm_code_execution: full bypass — packages with only vm.Script calls (cassandra-driver,
241
- // webpack, jest) are legitimate. Real malware using vm always has other signals
242
- // (network, fs, obfuscation). The >3 count threshold is sufficient protection.
235
+ // suspicious_dataflow: bypass percentage guard when count exceeds threshold.
236
+ // Packages with >3 suspicious_dataflow findings are always legitimate SDKs.
237
+ // But a single suspicious_dataflow at 50% ratio should NOT be downgraded.
238
+ // vm_code_execution: same logicbypass only when count exceeds threshold.
243
239
  if (typeRatio < 0.4 ||
244
- t.type === 'suspicious_dataflow' ||
245
- t.type === 'vm_code_execution') {
240
+ (t.type === 'suspicious_dataflow' && typeCounts[t.type] > rule.maxCount) ||
241
+ (t.type === 'vm_code_execution' && typeCounts[t.type] > rule.maxCount)) {
246
242
  t.severity = rule.to;
247
243
  }
248
244
  }
@@ -41,13 +41,27 @@ function normalizeHostname(hostname) {
41
41
  return ipv4Part;
42
42
  }
43
43
  }
44
- // Convert decimal IP notation: 2130706433 → 127.0.0.1
45
- if (/^\d+$/.test(hostname)) {
46
- const num = parseInt(hostname, 10);
44
+ // Convert integer IP notation (decimal or hex): 2130706433 or 0x7f000001 → 127.0.0.1
45
+ if (/^(0x[\da-f]+|\d+)$/i.test(hostname)) {
46
+ const num = hostname.startsWith('0x') ? parseInt(hostname, 16) : parseInt(hostname, 10);
47
47
  if (num > 0 && num < 4294967296) {
48
48
  return [(num >>> 24) & 255, (num >>> 16) & 255, (num >>> 8) & 255, num & 255].join('.');
49
49
  }
50
50
  }
51
+ // Convert dotted IP with octal/hex octets: 0177.0.0.01 or 0x7f.0.0.1 → 127.0.0.1
52
+ if (/^[\da-fox.]+$/i.test(hostname)) {
53
+ const parts = hostname.split('.');
54
+ if (parts.length === 4) {
55
+ const octets = parts.map(p => {
56
+ if (/^0x[\da-f]+$/i.test(p)) return parseInt(p, 16);
57
+ if (/^0\d+$/.test(p)) return parseInt(p, 8);
58
+ return parseInt(p, 10);
59
+ });
60
+ if (octets.every(o => !isNaN(o) && o >= 0 && o <= 255)) {
61
+ return octets.join('.');
62
+ }
63
+ }
64
+ }
51
65
  return hostname;
52
66
  }
53
67
 
@@ -121,12 +135,18 @@ async function safeDnsResolve(hostname) {
121
135
  * @param {number} [timeoutMs] - Download timeout in ms (default: DOWNLOAD_TIMEOUT)
122
136
  * @returns {Promise<number>} Number of bytes downloaded
123
137
  */
138
+ const MAX_REDIRECTS = 5;
139
+
124
140
  function downloadToFile(url, destPath, timeoutMs = DOWNLOAD_TIMEOUT) {
125
141
  // DNS rebinding protection: validate hostname before connecting
126
142
  const parsedUrl = new URL(url);
127
143
  return safeDnsResolve(parsedUrl.hostname).then(() => {
128
144
  return new Promise((resolve, reject) => {
129
- const doRequest = (requestUrl) => {
145
+ const doRequest = (requestUrl, redirectCount) => {
146
+ if (redirectCount === undefined) redirectCount = 0;
147
+ if (redirectCount >= MAX_REDIRECTS) {
148
+ return reject(new Error(`Too many redirects (${MAX_REDIRECTS}) for ${url}`));
149
+ }
130
150
  const req = https.get(requestUrl, { timeout: timeoutMs }, (res) => {
131
151
  if (res.statusCode === 301 || res.statusCode === 302) {
132
152
  res.resume();
@@ -138,7 +158,7 @@ function downloadToFile(url, destPath, timeoutMs = DOWNLOAD_TIMEOUT) {
138
158
  if (!check.allowed) {
139
159
  return reject(new Error(check.error));
140
160
  }
141
- return doRequest(absoluteLocation);
161
+ return doRequest(absoluteLocation, redirectCount + 1);
142
162
  }
143
163
  if (res.statusCode < 200 || res.statusCode >= 300) {
144
164
  res.resume();
@@ -246,5 +266,6 @@ module.exports = {
246
266
  isPrivateIP,
247
267
  safeDnsResolve,
248
268
  ALLOWED_DOWNLOAD_DOMAINS,
249
- PRIVATE_IP_PATTERNS
269
+ PRIVATE_IP_PATTERNS,
270
+ MAX_REDIRECTS
250
271
  };