muaddib-scanner 2.9.2 → 2.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.9.2",
3
+ "version": "2.9.4",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/index.js CHANGED
@@ -570,6 +570,7 @@ async function run(targetPath, options = {}) {
570
570
  // Cross-scanner compound: detached_process + suspicious_dataflow in same file
571
571
  // Catches cases where credential flow is detected by dataflow scanner, not AST scanner
572
572
  {
573
+ const DIST_RE = /(?:^|[/\\])(?:dist|build|out|output)[/\\]|\.min\.js$|\.bundle\.js$/i;
573
574
  const fileMap = Object.create(null);
574
575
  for (const t of deduped) {
575
576
  if (t.file) {
@@ -578,6 +579,9 @@ async function run(targetPath, options = {}) {
578
579
  }
579
580
  }
580
581
  for (const file of Object.keys(fileMap)) {
582
+ // Skip dist/build files — bundler aggregation creates coincidental co-occurrence
583
+ // of detached_process + suspicious_dataflow. Real DPRK attacks target root files.
584
+ if (DIST_RE.test(file)) continue;
581
585
  const fileThreats = fileMap[file];
582
586
  const hasDetached = fileThreats.some(t => t.type === 'detached_process');
583
587
  const hasCredFlow = fileThreats.some(t => t.type === 'suspicious_dataflow');
@@ -349,6 +349,11 @@ const PLAYBOOKS = {
349
349
  'pour eviter la detection statique. Technique de vol de GITHUB_TOKEN, NPM_TOKEN, etc. ' +
350
350
  'Verifier quelles variables sont accedees et si elles sont exfiltrees.',
351
351
 
352
+ lifecycle_hidden_payload:
353
+ 'CRITIQUE: Le script lifecycle pointe vers un fichier cache dans node_modules/. ' +
354
+ 'Ce pattern est utilise par les attaques DPRK/Lazarus pour cacher le payload dans un repertoire ' +
355
+ 'que les scanners excluent par defaut. Examiner le fichier cible immediatement.',
356
+
352
357
  lifecycle_shell_pipe:
353
358
  'CRITIQUE: Le script lifecycle (preinstall/postinstall) pipe du code distant vers un shell (curl | sh). ' +
354
359
  'NE PAS installer. Ceci execute du code arbitraire a l\'installation. ' +
@@ -547,11 +552,6 @@ const PLAYBOOKS = {
547
552
  'Vecteur classique de dependency confusion: le code s\'execute a l\'installation. ' +
548
553
  'NE PAS installer. Verifier le nom exact du package. Signaler sur npm.',
549
554
 
550
- credential_env_exfil:
551
- 'CRITIQUE: Ecriture dans des chemins sensibles (cache npm/yarn, credentials) + acces aux variables d\'environnement. ' +
552
- 'Double vecteur d\'exfiltration de credentials. Supprimer le package. Regenerer tous les secrets. ' +
553
- 'Nettoyer le cache: npm cache clean --force.',
554
-
555
555
  lifecycle_inline_exec:
556
556
  'CRITIQUE: Script lifecycle avec node -e (execution inline). Le code s\'execute automatiquement a npm install. ' +
557
557
  'NE PAS installer. Si deja installe: considerer la machine compromise. ' +
@@ -562,10 +562,6 @@ const PLAYBOOKS = {
562
562
  'Le payload est telecharge et execute automatiquement a l\'installation. ' +
563
563
  'NE PAS installer. Bloquer les connexions sortantes. Supprimer le package.',
564
564
 
565
- obfuscated_credential_tampering:
566
- 'CRITIQUE: Code obfusque + ecriture dans des chemins sensibles. Dissimulation de vol de credentials. ' +
567
- 'Supprimer le package immediatement. Nettoyer le cache npm/yarn. Regenerer tous les secrets.',
568
-
569
565
  bin_field_hijack:
570
566
  'CRITIQUE: Le champ "bin" de package.json shadow une commande systeme (node, npm, git, bash, etc.). ' +
571
567
  'A l\'installation, npm cree un symlink dans node_modules/.bin/ qui intercepte la commande reelle. ' +
@@ -649,6 +649,19 @@ const RULES = {
649
649
  mitre: 'T1027'
650
650
  },
651
651
 
652
+ lifecycle_hidden_payload: {
653
+ id: 'MUADDIB-PKG-016',
654
+ name: 'Lifecycle Script Targets Hidden Payload',
655
+ severity: 'CRITICAL',
656
+ confidence: 'high',
657
+ description: 'Script lifecycle pointe vers un fichier dans node_modules/ — technique de dissimulation de payload. Les scanners excluent node_modules/ par defaut, rendant le payload invisible. Pattern DPRK/Lazarus interview attack.',
658
+ references: [
659
+ 'https://unit42.paloaltonetworks.com/operation-dream-job/',
660
+ 'https://blog.phylum.io/shai-hulud-npm-worm'
661
+ ],
662
+ mitre: 'T1027.009'
663
+ },
664
+
652
665
  lifecycle_shell_pipe: {
653
666
  id: 'MUADDIB-PKG-010',
654
667
  name: 'Lifecycle Script Pipes to Shell',
@@ -1621,18 +1634,6 @@ const RULES = {
1621
1634
  ],
1622
1635
  mitre: 'T1195.002'
1623
1636
  },
1624
- credential_env_exfil: {
1625
- id: 'MUADDIB-COMPOUND-003',
1626
- name: 'Credential Tampering + Env Access',
1627
- severity: 'CRITICAL',
1628
- confidence: 'high',
1629
- description: 'Ecriture dans un chemin sensible (cache npm/yarn, credentials) combinee avec acces aux variables d\'environnement. Chaine d\'exfiltration de credentials par double vecteur.',
1630
- references: [
1631
- 'https://attack.mitre.org/techniques/T1552/001/',
1632
- 'https://attack.mitre.org/techniques/T1565/001/'
1633
- ],
1634
- mitre: 'T1552.001'
1635
- },
1636
1637
  lifecycle_inline_exec: {
1637
1638
  id: 'MUADDIB-COMPOUND-004',
1638
1639
  name: 'Lifecycle Hook + Inline Node Execution',
@@ -1657,18 +1658,6 @@ const RULES = {
1657
1658
  ],
1658
1659
  mitre: 'T1105'
1659
1660
  },
1660
- obfuscated_credential_tampering: {
1661
- id: 'MUADDIB-COMPOUND-006',
1662
- name: 'Obfuscated Code + Credential Tampering',
1663
- severity: 'CRITICAL',
1664
- confidence: 'high',
1665
- description: 'Code obfusque combine avec ecriture dans des chemins sensibles (cache npm/yarn, credentials). Dissimulation de vol de credentials.',
1666
- references: [
1667
- 'https://attack.mitre.org/techniques/T1027/',
1668
- 'https://attack.mitre.org/techniques/T1565/001/'
1669
- ],
1670
- mitre: 'T1027'
1671
- },
1672
1661
  };
1673
1662
 
1674
1663
  function getRule(type) {
@@ -475,6 +475,45 @@ function handleVariableDeclarator(node, ctx) {
475
475
  ctx.stringVarValues.set(node.id.name, strVal);
476
476
  }
477
477
 
478
+ // Track variables assigned from require.cache[...] (module cache references)
479
+ // Used to detect writes to cached module exports (require.cache poisoning)
480
+ if (node.init?.type === 'MemberExpression' && node.init.computed) {
481
+ const obj = node.init.object;
482
+ if (obj?.type === 'MemberExpression' &&
483
+ obj.object?.type === 'Identifier' && obj.object.name === 'require' &&
484
+ obj.property?.type === 'Identifier' && obj.property.name === 'cache') {
485
+ ctx.requireCacheVars.add(node.id.name);
486
+ }
487
+ }
488
+
489
+ // Track variables assigned from BinaryExpression with '+' (string concatenation building)
490
+ // Used to detect setTimeout(concatVar, delay) — eval via timer with built string
491
+ // FP fix: only track when at least one operand is demonstrably a string (literal, template,
492
+ // or known string var). Filters out arithmetic `var e = a + 1` in minified code.
493
+ if (node.init?.type === 'BinaryExpression' && node.init.operator === '+') {
494
+ const left = node.init.left;
495
+ const right = node.init.right;
496
+ const isStringOperand = (n) =>
497
+ (n.type === 'Literal' && typeof n.value === 'string') ||
498
+ n.type === 'TemplateLiteral' ||
499
+ (n.type === 'Identifier' && ctx.stringVarValues?.has(n.name)) ||
500
+ (n.type === 'Identifier' && ctx.stringBuildVars?.has(n.name));
501
+ if (isStringOperand(left) || isStringOperand(right)) {
502
+ ctx.stringBuildVars.add(node.id.name);
503
+ }
504
+ }
505
+
506
+ // Track object variables with Proxy trap properties (set/get/apply/construct)
507
+ // Used to detect new Proxy(target, handlerVar) when handler is not inline
508
+ if (node.init?.type === 'ObjectExpression') {
509
+ const hasTrap = node.init.properties?.some(p =>
510
+ p.key?.type === 'Identifier' && ['set', 'get', 'apply', 'construct'].includes(p.key.name)
511
+ );
512
+ if (hasTrap) {
513
+ ctx.proxyHandlerVars.add(node.id.name);
514
+ }
515
+ }
516
+
478
517
  // Track variables assigned from path.join containing .github/workflows
479
518
  if (node.init?.type === 'CallExpression' && node.init.callee?.type === 'MemberExpression') {
480
519
  const obj = node.init.callee.object;
@@ -1294,6 +1333,29 @@ function handleCallExpression(node, ctx) {
1294
1333
  file: ctx.relFile
1295
1334
  });
1296
1335
  }
1336
+ // BinaryExpression with '+' as first arg = string concatenation for eval via timer
1337
+ else if (firstArg.type === 'BinaryExpression' && firstArg.operator === '+') {
1338
+ ctx.hasEvalInFile = true;
1339
+ ctx.hasDynamicExec = true;
1340
+ ctx.threats.push({
1341
+ type: 'dangerous_call_eval',
1342
+ severity: 'HIGH',
1343
+ message: `${callName}() with concatenated string argument — eval equivalent, dynamically built code string.`,
1344
+ file: ctx.relFile
1345
+ });
1346
+ }
1347
+ // Identifier arg that was tracked as string value or string concatenation result
1348
+ else if (firstArg.type === 'Identifier' &&
1349
+ (ctx.stringVarValues?.has(firstArg.name) || ctx.stringBuildVars?.has(firstArg.name))) {
1350
+ ctx.hasEvalInFile = true;
1351
+ ctx.hasDynamicExec = true;
1352
+ ctx.threats.push({
1353
+ type: 'dangerous_call_eval',
1354
+ severity: 'HIGH',
1355
+ message: `${callName}() with variable "${firstArg.name}" containing built string — eval equivalent, executes the string as code.`,
1356
+ file: ctx.relFile
1357
+ });
1358
+ }
1297
1359
 
1298
1360
  // Static timer bomb: setTimeout/setInterval with delay > 1 hour (PhantomRaven 48h delay)
1299
1361
  if (node.arguments.length >= 2) {
@@ -1757,8 +1819,17 @@ function handleNewExpression(node, ctx) {
1757
1819
  );
1758
1820
  if (hasTrap) {
1759
1821
  ctx.hasProxyTrap = true;
1822
+ const hasSetTrap = handler.properties?.some(p =>
1823
+ p.key?.type === 'Identifier' && p.key.name === 'set'
1824
+ );
1825
+ if (hasSetTrap) ctx.hasProxySetTrap = true;
1760
1826
  }
1761
1827
  }
1828
+ // Also detect when handler is a variable reference that was tracked as having trap properties
1829
+ if (handler?.type === 'Identifier' && ctx.proxyHandlerVars?.has(handler.name)) {
1830
+ ctx.hasProxyTrap = true;
1831
+ ctx.hasProxySetTrap = true; // proxyHandlerVars tracks objects with any trap including set
1832
+ }
1762
1833
  }
1763
1834
  }
1764
1835
 
@@ -1966,6 +2037,29 @@ function handleAssignmentExpression(node, ctx) {
1966
2037
  if (node.left?.type === 'MemberExpression') {
1967
2038
  const left = node.left;
1968
2039
 
2040
+ // require.cache[...].exports = ... — module cache poisoning WRITE (not just read)
2041
+ // This is always malicious: replacing a core module's exports to intercept all usage.
2042
+ // Also detects: mod.exports.X = ... where mod is from require.cache[...]
2043
+ if (left.property?.type === 'Identifier' && left.property.name === 'exports') {
2044
+ // Direct pattern: require.cache[...].exports = ...
2045
+ const obj = left.object;
2046
+ if (obj?.type === 'MemberExpression' && obj.computed) {
2047
+ const deep = obj.object;
2048
+ if (deep?.type === 'MemberExpression' &&
2049
+ deep.object?.type === 'Identifier' && deep.object.name === 'require' &&
2050
+ deep.property?.type === 'Identifier' && deep.property.name === 'cache') {
2051
+ ctx.hasRequireCacheWrite = true;
2052
+ }
2053
+ }
2054
+ }
2055
+ // Indirect pattern: mod.exports.X = ... where mod = require.cache[...]
2056
+ if (left.object?.type === 'MemberExpression' &&
2057
+ left.object.property?.type === 'Identifier' && left.object.property.name === 'exports' &&
2058
+ left.object.object?.type === 'Identifier' &&
2059
+ ctx.requireCacheVars?.has(left.object.object.name)) {
2060
+ ctx.hasRequireCacheWrite = true;
2061
+ }
2062
+
1969
2063
  // globalThis.fetch = ... or globalThis.XMLHttpRequest = ... (B2: include aliases)
1970
2064
  if (left.object?.type === 'Identifier' &&
1971
2065
  (left.object.name === 'globalThis' || left.object.name === 'global' ||
@@ -2045,15 +2139,11 @@ function handleAssignmentExpression(node, ctx) {
2045
2139
  }
2046
2140
 
2047
2141
  function handleMemberExpression(node, ctx) {
2048
- // Detect require.cache access
2142
+ // Detect require.cache access — set flag, defer threat emission to handlePostWalk
2143
+ // FP fix: distinguish READ (hot-reload, delete, introspection) from WRITE (.exports = ...)
2049
2144
  if (node.object?.type === 'Identifier' && node.object.name === 'require' &&
2050
2145
  node.property?.type === 'Identifier' && node.property.name === 'cache') {
2051
- ctx.threats.push({
2052
- type: 'require_cache_poison',
2053
- severity: 'CRITICAL',
2054
- message: 'require.cache accessed — module cache poisoning to hijack or replace core Node.js modules.',
2055
- file: ctx.relFile
2056
- });
2146
+ ctx.hasRequireCacheRead = true;
2057
2147
  }
2058
2148
 
2059
2149
  // GlassWorm: track .codePointAt() calls (variation selector decoder pattern)
@@ -2307,11 +2397,15 @@ function handlePostWalk(ctx) {
2307
2397
 
2308
2398
  // Built-in method override + network: console.X = function or Object.defineProperty = function
2309
2399
  // combined with network calls. Monkey-patching built-in APIs for data interception.
2400
+ // CRITICAL when Object.defineProperty itself is reassigned (global hook on all property defs).
2310
2401
  if (ctx.hasBuiltinOverride && ctx.hasNetworkCallInFile) {
2402
+ const isGlobalHook = ctx.hasBuiltinGlobalHook;
2311
2403
  ctx.threats.push({
2312
2404
  type: 'builtin_override_exfil',
2313
- severity: 'HIGH',
2314
- message: 'Built-in method override (console/Object.defineProperty) + network call — runtime API hijacking for data interception and exfiltration.',
2405
+ severity: isGlobalHook ? 'CRITICAL' : 'HIGH',
2406
+ message: isGlobalHook
2407
+ ? 'Object.defineProperty reassigned + network call — global hook intercepts all property definitions for credential exfiltration.'
2408
+ : 'Built-in method override (console/Object.defineProperty) + network call — runtime API hijacking for data interception and exfiltration.',
2315
2409
  file: ctx.relFile
2316
2410
  });
2317
2411
  }
@@ -2335,10 +2429,15 @@ function handlePostWalk(ctx) {
2335
2429
  const hasCredentialSignal = ctx.threats.some(t =>
2336
2430
  t.type === 'env_access' || t.type === 'suspicious_dataflow'
2337
2431
  );
2432
+ // CRITICAL when: credential signals co-occur, OR set trap (intercepts all property writes)
2433
+ // A set trap with network call = universal data capture + exfiltration
2434
+ const isCritical = hasCredentialSignal || ctx.hasProxySetTrap;
2338
2435
  ctx.threats.push({
2339
2436
  type: 'proxy_data_intercept',
2340
- severity: hasCredentialSignal ? 'CRITICAL' : 'HIGH',
2341
- message: 'Proxy trap (set/get/apply) with network call in same file — data interception and exfiltration via Proxy handler.',
2437
+ severity: isCritical ? 'CRITICAL' : 'HIGH',
2438
+ message: ctx.hasProxySetTrap
2439
+ ? 'Proxy set trap with network call — intercepts ALL property writes for exfiltration via Proxy handler.'
2440
+ : 'Proxy trap (set/get/apply) with network call in same file — data interception and exfiltration via Proxy handler.',
2342
2441
  file: ctx.relFile
2343
2442
  });
2344
2443
  }
@@ -2353,6 +2452,24 @@ function handlePostWalk(ctx) {
2353
2452
  });
2354
2453
  }
2355
2454
 
2455
+ // require.cache: distinguish WRITE (actual poisoning) from READ-only (hot-reload, introspection)
2456
+ // FP fix: READ-only emits LOW (informational), WRITE emits CRITICAL (malicious module replacement).
2457
+ if (ctx.hasRequireCacheWrite) {
2458
+ ctx.threats.push({
2459
+ type: 'require_cache_poison',
2460
+ severity: 'CRITICAL',
2461
+ message: 'require.cache[...].exports = ... — module cache write: replaces core module exports to intercept all callers.',
2462
+ file: ctx.relFile
2463
+ });
2464
+ } else if (ctx.hasRequireCacheRead) {
2465
+ ctx.threats.push({
2466
+ type: 'require_cache_poison',
2467
+ severity: 'LOW',
2468
+ message: 'require.cache accessed — module cache read (hot-reload/introspection pattern).',
2469
+ file: ctx.relFile
2470
+ });
2471
+ }
2472
+
2356
2473
  // DPRK/Lazarus compound: detached background process + credential env access + network
2357
2474
  // Pattern: spawn({detached:true}) reads secrets then exfils via network.
2358
2475
  // This combination is never legitimate — daemons don't read API keys and send them out.
@@ -2360,7 +2477,7 @@ function handlePostWalk(ctx) {
2360
2477
  t.file === ctx.relFile && t.type === 'detached_process'
2361
2478
  );
2362
2479
  const hasSensitiveEnvInFile = ctx.threats.some(t =>
2363
- t.file === ctx.relFile && t.type === 'env_access'
2480
+ t.file === ctx.relFile && t.type === 'env_access' && t.severity === 'HIGH'
2364
2481
  );
2365
2482
  if (hasDetachedInFile && hasSensitiveEnvInFile && ctx.hasNetworkCallInFile) {
2366
2483
  ctx.threats.push({
@@ -2372,11 +2489,15 @@ function handlePostWalk(ctx) {
2372
2489
  }
2373
2490
 
2374
2491
  // GlassWorm: Unicode variation selector decoder = .codePointAt + variation selector constants
2492
+ // CRITICAL if combined with eval/exec (GlassWorm always uses dynamic execution),
2493
+ // MEDIUM otherwise (.codePointAt + 0xFE00 is legitimate Unicode processing in fonts/text libs)
2375
2494
  if (ctx.hasCodePointAt && ctx.hasVariationSelectorConst) {
2376
2495
  ctx.threats.push({
2377
2496
  type: 'unicode_variation_decoder',
2378
- severity: 'CRITICAL',
2379
- message: 'Unicode variation selector decoder: .codePointAt() + 0xFE00/0xE0100 constants — GlassWorm payload reconstruction from invisible characters.',
2497
+ severity: ctx.hasDynamicExec ? 'CRITICAL' : 'MEDIUM',
2498
+ message: ctx.hasDynamicExec
2499
+ ? 'Unicode variation selector decoder: .codePointAt() + 0xFE00/0xE0100 constants + dynamic execution — GlassWorm payload reconstruction from invisible characters.'
2500
+ : 'Unicode variation selector decoder: .codePointAt() + 0xFE00/0xE0100 constants — likely legitimate Unicode processing (text formatting, font rendering).',
2380
2501
  file: ctx.relFile
2381
2502
  });
2382
2503
  }
@@ -120,6 +120,8 @@ function analyzeFile(content, filePath, basePath) {
120
120
  hasBuiltinOverride: /\bconsole\s*\.\s*\w+\s*=\s*function/.test(content) ||
121
121
  /\bconsole\s*\[\s*\w+\s*\]\s*=\s*function/.test(content) ||
122
122
  /\bObject\s*\.\s*defineProperty\s*=\s*function/.test(content),
123
+ // Critical builtin override: Object.defineProperty itself is reassigned (global hook)
124
+ hasBuiltinGlobalHook: /\bObject\s*\.\s*defineProperty\s*=\s*function/.test(content),
123
125
  // Stream interceptor: class extending Transform/Duplex/Writable (data wiretap pattern)
124
126
  hasStreamInterceptor: /\bextends\s+(Transform|Duplex|Writable)\b/.test(content),
125
127
  // SANDWORM_MODE P2: DNS exfiltration co-occurrence
@@ -157,6 +159,12 @@ function analyzeFile(content, filePath, basePath) {
157
159
  hasWasmLoad: /\bWebAssembly\s*\.\s*(compile|instantiate|compileStreaming|instantiateStreaming)\b/.test(content),
158
160
  hasWasmHostSink: false, // set in handleCallExpression when WASM import object contains network/fs sinks
159
161
  hasProxyTrap: false, // set in handleNewExpression when Proxy has set/get/apply trap
162
+ hasProxySetTrap: false, // set when Proxy specifically has a 'set' trap (data interception)
163
+ hasRequireCacheRead: false, // set when require.cache is accessed (read)
164
+ hasRequireCacheWrite: false, // set when require.cache exports are modified
165
+ requireCacheVars: new Set(), // variables assigned from require.cache[...]
166
+ proxyHandlerVars: new Set(), // variables assigned object literals with set/get/apply/construct traps
167
+ stringBuildVars: new Set(), // variables assigned from BinaryExpression with '+' (string concat)
160
168
  // C10: Hash verification — legitimate binary installers verify checksums
161
169
  // Requires BOTH createHash() call AND .digest() call — false positives from
162
170
  // standalone mentions of 'sha256' or 'integrity' in comments/descriptions
@@ -205,9 +205,17 @@ function analyzeFile(content, filePath, basePath) {
205
205
  // Fix #23: Function param tainting — track function declarations
206
206
  const functionDefs = new Map(); // functionName → { params: [paramNames] }
207
207
 
208
+ // Fix #24: Callback exposure — track function parameters (potential callbacks)
209
+ // When a callback parameter is invoked with tainted data, it's credential exposure.
210
+ const callbackParams = new Set(); // parameter names of enclosing functions
211
+ const callbackExposures = []; // { callbackName, argName, line }
212
+
213
+ // Pre-scan: collect function declarations and callback params BEFORE the main walk.
214
+ // acorn-walk.simple uses post-order traversal (children before parents), so
215
+ // FunctionDeclaration handlers fire AFTER CallExpressions inside the function body.
216
+ // This pre-scan ensures callbackParams and functionDefs are populated before analysis.
208
217
  walk.simple(ast, {
209
218
  FunctionDeclaration(node) {
210
- // Fix #23: Track function declarations for param tainting
211
219
  if (node.id && node.id.name && node.params) {
212
220
  const paramNames = node.params
213
221
  .filter(p => p.type === 'Identifier')
@@ -215,8 +223,16 @@ function analyzeFile(content, filePath, basePath) {
215
223
  if (paramNames.length > 0) {
216
224
  functionDefs.set(node.id.name, { params: paramNames });
217
225
  }
226
+ // FP fix: skip 1-char parameter names (minified code noise: e, t, n, r, a, b, etc.)
227
+ // Real callback exposure attacks use descriptive names (callback, handler, cb, fn, done).
228
+ for (const p of node.params) {
229
+ if (p.type === 'Identifier' && p.name.length > 1) callbackParams.add(p.name);
230
+ }
218
231
  }
219
- },
232
+ }
233
+ });
234
+
235
+ walk.simple(ast, {
220
236
 
221
237
  VariableDeclarator(node) {
222
238
  // B9: Array destructuring taint propagation: const [data] = [fs.readFileSync('.npmrc')]
@@ -268,6 +284,19 @@ function analyzeFile(content, filePath, basePath) {
268
284
  }
269
285
  }
270
286
  }
287
+ // Fix #24: Propagate taint through fs.readFileSync/readFile results
288
+ // const data = fs.readFileSync(npmrc) where npmrc is sensitive → data is tainted
289
+ if (initNode.type === 'CallExpression' && initNode.callee?.type === 'MemberExpression') {
290
+ const callProp = initNode.callee.property;
291
+ if (callProp?.type === 'Identifier' &&
292
+ (callProp.name === 'readFileSync' || callProp.name === 'readFile')) {
293
+ const readArg = initNode.arguments[0];
294
+ if (readArg && isCredentialPath(readArg, sensitivePathVars)) {
295
+ sensitivePathVars.add(node.id.name);
296
+ }
297
+ }
298
+ }
299
+
271
300
  // B7: Taint propagation through data-preserving wrappers
272
301
  if (initNode.type === 'CallExpression') {
273
302
  const callee = initNode.callee;
@@ -653,6 +682,22 @@ function analyzeFile(content, filePath, basePath) {
653
682
  }
654
683
  }
655
684
 
685
+ // Fix #24: Callback exposure — detect callback(taintedData)
686
+ // When a function parameter is called with tainted data, it exposes credentials
687
+ // to the caller (cross-module credential exposure pattern).
688
+ if (node.callee.type === 'Identifier' && callbackParams.has(node.callee.name) &&
689
+ node.arguments.length >= 1) {
690
+ for (const arg of node.arguments) {
691
+ if (arg.type === 'Identifier' && sensitivePathVars.has(arg.name)) {
692
+ callbackExposures.push({
693
+ callbackName: node.callee.name,
694
+ argName: arg.name,
695
+ line: node.loc?.start?.line || 0
696
+ });
697
+ }
698
+ }
699
+ }
700
+
656
701
  // Exec callback: exec('cmd', (err, stdout) => {...}) — output will be used
657
702
  if (!execResultNodes.has(node) && node.arguments.length >= 2) {
658
703
  const lastArg = node.arguments[node.arguments.length - 1];
@@ -755,6 +800,7 @@ function analyzeFile(content, filePath, basePath) {
755
800
  for (const eventName of emitTaintedEvents) {
756
801
  const handler = eventHandlers.get(eventName);
757
802
  if (handler && handler.hasNetworkSink) {
803
+ // Same-file emit→on with network sink: full suspicious_dataflow
758
804
  sources.push({
759
805
  type: 'credential_read',
760
806
  name: `EventEmitter.emit('${eventName}')`,
@@ -767,9 +813,31 @@ function analyzeFile(content, filePath, basePath) {
767
813
  line: 0,
768
814
  taint_tracked: true
769
815
  });
816
+ } else {
817
+ // Cross-file: tainted data emitted on EventEmitter without same-file listener.
818
+ // The data is broadcasted to other modules — credential exposure pattern.
819
+ sinks.push({
820
+ type: 'network_send',
821
+ name: `EventEmitter.emit('${eventName}') [cross-module broadcast]`,
822
+ line: 0,
823
+ taint_tracked: true
824
+ });
770
825
  }
771
826
  }
772
827
 
828
+ // Fix #24: Callback exposure — add sinks for callback invocations with tainted data
829
+ // FP fix: cap at 5 exposures per file. Real attacks have 1-2 targeted callbacks,
830
+ // >5 is minified code noise (jspdf, etc.)
831
+ const cappedExposures = callbackExposures.slice(0, 5);
832
+ for (const exposure of cappedExposures) {
833
+ sinks.push({
834
+ type: 'network_send',
835
+ name: `${exposure.callbackName}(${exposure.argName}) [callback exposure]`,
836
+ line: exposure.line,
837
+ taint_tracked: true
838
+ });
839
+ }
840
+
773
841
  // Check if any source or sink was resolved via taint tracking
774
842
  const hasTaintTracked = sources.some(s => s.taint_tracked) || sinks.some(s => s.taint_tracked);
775
843
 
@@ -804,6 +872,17 @@ function analyzeFile(content, filePath, basePath) {
804
872
  }
805
873
  if (severity === 'CRITICAL') break;
806
874
  }
875
+ // Fix #24: EventEmitter broadcast and callback exposure sinks are always CRITICAL
876
+ // when combined with credential sources — the data is being sent to external consumers
877
+ if (severity !== 'CRITICAL') {
878
+ const hasExposureSink = exfilSinks.some(s =>
879
+ s.name.includes('[cross-module broadcast]') || s.name.includes('[callback exposure]')
880
+ );
881
+ const hasCredentialSource = sources.some(s => s.type === 'credential_read');
882
+ if (hasExposureSink && hasCredentialSource) {
883
+ severity = 'CRITICAL';
884
+ }
885
+ }
807
886
 
808
887
  // Downgrade: if ALL sources are pure telemetry (os.platform, os.arch), cap at HIGH
809
888
  const allTelemetryOnly = sources.every(s => s.type === 'telemetry_read');
@@ -23,7 +23,9 @@ function detectObfuscation(targetPath) {
23
23
  // P6: Any JS file > 100KB is overwhelmingly bundled output regardless of directory name.
24
24
  // Real obfuscated malware is typically small (<50KB). Catches prettier plugins/, svelte compiler/, etc.
25
25
  const isLargeJs = basename.endsWith('.js') && content.length > 100 * 1024;
26
- const isPackageOutput = isMinified || isBundled || isInDistOrBuild || isLargeCjsMjs || isLargeJs;
26
+ // Locale/i18n files legitimately contain invisible Unicode (e.g. Persian ZWNJ U+200C)
27
+ const isLocaleFile = /(?:^|[/\\])(?:locale|locales|i18n|intl|lang|languages|translations)[/\\]/i.test(relativePath);
28
+ const isPackageOutput = isMinified || isBundled || isInDistOrBuild || isLargeCjsMjs || isLargeJs || isLocaleFile;
27
29
 
28
30
  // 1. Ratio code sur une seule ligne (skip .min.js — minification, not obfuscation)
29
31
  if (!isMinified) {
@@ -73,11 +75,11 @@ function detectObfuscation(targetPath) {
73
75
  // 7. Unicode invisible character injection (GlassWorm — mars 2026)
74
76
  // Detects zero-width chars, variation selectors, tag characters embedded in source
75
77
  const invisibleCount = countInvisibleUnicode(content);
76
- if (invisibleCount >= 3) {
78
+ if (invisibleCount >= 10) {
77
79
  threats.push({
78
80
  type: 'unicode_invisible_injection',
79
81
  severity: isPackageOutput ? 'LOW' : 'CRITICAL',
80
- message: `${invisibleCount} invisible Unicode characters detected (zero-width, variation selectors, tag chars). GlassWorm technique: payload encoded via invisible codepoints.`,
82
+ message: `${invisibleCount} invisible Unicode characters detected (zero-width, variation selectors, tag chars). Possible hidden payload encoded via invisible codepoints.`,
81
83
  file: relativePath
82
84
  });
83
85
  }
@@ -151,7 +153,7 @@ function hasLargeStringArray(content) {
151
153
  * - U+200B, U+200C, U+200D (zero-width space/joiner/non-joiner)
152
154
  * - U+FEFF (BOM — only if position > 0; pos 0 is legitimate BOM)
153
155
  * - U+2060 (word joiner), U+180E (Mongolian vowel separator)
154
- * - U+FE00-U+FE0F (variation selectors — GlassWorm 256-value encoding)
156
+ * - U+FE00-U+FE0E (variation selectors — excludes U+FE0F emoji presentation selector)
155
157
  * - U+E0100-U+E01EF (variation selectors supplement)
156
158
  * - U+E0001-U+E007F (tag characters)
157
159
  */
@@ -168,8 +170,8 @@ function countInvisibleUnicode(content) {
168
170
  else if (cp === 0xFEFF && i > 0) {
169
171
  count++;
170
172
  }
171
- // BMP variation selectors (U+FE00-U+FE0F)
172
- else if (cp >= 0xFE00 && cp <= 0xFE0F) {
173
+ // BMP variation selectors (U+FE00-U+FE0E) — excludes U+FE0F (emoji presentation selector)
174
+ else if (cp >= 0xFE00 && cp <= 0xFE0E) {
173
175
  count++;
174
176
  }
175
177
  // Supplementary plane: variation selectors supplement (U+E0100-U+E01EF)
@@ -103,6 +103,19 @@ async function scanPackageJson(targetPath) {
103
103
  }
104
104
  }
105
105
 
106
+ // Escalate: lifecycle script targeting node_modules/ — payload hiding technique.
107
+ // Legitimate postinstall scripts run from the package's own directory, not from node_modules/.
108
+ // Lazarus/DPRK interview attacks hide payloads in node_modules/.cache/ or similar paths.
109
+ if (['preinstall', 'install', 'postinstall'].includes(scriptName) &&
110
+ /\bnode_modules[\/\\]/.test(scriptContent)) {
111
+ threats.push({
112
+ type: 'lifecycle_hidden_payload',
113
+ severity: 'CRITICAL',
114
+ message: `Critical: "${scriptName}" targets file inside node_modules/ — payload hiding technique to evade scanners.`,
115
+ file: 'package.json'
116
+ });
117
+ }
118
+
106
119
  // Detect Bun runtime evasion in lifecycle scripts (Shai-Hulud 2.0)
107
120
  if (/\bbun\s+(run|exec|install|x)\b/.test(scriptContent) || /\bbunx\s+/.test(scriptContent)) {
108
121
  threats.push({
@@ -137,6 +150,11 @@ async function scanPackageJson(targetPath) {
137
150
  : pkg.bin;
138
151
  for (const [cmdName, cmdPath] of Object.entries(binEntries || {})) {
139
152
  if (SHADOWED_COMMANDS.has(cmdName)) {
153
+ // Skip when the package IS the legitimate provider of the command:
154
+ // 1. Self-name: npm→bin.npm, yarn→bin.yarn
155
+ // 2. Sibling commands: npm also provides npx → pkg.name in SHADOWED_COMMANDS
156
+ // Typosquats still caught: 'nmp' declaring bin.npm → 'nmp' not in SHADOWED_COMMANDS → fires
157
+ if (cmdName === pkg.name || SHADOWED_COMMANDS.has(pkg.name)) continue;
140
158
  threats.push({
141
159
  type: 'bin_field_hijack',
142
160
  severity: 'CRITICAL',
package/src/scoring.js CHANGED
@@ -131,7 +131,8 @@ const FP_COUNT_THRESHOLDS = {
131
131
  // P4: bundled credential_tampering from minified alias resolution (jspdf, lerna)
132
132
  credential_tampering: { maxCount: 5, to: 'LOW' },
133
133
  // B1 FP reduction: bundled code aliases eval/Function (sinon, storybook, vitest)
134
- dangerous_call_eval: { maxCount: 3, from: 'MEDIUM', to: 'LOW' },
134
+ // FP fix: also cover HIGH severity (setTimeout+stringBuildVar in minified code)
135
+ dangerous_call_eval: { maxCount: 3, to: 'LOW' },
135
136
  // P6: HTTP client libraries (undici, aws-sdk, nodemailer, jsdom) parse Authorization/Bearer headers
136
137
  // with 3+ credential regexes. Real harvesters use 1-2 targeted regexes.
137
138
  credential_regex_harvest: { maxCount: 2, from: 'HIGH', to: 'LOW' },
@@ -156,14 +157,16 @@ const DIST_EXEMPT_TYPES = new Set([
156
157
  'cross_file_dataflow', // credential read → network exfil across files
157
158
  'staged_eval_decode', // eval(atob(...)) (explicit payload staging)
158
159
  'reverse_shell', // net.Socket + connect + pipe (always malicious)
159
- 'detached_credential_exfil', // detached process + credential exfil (DPRK/Lazarus)
160
+ // detached_credential_exfil removed from DIST_EXEMPT: in dist/ files, co-occurrence of
161
+ // detached_process + env_access + network is coincidental bundler aggregation.
162
+ // Kept in REACHABILITY_EXEMPT_TYPES (lifecycle invocation is valid).
160
163
  'node_modules_write', // writeFile to node_modules/ (worm propagation)
161
164
  'npm_publish_worm', // exec("npm publish") (worm propagation)
162
165
  // Dangerous shell commands in dist/ are real threats, never bundler output
163
166
  'dangerous_exec',
164
167
  // Compound scoring rules — co-occurrence signals, never FP
165
- 'crypto_staged_payload', 'lifecycle_typosquat', 'credential_env_exfil',
166
- 'lifecycle_inline_exec', 'lifecycle_remote_require', 'obfuscated_credential_tampering'
168
+ 'crypto_staged_payload', 'lifecycle_typosquat',
169
+ 'lifecycle_inline_exec', 'lifecycle_remote_require'
167
170
  // P6: remote_code_load and proxy_data_intercept removed — in bundled dist/ files,
168
171
  // fetch + eval co-occurrence is coincidental (bundler combines HTTP client + template compilation).
169
172
  // fetch_decrypt_exec (fetch+decrypt+eval triple) remains exempt — never coincidental.
@@ -181,7 +184,7 @@ const DIST_BUNDLER_ARTIFACT_TYPES = new Set([
181
184
  'dynamic_require', 'dynamic_import',
182
185
  'obfuscation_detected', 'high_entropy_string', 'possible_obfuscation',
183
186
  'js_obfuscation_pattern', 'vm_code_execution',
184
- 'module_compile', 'module_compile_dynamic',
187
+ 'module_compile', 'module_compile_dynamic', 'unicode_variation_decoder',
185
188
  // P7: env_access in dist/ is bundled SDK config reading, not credential theft
186
189
  'env_access',
187
190
  // P8: Proxy traps in dist/ are state management frameworks (MobX, Vue reactivity, Immer),
@@ -189,7 +192,12 @@ const DIST_BUNDLER_ARTIFACT_TYPES = new Set([
189
192
  'proxy_data_intercept',
190
193
  // P9: fetch+eval in dist/ is Vite/Webpack code splitting (lazy chunk loading),
191
194
  // not remote code execution. Two-notch downgrade (CRITICAL→MEDIUM, HIGH→LOW).
192
- 'remote_code_load'
195
+ 'remote_code_load',
196
+ // P10: In dist/ bundles, binary file refs + crypto are coincidental bundler aggregation
197
+ // (webpack bundles crypto utils alongside image processing). Real steganographic attacks
198
+ // (flatmap-stream) have these at package root, not dist/. Compound (crypto_staged_payload)
199
+ // is in DIST_EXEMPT_TYPES so the overall signal is preserved when truly malicious.
200
+ 'staged_binary_payload', 'crypto_decipher'
193
201
  ]);
194
202
 
195
203
  // Types exempt from reachability downgrade — IOC matches, lifecycle, and package-level types.
@@ -222,7 +230,8 @@ const SCORING_COMPOUNDS = [
222
230
  requires: ['staged_binary_payload', 'crypto_decipher'],
223
231
  severity: 'CRITICAL',
224
232
  message: 'Binary file reference + crypto decryption — steganographic payload chain (scoring compound).',
225
- fileFrom: 'staged_binary_payload'
233
+ fileFrom: 'staged_binary_payload',
234
+ sameFile: true // Real steganographic attacks (flatmap-stream) have crypto+binary in the SAME file
226
235
  },
227
236
  {
228
237
  type: 'lifecycle_typosquat',
@@ -231,13 +240,6 @@ const SCORING_COMPOUNDS = [
231
240
  message: 'Lifecycle hook on typosquat package — dependency confusion attack vector (scoring compound).',
232
241
  fileFrom: 'typosquat_detected'
233
242
  },
234
- {
235
- type: 'credential_env_exfil',
236
- requires: ['credential_tampering', 'env_access'],
237
- severity: 'CRITICAL',
238
- message: 'Credential path tampering + environment variable access — credential exfiltration chain (scoring compound).',
239
- fileFrom: 'credential_tampering'
240
- },
241
243
  {
242
244
  type: 'lifecycle_inline_exec',
243
245
  requires: ['lifecycle_script', 'node_inline_exec'],
@@ -252,13 +254,6 @@ const SCORING_COMPOUNDS = [
252
254
  message: 'Lifecycle hook loading remote code (require http/https) — supply chain payload delivery (scoring compound).',
253
255
  fileFrom: 'network_require'
254
256
  },
255
- {
256
- type: 'obfuscated_credential_tampering',
257
- requires: ['credential_tampering', 'obfuscation_detected'],
258
- severity: 'CRITICAL',
259
- message: 'Obfuscated code + credential path tampering — concealed credential theft (scoring compound).',
260
- fileFrom: 'credential_tampering'
261
- }
262
257
  ];
263
258
 
264
259
  /**
@@ -284,6 +279,28 @@ function applyCompoundBoosts(threats) {
284
279
 
285
280
  // Check all required types are present
286
281
  if (compound.requires.every(req => typeSet.has(req))) {
282
+ // Severity gate: at least one component must have severity >= MEDIUM
283
+ // after FP reductions. If all components were downgraded to LOW,
284
+ // the compound signal is not strong enough to justify a CRITICAL boost.
285
+ const hasSignificantComponent = compound.requires.some(req =>
286
+ threats.some(t => t.type === req && t.severity !== 'LOW')
287
+ );
288
+ if (!hasSignificantComponent) continue;
289
+
290
+ // Same-file constraint: all required types must appear in at least one common file.
291
+ // Prevents cross-file coincidental matches (e.g. next.js: staged_binary_payload in
292
+ // dist/compiled/@vercel/nft/index.js + crypto_decipher in a different file).
293
+ if (compound.sameFile) {
294
+ const filesByType = compound.requires.map(req =>
295
+ new Set(threats.filter(t => t.type === req).map(t => t.file))
296
+ );
297
+ // Find intersection of all file sets
298
+ const commonFiles = [...filesByType[0]].filter(f =>
299
+ filesByType.every(s => s.has(f))
300
+ );
301
+ if (commonFiles.length === 0) continue;
302
+ }
303
+
287
304
  threats.push({
288
305
  type: compound.type,
289
306
  severity: compound.severity,
@@ -363,13 +380,10 @@ function applyFPReductions(threats, reachableFiles, packageName, packageDeps) {
363
380
  }
364
381
  }
365
382
 
366
- // require_cache_poison: single hit HIGH (plugin dedup/hot-reload, not malware)
367
- // Malware poisons cache repeatedly; a single access is framework behavior
368
- if (t.type === 'require_cache_poison' && t.severity === 'CRITICAL' &&
369
- typeCounts.require_cache_poison === 1) {
370
- t.reductions.push({ rule: 'cache_poison_single', from: 'CRITICAL', to: 'HIGH' });
371
- t.severity = 'HIGH';
372
- }
383
+ // require_cache_poison: single-hit downgrade removed.
384
+ // The READ/WRITE distinction in ast-detectors already handles the FP case:
385
+ // READ-only LOW (hot-reload, introspection), WRITE CRITICAL (malicious replacement).
386
+ // A single cache WRITE is genuinely malicious — no downgrade needed.
373
387
 
374
388
  // Prototype hook: framework class prototypes → MEDIUM
375
389
  // Core Node.js prototypes (http.IncomingMessage, net.Socket) stay CRITICAL
@@ -416,9 +430,12 @@ function applyFPReductions(threats, reachableFiles, packageName, packageDeps) {
416
430
  }
417
431
 
418
432
  // Reachability: findings in files not reachable from entry points → LOW
433
+ // Exception: .d.ts files are never require()'d by JS but are executed by ts-node/tsx/bun.
434
+ // Executable code in .d.ts is always malicious — exempt from unreachable downgrade.
435
+ const isDtsFile = t.file && t.file.endsWith('.d.ts');
419
436
  if (reachableFiles && reachableFiles.size > 0 && t.file &&
420
437
  !REACHABILITY_EXEMPT_TYPES.has(t.type) &&
421
- !isPackageLevelThreat(t)) {
438
+ !isPackageLevelThreat(t) && !isDtsFile) {
422
439
  const normalizedFile = t.file.replace(/\\/g, '/');
423
440
  if (!reachableFiles.has(normalizedFile)) {
424
441
  t.reductions.push({ rule: 'unreachable', from: t.severity, to: 'LOW' });
@@ -22,14 +22,29 @@ function analyzeWithDeobfuscation(targetPath, analyzeFileFn, options = {}) {
22
22
  if (options.excludedFiles && options.excludedFiles.includes(relativePath)) return;
23
23
  if (options.skipDevFiles !== false && isDevFile(relativePath)) return;
24
24
 
25
+ // .d.ts files: strip TypeScript declaration syntax before JS parsing.
26
+ // Legitimate .d.ts files contain only type declarations (no executable code).
27
+ // Any require/exec/network calls in a .d.ts are high-confidence malicious payload hiding.
28
+ let effectiveContent = content;
29
+ if (file.endsWith('.d.ts')) {
30
+ effectiveContent = content.split('\n').map(line => {
31
+ const trimmed = line.trim();
32
+ // Strip lines that are pure TypeScript declarations (Acorn can't parse these)
33
+ if (/^export\s+declare\s+/.test(trimmed)) return '// [ts-stripped]';
34
+ if (/^declare\s+(function|class|const|let|var|type|interface|enum|namespace|module|global)\s/.test(trimmed)) return '// [ts-stripped]';
35
+ if (/^(export\s+)?(type|interface)\s/.test(trimmed)) return '// [ts-stripped]';
36
+ return line;
37
+ }).join('\n');
38
+ }
39
+
25
40
  // Analyze original code first (preserves obfuscation-detection rules)
26
- const fileThreats = analyzeFileFn(content, file, targetPath);
41
+ const fileThreats = analyzeFileFn(effectiveContent, file, targetPath);
27
42
  threats.push(...fileThreats);
28
43
 
29
44
  // Also analyze deobfuscated code for additional findings hidden by obfuscation
30
45
  if (typeof options.deobfuscate === 'function') {
31
46
  try {
32
- const result = options.deobfuscate(content);
47
+ const result = options.deobfuscate(effectiveContent);
33
48
  if (result.transforms.length > 0) {
34
49
  const deobThreats = analyzeFileFn(result.code, file, targetPath);
35
50
  const existingKeys = new Set(fileThreats.map(t => `${t.type}::${t.message}`));
package/src/utils.js CHANGED
@@ -183,7 +183,9 @@ function _findFilesImpl(dir, { extensions, excludedDirs, maxDepth, results, visi
183
183
  * @returns {string[]} List of .js file paths
184
184
  */
185
185
  function findJsFiles(dir, results = []) {
186
- return findFiles(dir, { extensions: ['.js', '.mjs', '.cjs'], results });
186
+ // .d.ts included: legitimate .d.ts files never contain require/exec/network calls,
187
+ // so any executable code in .d.ts is a high-confidence malicious payload hiding technique.
188
+ return findFiles(dir, { extensions: ['.js', '.mjs', '.cjs', '.d.ts'], results });
187
189
  }
188
190
 
189
191
  function clearFileListCache() {