npm - muaddib-scanner - Versions diffs - 2.10.2 → 2.10.5 - Mend

muaddib-scanner 2.10.2 → 2.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +3 -1
package/bin/muaddib.js +6 -1
package/package.json +1 -1
package/scripts/analyze-score0.js +190 -0
package/scripts/cleanup-fp-labels.js +81 -0
package/src/canary-tokens.js +52 -0
package/src/index.js +29 -0
package/src/ml/classifier.js +109 -7
package/src/ml/feature-extractor.js +2 -1
package/src/ml/jsonl-writer.js +19 -2
package/src/ml/model-bundler.js +11 -0
package/src/ml/model-trees.js +7 -9
package/src/ml/train-bundler-detector.py +704 -0
package/src/ml/train-xgboost.py +733 -0
package/src/response/playbooks.js +20 -0
package/src/rules/index.js +49 -0
package/src/sandbox/index.js +11 -0
package/src/scanner/ast-detectors.js +136 -8
package/src/scanner/ast.js +3 -1
package/src/scoring.js +64 -5
package/src/webhook.js +46 -14

package/README.md CHANGED Viewed

@@ -30,7 +30,7 @@
 npm and PyPI supply-chain attacks are exploding. Shai-Hulud compromised 25K+ repos in 2025. Existing tools detect threats but don't help you respond.
-MUAD'DIB combines **14 parallel scanners** (158 detection rules), a **deobfuscation engine**, **inter-module dataflow analysis**, **per-file max scoring**, **compound scoring rules**, **ML classifier** for T1 zone FP reduction, Docker sandbox with **monkey-patching preload** for time-bomb detection, **behavioral anomaly detection**, **GlassWorm campaign detection**, and **ground truth validation** to detect threats AND guide your response — even before they appear in any IOC database.
+MUAD'DIB combines **14 parallel scanners** (158 detection rules), a **deobfuscation engine**, **inter-module dataflow analysis**, **compound scoring**, and Docker sandbox to detect known threats and suspicious behavioral patterns in npm and PyPI packages.
 ---
@@ -345,6 +345,8 @@ npm test
 ## Documentation
+- [Blog](https://dnszlsk.github.io/muad-dib/blog/) - Technical articles on supply-chain threat detection
+- [Carnet de bord](docs/CARNET_DE_BORD_MUADDIB.md) - Development journal (in French)
 - [Documentation Index](docs/INDEX.md) - All documentation in one place
 - [Evaluation Methodology](docs/EVALUATION_METHODOLOGY.md) - Experimental protocol, holdout scores
 - [Threat Model](docs/threat-model.md) - What MUAD'DIB detects and doesn't detect

package/bin/muaddib.js CHANGED Viewed

@@ -34,6 +34,7 @@ let noDeobfuscate = false;
 let noModuleGraph = false;
 let noReachability = false;
 let configPath = null;
+let autoSandbox = false;
 let feedLimit = null;
 let feedSeverity = null;
 let feedSince = null;
@@ -137,6 +138,8 @@ for (let i = 0; i < options.length; i++) {
     }
     configPath = cfgPath;
     i++;
+  } else if (options[i] === '--auto-sandbox') {
+    autoSandbox = true;
   } else if (options[i] === '--temporal') {
     temporalMode = true;
   } else if (options[i] === '--limit') {
@@ -429,6 +432,7 @@ const helpText = `
     --temporal-publish  Detect publish frequency anomalies (bursts, dormant spikes)
     --temporal-maintainer  Detect maintainer changes (new maintainer, account takeover)
     --temporal-full     All temporal analyses (lifecycle + AST + publish + maintainer)
+    --auto-sandbox      Auto-trigger sandbox when static scan score >= 20 (requires Docker)
     --no-canary         Disable honey token injection in sandbox
     --no-deobfuscate    Disable deobfuscation pre-processing
     --no-module-graph   Disable cross-file dataflow analysis
@@ -482,7 +486,8 @@ if (command === 'version' || command === '--version' || command === '-v') {
     noDeobfuscate: noDeobfuscate,
     noModuleGraph: noModuleGraph,
     noReachability: noReachability,
-    configPath: configPath
+    configPath: configPath,
+    autoSandbox: autoSandbox
   }).then(exitCode => {
     process.exit(exitCode);
   }).catch(err => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.10.2",
+  "version": "2.10.5",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/scripts/analyze-score0.js ADDED Viewed

@@ -0,0 +1,190 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * analyze-score0.js — Diagnostic script for score-0 malware investigation.
+ *
+ * Analyzes packages from the Datadog benchmark that scored 0 (zero threats detected).
+ * Categorizes each package to identify blind spots vs expected non-detections.
+ *
+ * Categories:
+ *   - empty_package: no code files at all
+ *   - ts_only: only .ts files (no .js)
+ *   - binary_only: only .wasm/.node/.dll/.so
+ *   - non_code_assets: CSS/images/fonts/markdown only
+ *   - minimum_viable: package.json + README only
+ *   - python_in_npm: .py files in an npm package
+ *   - unknown: has .js but 0 detections — TRUE BLIND SPOT
+ *
+ * Usage:
+ *   node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl
+ *   node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl --csv report.csv
+ *   node scripts/analyze-score0.js --dir .muaddib-cache/datadog-tarballs/
+ */
+const fs = require('fs');
+const path = require('path');
+const CODE_EXTENSIONS = new Set(['.js', '.cjs', '.mjs', '.jsx']);
+const TS_EXTENSIONS = new Set(['.ts', '.tsx', '.cts', '.mts']);
+const BINARY_EXTENSIONS = new Set(['.wasm', '.node', '.dll', '.so', '.dylib', '.exe']);
+const ASSET_EXTENSIONS = new Set(['.css', '.scss', '.less', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico',
+  '.woff', '.woff2', '.ttf', '.eot', '.otf', '.md', '.txt', '.html', '.htm', '.map']);
+const PY_EXTENSIONS = new Set(['.py', '.pyx', '.pyi']);
+function categorizePackage(packageDir) {
+  if (!fs.existsSync(packageDir)) return 'missing';
+  const files = [];
+  function walk(dir, depth) {
+    if (depth > 5) return; // Limit depth
+    try {
+      const entries = fs.readdirSync(dir, { withFileTypes: true });
+      for (const entry of entries) {
+        if (entry.name === 'node_modules' || entry.name === '.git') continue;
+        const full = path.join(dir, entry.name);
+        if (entry.isDirectory()) {
+          walk(full, depth + 1);
+        } else if (entry.isFile()) {
+          files.push(entry.name);
+        }
+      }
+    } catch { /* skip permission errors */ }
+  }
+  walk(packageDir, 0);
+  if (files.length === 0) return 'empty_package';
+  const extensions = files.map(f => path.extname(f).toLowerCase());
+  const hasCode = extensions.some(e => CODE_EXTENSIONS.has(e));
+  const hasTs = extensions.some(e => TS_EXTENSIONS.has(e));
+  const hasBinary = extensions.some(e => BINARY_EXTENSIONS.has(e));
+  const hasPython = extensions.some(e => PY_EXTENSIONS.has(e));
+  const hasAssets = extensions.some(e => ASSET_EXTENSIONS.has(e));
+  // Only package.json + README
+  const nonMeta = files.filter(f => !['package.json', 'readme.md', 'readme', 'license', 'license.md', 'changelog.md'].includes(f.toLowerCase()));
+  if (nonMeta.length === 0) return 'minimum_viable';
+  if (hasCode) return 'unknown'; // TRUE BLIND SPOT: has JS but 0 detections
+  if (hasTs && !hasCode) return 'ts_only';
+  if (hasBinary && !hasCode && !hasTs) return 'binary_only';
+  if (hasPython && !hasCode) return 'python_in_npm';
+  if (hasAssets && !hasCode && !hasTs && !hasBinary) return 'non_code_assets';
+  return 'unknown'; // Fallback
+}
+function loadBenchmarkResults(filepath) {
+  if (!fs.existsSync(filepath)) {
+    console.error(`[SCORE0] File not found: ${filepath}`);
+    process.exit(1);
+  }
+  const content = fs.readFileSync(filepath, 'utf8');
+  const records = [];
+  for (const line of content.split('\n')) {
+    if (!line.trim()) continue;
+    try {
+      const record = JSON.parse(line);
+      if (record.score === 0 && record.threat_count === 0) {
+        records.push(record);
+      }
+    } catch { /* skip malformed */ }
+  }
+  return records;
+}
+function main() {
+  const args = process.argv.slice(2);
+  const benchmarkIdx = args.indexOf('--benchmark');
+  const dirIdx = args.indexOf('--dir');
+  const csvIdx = args.indexOf('--csv');
+  const benchmarkFile = benchmarkIdx >= 0 ? args[benchmarkIdx + 1] : null;
+  const tarballDir = dirIdx >= 0 ? args[dirIdx + 1] : null;
+  const csvFile = csvIdx >= 0 ? args[csvIdx + 1] : null;
+  if (!benchmarkFile && !tarballDir) {
+    console.log('Usage:');
+    console.log('  node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl');
+    console.log('  node scripts/analyze-score0.js --dir .muaddib-cache/datadog-tarballs/');
+    console.log('  node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl --csv report.csv');
+    process.exit(0);
+  }
+  let packages = [];
+  if (benchmarkFile) {
+    const records = loadBenchmarkResults(benchmarkFile);
+    console.log(`[SCORE0] Loaded ${records.length} score-0 packages from benchmark`);
+    packages = records.map(r => ({
+      name: r.name || r.package || 'unknown',
+      version: r.version || '',
+      dir: tarballDir ? path.join(tarballDir, r.name || r.package || 'unknown') : null
+    }));
+  } else if (tarballDir) {
+    // Direct directory scan mode
+    if (!fs.existsSync(tarballDir)) {
+      console.error(`[SCORE0] Directory not found: ${tarballDir}`);
+      process.exit(1);
+    }
+    const entries = fs.readdirSync(tarballDir, { withFileTypes: true });
+    packages = entries
+      .filter(e => e.isDirectory())
+      .map(e => ({ name: e.name, version: '', dir: path.join(tarballDir, e.name) }));
+    console.log(`[SCORE0] Found ${packages.length} package directories`);
+  }
+  // Categorize
+  const categories = {};
+  const results = [];
+  for (const pkg of packages) {
+    let category = 'no_dir';
+    if (pkg.dir && fs.existsSync(pkg.dir)) {
+      category = categorizePackage(pkg.dir);
+    }
+    categories[category] = (categories[category] || 0) + 1;
+    results.push({ name: pkg.name, version: pkg.version, category });
+  }
+  // Summary
+  console.log('\n=== SCORE 0 INVESTIGATION REPORT ===\n');
+  console.log(`Total score-0 packages: ${packages.length}\n`);
+  const sortedCategories = Object.entries(categories).sort((a, b) => b[1] - a[1]);
+  for (const [cat, count] of sortedCategories) {
+    const pct = ((count / packages.length) * 100).toFixed(1);
+    const label = cat === 'unknown' ? `${cat} *** BLIND SPOT ***` : cat;
+    console.log(`  ${label}: ${count} (${pct}%)`);
+  }
+  const unknownCount = categories.unknown || 0;
+  console.log(`\n  Actionable blind spots: ${unknownCount} packages with JS code but 0 detections`);
+  // CSV output
+  if (csvFile) {
+    const csvLines = ['name,version,category'];
+    for (const r of results) {
+      csvLines.push(`${r.name},${r.version},${r.category}`);
+    }
+    fs.writeFileSync(csvFile, csvLines.join('\n'), 'utf8');
+    console.log(`\n  CSV report written to: ${csvFile}`);
+  }
+  // List unknown packages (first 20)
+  const unknowns = results.filter(r => r.category === 'unknown');
+  if (unknowns.length > 0) {
+    console.log('\n  First 20 "unknown" (blind spot) packages:');
+    for (const u of unknowns.slice(0, 20)) {
+      console.log(`    - ${u.name}@${u.version}`);
+    }
+    if (unknowns.length > 20) {
+      console.log(`    ... and ${unknowns.length - 20} more`);
+    }
+  }
+}
+main();

package/scripts/cleanup-fp-labels.js ADDED Viewed

@@ -0,0 +1,81 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * cleanup-fp-labels.js — One-shot script to convert contaminated 'fp' labels to 'unconfirmed'.
+ *
+ * Context: During 3 months of monitoring, sandbox score === 0 was automatically relabeled
+ * as 'fp' (false positive). Without honey tokens, sandbox clean ≠ false positive.
+ * This script converts all automated 'fp' labels to 'unconfirmed' so they are excluded
+ * from ML training (neither positive nor negative).
+ *
+ * Usage:
+ *   node scripts/cleanup-fp-labels.js                # Dry-run (default)
+ *   node scripts/cleanup-fp-labels.js --apply        # Write changes
+ *   node scripts/cleanup-fp-labels.js --file path    # Custom JSONL path
+ */
+const fs = require('fs');
+const path = require('path');
+const DEFAULT_FILE = path.join(__dirname, '..', 'data', 'ml-training.jsonl');
+function main() {
+  const args = process.argv.slice(2);
+  const apply = args.includes('--apply');
+  const fileIdx = args.indexOf('--file');
+  const filePath = fileIdx >= 0 && args[fileIdx + 1] ? args[fileIdx + 1] : DEFAULT_FILE;
+  if (!fs.existsSync(filePath)) {
+    console.log(`[CLEANUP] File not found: ${filePath}`);
+    process.exit(1);
+  }
+  const content = fs.readFileSync(filePath, 'utf8');
+  const lines = content.split('\n');
+  let totalRecords = 0;
+  let fpCount = 0;
+  let convertedLines = [];
+  for (const line of lines) {
+    if (!line.trim()) {
+      convertedLines.push(line);
+      continue;
+    }
+    try {
+      const record = JSON.parse(line);
+      totalRecords++;
+      if (record.label === 'fp') {
+        fpCount++;
+        if (apply) {
+          record.label = 'unconfirmed';
+          convertedLines.push(JSON.stringify(record));
+        } else {
+          convertedLines.push(line);
+        }
+      } else {
+        convertedLines.push(line);
+      }
+    } catch {
+      convertedLines.push(line); // Keep malformed lines as-is
+    }
+  }
+  console.log(`[CLEANUP] File: ${filePath}`);
+  console.log(`[CLEANUP] Total records: ${totalRecords}`);
+  console.log(`[CLEANUP] Records with label 'fp': ${fpCount}`);
+  if (apply && fpCount > 0) {
+    fs.writeFileSync(filePath, convertedLines.join('\n'), 'utf8');
+    console.log(`[CLEANUP] APPLIED: Converted ${fpCount} 'fp' labels to 'unconfirmed'`);
+  } else if (!apply && fpCount > 0) {
+    console.log(`[CLEANUP] DRY-RUN: Would convert ${fpCount} labels. Use --apply to write.`);
+  } else {
+    console.log(`[CLEANUP] No 'fp' labels found. Nothing to do.`);
+  }
+}
+main();

package/src/canary-tokens.js CHANGED Viewed

@@ -71,6 +71,55 @@ function createCanaryNpmrc(tokens) {
   return `//registry.npmjs.org/:_authToken=${tokens.NPM_AUTH_TOKEN}\n`;
 }
+/**
+ * Generate fake AWS credentials file content.
+ * Format matches ~/.aws/credentials (INI format, format-valid key IDs).
+ * @param {Record<string, string>} tokens - The token map from generateCanaryTokens()
+ * @returns {string} AWS credentials file content
+ */
+function createCanaryAwsCredentials(tokens) {
+  return [
+    '[default]',
+    `aws_access_key_id = ${tokens.AWS_ACCESS_KEY_ID}`,
+    `aws_secret_access_key = ${tokens.AWS_SECRET_ACCESS_KEY}`,
+    'region = us-east-1',
+    ''
+  ].join('\n');
+}
+/**
+ * Generate a fake SSH private key (Ed25519 format).
+ * The key is structurally valid PEM but cryptographically meaningless.
+ * Malware that reads ~/.ssh/id_rsa or id_ed25519 will exfiltrate this.
+ * @returns {string} Fake SSH private key content
+ */
+function createCanarySshKey() {
+  const fakeKeyData = crypto.randomBytes(64).toString('base64');
+  return [
+    '-----BEGIN OPENSSH PRIVATE KEY-----',
+    fakeKeyData.substring(0, 70),
+    fakeKeyData.substring(0, 70),
+    '-----END OPENSSH PRIVATE KEY-----',
+    ''
+  ].join('\n');
+}
+/**
+ * Generate a fake .gitconfig with user identity.
+ * Malware fingerprinting the developer will exfiltrate this.
+ * @returns {string} Fake .gitconfig content
+ */
+function createCanaryGitconfig() {
+  return [
+    '[user]',
+    '\tname = John Developer',
+    '\temail = john.dev@company-internal.example.com',
+    '[credential]',
+    '\thelper = store',
+    ''
+  ].join('\n');
+}
 /**
  * Search for canary tokens in network logs from sandbox.
  * Network log structure matches sandbox.js report.network:
@@ -199,6 +248,9 @@ module.exports = {
   generateCanaryTokens,
   createCanaryEnvFile,
   createCanaryNpmrc,
+  createCanaryAwsCredentials,
+  createCanarySshKey,
+  createCanaryGitconfig,
   detectCanaryExfiltration,
   detectCanaryInOutput
 };

package/src/index.js CHANGED Viewed

@@ -523,6 +523,35 @@ async function run(targetPath, options = {}) {
     threats.push(...temporalThreats);
   }
+  // Auto-sandbox: trigger sandbox analysis when static scan detects threats.
+  // Preliminary score estimate: count CRITICAL/HIGH threats as a quick heuristic.
+  // Only when --auto-sandbox flag is set, no explicit sandboxResult, and Docker available.
+  if (options.autoSandbox && !options.sandboxResult) {
+    const critCount = threats.filter(t => t.severity === 'CRITICAL').length;
+    const highCount = threats.filter(t => t.severity === 'HIGH').length;
+    const prelimScore = Math.min(100, critCount * 25 + highCount * 10);
+    if (prelimScore >= 20) {
+      try {
+        const { isDockerAvailable, buildSandboxImage, runSandbox } = require('./sandbox/index.js');
+        if (isDockerAvailable()) {
+          console.log(`\n[AUTO-SANDBOX] Preliminary score ~${prelimScore} >= 20 — triggering sandbox analysis...`);
+          const built = await buildSandboxImage();
+          if (built) {
+            const sbResult = await runSandbox(targetPath, { local: true, strict: false });
+            if (sbResult && Array.isArray(sbResult.findings)) {
+              options.sandboxResult = sbResult;
+            }
+          }
+        } else {
+          debugLog('[AUTO-SANDBOX] Docker not available — skipping sandbox');
+        }
+      } catch (e) {
+        debugLog('[AUTO-SANDBOX] Error:', e && e.message);
+        // Graceful fallback — sandbox is best-effort
+      }
+    }
+  }
   // Sandbox integration
   let sandboxData = null;
   if (options.sandboxResult && Array.isArray(options.sandboxResult.findings)) {

package/src/ml/classifier.js CHANGED Viewed

@@ -8,15 +8,19 @@
  *
  * Guard rails:
  * - score < 20 → clean (below T1 threshold)
- * - score >= 35 → bypass (above T1 zone, always suspicious)
- * - model absent → bypass
- * - high-confidence threat types → bypass (never suppress HC types)
+ * - score >= 35:
+ *   1. HC_TYPES present → bypass (never suppress)
+ *   2. Bundler model available → bundler model decides (fp_bundler or bypass)
+ *   3. Bundler model absent → bypass (unchanged)
+ * - model absent → bypass (T1 zone)
+ * - high-confidence threat types → bypass (never suppress HC types, T1 zone)
  */
 const { extractFeatures } = require('./feature-extractor.js');
-// Lazy-loaded model (allows resetModel for testing)
+// Lazy-loaded models (allows resetModel for testing)
 let _model = undefined; // undefined = not yet loaded, null = absent
+let _bundlerModel = undefined; // undefined = not yet loaded, null = absent
 // High-confidence malice types that must NEVER be suppressed by ML
 const HC_TYPES = new Set([
@@ -59,6 +63,37 @@ function resetModel() {
   _model = undefined;
 }
+// --- Bundler detector model (ML2) ---
+/**
+ * Load the bundler detector model from model-bundler.js. Returns the model object or null.
+ */
+function loadBundlerModel() {
+  if (_bundlerModel !== undefined) return _bundlerModel;
+  try {
+    const trees = require('./model-bundler.js');
+    _bundlerModel = trees || null;
+  } catch {
+    _bundlerModel = null;
+  }
+  return _bundlerModel;
+}
+/**
+ * Check if a trained bundler model is available.
+ * @returns {boolean}
+ */
+function isBundlerModelAvailable() {
+  return loadBundlerModel() !== null;
+}
+/**
+ * Reset bundler model cache (for testing isolation).
+ */
+function resetBundlerModel() {
+  _bundlerModel = undefined;
+}
 /**
  * Sigmoid function: maps raw margin to probability [0, 1].
  * @param {number} x - raw margin (sum of tree outputs)
@@ -134,6 +169,43 @@ function buildFeatureVector(result, meta) {
   return values;
 }
+/**
+ * Build ordered feature vector for the bundler model from scan result and metadata.
+ * @param {Object} result - scan result from run()
+ * @param {Object} meta - enriched metadata
+ * @returns {Array<number>} ordered feature values
+ */
+function buildBundlerFeatureVector(result, meta) {
+  const model = loadBundlerModel();
+  if (!model) return [];
+  const features = extractFeatures(result, meta || {});
+  const values = new Array(model.features.length);
+  for (let i = 0; i < model.features.length; i++) {
+    values[i] = features[model.features[i]] || 0;
+  }
+  return values;
+}
+/**
+ * Run bundler model prediction on ordered feature values.
+ * @param {Array<number>} featureValues - ordered feature values matching bundler model features
+ * @returns {{ probability: number, prediction: string }}
+ */
+function predictBundler(featureValues) {
+  const model = loadBundlerModel();
+  if (!model) return { probability: 0.5, prediction: 'bypass' };
+  let margin = 0;
+  for (const tree of model.trees) {
+    margin += traverseTree(tree, featureValues);
+  }
+  const probability = sigmoid(margin);
+  const prediction = probability >= model.threshold ? 'malicious' : 'clean';
+  return { probability, prediction };
+}
 /**
  * Check if result contains any high-confidence threat types.
  * @param {Object} result - scan result
@@ -150,7 +222,7 @@ function hasHighConfidenceThreat(result) {
  * @param {Object} result - scan result from run() with { threats, summary }
  * @param {Object} meta - enriched metadata for feature extraction
  * @returns {{ prediction: string, probability: number, reason: string }}
- *   prediction: 'clean' | 'malicious' | 'bypass'
+ *   prediction: 'clean' | 'malicious' | 'bypass' | 'fp_bundler'
  *   reason: explains why this prediction was made
  */
 function classifyPackage(result, meta) {
@@ -161,8 +233,32 @@ function classifyPackage(result, meta) {
     return { prediction: 'clean', probability: 0, reason: 'below_t1' };
   }
-  // Guard rail 2: above T1 zone — always bypass (let rules decide)
+  // Guard rail 2: above T1 zone — bundler model or bypass
   if (score >= 35) {
+    // Guard rail 2a: HC types present → always bypass (never suppress)
+    if (hasHighConfidenceThreat(result)) {
+      return { prediction: 'bypass', probability: 1, reason: 'high_confidence_threat' };
+    }
+    // Guard rail 2b: bundler model available → let it decide
+    if (isBundlerModelAvailable()) {
+      const bundlerVec = buildBundlerFeatureVector(result, meta);
+      const bundlerResult = predictBundler(bundlerVec);
+      if (bundlerResult.prediction === 'clean') {
+        return {
+          prediction: 'fp_bundler',
+          probability: Math.round(bundlerResult.probability * 1000) / 1000,
+          reason: 'ml_bundler_clean'
+        };
+      }
+      return {
+        prediction: 'bypass',
+        probability: Math.round(bundlerResult.probability * 1000) / 1000,
+        reason: 'ml_bundler_malicious'
+      };
+    }
+    // Guard rail 2c: bundler model absent → bypass
     return { prediction: 'bypass', probability: 1, reason: 'score_above_threshold' };
   }
@@ -196,5 +292,11 @@ module.exports = {
   traverseTree,
   sigmoid,
   buildFeatureVector,
-  hasHighConfidenceThreat
+  hasHighConfidenceThreat,
+  // Bundler detector (ML2)
+  isBundlerModelAvailable,
+  resetBundlerModel,
+  loadBundlerModel,
+  predictBundler,
+  buildBundlerFeatureVector
 };

package/src/ml/feature-extractor.js CHANGED Viewed

@@ -205,8 +205,9 @@ function buildTrainingRecord(result, params) {
   // --- Label ---
   // 'clean' = no findings or T3 only
   // 'suspect' = T1/T2 (pending manual review)
+  // 'unconfirmed' = sandbox clean, not manually reviewed (default for automated relabeling)
   // 'confirmed' = manually confirmed malicious
-  // 'fp' = manually confirmed false positive
+  // 'fp' = manually confirmed false positive (requires manualReview=true)
   record.label = label || 'suspect';
   record.tier = tier || null;

package/src/ml/jsonl-writer.js CHANGED Viewed

@@ -128,16 +128,33 @@ function getStats() {
   }
 }
+// Valid labels for ML training records
+const VALID_LABELS = new Set(['fp', 'confirmed', 'unconfirmed']);
 /**
  * Update the label of records matching a given package name.
  * Used when manual confirmation (fp/confirmed) is applied retroactively.
  *
  * @param {string} packageName - package name to relabel
- * @param {string} newLabel - 'fp' or 'confirmed'
+ * @param {string} newLabel - 'fp', 'confirmed', or 'unconfirmed'
  * @param {number} [sandboxFindingCount] - number of sandbox findings (defense-in-depth for 'confirmed')
+ * @param {boolean} [manualReview] - required for 'fp' label (prevents automated contamination)
  * @returns {number} number of records updated
  */
-function relabelRecords(packageName, newLabel, sandboxFindingCount) {
+function relabelRecords(packageName, newLabel, sandboxFindingCount, manualReview) {
+  // Validate label
+  if (!VALID_LABELS.has(newLabel)) {
+    console.warn(`[ML] BLOCKED relabel to '${newLabel}' for ${packageName}: invalid label (valid: ${[...VALID_LABELS].join(', ')})`);
+    return 0;
+  }
+  // Defense-in-depth: 'fp' requires explicit manual review flag to prevent
+  // automated sandbox-clean → fp contamination (8176 records in 3 months)
+  if (newLabel === 'fp' && manualReview !== true) {
+    console.warn(`[ML] BLOCKED relabel to 'fp' for ${packageName}: manualReview required (use 'unconfirmed' for automated relabeling)`);
+    return 0;
+  }
   // Defense-in-depth: never write 'confirmed' without real sandbox findings
   if (newLabel === 'confirmed' && (!sandboxFindingCount || sandboxFindingCount === 0)) {
     console.warn(`[ML] BLOCKED relabel to 'confirmed' for ${packageName}: sandbox_finding_count=${sandboxFindingCount || 0}`);