npm - muaddib-scanner - Versions diffs - 2.10.64 → 2.10.65 - Mend

muaddib-scanner 2.10.64 → 2.10.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/bin/muaddib.js +30 -0
package/package.json +1 -1
package/src/ml/classifier.js +12 -5
package/src/monitor/auto-labeler.js +344 -0
package/src/monitor/daemon.js +13 -0
package/src/monitor/queue.js +36 -10

package/bin/muaddib.js CHANGED Viewed

@@ -687,6 +687,36 @@ if (command === 'version' || command === '--version' || command === '-v') {
     console.log('Usage: muaddib report --now | --status');
     process.exit(1);
   }
+} else if (command === 'relabel') {
+  if (wantHelp) {
+    console.log('Usage: muaddib relabel [--input <path>] [--output <path>] [--dry-run]');
+    console.log('');
+    console.log('Auto-relabel ML training data by checking registry takedown status.');
+    console.log('Verifies each package against npm/PyPI registries:');
+    console.log('  - npm 0.0.1-security → confirmed_malicious');
+    console.log('  - HTTP 404 + score >= 50 → confirmed_malicious');
+    console.log('  - Alive > 30 days + score < 20 → confirmed_benign');
+    console.log('');
+    console.log('Options:');
+    console.log('  --input <path>   Input JSONL file (default: data/ml-training.jsonl)');
+    console.log('  --output <path>  Output JSONL file (default: data/ml-training-relabeled.jsonl)');
+    console.log('  --dry-run        Log changes without writing');
+    process.exit(0);
+  }
+  const { relabelDataset } = require('../src/monitor/auto-labeler.js');
+  let inputPath, outputPath;
+  for (let i = 0; i < options.length; i++) {
+    if (options[i] === '--input' && options[i + 1]) { inputPath = options[++i]; }
+    else if (options[i] === '--output' && options[i + 1]) { outputPath = options[++i]; }
+  }
+  const dryRun = options.includes('--dry-run');
+  relabelDataset({ input: inputPath, output: outputPath, dryRun }).then(summary => {
+    console.log(JSON.stringify(summary, null, 2));
+    process.exit(0);
+  }).catch(err => {
+    console.error('[ERROR]', err.message);
+    process.exit(1);
+  });
 } else if (command === 'help') {
   // muaddib help <command> — show per-command help
   const helpCmd = options.filter(o => !o.startsWith('-'))[0];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.10.64",
+  "version": "2.10.65",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/src/ml/classifier.js CHANGED Viewed

@@ -326,21 +326,28 @@ function classifyPackage(result, meta) {
       return { prediction: 'bypass', probability: 1, reason: 'high_confidence_threat' };
     }
-    // Guard rail 2b: bundler model available → let it decide
+    // Guard rail 2b: bundler model — LOG-ONLY mode
+    // DISABLED (2026-04-08): Model semi-collapsed — gives p≈0.37 for both bundler FPs
+    // and real malware (identical output despite 11/19 features diverging). Cannot
+    // discriminate. Safe (nothing filtered at threshold 0.1) but useless.
+    // Disabled until retrained alongside ML1 on corrected JSONL data.
     if (isBundlerModelAvailable()) {
       const bundlerVec = buildBundlerFeatureVector(result, meta);
       const bundlerResult = predictBundler(bundlerVec);
-      if (bundlerResult.prediction === 'clean') {
+      // Log-only: record prediction for retraining validation
+      const roundedP = Math.round(bundlerResult.probability * 1000) / 1000;
+      // When retrained and validated, remove the 'false &&' guard below.
+      if (false && bundlerResult.prediction === 'clean') {
         return {
           prediction: 'fp_bundler',
-          probability: Math.round(bundlerResult.probability * 1000) / 1000,
+          probability: roundedP,
           reason: 'ml_bundler_clean'
         };
       }
       return {
         prediction: 'bypass',
-        probability: Math.round(bundlerResult.probability * 1000) / 1000,
-        reason: 'ml_bundler_malicious'
+        probability: roundedP,
+        reason: bundlerResult.prediction === 'clean' ? 'ml_bundler_clean_disabled' : 'ml_bundler_malicious'
       };
     }

package/src/monitor/auto-labeler.js ADDED Viewed

@@ -0,0 +1,344 @@
+'use strict';
+/**
+ * Auto-labeler — registry takedown-based ML training label correction.
+ *
+ * Verifies packages in the JSONL training dataset against npm/PyPI registries:
+ * - npm `0.0.1-security` replacement → confirmed_malicious (npm Security takedown)
+ * - HTTP 404 + high score → confirmed_malicious (removed, high conviction)
+ * - HTTP 404 + low score → removed_unlabeled (removed, unknown intent)
+ * - Alive > 30 days + low score → confirmed_benign (survival heuristic)
+ * - Alive > 30 days + moderate score → likely_benign
+ *
+ * Never modifies the input JSONL — writes a new file.
+ * Reuses the shared HTTP semaphore to avoid starving monitor scans.
+ */
+const fs = require('fs');
+const path = require('path');
+const https = require('https');
+const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
+const { atomicWriteFileSync } = require('./state.js');
+const DEFAULT_INPUT = path.join(__dirname, '..', '..', 'data', 'ml-training.jsonl');
+const DEFAULT_OUTPUT = path.join(__dirname, '..', '..', 'data', 'ml-training-relabeled.jsonl');
+const DEFAULT_DELAY_MS = 200; // 5 req/s max — gentle on registries
+const SURVIVAL_DAYS = 30;
+// Labels eligible for auto-relabeling
+const RELABELABLE = new Set(['suspect', 'ml_clean', 'unconfirmed', 'clean']);
+// --- HTTP helper (minimal, avoids circular deps with ingestion.js) ---
+function httpsGetJson(url, timeoutMs = 15000) {
+  return new Promise((resolve, reject) => {
+    const req = https.get(url, { timeout: timeoutMs }, (res) => {
+      if (res.statusCode === 404) {
+        res.resume();
+        return resolve({ _httpStatus: 404 });
+      }
+      if (res.statusCode < 200 || res.statusCode >= 300) {
+        res.resume();
+        return reject(new Error(`HTTP ${res.statusCode} for ${url}`));
+      }
+      const chunks = [];
+      res.on('data', (chunk) => chunks.push(chunk));
+      res.on('end', () => {
+        try {
+          const body = Buffer.concat(chunks).toString('utf8');
+          resolve(JSON.parse(body));
+        } catch (err) {
+          reject(new Error(`JSON parse error for ${url}: ${err.message}`));
+        }
+      });
+      res.on('error', reject);
+    });
+    req.on('error', reject);
+    req.on('timeout', () => {
+      req.destroy();
+      reject(new Error(`Timeout for ${url}`));
+    });
+  });
+}
+function sleep(ms) {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
+// --- Registry status checks ---
+/**
+ * Check npm registry status for a package.
+ * @param {string} name - package name
+ * @returns {Promise<{status: string, latestVersion?: string, detail?: string}>}
+ */
+async function checkNpmStatus(name) {
+  await acquireRegistrySlot();
+  try {
+    const data = await httpsGetJson(`https://registry.npmjs.org/${encodeURIComponent(name)}`);
+    if (data._httpStatus === 404) {
+      return { status: 'removed' };
+    }
+    const latest = data['dist-tags'] && data['dist-tags'].latest;
+    if (latest === '0.0.1-security') {
+      return { status: 'security_takedown', latestVersion: latest };
+    }
+    return { status: 'alive', latestVersion: latest || 'unknown' };
+  } catch (err) {
+    return { status: 'error', detail: err.message };
+  } finally {
+    releaseRegistrySlot();
+  }
+}
+/**
+ * Check PyPI registry status for a package.
+ * @param {string} name - package name
+ * @returns {Promise<{status: string, detail?: string}>}
+ */
+async function checkPyPIStatus(name) {
+  try {
+    const data = await httpsGetJson(`https://pypi.org/pypi/${encodeURIComponent(name)}/json`);
+    if (data._httpStatus === 404) {
+      return { status: 'removed' };
+    }
+    return { status: 'alive' };
+  } catch (err) {
+    return { status: 'error', detail: err.message };
+  }
+}
+// --- Label computation ---
+/**
+ * Compute the new label for a record based on registry status.
+ *
+ * Guards:
+ * - security_takedown → always confirmed_malicious
+ * - removed + score >= 50 → confirmed_malicious (high conviction)
+ * - removed + score < 50 → removed_unlabeled (don't train on uncertain data)
+ * - alive + age >= 30d + score < 20 → confirmed_benign
+ * - alive + age >= 30d + score 20-34 → likely_benign
+ * - alive + age >= 30d + score >= 35 → no change (sleeper risk)
+ * - alive + age < 30d → no change (too early)
+ *
+ * @param {Object} record - JSONL training record (must have: score, timestamp, label)
+ * @param {{status: string}} registryStatus - from checkNpmStatus/checkPyPIStatus
+ * @returns {{label: string, source: string} | null} new label or null if no change
+ */
+function computeNewLabel(record, registryStatus) {
+  const { status } = registryStatus;
+  const score = record.score || 0;
+  // Already confirmed — don't re-label
+  if (record.label === 'confirmed_malicious' || record.label === 'confirmed_benign' ||
+      record.label === 'fp' || record.label === 'confirmed') {
+    return null;
+  }
+  // --- Takedown signals ---
+  if (status === 'security_takedown') {
+    return { label: 'confirmed_malicious', source: 'npm_security_takedown' };
+  }
+  if (status === 'removed') {
+    if (score >= 50) {
+      return { label: 'confirmed_malicious', source: 'registry_removed_high_score' };
+    }
+    return { label: 'removed_unlabeled', source: 'registry_removed_low_score' };
+  }
+  // --- Survival signals ---
+  if (status === 'alive') {
+    const recordAge = record.timestamp
+      ? (Date.now() - new Date(record.timestamp).getTime()) / (1000 * 60 * 60 * 24)
+      : 0;
+    if (recordAge >= SURVIVAL_DAYS) {
+      if (score < 20) {
+        return { label: 'confirmed_benign', source: 'survival_30d' };
+      }
+      if (score >= 20 && score < 35) {
+        return { label: 'likely_benign', source: 'survival_30d_moderate' };
+      }
+      // score >= 35: no change (sleeper risk)
+    }
+  }
+  return null;
+}
+// --- Dataset relabeling ---
+/**
+ * Read JSONL, check each unique package against registries, write relabeled output.
+ *
+ * @param {Object} [options]
+ * @param {string} [options.input] - input JSONL path
+ * @param {string} [options.output] - output JSONL path
+ * @param {boolean} [options.dryRun] - log changes without writing
+ * @param {number} [options.delayMs] - ms between registry requests
+ * @returns {Promise<Object>} summary stats
+ */
+async function relabelDataset(options = {}) {
+  const inputPath = options.input || DEFAULT_INPUT;
+  const outputPath = options.output || DEFAULT_OUTPUT;
+  const dryRun = options.dryRun || false;
+  const delayMs = options.delayMs != null ? options.delayMs : DEFAULT_DELAY_MS;
+  // 1. Read records
+  if (!fs.existsSync(inputPath)) {
+    throw new Error(`Input file not found: ${inputPath}`);
+  }
+  const content = fs.readFileSync(inputPath, 'utf8');
+  const lines = content.split('\n');
+  const records = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i].trim();
+    if (!line) continue;
+    try {
+      records.push({ idx: i, data: JSON.parse(line), raw: lines[i] });
+    } catch {
+      records.push({ idx: i, data: null, raw: lines[i] });
+    }
+  }
+  // 2. Extract unique packages eligible for relabeling
+  const packageMap = new Map(); // key → { name, ecosystem, score, timestamp, indices[] }
+  for (const rec of records) {
+    if (!rec.data) continue;
+    if (!RELABELABLE.has(rec.data.label)) continue;
+    const key = `${rec.data.ecosystem || 'npm'}/${rec.data.name}`;
+    if (!packageMap.has(key)) {
+      packageMap.set(key, {
+        name: rec.data.name,
+        ecosystem: rec.data.ecosystem || 'npm',
+        score: rec.data.score || 0,
+        timestamp: rec.data.timestamp,
+        indices: []
+      });
+    }
+    packageMap.get(key).indices.push(rec.idx);
+    // Use highest score seen for this package
+    if ((rec.data.score || 0) > packageMap.get(key).score) {
+      packageMap.get(key).score = rec.data.score;
+    }
+    // Use earliest timestamp
+    if (rec.data.timestamp && (!packageMap.get(key).timestamp || rec.data.timestamp < packageMap.get(key).timestamp)) {
+      packageMap.get(key).timestamp = rec.data.timestamp;
+    }
+  }
+  console.log(`[RELABEL] ${records.length} records, ${packageMap.size} unique packages to check`);
+  // 3. Check each package against registry
+  const summary = {
+    checked: 0,
+    relabeled_malicious: 0,
+    relabeled_benign: 0,
+    relabeled_likely_benign: 0,
+    removed_unlabeled: 0,
+    unchanged: 0,
+    errors: 0,
+    records_updated: 0
+  };
+  const labelChanges = new Map(); // packageKey → { label, source }
+  for (const [key, pkg] of packageMap) {
+    let registryStatus;
+    try {
+      if (pkg.ecosystem === 'npm') {
+        registryStatus = await checkNpmStatus(pkg.name);
+      } else if (pkg.ecosystem === 'pypi') {
+        registryStatus = await checkPyPIStatus(pkg.name);
+      } else {
+        summary.unchanged++;
+        summary.checked++;
+        continue;
+      }
+    } catch (err) {
+      summary.errors++;
+      summary.checked++;
+      continue;
+    }
+    if (registryStatus.status === 'error') {
+      summary.errors++;
+      summary.checked++;
+      if (delayMs > 0) await sleep(delayMs);
+      continue;
+    }
+    const newLabel = computeNewLabel(pkg, registryStatus);
+    summary.checked++;
+    if (newLabel) {
+      labelChanges.set(key, newLabel);
+      if (newLabel.label === 'confirmed_malicious') summary.relabeled_malicious++;
+      else if (newLabel.label === 'confirmed_benign') summary.relabeled_benign++;
+      else if (newLabel.label === 'likely_benign') summary.relabeled_likely_benign++;
+      else if (newLabel.label === 'removed_unlabeled') summary.removed_unlabeled++;
+      if (dryRun) {
+        console.log(`[RELABEL] DRY-RUN: ${key} → ${newLabel.label} (${newLabel.source}, score=${pkg.score}, status=${registryStatus.status})`);
+      }
+    } else {
+      summary.unchanged++;
+    }
+    if (delayMs > 0) await sleep(delayMs);
+  }
+  // 4. Apply label changes to records
+  const outputLines = [];
+  for (const rec of records) {
+    if (!rec.data) {
+      outputLines.push(rec.raw);
+      continue;
+    }
+    const key = `${rec.data.ecosystem || 'npm'}/${rec.data.name}`;
+    const change = labelChanges.get(key);
+    if (change && RELABELABLE.has(rec.data.label)) {
+      rec.data.label = change.label;
+      rec.data.relabel_source = change.source;
+      rec.data.relabel_timestamp = new Date().toISOString();
+      outputLines.push(JSON.stringify(rec.data));
+      summary.records_updated++;
+    } else {
+      outputLines.push(rec.raw);
+    }
+  }
+  // 5. Write output
+  if (!dryRun) {
+    const dir = path.dirname(outputPath);
+    if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+    atomicWriteFileSync(outputPath, outputLines.join('\n'));
+    console.log(`[RELABEL] Written ${outputLines.length} records to ${path.basename(outputPath)} (${summary.records_updated} updated)`);
+  } else {
+    console.log(`[RELABEL] DRY-RUN complete: ${summary.records_updated} records would be updated`);
+  }
+  console.log(`[RELABEL] Summary: ${summary.relabeled_malicious} malicious, ${summary.relabeled_benign} benign, ${summary.relabeled_likely_benign} likely_benign, ${summary.removed_unlabeled} removed_unlabeled, ${summary.unchanged} unchanged, ${summary.errors} errors`);
+  return summary;
+}
+module.exports = {
+  checkNpmStatus,
+  checkPyPIStatus,
+  computeNewLabel,
+  relabelDataset,
+  // Constants (for testing)
+  RELABELABLE,
+  SURVIVAL_DAYS,
+  DEFAULT_INPUT,
+  DEFAULT_OUTPUT,
+  DEFAULT_DELAY_MS
+};

package/src/monitor/daemon.js CHANGED Viewed

@@ -558,6 +558,19 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
     // Daily webhook report at 08:00 Paris time
     if (isDailyReportDue(stats)) {
       await sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCache);
+      // Auto-relabel JSONL training data after daily report (once per day).
+      // Checks registry takedown status for unconfirmed packages.
+      try {
+        const { relabelDataset } = require('./auto-labeler.js');
+        const summary = await relabelDataset({});
+        const totalRelabeled = summary.relabeled_malicious + summary.relabeled_benign + summary.relabeled_likely_benign;
+        if (totalRelabeled > 0) {
+          console.log(`[MONITOR] Auto-relabel: ${summary.relabeled_malicious} malicious, ${summary.relabeled_benign} benign, ${summary.relabeled_likely_benign} likely_benign (${summary.checked} checked)`);
+        }
+      } catch (err) {
+        // Non-fatal: relabel failure must never crash the monitor
+        console.error(`[MONITOR] Auto-relabel failed: ${err.message}`);
+      }
     }
     // Short pause before re-checking queue — yields event loop for poll interval

package/src/monitor/queue.js CHANGED Viewed

@@ -413,11 +413,15 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
     // First-publish detection: used for sandbox priority below
     const isFirstPublish = cacheTrigger && cacheTrigger.reason === 'first_publish';
-    // ML Phase 2a: Fetch npm registry metadata once for packages with findings
-    // OR for first-publish packages (needed for isFirstPublishHighRisk decision).
-    // Reused for both training records (enriched features) and reputation scoring.
+    // Fetch npm registry metadata for ALL npm packages (not just those with findings).
+    // Needed for: (1) isFirstPublishHighRisk decision, (2) ML classifier features,
+    // (3) JSONL training records — clean packages MUST have metadata to prevent
+    // data leakage (model learning "metadata=0 → clean" instead of behavioral signals).
+    // Cost: near-zero for npm packages because temporal checks (line ~1014) already
+    // pre-fetch registry metadata into temporal-analysis._metadataCache, and
+    // getPackageMetadata() reads this cache first (npm-registry.js:87-95).
     let npmRegistryMeta = null;
-    if ((result.summary.total > 0 || isFirstPublish) && ecosystem === 'npm') {
+    if (ecosystem === 'npm') {
       try {
         const { getPackageMetadata } = require('../scanner/npm-registry.js');
         npmRegistryMeta = await getPackageMetadata(name);
@@ -589,19 +593,43 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
         console.log(`[MONITOR] FINDINGS: ${name}@${version} → ${formatFindings(result)}`);
         // ML Phase 2: classifier filter for T1 zone (score 20-34)
-        // Reduces FP webhook noise by filtering clean packages before sandbox/webhook.
         // Guard rails in classifyPackage() ensure HC types and high-score packages are never suppressed.
         // Hoisted so trySendWebhook can use ML result to prevent suppression (p >= 0.90).
-        // Applies to both T1a and T1b (ML can filter both sub-tiers in the [20,35) score range).
+        //
+        // DISABLED (2026-04-08): Model has collapsed — predicts p≈0.002 for ALL inputs (always "clean"),
+        // including clearly malicious patterns (lifecycle+exec+staged_payload). This suppresses real
+        // threats as ml_clean (false negatives). Disabled until model is retrained on corrected JSONL
+        // data with balanced labels. The classifier still runs in LOG-ONLY mode to collect data for
+        // retraining validation, but its prediction is never used for filtering.
+        //
+        // Guards added: ecosystem === 'npm' (PyPI has no npm registry metadata),
+        // npmRegistryMeta fallback fetch (ensure metadata is never null for ML features).
         let mlResult = null;
         const riskScore = result.summary.riskScore || 0;
-        if ((tier === '1a' || tier === '1b') && riskScore >= 20 && riskScore < 35) {
+        if ((tier === '1a' || tier === '1b') && riskScore >= 20 && riskScore < 35 && ecosystem === 'npm') {
           try {
             const { classifyPackage, isModelAvailable } = require('../ml/classifier.js');
             if (isModelAvailable()) {
+              // Defensive: ensure npmRegistryMeta is fetched (should already be from line ~420,
+              // but network failures can silently leave it null)
+              if (!npmRegistryMeta) {
+                try {
+                  const { getPackageMetadata } = require('../scanner/npm-registry.js');
+                  npmRegistryMeta = await getPackageMetadata(name);
+                  if (!npmRegistryMeta) {
+                    console.warn(`[ML] Registry metadata unavailable for ${name} — ML features will be zero-filled`);
+                  }
+                } catch (fetchErr) {
+                  console.warn(`[ML] Registry metadata fetch failed for ${name}: ${fetchErr.message}`);
+                }
+              }
               const enrichedMeta = { npmRegistryMeta, fileCountTotal, hasTests, unpackedSize: meta.unpackedSize, registryMeta: meta };
               mlResult = classifyPackage(result, enrichedMeta);
-              if (mlResult.prediction === 'clean') {
+              // LOG-ONLY: record ML prediction for retraining data but do NOT filter.
+              // When model is retrained and validated, remove the 'true ||' guard below.
+              console.log(`[MONITOR] ML LOG-ONLY: ${name}@${version} (prediction=${mlResult.prediction}, p=${mlResult.probability}, score=${riskScore})`);
+              if (false && mlResult.prediction === 'clean') {
+                // DISABLED: model collapsed (p≈0.002 for all inputs). Re-enable after retrain.
                 console.log(`[MONITOR] ML CLEAN: ${name}@${version} (p=${mlResult.probability}, score=${riskScore})`);
                 stats.mlFiltered++;
                 stats.scanned++;
@@ -612,8 +640,6 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
                 recordTrainingSample(result, { name, version, ecosystem, label: 'ml_clean', tier, registryMeta: meta, unpackedSize: meta.unpackedSize, npmRegistryMeta, fileCountTotal, hasTests });
                 return { sandboxResult: null, mlFiltered: true, tier };
               }
-              // Not clean — proceed normally
-              console.log(`[MONITOR] ML SUSPECT: ${name}@${version} (p=${mlResult.probability}, reason=${mlResult.reason})`);
             }
           } catch (err) {
             // Non-fatal: ML failure must never block the scan pipeline