npm - muaddib-scanner - Versions diffs - 2.10.31 → 2.10.33 - Mend

muaddib-scanner 2.10.31 → 2.10.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +2 -2
package/src/ml/classifier.js +94 -2
package/src/ml/feature-extractor.js +28 -2
package/src/ml/model-trees.js +4 -4
package/src/ml/train-bundler-detector.py +25 -4
package/src/ml/train-xgboost.py +60 -20
package/src/monitor/daemon.js +26 -1
package/src/sandbox/index.js +113 -60

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.10.31",
+  "version": "2.10.33",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {
@@ -55,7 +55,7 @@
   },
   "devDependencies": {
     "@eslint/js": "10.0.1",
-    "eslint": "10.0.3",
+    "eslint": "10.1.0",
     "eslint-plugin-security": "^4.0.0",
     "globals": "17.4.0"
   }

package/src/ml/classifier.js CHANGED Viewed

@@ -21,6 +21,10 @@ const { extractFeatures } = require('./feature-extractor.js');
 // Lazy-loaded models (allows resetModel for testing)
 let _model = undefined; // undefined = not yet loaded, null = absent
 let _bundlerModel = undefined; // undefined = not yet loaded, null = absent
+let _shadowModel = undefined; // undefined = not yet loaded, null = absent
+// Shadow mode stats (reset on model reload)
+const _shadowStats = { total: 0, agree: 0, disagree: 0 };
 // High-confidence malice types that must NEVER be suppressed by ML
 const HC_TYPES = new Set([
@@ -94,6 +98,80 @@ function resetBundlerModel() {
   _bundlerModel = undefined;
 }
+// --- Shadow model (ML1 v2, logs only, no filtering) ---
+/**
+ * Load shadow model from model-trees-shadow.js. Returns model object or null.
+ * Shadow model runs in parallel with the main model for comparison.
+ */
+function loadShadowModel() {
+  if (_shadowModel !== undefined) return _shadowModel;
+  try {
+    _shadowModel = require('./model-trees-shadow.js') || null;
+  } catch {
+    _shadowModel = null;
+  }
+  return _shadowModel;
+}
+function isShadowModelAvailable() {
+  return loadShadowModel() !== null;
+}
+function resetShadowModel() {
+  _shadowModel = undefined;
+  _shadowStats.total = 0;
+  _shadowStats.agree = 0;
+  _shadowStats.disagree = 0;
+}
+/**
+ * Run shadow model prediction and log comparison with main model.
+ * Never affects the actual classification decision.
+ *
+ * @param {Object} result - scan result
+ * @param {Object} meta - enriched metadata
+ * @param {string} mainPrediction - the main model's prediction
+ * @param {number} mainProbability - the main model's probability
+ * @param {string} packageName - for logging
+ */
+function runShadowComparison(result, meta, mainPrediction, mainProbability, packageName) {
+  const shadow = loadShadowModel();
+  if (!shadow) return;
+  const features = extractFeatures(result, meta || {});
+  const values = new Array(shadow.features.length);
+  for (let i = 0; i < shadow.features.length; i++) {
+    values[i] = features[shadow.features[i]] || 0;
+  }
+  let margin = 0;
+  for (const tree of shadow.trees) {
+    margin += traverseTree(tree, values);
+  }
+  const shadowProb = sigmoid(margin);
+  const shadowPred = shadowProb >= shadow.threshold ? 'malicious' : 'clean';
+  _shadowStats.total++;
+  if (shadowPred === mainPrediction) {
+    _shadowStats.agree++;
+  } else {
+    _shadowStats.disagree++;
+    console.log(`[ML-SHADOW] Disagreement on ${packageName}: main=${mainPrediction}(${mainProbability}) shadow=${shadowPred}(${Math.round(shadowProb * 1000) / 1000}) [${_shadowStats.disagree}/${_shadowStats.total} disagree]`);
+  }
+  // Periodic summary every 100 classifications
+  if (_shadowStats.total % 100 === 0) {
+    const agreeRate = ((_shadowStats.agree / _shadowStats.total) * 100).toFixed(1);
+    console.log(`[ML-SHADOW] Stats: ${_shadowStats.total} total, ${agreeRate}% agree, ${_shadowStats.disagree} disagree`);
+  }
+}
+function getShadowStats() {
+  return { ..._shadowStats };
+}
 /**
  * Sigmoid function: maps raw margin to probability [0, 1].
  * @param {number} x - raw margin (sum of tree outputs)
@@ -276,9 +354,18 @@ function classifyPackage(result, meta) {
   const featureValues = buildFeatureVector(result, meta);
   const { probability, prediction } = predict(featureValues);
+  const roundedProb = Math.round(probability * 1000) / 1000;
+  // Shadow model comparison (log-only, never affects decision)
+  if (isShadowModelAvailable()) {
+    const pkgName = (result && result.summary && result.summary.packageName) ||
+                    (meta && meta.name) || 'unknown';
+    runShadowComparison(result, meta, prediction, roundedProb, pkgName);
+  }
   return {
     prediction,
-    probability: Math.round(probability * 1000) / 1000,
+    probability: roundedProb,
     reason: prediction === 'clean' ? 'ml_clean' : 'ml_malicious'
   };
 }
@@ -298,5 +385,10 @@ module.exports = {
   resetBundlerModel,
   loadBundlerModel,
   predictBundler,
-  buildBundlerFeatureVector
+  buildBundlerFeatureVector,
+  // Shadow model (ML1 v2, log-only comparison)
+  isShadowModelAvailable,
+  resetShadowModel,
+  loadShadowModel,
+  getShadowStats
 };

package/src/ml/feature-extractor.js CHANGED Viewed

@@ -13,7 +13,11 @@
 // Top threat types by frequency in production (covers ~95% of all findings).
 // Types not in this list are aggregated into `threat_type_other`.
+// v2.10.32: expanded from 31 to 47 types — code exec bypasses, IoC, GlassWorm,
+// obfuscation patterns, module graph sinks. New features will be 0 for pre-existing
+// JSONL records; SHAP feature selection handles sparsity gracefully.
 const TOP_THREAT_TYPES = [
+  // --- Original 31 types ---
   'suspicious_dataflow',
   'env_access',
   'sensitive_string',
@@ -44,7 +48,29 @@ const TOP_THREAT_TYPES = [
   'curl_exec',
   'reverse_shell',
   'binary_dropper',
-  'mcp_config_injection'
+  'mcp_config_injection',
+  // --- Code execution bypasses (v2.9.x–v2.10.x) ---
+  'vm_code_execution',
+  'vm_dynamic_code',
+  'dangerous_constructor',
+  'module_load_bypass',
+  'require_process_mainmodule',
+  'proxy_globalthis_intercept',
+  'reflect_bind_code_execution',
+  // --- IoC / supply chain ---
+  'known_malicious_package',
+  'known_malicious_hash',
+  // --- GlassWorm (Unicode + Blockchain C2) ---
+  'unicode_invisible_injection',
+  'blockchain_c2_resolution',
+  // --- Shell / exec patterns ---
+  'dangerous_exec',
+  'node_inline_exec',
+  // --- Obfuscation patterns ---
+  'js_obfuscation_pattern',
+  // --- Module graph / WASM ---
+  'suspicious_module_sink',
+  'wasm_host_sink'
 ];
 const TOP_THREAT_TYPES_SET = new Set(TOP_THREAT_TYPES);
@@ -78,7 +104,7 @@ function extractFeatures(result, meta) {
   const distinctTypes = new Set(threats.map(t => t.type));
   features.distinct_threat_types = distinctTypes.size;
-  // --- Per-type counts (top 31 types) ---
+  // --- Per-type counts (top 47 types) ---
   const typeCounts = Object.create(null);
   for (const t of threats) {
     typeCounts[t.type] = (typeCounts[t.type] || 0) + 1;