npm - muaddib-scanner - Versions diffs - 2.10.67 → 2.10.68 - Mend

muaddib-scanner 2.10.67 → 2.10.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/src/ml/train-xgboost.py +137 -9
package/src/response/playbooks.js +6 -0
package/src/rules/index.js +12 -0
package/src/scanner/ast-detectors/handle-call-expression.js +33 -4
package/src/scanner/ast-detectors/helpers.js +30 -0
package/src/scanner/ast.js +1 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.10.67",
+  "version": "2.10.68",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/src/ml/train-xgboost.py CHANGED Viewed

@@ -123,6 +123,25 @@ FEATURE_NAMES = [
 assert len(FEATURE_NAMES) == 87, f"Expected 87 features, got {len(FEATURE_NAMES)}"
+# Features to exclude: metadata/source-identity proxies that differ between
+# monitor (negatives) and Datadog (positives) for non-behavioral reasons.
+# See corrected retrain plan for full justification of each exclusion.
+EXCLUDED_METADATA = {
+    # npm registry metadata — always 0 in Datadog positives (not fetched),
+    # 8-13% non-zero in monitor negatives → source leak
+    'package_age_days', 'weekly_downloads', 'version_count',
+    'author_package_count', 'has_repository', 'readme_size',
+    # Derived from corrupted npm metadata (age_days, version_count, downloads).
+    # Currently zero-variance (always 1.0) but becomes a leak when future
+    # records have actual computed values.
+    'reputation_factor',
+    # Package-level metadata not from behavioral scan —
+    # 88-95% non-zero in negatives, 0% in positives → massive source proxy
+    'unpacked_size_bytes', 'file_count_total',
+    # 13% non-zero in negatives, 0% in positives → source proxy
+    'has_tests',
+}
 # --- Data loading ---
@@ -300,10 +319,14 @@ def filter_leaky_features(X: pd.DataFrame, y: np.ndarray,
     retained = []
     excluded = []
+    # Iterate over columns actually present in X (metadata may have been
+    # dropped by Step 2a before this function is called).
+    available_features = list(X.columns)
     print(f"\n  {'Feature':<40s} {'Neg%':>6s} {'Pos%':>6s} {'All%':>6s} {'Status'}")
     print(f"  {'-' * 40} {'-' * 6} {'-' * 6} {'-' * 6} {'-' * 8}")
-    for feat in FEATURE_NAMES:
+    for feat in available_features:
         neg_nonzero = float((X.loc[neg_mask, feat] != 0).sum()) / max(n_neg, 1)
         pos_nonzero = float((X.loc[pos_mask, feat] != 0).sum()) / max(n_pos, 1)
         all_nonzero = float((X[feat] != 0).sum()) / max(n_total, 1)
@@ -328,7 +351,7 @@ def filter_leaky_features(X: pd.DataFrame, y: np.ndarray,
         print(f"  {feat:<40s} {neg_nonzero * 100:5.1f}% {pos_nonzero * 100:5.1f}% "
               f"{all_nonzero * 100:5.1f}% {status}")
-    print(f"\n  Retained: {len(retained)}/{len(FEATURE_NAMES)} features")
+    print(f"\n  Retained: {len(retained)}/{len(available_features)} features")
     if excluded:
         print(f"  Excluded ({len(excluded)}): {', '.join(excluded)}")
@@ -336,6 +359,85 @@ def filter_leaky_features(X: pd.DataFrame, y: np.ndarray,
     return X_filtered, retained
+def source_discrimination_gate(X: pd.DataFrame, y: np.ndarray,
+                                active_features: list,
+                                max_accuracy: float = 0.65) -> bool:
+    """
+    Step 2c: Hard gate — verify that retained behavioral features cannot
+    trivially distinguish data source (monitor vs Datadog).
+    Since all negatives come from monitor and all positives from Datadog,
+    y IS the source label. A shallow classifier that achieves accuracy > 65%
+    on the retained features indicates residual source-identity leaks.
+    Returns: True if gate passes (accuracy <= max_accuracy), False if fails.
+    Prints SHAP top 10 of the discriminator to identify offending features.
+    """
+    print("\n" + "=" * 60)
+    print(f"[Step 2c/8] Source discrimination gate (threshold={max_accuracy:.0%})...")
+    print("=" * 60)
+    X_active = X[active_features]
+    # 70/30 split with different seed to avoid overlap with main split
+    X_tr, X_te, y_tr, y_te = train_test_split(
+        X_active, y, test_size=0.3, stratify=y, random_state=99
+    )
+    # Shallow model — depth=3, 50 rounds, no class weighting
+    # (we want to detect ANY discriminability, not optimize for one class)
+    params = {
+        'objective': 'binary:logistic',
+        'eval_metric': 'logloss',
+        'max_depth': 3,
+        'learning_rate': 0.1,
+        'subsample': 0.8,
+        'seed': 99,
+        'verbosity': 0,
+    }
+    dtrain = xgb.DMatrix(X_tr, label=y_tr, feature_names=active_features)
+    dtest = xgb.DMatrix(X_te, label=y_te, feature_names=active_features)
+    model = xgb.train(params, dtrain, num_boost_round=50)
+    probs = model.predict(dtest)
+    preds = (probs >= 0.5).astype(int)
+    accuracy = float((preds == y_te).mean())
+    p = precision_score(y_te, preds, zero_division=0)
+    r = recall_score(y_te, preds, zero_division=0)
+    print(f"  Discrimination accuracy: {accuracy:.3f} (P={p:.3f} R={r:.3f})")
+    # SHAP analysis to identify which features drive discrimination
+    explainer = shap.TreeExplainer(model)
+    shap_values = explainer.shap_values(X_te)
+    mean_abs_shap = np.abs(shap_values).mean(axis=0)
+    importance = sorted(zip(active_features, mean_abs_shap),
+                        key=lambda x: x[1], reverse=True)
+    print(f"\n  Top 10 features driving source discrimination:")
+    for i, (name, val) in enumerate(importance[:10]):
+        flag = ""
+        # Flag non-behavioral features that shouldn't be discriminative
+        if name in ('unpacked_size_bytes', 'file_count_total', 'has_tests',
+                     'dep_count', 'dev_dep_count', 'reputation_factor'):
+            flag = " *** NON-BEHAVIORAL"
+        print(f"    {i + 1:2d}. {name:40s} {val:.6f}{flag}")
+    if accuracy <= max_accuracy:
+        print(f"\n  [GATE PASS] Accuracy {accuracy:.3f} <= {max_accuracy:.3f}")
+        print(f"  Behavioral features do not trivially encode source identity.")
+        return True
+    else:
+        print(f"\n  [GATE FAIL] Accuracy {accuracy:.3f} > {max_accuracy:.3f}")
+        print(f"  Retained features still encode source identity.")
+        print(f"  Offending features (exclude and re-run):")
+        for name, val in importance[:5]:
+            print(f"    - {name} (SHAP={val:.6f})")
+        return False
 def split_data(X: pd.DataFrame, y: np.ndarray) -> tuple:
     """
     Step 3: Stratified 80/20 split.
@@ -693,13 +795,15 @@ def main():
                         help='Path to negatives JSONL (clean/fp labels)')
     parser.add_argument('--positives', required=True,
                         help='Path to positives JSONL (malicious labels)')
-    parser.add_argument('--output', default='src/ml/model-trees.js',
-                        help='Output JS file path (default: src/ml/model-trees.js)')
-    parser.add_argument('--top-features', type=int, default=40,
-                        help='Number of top SHAP features to select (default: 40)')
+    parser.add_argument('--output', default='src/ml/model-trees-shadow.js',
+                        help='Output JS file path (default: src/ml/model-trees-shadow.js)')
+    parser.add_argument('--top-features', type=int, default=50,
+                        help='Number of top SHAP features to select (default: 50)')
     parser.add_argument('--common-only', action=argparse.BooleanOptionalAction,
                         default=True,
                         help='Only use features with >=1%% non-zero coverage in BOTH sources (default: on)')
+    parser.add_argument('--skip-gate', action='store_true',
+                        help='Skip source discrimination gate (dangerous — use only for debugging)')
     args = parser.parse_args()
     # Validate inputs
@@ -716,11 +820,34 @@ def main():
     # Step 2: Align features
     X, y, stats = align_features(negatives, positives)
-    # Step 2b: Filter leaky features
+    # Step 2a: Remove known metadata/source-proxy features BEFORE leak filter.
+    # These features differ between sources for non-behavioral reasons and would
+    # cause the model to learn source identity instead of malicious behavior.
+    metadata_cols = [f for f in FEATURE_NAMES if f in EXCLUDED_METADATA]
+    X = X.drop(columns=metadata_cols, errors='ignore')
+    remaining_features = [f for f in FEATURE_NAMES if f not in EXCLUDED_METADATA]
+    print(f"\n  [Step 2a] Excluded {len(metadata_cols)} metadata features: "
+          f"{', '.join(metadata_cols)}")
+    print(f"  Remaining: {len(remaining_features)} features")
+    # Step 2b: Filter dead/leaky features (on remaining behavioral features)
     if args.common_only:
         X, active_features = filter_leaky_features(X, y)
     else:
-        active_features = list(FEATURE_NAMES)
+        active_features = list(remaining_features)
+    # Step 2c: Source discrimination gate — HARD STOP if features encode source
+    if not args.skip_gate:
+        gate_pass = source_discrimination_gate(X, y, active_features)
+        if not gate_pass:
+            print("\n" + "=" * 60)
+            print("ABORTED: Source discrimination gate failed.")
+            print("The retained features still encode source identity.")
+            print("Add offending features to EXCLUDED_METADATA and re-run.")
+            print("=" * 60)
+            sys.exit(1)
+    else:
+        print("\n  [Step 2c] Source discrimination gate SKIPPED (--skip-gate)")
     # Class imbalance weight
     n_neg = stats['n_neg']
@@ -756,7 +883,8 @@ def main():
     print("TRAINING COMPLETE")
     print("=" * 60)
     print(f"  Samples: {n_neg} negatives + {n_pos} positives = {n_neg + n_pos}")
-    print(f"  Features: {len(selected)} selected (from {len(active_features)} active / {len(FEATURE_NAMES)} total)")
+    print(f"  Features: {len(selected)} selected (from {len(active_features)} active / "
+          f"{len(FEATURE_NAMES)} total, {len(EXCLUDED_METADATA)} metadata excluded)")
     print(f"  Threshold: {cv_metrics['threshold']:.3f}")
     print(f"  CV:      P={cv_metrics['precision']:.3f} R={cv_metrics['recall']:.3f} F1={cv_metrics['f1']:.3f}")
     print(f"  Holdout: P={holdout_metrics['precision']:.3f} R={holdout_metrics['recall']:.3f} F1={holdout_metrics['f1']:.3f}")

package/src/response/playbooks.js CHANGED Viewed

@@ -673,6 +673,12 @@ const PLAYBOOKS = {
     'Analyser le callback du timer pour identifier le payload retarde. ' +
     'Si delai > 24h: fort indicateur de time-bomb malware. NE PAS installer.',
+  timer_delayed_payload:
+    'Timer avec delai >= 60s contenant un sink dangereux (eval/exec/spawn) dans le callback. ' +
+    'Technique d\'evasion temporelle: le payload attend que les sandboxes timeout avant de s\'activer. ' +
+    'Analyser le contenu du callback: rechercher exfiltration de credentials, reverse shell, ou telechargement de payload. ' +
+    'Si delai >= 15min: forte probabilite de malware. NE PAS installer.',
   npm_publish_worm:
     'CRITIQUE: exec("npm publish") detecte — propagation worm. Le code utilise des tokens npm voles ' +
     'pour publier des versions infectees des packages de la victime. Technique Shai-Hulud 1.0 et 2.0. ' +

package/src/rules/index.js CHANGED Viewed

@@ -2194,6 +2194,18 @@ const RULES = {
     references: ['https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Reflect/apply'],
     mitre: 'T1059'
   },
+  timer_delayed_payload: {
+    id: 'MUADDIB-AST-085',
+    name: 'Timer Delayed Payload',
+    severity: 'HIGH',
+    confidence: 'high',
+    description: 'setTimeout/setInterval avec delai >= 60s contenant un sink dangereux (eval/exec/spawn/Function) dans le callback. Evasion temporelle: le payload s\'active apres le timeout des sandboxes. Technique PhantomRaven/timer-bomb-exfil.',
+    references: [
+      'https://attack.mitre.org/techniques/T1497/003/',
+      'https://www.sonatype.com/blog/phantomraven-supply-chain-attack'
+    ],
+    mitre: 'T1497.003'
+  },
   lifecycle_missing_script: {
     id: 'MUADDIB-PKG-017',
     name: 'Phantom Lifecycle Script',

package/src/scanner/ast-detectors/handle-call-expression.js CHANGED Viewed

@@ -47,7 +47,8 @@ const {
   extractStringValueDeep,
   hasOnlyStringLiteralArgs,
   hasDecodeArg,
-  containsDecodePattern
+  containsDecodePattern,
+  resolveNumericExpression
 } = require('./helpers.js');
 function handleCallExpression(node, ctx) {
@@ -1046,9 +1047,7 @@ function handleCallExpression(node, ctx) {
     if (node.arguments.length >= 2) {
       const delayArg = node.arguments[1];
       let delayMs = null;
-      if (delayArg.type === 'Literal' && typeof delayArg.value === 'number') {
-        delayMs = delayArg.value;
-      }
+      delayMs = resolveNumericExpression(delayArg);
       if (delayMs !== null && delayMs > 3600000) { // > 1 hour
         const hours = (delayMs / 3600000).toFixed(1);
         ctx.threats.push({
@@ -1058,6 +1057,36 @@ function handleCallExpression(node, ctx) {
           file: ctx.relFile
         });
       }
+      // timer_delayed_payload: delay >= 60s + dangerous sink in callback body
+      if (delayMs !== null && delayMs >= 60000) {
+        const callback = node.arguments[0];
+        if (callback && (callback.type === 'ArrowFunctionExpression' || callback.type === 'FunctionExpression')) {
+          const cbSrc = callback.start !== undefined && callback.end !== undefined
+            ? ctx._sourceCode?.slice(callback.start, callback.end) : '';
+          if (cbSrc) {
+            const hasDangerousSink =
+              /\beval\s*\(/.test(cbSrc) ||
+              /\bnew\s+Function\s*\(/.test(cbSrc) ||
+              /\b(execSync|spawn|spawnSync)\s*\(/.test(cbSrc) ||
+              /(?<!\.)\bexec\s*\(/.test(cbSrc) ||
+              /\brequire\s*\(\s*['"](?:node:)?child_process['"]\s*\)/.test(cbSrc) ||
+              /\bModule\._compile\s*\(/.test(cbSrc);
+            if (hasDangerousSink) {
+              const delayDesc = delayMs >= 3600000
+                ? `${(delayMs / 3600000).toFixed(1)}h`
+                : `${(delayMs / 60000).toFixed(0)}min`;
+              ctx.hasTimerDelayedPayload = true;
+              ctx.threats.push({
+                type: 'timer_delayed_payload',
+                severity: delayMs >= 900000 ? 'CRITICAL' : 'HIGH',
+                message: `${callName}() with ${delayDesc} delay (${delayMs}ms) contains dangerous sink in callback — time-delayed payload execution for sandbox evasion.`,
+                file: ctx.relFile
+              });
+            }
+          }
+        }
+      }
     }
   }

package/src/scanner/ast-detectors/helpers.js CHANGED Viewed

@@ -151,6 +151,35 @@ function resolveStringConcatWithVars(node, stringVarValues) {
   return null;
 }
+/**
+ * Recursively resolve a numeric expression AST node to a concrete number.
+ * Handles: Literal numbers, BinaryExpression (*, +, -, /), UnaryExpression (-).
+ * Returns null if the expression contains non-resolvable nodes.
+ *
+ * Examples: 60000 → 60000, 60*1000 → 60000, 10*60*1000 → 600000
+ */
+function resolveNumericExpression(node) {
+  if (!node) return null;
+  if (node.type === 'Literal' && typeof node.value === 'number') return node.value;
+  if (node.type === 'UnaryExpression' && node.operator === '-') {
+    const val = resolveNumericExpression(node.argument);
+    return val !== null ? -val : null;
+  }
+  if (node.type === 'BinaryExpression') {
+    const left = resolveNumericExpression(node.left);
+    const right = resolveNumericExpression(node.right);
+    if (left === null || right === null) return null;
+    switch (node.operator) {
+      case '*': return left * right;
+      case '+': return left + right;
+      case '-': return left - right;
+      case '/': return right !== 0 ? left / right : null;
+      default: return null;
+    }
+  }
+  return null;
+}
 /**
  * Extract string value from a node, including BinaryExpression resolution.
  * Falls back to extractStringValue if concat resolution fails.
@@ -253,6 +282,7 @@ module.exports = {
   countConcatOperands,
   resolveStringConcat,
   resolveStringConcatWithVars,
+  resolveNumericExpression,
   extractStringValueDeep,
   hasOnlyStringLiteralArgs,
   hasDecodeArg,

package/src/scanner/ast.js CHANGED Viewed

@@ -156,6 +156,7 @@ function analyzeFile(content, filePath, basePath) {
     hasDnsRequire: /\brequire\s*\(\s*['"]dns['"]\s*\)/.test(content) || /\bdns\s*\.\s*resolve/.test(content),
     hasBase64Encode: /\.toString\s*\(\s*['"]base64(url)?['"]\s*\)/.test(content),
     hasDnsLoop: false,  // set when dns call inside loop context detected
+    hasTimerDelayedPayload: false,  // set when setTimeout/setInterval >= 60s has dangerous sink in callback
     // SANDWORM_MODE P2: LLM API key harvesting
     llmApiKeyCount: 0,
     // Wave 4: path variable tracking for git hooks and IDE config injection