npm - muaddib-scanner - Versions diffs - 2.10.67 → 2.10.69 - Mend

muaddib-scanner 2.10.67 → 2.10.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/src/ml/train-xgboost.py +163 -9
package/src/response/playbooks.js +6 -0
package/src/rules/index.js +12 -0
package/src/scanner/ast-detectors/handle-call-expression.js +33 -4
package/src/scanner/ast-detectors/helpers.js +30 -0
package/src/scanner/ast.js +1 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.10.67",
+  "version": "2.10.69",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/src/ml/train-xgboost.py CHANGED Viewed

@@ -123,6 +123,25 @@ FEATURE_NAMES = [
 assert len(FEATURE_NAMES) == 87, f"Expected 87 features, got {len(FEATURE_NAMES)}"
+# Features to exclude: metadata/source-identity proxies that differ between
+# monitor (negatives) and Datadog (positives) for non-behavioral reasons.
+# See corrected retrain plan for full justification of each exclusion.
+EXCLUDED_METADATA = {
+    # npm registry metadata — always 0 in Datadog positives (not fetched),
+    # 8-13% non-zero in monitor negatives → source leak
+    'package_age_days', 'weekly_downloads', 'version_count',
+    'author_package_count', 'has_repository', 'readme_size',
+    # Derived from corrupted npm metadata (age_days, version_count, downloads).
+    # Currently zero-variance (always 1.0) but becomes a leak when future
+    # records have actual computed values.
+    'reputation_factor',
+    # Package-level metadata not from behavioral scan —
+    # 88-95% non-zero in negatives, 0% in positives → massive source proxy
+    'unpacked_size_bytes', 'file_count_total',
+    # 13% non-zero in negatives, 0% in positives → source proxy
+    'has_tests',
+}
 # --- Data loading ---
@@ -300,10 +319,14 @@ def filter_leaky_features(X: pd.DataFrame, y: np.ndarray,
     retained = []
     excluded = []
+    # Iterate over columns actually present in X (metadata may have been
+    # dropped by Step 2a before this function is called).
+    available_features = list(X.columns)
     print(f"\n  {'Feature':<40s} {'Neg%':>6s} {'Pos%':>6s} {'All%':>6s} {'Status'}")
     print(f"  {'-' * 40} {'-' * 6} {'-' * 6} {'-' * 6} {'-' * 8}")
-    for feat in FEATURE_NAMES:
+    for feat in available_features:
         neg_nonzero = float((X.loc[neg_mask, feat] != 0).sum()) / max(n_neg, 1)
         pos_nonzero = float((X.loc[pos_mask, feat] != 0).sum()) / max(n_pos, 1)
         all_nonzero = float((X[feat] != 0).sum()) / max(n_total, 1)
@@ -328,7 +351,7 @@ def filter_leaky_features(X: pd.DataFrame, y: np.ndarray,
         print(f"  {feat:<40s} {neg_nonzero * 100:5.1f}% {pos_nonzero * 100:5.1f}% "
               f"{all_nonzero * 100:5.1f}% {status}")
-    print(f"\n  Retained: {len(retained)}/{len(FEATURE_NAMES)} features")
+    print(f"\n  Retained: {len(retained)}/{len(available_features)} features")
     if excluded:
         print(f"  Excluded ({len(excluded)}): {', '.join(excluded)}")
@@ -336,6 +359,114 @@ def filter_leaky_features(X: pd.DataFrame, y: np.ndarray,
     return X_filtered, retained
+def source_discrimination_diagnostic(X: pd.DataFrame, y: np.ndarray,
+                                      active_features: list):
+    """
+    Step 2c: Source discrimination diagnostic (LOG-ONLY, non-blocking).
+    DESIGN NOTE: This test cannot function as a hard gate when source labels
+    are perfectly confounded with class labels (all negatives = monitor,
+    all positives = Datadog). In that case, legitimate behavioral features
+    (score, count_critical, type_*) will dominate the discriminator because
+    malware genuinely behaves differently from clean packages — this is
+    signal, not leak.
+    A true source discrimination test would require either:
+    (a) positives re-scanned through our own pipeline, or
+    (b) negatives and positives from the SAME source.
+    This diagnostic still serves a purpose: it flags NON-BEHAVIORAL features
+    that shouldn't appear in the top discriminators. If metadata features
+    (unpacked_size_bytes, file_count_total, etc.) appear despite being
+    excluded in Step 2a, something is wrong.
+    The real validation happens in shadow deployment on live production data.
+    """
+    print("\n" + "=" * 60)
+    print("[Step 2c/8] Source discrimination diagnostic (log-only)...")
+    print("=" * 60)
+    print("  NOTE: source=Datadog correlates 100% with label=malicious.")
+    print("  This diagnostic checks for non-behavioral features in the")
+    print("  top discriminators, NOT for overall accuracy (which will")
+    print("  always be high due to the source/label confound).")
+    X_active = X[active_features]
+    # 70/30 split with different seed to avoid overlap with main split
+    X_tr, X_te, y_tr, y_te = train_test_split(
+        X_active, y, test_size=0.3, stratify=y, random_state=99
+    )
+    # Shallow model — depth=3, 50 rounds, no class weighting
+    params = {
+        'objective': 'binary:logistic',
+        'eval_metric': 'logloss',
+        'max_depth': 3,
+        'learning_rate': 0.1,
+        'subsample': 0.8,
+        'seed': 99,
+        'verbosity': 0,
+    }
+    dtrain = xgb.DMatrix(X_tr, label=y_tr, feature_names=active_features)
+    dtest = xgb.DMatrix(X_te, label=y_te, feature_names=active_features)
+    model = xgb.train(params, dtrain, num_boost_round=50)
+    probs = model.predict(dtest)
+    preds = (probs >= 0.5).astype(int)
+    accuracy = float((preds == y_te).mean())
+    p = precision_score(y_te, preds, zero_division=0)
+    r = recall_score(y_te, preds, zero_division=0)
+    print(f"\n  Discrimination accuracy: {accuracy:.3f} (P={p:.3f} R={r:.3f})")
+    print(f"  (Expected to be high due to source/label confound)")
+    # SHAP analysis — the diagnostic value is in WHICH features dominate
+    explainer = shap.TreeExplainer(model)
+    shap_values = explainer.shap_values(X_te)
+    mean_abs_shap = np.abs(shap_values).mean(axis=0)
+    importance = sorted(zip(active_features, mean_abs_shap),
+                        key=lambda x: x[1], reverse=True)
+    # Known behavioral features that SHOULD dominate (malware scores higher)
+    EXPECTED_BEHAVIORAL = {
+        'score', 'global_risk_score', 'max_file_score', 'package_score',
+        'count_total', 'count_critical', 'count_high', 'count_medium',
+        'count_low', 'distinct_threat_types', 'severity_ratio_high',
+        'max_single_points', 'points_concentration', 'file_count_with_threats',
+        'file_score_mean', 'file_score_max', 'threat_density',
+    }
+    # Features that should NOT appear (already excluded, but sanity check)
+    EXCLUDED_CHECK = {
+        'unpacked_size_bytes', 'file_count_total', 'has_tests',
+        'dep_count', 'dev_dep_count', 'reputation_factor',
+        'package_age_days', 'weekly_downloads', 'version_count',
+        'author_package_count', 'has_repository', 'readme_size',
+    }
+    print(f"\n  Top 10 features driving discrimination:")
+    has_leak = False
+    for i, (name, val) in enumerate(importance[:10]):
+        if name in EXCLUDED_CHECK:
+            flag = " *** LEAK — should have been excluded in Step 2a!"
+            has_leak = True
+        elif name in EXPECTED_BEHAVIORAL:
+            flag = " (expected — behavioral)"
+        elif name.startswith('type_') or name.startswith('has_'):
+            flag = " (behavioral signal)"
+        else:
+            flag = ""
+        print(f"    {i + 1:2d}. {name:40s} {val:.6f}{flag}")
+    if has_leak:
+        print(f"\n  [WARNING] Non-behavioral features found in top discriminators!")
+        print(f"  Check EXCLUDED_METADATA — some metadata features leaked through.")
+    else:
+        print(f"\n  [OK] Top discriminators are all behavioral features.")
+        print(f"  No metadata/source-proxy leak detected.")
 def split_data(X: pd.DataFrame, y: np.ndarray) -> tuple:
     """
     Step 3: Stratified 80/20 split.
@@ -693,13 +824,15 @@ def main():
                         help='Path to negatives JSONL (clean/fp labels)')
     parser.add_argument('--positives', required=True,
                         help='Path to positives JSONL (malicious labels)')
-    parser.add_argument('--output', default='src/ml/model-trees.js',
-                        help='Output JS file path (default: src/ml/model-trees.js)')
-    parser.add_argument('--top-features', type=int, default=40,
-                        help='Number of top SHAP features to select (default: 40)')
+    parser.add_argument('--output', default='src/ml/model-trees-shadow.js',
+                        help='Output JS file path (default: src/ml/model-trees-shadow.js)')
+    parser.add_argument('--top-features', type=int, default=50,
+                        help='Number of top SHAP features to select (default: 50)')
     parser.add_argument('--common-only', action=argparse.BooleanOptionalAction,
                         default=True,
                         help='Only use features with >=1%% non-zero coverage in BOTH sources (default: on)')
+    parser.add_argument('--skip-gate', action='store_true',
+                        help='Skip source discrimination gate (dangerous — use only for debugging)')
     args = parser.parse_args()
     # Validate inputs
@@ -716,11 +849,31 @@ def main():
     # Step 2: Align features
     X, y, stats = align_features(negatives, positives)
-    # Step 2b: Filter leaky features
+    # Step 2a: Remove known metadata/source-proxy features BEFORE leak filter.
+    # These features differ between sources for non-behavioral reasons and would
+    # cause the model to learn source identity instead of malicious behavior.
+    metadata_cols = [f for f in FEATURE_NAMES if f in EXCLUDED_METADATA]
+    X = X.drop(columns=metadata_cols, errors='ignore')
+    remaining_features = [f for f in FEATURE_NAMES if f not in EXCLUDED_METADATA]
+    print(f"\n  [Step 2a] Excluded {len(metadata_cols)} metadata features: "
+          f"{', '.join(metadata_cols)}")
+    print(f"  Remaining: {len(remaining_features)} features")
+    # Step 2b: Filter dead/leaky features (on remaining behavioral features)
     if args.common_only:
         X, active_features = filter_leaky_features(X, y)
     else:
-        active_features = list(FEATURE_NAMES)
+        active_features = list(remaining_features)
+    # Step 2c: Source discrimination diagnostic (log-only).
+    # NOT a hard gate — source label is 100% confounded with class label
+    # (all positives = Datadog, all negatives = monitor), so behavioral
+    # features will always dominate the discriminator. The diagnostic
+    # checks that no METADATA features leaked through Step 2a.
+    if not args.skip_gate:
+        source_discrimination_diagnostic(X, y, active_features)
+    else:
+        print("\n  [Step 2c] Source discrimination diagnostic SKIPPED (--skip-gate)")
     # Class imbalance weight
     n_neg = stats['n_neg']
@@ -756,7 +909,8 @@ def main():
     print("TRAINING COMPLETE")
     print("=" * 60)
     print(f"  Samples: {n_neg} negatives + {n_pos} positives = {n_neg + n_pos}")
-    print(f"  Features: {len(selected)} selected (from {len(active_features)} active / {len(FEATURE_NAMES)} total)")
+    print(f"  Features: {len(selected)} selected (from {len(active_features)} active / "
+          f"{len(FEATURE_NAMES)} total, {len(EXCLUDED_METADATA)} metadata excluded)")
     print(f"  Threshold: {cv_metrics['threshold']:.3f}")
     print(f"  CV:      P={cv_metrics['precision']:.3f} R={cv_metrics['recall']:.3f} F1={cv_metrics['f1']:.3f}")
     print(f"  Holdout: P={holdout_metrics['precision']:.3f} R={holdout_metrics['recall']:.3f} F1={holdout_metrics['f1']:.3f}")

package/src/response/playbooks.js CHANGED Viewed

@@ -673,6 +673,12 @@ const PLAYBOOKS = {
     'Analyser le callback du timer pour identifier le payload retarde. ' +
     'Si delai > 24h: fort indicateur de time-bomb malware. NE PAS installer.',
+  timer_delayed_payload:
+    'Timer avec delai >= 60s contenant un sink dangereux (eval/exec/spawn) dans le callback. ' +
+    'Technique d\'evasion temporelle: le payload attend que les sandboxes timeout avant de s\'activer. ' +
+    'Analyser le contenu du callback: rechercher exfiltration de credentials, reverse shell, ou telechargement de payload. ' +
+    'Si delai >= 15min: forte probabilite de malware. NE PAS installer.',
   npm_publish_worm:
     'CRITIQUE: exec("npm publish") detecte — propagation worm. Le code utilise des tokens npm voles ' +
     'pour publier des versions infectees des packages de la victime. Technique Shai-Hulud 1.0 et 2.0. ' +

package/src/rules/index.js CHANGED Viewed

@@ -2194,6 +2194,18 @@ const RULES = {
     references: ['https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Reflect/apply'],
     mitre: 'T1059'
   },
+  timer_delayed_payload: {
+    id: 'MUADDIB-AST-085',
+    name: 'Timer Delayed Payload',
+    severity: 'HIGH',
+    confidence: 'high',
+    description: 'setTimeout/setInterval avec delai >= 60s contenant un sink dangereux (eval/exec/spawn/Function) dans le callback. Evasion temporelle: le payload s\'active apres le timeout des sandboxes. Technique PhantomRaven/timer-bomb-exfil.',
+    references: [
+      'https://attack.mitre.org/techniques/T1497/003/',
+      'https://www.sonatype.com/blog/phantomraven-supply-chain-attack'
+    ],
+    mitre: 'T1497.003'
+  },
   lifecycle_missing_script: {
     id: 'MUADDIB-PKG-017',
     name: 'Phantom Lifecycle Script',

package/src/scanner/ast-detectors/handle-call-expression.js CHANGED Viewed

@@ -47,7 +47,8 @@ const {
   extractStringValueDeep,
   hasOnlyStringLiteralArgs,
   hasDecodeArg,
-  containsDecodePattern
+  containsDecodePattern,
+  resolveNumericExpression
 } = require('./helpers.js');
 function handleCallExpression(node, ctx) {
@@ -1046,9 +1047,7 @@ function handleCallExpression(node, ctx) {
     if (node.arguments.length >= 2) {
       const delayArg = node.arguments[1];
       let delayMs = null;
-      if (delayArg.type === 'Literal' && typeof delayArg.value === 'number') {
-        delayMs = delayArg.value;
-      }
+      delayMs = resolveNumericExpression(delayArg);
       if (delayMs !== null && delayMs > 3600000) { // > 1 hour
         const hours = (delayMs / 3600000).toFixed(1);
         ctx.threats.push({
@@ -1058,6 +1057,36 @@ function handleCallExpression(node, ctx) {
           file: ctx.relFile
         });
       }
+      // timer_delayed_payload: delay >= 60s + dangerous sink in callback body
+      if (delayMs !== null && delayMs >= 60000) {
+        const callback = node.arguments[0];
+        if (callback && (callback.type === 'ArrowFunctionExpression' || callback.type === 'FunctionExpression')) {
+          const cbSrc = callback.start !== undefined && callback.end !== undefined
+            ? ctx._sourceCode?.slice(callback.start, callback.end) : '';
+          if (cbSrc) {
+            const hasDangerousSink =
+              /\beval\s*\(/.test(cbSrc) ||
+              /\bnew\s+Function\s*\(/.test(cbSrc) ||
+              /\b(execSync|spawn|spawnSync)\s*\(/.test(cbSrc) ||
+              /(?<!\.)\bexec\s*\(/.test(cbSrc) ||
+              /\brequire\s*\(\s*['"](?:node:)?child_process['"]\s*\)/.test(cbSrc) ||
+              /\bModule\._compile\s*\(/.test(cbSrc);
+            if (hasDangerousSink) {
+              const delayDesc = delayMs >= 3600000
+                ? `${(delayMs / 3600000).toFixed(1)}h`
+                : `${(delayMs / 60000).toFixed(0)}min`;
+              ctx.hasTimerDelayedPayload = true;
+              ctx.threats.push({
+                type: 'timer_delayed_payload',
+                severity: delayMs >= 900000 ? 'CRITICAL' : 'HIGH',
+                message: `${callName}() with ${delayDesc} delay (${delayMs}ms) contains dangerous sink in callback — time-delayed payload execution for sandbox evasion.`,
+                file: ctx.relFile
+              });
+            }
+          }
+        }
+      }
     }
   }

package/src/scanner/ast-detectors/helpers.js CHANGED Viewed

@@ -151,6 +151,35 @@ function resolveStringConcatWithVars(node, stringVarValues) {
   return null;
 }
+/**
+ * Recursively resolve a numeric expression AST node to a concrete number.
+ * Handles: Literal numbers, BinaryExpression (*, +, -, /), UnaryExpression (-).
+ * Returns null if the expression contains non-resolvable nodes.
+ *
+ * Examples: 60000 → 60000, 60*1000 → 60000, 10*60*1000 → 600000
+ */
+function resolveNumericExpression(node) {
+  if (!node) return null;
+  if (node.type === 'Literal' && typeof node.value === 'number') return node.value;
+  if (node.type === 'UnaryExpression' && node.operator === '-') {
+    const val = resolveNumericExpression(node.argument);
+    return val !== null ? -val : null;
+  }
+  if (node.type === 'BinaryExpression') {
+    const left = resolveNumericExpression(node.left);
+    const right = resolveNumericExpression(node.right);
+    if (left === null || right === null) return null;
+    switch (node.operator) {
+      case '*': return left * right;
+      case '+': return left + right;
+      case '-': return left - right;
+      case '/': return right !== 0 ? left / right : null;
+      default: return null;
+    }
+  }
+  return null;
+}
 /**
  * Extract string value from a node, including BinaryExpression resolution.
  * Falls back to extractStringValue if concat resolution fails.
@@ -253,6 +282,7 @@ module.exports = {
   countConcatOperands,
   resolveStringConcat,
   resolveStringConcatWithVars,
+  resolveNumericExpression,
   extractStringValueDeep,
   hasOnlyStringLiteralArgs,
   hasDecodeArg,

package/src/scanner/ast.js CHANGED Viewed

@@ -156,6 +156,7 @@ function analyzeFile(content, filePath, basePath) {
     hasDnsRequire: /\brequire\s*\(\s*['"]dns['"]\s*\)/.test(content) || /\bdns\s*\.\s*resolve/.test(content),
     hasBase64Encode: /\.toString\s*\(\s*['"]base64(url)?['"]\s*\)/.test(content),
     hasDnsLoop: false,  // set when dns call inside loop context detected
+    hasTimerDelayedPayload: false,  // set when setTimeout/setInterval >= 60s has dangerous sink in callback
     // SANDWORM_MODE P2: LLM API key harvesting
     llmApiKeyCount: 0,
     // Wave 4: path variable tracking for git hooks and IDE config injection