muaddib-scanner 2.10.67 → 2.10.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.67",
3
+ "version": "2.10.69",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -123,6 +123,25 @@ FEATURE_NAMES = [
123
123
 
124
124
  assert len(FEATURE_NAMES) == 87, f"Expected 87 features, got {len(FEATURE_NAMES)}"
125
125
 
126
+ # Features to exclude: metadata/source-identity proxies that differ between
127
+ # monitor (negatives) and Datadog (positives) for non-behavioral reasons.
128
+ # See corrected retrain plan for full justification of each exclusion.
129
+ EXCLUDED_METADATA = {
130
+ # npm registry metadata — always 0 in Datadog positives (not fetched),
131
+ # 8-13% non-zero in monitor negatives → source leak
132
+ 'package_age_days', 'weekly_downloads', 'version_count',
133
+ 'author_package_count', 'has_repository', 'readme_size',
134
+ # Derived from corrupted npm metadata (age_days, version_count, downloads).
135
+ # Currently zero-variance (always 1.0) but becomes a leak when future
136
+ # records have actual computed values.
137
+ 'reputation_factor',
138
+ # Package-level metadata not from behavioral scan —
139
+ # 88-95% non-zero in negatives, 0% in positives → massive source proxy
140
+ 'unpacked_size_bytes', 'file_count_total',
141
+ # 13% non-zero in negatives, 0% in positives → source proxy
142
+ 'has_tests',
143
+ }
144
+
126
145
 
127
146
  # --- Data loading ---
128
147
 
@@ -300,10 +319,14 @@ def filter_leaky_features(X: pd.DataFrame, y: np.ndarray,
300
319
  retained = []
301
320
  excluded = []
302
321
 
322
+ # Iterate over columns actually present in X (metadata may have been
323
+ # dropped by Step 2a before this function is called).
324
+ available_features = list(X.columns)
325
+
303
326
  print(f"\n {'Feature':<40s} {'Neg%':>6s} {'Pos%':>6s} {'All%':>6s} {'Status'}")
304
327
  print(f" {'-' * 40} {'-' * 6} {'-' * 6} {'-' * 6} {'-' * 8}")
305
328
 
306
- for feat in FEATURE_NAMES:
329
+ for feat in available_features:
307
330
  neg_nonzero = float((X.loc[neg_mask, feat] != 0).sum()) / max(n_neg, 1)
308
331
  pos_nonzero = float((X.loc[pos_mask, feat] != 0).sum()) / max(n_pos, 1)
309
332
  all_nonzero = float((X[feat] != 0).sum()) / max(n_total, 1)
@@ -328,7 +351,7 @@ def filter_leaky_features(X: pd.DataFrame, y: np.ndarray,
328
351
  print(f" {feat:<40s} {neg_nonzero * 100:5.1f}% {pos_nonzero * 100:5.1f}% "
329
352
  f"{all_nonzero * 100:5.1f}% {status}")
330
353
 
331
- print(f"\n Retained: {len(retained)}/{len(FEATURE_NAMES)} features")
354
+ print(f"\n Retained: {len(retained)}/{len(available_features)} features")
332
355
  if excluded:
333
356
  print(f" Excluded ({len(excluded)}): {', '.join(excluded)}")
334
357
 
@@ -336,6 +359,114 @@ def filter_leaky_features(X: pd.DataFrame, y: np.ndarray,
336
359
  return X_filtered, retained
337
360
 
338
361
 
362
+ def source_discrimination_diagnostic(X: pd.DataFrame, y: np.ndarray,
363
+ active_features: list):
364
+ """
365
+ Step 2c: Source discrimination diagnostic (LOG-ONLY, non-blocking).
366
+
367
+ DESIGN NOTE: This test cannot function as a hard gate when source labels
368
+ are perfectly confounded with class labels (all negatives = monitor,
369
+ all positives = Datadog). In that case, legitimate behavioral features
370
+ (score, count_critical, type_*) will dominate the discriminator because
371
+ malware genuinely behaves differently from clean packages — this is
372
+ signal, not leak.
373
+
374
+ A true source discrimination test would require either:
375
+ (a) positives re-scanned through our own pipeline, or
376
+ (b) negatives and positives from the SAME source.
377
+
378
+ This diagnostic still serves a purpose: it flags NON-BEHAVIORAL features
379
+ that shouldn't appear in the top discriminators. If metadata features
380
+ (unpacked_size_bytes, file_count_total, etc.) appear despite being
381
+ excluded in Step 2a, something is wrong.
382
+
383
+ The real validation happens in shadow deployment on live production data.
384
+ """
385
+ print("\n" + "=" * 60)
386
+ print("[Step 2c/8] Source discrimination diagnostic (log-only)...")
387
+ print("=" * 60)
388
+ print(" NOTE: source=Datadog correlates 100% with label=malicious.")
389
+ print(" This diagnostic checks for non-behavioral features in the")
390
+ print(" top discriminators, NOT for overall accuracy (which will")
391
+ print(" always be high due to the source/label confound).")
392
+
393
+ X_active = X[active_features]
394
+
395
+ # 70/30 split with different seed to avoid overlap with main split
396
+ X_tr, X_te, y_tr, y_te = train_test_split(
397
+ X_active, y, test_size=0.3, stratify=y, random_state=99
398
+ )
399
+
400
+ # Shallow model — depth=3, 50 rounds, no class weighting
401
+ params = {
402
+ 'objective': 'binary:logistic',
403
+ 'eval_metric': 'logloss',
404
+ 'max_depth': 3,
405
+ 'learning_rate': 0.1,
406
+ 'subsample': 0.8,
407
+ 'seed': 99,
408
+ 'verbosity': 0,
409
+ }
410
+
411
+ dtrain = xgb.DMatrix(X_tr, label=y_tr, feature_names=active_features)
412
+ dtest = xgb.DMatrix(X_te, label=y_te, feature_names=active_features)
413
+
414
+ model = xgb.train(params, dtrain, num_boost_round=50)
415
+ probs = model.predict(dtest)
416
+ preds = (probs >= 0.5).astype(int)
417
+ accuracy = float((preds == y_te).mean())
418
+
419
+ p = precision_score(y_te, preds, zero_division=0)
420
+ r = recall_score(y_te, preds, zero_division=0)
421
+
422
+ print(f"\n Discrimination accuracy: {accuracy:.3f} (P={p:.3f} R={r:.3f})")
423
+ print(f" (Expected to be high due to source/label confound)")
424
+
425
+ # SHAP analysis — the diagnostic value is in WHICH features dominate
426
+ explainer = shap.TreeExplainer(model)
427
+ shap_values = explainer.shap_values(X_te)
428
+ mean_abs_shap = np.abs(shap_values).mean(axis=0)
429
+ importance = sorted(zip(active_features, mean_abs_shap),
430
+ key=lambda x: x[1], reverse=True)
431
+
432
+ # Known behavioral features that SHOULD dominate (malware scores higher)
433
+ EXPECTED_BEHAVIORAL = {
434
+ 'score', 'global_risk_score', 'max_file_score', 'package_score',
435
+ 'count_total', 'count_critical', 'count_high', 'count_medium',
436
+ 'count_low', 'distinct_threat_types', 'severity_ratio_high',
437
+ 'max_single_points', 'points_concentration', 'file_count_with_threats',
438
+ 'file_score_mean', 'file_score_max', 'threat_density',
439
+ }
440
+ # Features that should NOT appear (already excluded, but sanity check)
441
+ EXCLUDED_CHECK = {
442
+ 'unpacked_size_bytes', 'file_count_total', 'has_tests',
443
+ 'dep_count', 'dev_dep_count', 'reputation_factor',
444
+ 'package_age_days', 'weekly_downloads', 'version_count',
445
+ 'author_package_count', 'has_repository', 'readme_size',
446
+ }
447
+
448
+ print(f"\n Top 10 features driving discrimination:")
449
+ has_leak = False
450
+ for i, (name, val) in enumerate(importance[:10]):
451
+ if name in EXCLUDED_CHECK:
452
+ flag = " *** LEAK — should have been excluded in Step 2a!"
453
+ has_leak = True
454
+ elif name in EXPECTED_BEHAVIORAL:
455
+ flag = " (expected — behavioral)"
456
+ elif name.startswith('type_') or name.startswith('has_'):
457
+ flag = " (behavioral signal)"
458
+ else:
459
+ flag = ""
460
+ print(f" {i + 1:2d}. {name:40s} {val:.6f}{flag}")
461
+
462
+ if has_leak:
463
+ print(f"\n [WARNING] Non-behavioral features found in top discriminators!")
464
+ print(f" Check EXCLUDED_METADATA — some metadata features leaked through.")
465
+ else:
466
+ print(f"\n [OK] Top discriminators are all behavioral features.")
467
+ print(f" No metadata/source-proxy leak detected.")
468
+
469
+
339
470
  def split_data(X: pd.DataFrame, y: np.ndarray) -> tuple:
340
471
  """
341
472
  Step 3: Stratified 80/20 split.
@@ -693,13 +824,15 @@ def main():
693
824
  help='Path to negatives JSONL (clean/fp labels)')
694
825
  parser.add_argument('--positives', required=True,
695
826
  help='Path to positives JSONL (malicious labels)')
696
- parser.add_argument('--output', default='src/ml/model-trees.js',
697
- help='Output JS file path (default: src/ml/model-trees.js)')
698
- parser.add_argument('--top-features', type=int, default=40,
699
- help='Number of top SHAP features to select (default: 40)')
827
+ parser.add_argument('--output', default='src/ml/model-trees-shadow.js',
828
+ help='Output JS file path (default: src/ml/model-trees-shadow.js)')
829
+ parser.add_argument('--top-features', type=int, default=50,
830
+ help='Number of top SHAP features to select (default: 50)')
700
831
  parser.add_argument('--common-only', action=argparse.BooleanOptionalAction,
701
832
  default=True,
702
833
  help='Only use features with >=1%% non-zero coverage in BOTH sources (default: on)')
834
+ parser.add_argument('--skip-gate', action='store_true',
835
+ help='Skip source discrimination gate (dangerous — use only for debugging)')
703
836
  args = parser.parse_args()
704
837
 
705
838
  # Validate inputs
@@ -716,11 +849,31 @@ def main():
716
849
  # Step 2: Align features
717
850
  X, y, stats = align_features(negatives, positives)
718
851
 
719
- # Step 2b: Filter leaky features
852
+ # Step 2a: Remove known metadata/source-proxy features BEFORE leak filter.
853
+ # These features differ between sources for non-behavioral reasons and would
854
+ # cause the model to learn source identity instead of malicious behavior.
855
+ metadata_cols = [f for f in FEATURE_NAMES if f in EXCLUDED_METADATA]
856
+ X = X.drop(columns=metadata_cols, errors='ignore')
857
+ remaining_features = [f for f in FEATURE_NAMES if f not in EXCLUDED_METADATA]
858
+ print(f"\n [Step 2a] Excluded {len(metadata_cols)} metadata features: "
859
+ f"{', '.join(metadata_cols)}")
860
+ print(f" Remaining: {len(remaining_features)} features")
861
+
862
+ # Step 2b: Filter dead/leaky features (on remaining behavioral features)
720
863
  if args.common_only:
721
864
  X, active_features = filter_leaky_features(X, y)
722
865
  else:
723
- active_features = list(FEATURE_NAMES)
866
+ active_features = list(remaining_features)
867
+
868
+ # Step 2c: Source discrimination diagnostic (log-only).
869
+ # NOT a hard gate — source label is 100% confounded with class label
870
+ # (all positives = Datadog, all negatives = monitor), so behavioral
871
+ # features will always dominate the discriminator. The diagnostic
872
+ # checks that no METADATA features leaked through Step 2a.
873
+ if not args.skip_gate:
874
+ source_discrimination_diagnostic(X, y, active_features)
875
+ else:
876
+ print("\n [Step 2c] Source discrimination diagnostic SKIPPED (--skip-gate)")
724
877
 
725
878
  # Class imbalance weight
726
879
  n_neg = stats['n_neg']
@@ -756,7 +909,8 @@ def main():
756
909
  print("TRAINING COMPLETE")
757
910
  print("=" * 60)
758
911
  print(f" Samples: {n_neg} negatives + {n_pos} positives = {n_neg + n_pos}")
759
- print(f" Features: {len(selected)} selected (from {len(active_features)} active / {len(FEATURE_NAMES)} total)")
912
+ print(f" Features: {len(selected)} selected (from {len(active_features)} active / "
913
+ f"{len(FEATURE_NAMES)} total, {len(EXCLUDED_METADATA)} metadata excluded)")
760
914
  print(f" Threshold: {cv_metrics['threshold']:.3f}")
761
915
  print(f" CV: P={cv_metrics['precision']:.3f} R={cv_metrics['recall']:.3f} F1={cv_metrics['f1']:.3f}")
762
916
  print(f" Holdout: P={holdout_metrics['precision']:.3f} R={holdout_metrics['recall']:.3f} F1={holdout_metrics['f1']:.3f}")
@@ -673,6 +673,12 @@ const PLAYBOOKS = {
673
673
  'Analyser le callback du timer pour identifier le payload retarde. ' +
674
674
  'Si delai > 24h: fort indicateur de time-bomb malware. NE PAS installer.',
675
675
 
676
+ timer_delayed_payload:
677
+ 'Timer avec delai >= 60s contenant un sink dangereux (eval/exec/spawn) dans le callback. ' +
678
+ 'Technique d\'evasion temporelle: le payload attend que les sandboxes timeout avant de s\'activer. ' +
679
+ 'Analyser le contenu du callback: rechercher exfiltration de credentials, reverse shell, ou telechargement de payload. ' +
680
+ 'Si delai >= 15min: forte probabilite de malware. NE PAS installer.',
681
+
676
682
  npm_publish_worm:
677
683
  'CRITIQUE: exec("npm publish") detecte — propagation worm. Le code utilise des tokens npm voles ' +
678
684
  'pour publier des versions infectees des packages de la victime. Technique Shai-Hulud 1.0 et 2.0. ' +
@@ -2194,6 +2194,18 @@ const RULES = {
2194
2194
  references: ['https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Reflect/apply'],
2195
2195
  mitre: 'T1059'
2196
2196
  },
2197
+ timer_delayed_payload: {
2198
+ id: 'MUADDIB-AST-085',
2199
+ name: 'Timer Delayed Payload',
2200
+ severity: 'HIGH',
2201
+ confidence: 'high',
2202
+ description: 'setTimeout/setInterval avec delai >= 60s contenant un sink dangereux (eval/exec/spawn/Function) dans le callback. Evasion temporelle: le payload s\'active apres le timeout des sandboxes. Technique PhantomRaven/timer-bomb-exfil.',
2203
+ references: [
2204
+ 'https://attack.mitre.org/techniques/T1497/003/',
2205
+ 'https://www.sonatype.com/blog/phantomraven-supply-chain-attack'
2206
+ ],
2207
+ mitre: 'T1497.003'
2208
+ },
2197
2209
  lifecycle_missing_script: {
2198
2210
  id: 'MUADDIB-PKG-017',
2199
2211
  name: 'Phantom Lifecycle Script',
@@ -47,7 +47,8 @@ const {
47
47
  extractStringValueDeep,
48
48
  hasOnlyStringLiteralArgs,
49
49
  hasDecodeArg,
50
- containsDecodePattern
50
+ containsDecodePattern,
51
+ resolveNumericExpression
51
52
  } = require('./helpers.js');
52
53
 
53
54
  function handleCallExpression(node, ctx) {
@@ -1046,9 +1047,7 @@ function handleCallExpression(node, ctx) {
1046
1047
  if (node.arguments.length >= 2) {
1047
1048
  const delayArg = node.arguments[1];
1048
1049
  let delayMs = null;
1049
- if (delayArg.type === 'Literal' && typeof delayArg.value === 'number') {
1050
- delayMs = delayArg.value;
1051
- }
1050
+ delayMs = resolveNumericExpression(delayArg);
1052
1051
  if (delayMs !== null && delayMs > 3600000) { // > 1 hour
1053
1052
  const hours = (delayMs / 3600000).toFixed(1);
1054
1053
  ctx.threats.push({
@@ -1058,6 +1057,36 @@ function handleCallExpression(node, ctx) {
1058
1057
  file: ctx.relFile
1059
1058
  });
1060
1059
  }
1060
+
1061
+ // timer_delayed_payload: delay >= 60s + dangerous sink in callback body
1062
+ if (delayMs !== null && delayMs >= 60000) {
1063
+ const callback = node.arguments[0];
1064
+ if (callback && (callback.type === 'ArrowFunctionExpression' || callback.type === 'FunctionExpression')) {
1065
+ const cbSrc = callback.start !== undefined && callback.end !== undefined
1066
+ ? ctx._sourceCode?.slice(callback.start, callback.end) : '';
1067
+ if (cbSrc) {
1068
+ const hasDangerousSink =
1069
+ /\beval\s*\(/.test(cbSrc) ||
1070
+ /\bnew\s+Function\s*\(/.test(cbSrc) ||
1071
+ /\b(execSync|spawn|spawnSync)\s*\(/.test(cbSrc) ||
1072
+ /(?<!\.)\bexec\s*\(/.test(cbSrc) ||
1073
+ /\brequire\s*\(\s*['"](?:node:)?child_process['"]\s*\)/.test(cbSrc) ||
1074
+ /\bModule\._compile\s*\(/.test(cbSrc);
1075
+ if (hasDangerousSink) {
1076
+ const delayDesc = delayMs >= 3600000
1077
+ ? `${(delayMs / 3600000).toFixed(1)}h`
1078
+ : `${(delayMs / 60000).toFixed(0)}min`;
1079
+ ctx.hasTimerDelayedPayload = true;
1080
+ ctx.threats.push({
1081
+ type: 'timer_delayed_payload',
1082
+ severity: delayMs >= 900000 ? 'CRITICAL' : 'HIGH',
1083
+ message: `${callName}() with ${delayDesc} delay (${delayMs}ms) contains dangerous sink in callback — time-delayed payload execution for sandbox evasion.`,
1084
+ file: ctx.relFile
1085
+ });
1086
+ }
1087
+ }
1088
+ }
1089
+ }
1061
1090
  }
1062
1091
  }
1063
1092
 
@@ -151,6 +151,35 @@ function resolveStringConcatWithVars(node, stringVarValues) {
151
151
  return null;
152
152
  }
153
153
 
154
+ /**
155
+ * Recursively resolve a numeric expression AST node to a concrete number.
156
+ * Handles: Literal numbers, BinaryExpression (*, +, -, /), UnaryExpression (-).
157
+ * Returns null if the expression contains non-resolvable nodes.
158
+ *
159
+ * Examples: 60000 → 60000, 60*1000 → 60000, 10*60*1000 → 600000
160
+ */
161
+ function resolveNumericExpression(node) {
162
+ if (!node) return null;
163
+ if (node.type === 'Literal' && typeof node.value === 'number') return node.value;
164
+ if (node.type === 'UnaryExpression' && node.operator === '-') {
165
+ const val = resolveNumericExpression(node.argument);
166
+ return val !== null ? -val : null;
167
+ }
168
+ if (node.type === 'BinaryExpression') {
169
+ const left = resolveNumericExpression(node.left);
170
+ const right = resolveNumericExpression(node.right);
171
+ if (left === null || right === null) return null;
172
+ switch (node.operator) {
173
+ case '*': return left * right;
174
+ case '+': return left + right;
175
+ case '-': return left - right;
176
+ case '/': return right !== 0 ? left / right : null;
177
+ default: return null;
178
+ }
179
+ }
180
+ return null;
181
+ }
182
+
154
183
  /**
155
184
  * Extract string value from a node, including BinaryExpression resolution.
156
185
  * Falls back to extractStringValue if concat resolution fails.
@@ -253,6 +282,7 @@ module.exports = {
253
282
  countConcatOperands,
254
283
  resolveStringConcat,
255
284
  resolveStringConcatWithVars,
285
+ resolveNumericExpression,
256
286
  extractStringValueDeep,
257
287
  hasOnlyStringLiteralArgs,
258
288
  hasDecodeArg,
@@ -156,6 +156,7 @@ function analyzeFile(content, filePath, basePath) {
156
156
  hasDnsRequire: /\brequire\s*\(\s*['"]dns['"]\s*\)/.test(content) || /\bdns\s*\.\s*resolve/.test(content),
157
157
  hasBase64Encode: /\.toString\s*\(\s*['"]base64(url)?['"]\s*\)/.test(content),
158
158
  hasDnsLoop: false, // set when dns call inside loop context detected
159
+ hasTimerDelayedPayload: false, // set when setTimeout/setInterval >= 60s has dangerous sink in callback
159
160
  // SANDWORM_MODE P2: LLM API key harvesting
160
161
  llmApiKeyCount: 0,
161
162
  // Wave 4: path variable tracking for git hooks and IDE config injection