muaddib-scanner 2.10.31 → 2.10.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.31",
3
+ "version": "2.10.33",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -55,7 +55,7 @@
55
55
  },
56
56
  "devDependencies": {
57
57
  "@eslint/js": "10.0.1",
58
- "eslint": "10.0.3",
58
+ "eslint": "10.1.0",
59
59
  "eslint-plugin-security": "^4.0.0",
60
60
  "globals": "17.4.0"
61
61
  }
@@ -21,6 +21,10 @@ const { extractFeatures } = require('./feature-extractor.js');
21
21
  // Lazy-loaded models (allows resetModel for testing)
22
22
  let _model = undefined; // undefined = not yet loaded, null = absent
23
23
  let _bundlerModel = undefined; // undefined = not yet loaded, null = absent
24
+ let _shadowModel = undefined; // undefined = not yet loaded, null = absent
25
+
26
+ // Shadow mode stats (reset on model reload)
27
+ const _shadowStats = { total: 0, agree: 0, disagree: 0 };
24
28
 
25
29
  // High-confidence malice types that must NEVER be suppressed by ML
26
30
  const HC_TYPES = new Set([
@@ -94,6 +98,80 @@ function resetBundlerModel() {
94
98
  _bundlerModel = undefined;
95
99
  }
96
100
 
101
+ // --- Shadow model (ML1 v2, logs only, no filtering) ---
102
+
103
+ /**
104
+ * Load shadow model from model-trees-shadow.js. Returns model object or null.
105
+ * Shadow model runs in parallel with the main model for comparison.
106
+ */
107
+ function loadShadowModel() {
108
+ if (_shadowModel !== undefined) return _shadowModel;
109
+ try {
110
+ _shadowModel = require('./model-trees-shadow.js') || null;
111
+ } catch {
112
+ _shadowModel = null;
113
+ }
114
+ return _shadowModel;
115
+ }
116
+
117
+ function isShadowModelAvailable() {
118
+ return loadShadowModel() !== null;
119
+ }
120
+
121
+ function resetShadowModel() {
122
+ _shadowModel = undefined;
123
+ _shadowStats.total = 0;
124
+ _shadowStats.agree = 0;
125
+ _shadowStats.disagree = 0;
126
+ }
127
+
128
+ /**
129
+ * Run shadow model prediction and log comparison with main model.
130
+ * Never affects the actual classification decision.
131
+ *
132
+ * @param {Object} result - scan result
133
+ * @param {Object} meta - enriched metadata
134
+ * @param {string} mainPrediction - the main model's prediction
135
+ * @param {number} mainProbability - the main model's probability
136
+ * @param {string} packageName - for logging
137
+ */
138
+ function runShadowComparison(result, meta, mainPrediction, mainProbability, packageName) {
139
+ const shadow = loadShadowModel();
140
+ if (!shadow) return;
141
+
142
+ const features = extractFeatures(result, meta || {});
143
+ const values = new Array(shadow.features.length);
144
+ for (let i = 0; i < shadow.features.length; i++) {
145
+ values[i] = features[shadow.features[i]] || 0;
146
+ }
147
+
148
+ let margin = 0;
149
+ for (const tree of shadow.trees) {
150
+ margin += traverseTree(tree, values);
151
+ }
152
+
153
+ const shadowProb = sigmoid(margin);
154
+ const shadowPred = shadowProb >= shadow.threshold ? 'malicious' : 'clean';
155
+
156
+ _shadowStats.total++;
157
+ if (shadowPred === mainPrediction) {
158
+ _shadowStats.agree++;
159
+ } else {
160
+ _shadowStats.disagree++;
161
+ console.log(`[ML-SHADOW] Disagreement on ${packageName}: main=${mainPrediction}(${mainProbability}) shadow=${shadowPred}(${Math.round(shadowProb * 1000) / 1000}) [${_shadowStats.disagree}/${_shadowStats.total} disagree]`);
162
+ }
163
+
164
+ // Periodic summary every 100 classifications
165
+ if (_shadowStats.total % 100 === 0) {
166
+ const agreeRate = ((_shadowStats.agree / _shadowStats.total) * 100).toFixed(1);
167
+ console.log(`[ML-SHADOW] Stats: ${_shadowStats.total} total, ${agreeRate}% agree, ${_shadowStats.disagree} disagree`);
168
+ }
169
+ }
170
+
171
+ function getShadowStats() {
172
+ return { ..._shadowStats };
173
+ }
174
+
97
175
  /**
98
176
  * Sigmoid function: maps raw margin to probability [0, 1].
99
177
  * @param {number} x - raw margin (sum of tree outputs)
@@ -276,9 +354,18 @@ function classifyPackage(result, meta) {
276
354
  const featureValues = buildFeatureVector(result, meta);
277
355
  const { probability, prediction } = predict(featureValues);
278
356
 
357
+ const roundedProb = Math.round(probability * 1000) / 1000;
358
+
359
+ // Shadow model comparison (log-only, never affects decision)
360
+ if (isShadowModelAvailable()) {
361
+ const pkgName = (result && result.summary && result.summary.packageName) ||
362
+ (meta && meta.name) || 'unknown';
363
+ runShadowComparison(result, meta, prediction, roundedProb, pkgName);
364
+ }
365
+
279
366
  return {
280
367
  prediction,
281
- probability: Math.round(probability * 1000) / 1000,
368
+ probability: roundedProb,
282
369
  reason: prediction === 'clean' ? 'ml_clean' : 'ml_malicious'
283
370
  };
284
371
  }
@@ -298,5 +385,10 @@ module.exports = {
298
385
  resetBundlerModel,
299
386
  loadBundlerModel,
300
387
  predictBundler,
301
- buildBundlerFeatureVector
388
+ buildBundlerFeatureVector,
389
+ // Shadow model (ML1 v2, log-only comparison)
390
+ isShadowModelAvailable,
391
+ resetShadowModel,
392
+ loadShadowModel,
393
+ getShadowStats
302
394
  };
@@ -13,7 +13,11 @@
13
13
 
14
14
  // Top threat types by frequency in production (covers ~95% of all findings).
15
15
  // Types not in this list are aggregated into `threat_type_other`.
16
+ // v2.10.32: expanded from 31 to 47 types — code exec bypasses, IoC, GlassWorm,
17
+ // obfuscation patterns, module graph sinks. New features will be 0 for pre-existing
18
+ // JSONL records; SHAP feature selection handles sparsity gracefully.
16
19
  const TOP_THREAT_TYPES = [
20
+ // --- Original 31 types ---
17
21
  'suspicious_dataflow',
18
22
  'env_access',
19
23
  'sensitive_string',
@@ -44,7 +48,29 @@ const TOP_THREAT_TYPES = [
44
48
  'curl_exec',
45
49
  'reverse_shell',
46
50
  'binary_dropper',
47
- 'mcp_config_injection'
51
+ 'mcp_config_injection',
52
+ // --- Code execution bypasses (v2.9.x–v2.10.x) ---
53
+ 'vm_code_execution',
54
+ 'vm_dynamic_code',
55
+ 'dangerous_constructor',
56
+ 'module_load_bypass',
57
+ 'require_process_mainmodule',
58
+ 'proxy_globalthis_intercept',
59
+ 'reflect_bind_code_execution',
60
+ // --- IoC / supply chain ---
61
+ 'known_malicious_package',
62
+ 'known_malicious_hash',
63
+ // --- GlassWorm (Unicode + Blockchain C2) ---
64
+ 'unicode_invisible_injection',
65
+ 'blockchain_c2_resolution',
66
+ // --- Shell / exec patterns ---
67
+ 'dangerous_exec',
68
+ 'node_inline_exec',
69
+ // --- Obfuscation patterns ---
70
+ 'js_obfuscation_pattern',
71
+ // --- Module graph / WASM ---
72
+ 'suspicious_module_sink',
73
+ 'wasm_host_sink'
48
74
  ];
49
75
 
50
76
  const TOP_THREAT_TYPES_SET = new Set(TOP_THREAT_TYPES);
@@ -78,7 +104,7 @@ function extractFeatures(result, meta) {
78
104
  const distinctTypes = new Set(threats.map(t => t.type));
79
105
  features.distinct_threat_types = distinctTypes.size;
80
106
 
81
- // --- Per-type counts (top 31 types) ---
107
+ // --- Per-type counts (top 47 types) ---
82
108
  const typeCounts = Object.create(null);
83
109
  for (const t of threats) {
84
110
  typeCounts[t.type] = (typeCounts[t.type] || 0) + 1;