muaddib-scanner 2.10.32 → 2.10.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/integrations/webhook.js +22 -0
- package/src/ml/classifier.js +94 -2
- package/src/ml/feature-extractor.js +28 -2
- package/src/ml/llm-detective.js +538 -0
- package/src/ml/model-trees.js +4 -4
- package/src/ml/train-bundler-detector.py +25 -4
- package/src/ml/train-xgboost.py +60 -20
- package/src/monitor/classify.js +15 -0
- package/src/monitor/daemon.js +11 -1
- package/src/monitor/queue.js +35 -2
- package/src/monitor/state.js +4 -0
- package/src/monitor/webhook.js +42 -7
package/package.json
CHANGED
|
@@ -223,6 +223,28 @@ function formatDiscord(results) {
|
|
|
223
223
|
});
|
|
224
224
|
}
|
|
225
225
|
|
|
226
|
+
// Add LLM Detective field if LLM analysis was performed
|
|
227
|
+
if (results.llm && results.llm.verdict) {
|
|
228
|
+
const verdictEmoji = results.llm.verdict === 'malicious' ? '\u274C'
|
|
229
|
+
: results.llm.verdict === 'benign' ? '\u2705' : '\u2753';
|
|
230
|
+
const modeTag = results.llm.mode === 'shadow' ? ' [shadow]' : '';
|
|
231
|
+
let llmValue = `${verdictEmoji} **${results.llm.verdict}** (${Math.round(results.llm.confidence * 100)}% confidence)${modeTag}`;
|
|
232
|
+
if (results.llm.attack_type) {
|
|
233
|
+
llmValue += `\nType: ${results.llm.attack_type}`;
|
|
234
|
+
}
|
|
235
|
+
if (results.llm.iocs_found && results.llm.iocs_found.length > 0) {
|
|
236
|
+
llmValue += `\nIOCs: ${results.llm.iocs_found.join(', ')}`;
|
|
237
|
+
}
|
|
238
|
+
if (results.llm.reasoning) {
|
|
239
|
+
llmValue += `\n${results.llm.reasoning}`;
|
|
240
|
+
}
|
|
241
|
+
fields.push({
|
|
242
|
+
name: 'LLM Analysis',
|
|
243
|
+
value: llmValue.slice(0, 1024),
|
|
244
|
+
inline: false
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
|
|
226
248
|
const titlePrefix = emoji ? `${emoji} ` : '';
|
|
227
249
|
const prioritySuffix = priority && priority.level ? ` [${priority.level}]` : '';
|
|
228
250
|
const ts = results.timestamp ? new Date(results.timestamp) : new Date();
|
package/src/ml/classifier.js
CHANGED
|
@@ -21,6 +21,10 @@ const { extractFeatures } = require('./feature-extractor.js');
|
|
|
21
21
|
// Lazy-loaded models (allows resetModel for testing)
|
|
22
22
|
let _model = undefined; // undefined = not yet loaded, null = absent
|
|
23
23
|
let _bundlerModel = undefined; // undefined = not yet loaded, null = absent
|
|
24
|
+
let _shadowModel = undefined; // undefined = not yet loaded, null = absent
|
|
25
|
+
|
|
26
|
+
// Shadow mode stats (reset on model reload)
|
|
27
|
+
const _shadowStats = { total: 0, agree: 0, disagree: 0 };
|
|
24
28
|
|
|
25
29
|
// High-confidence malice types that must NEVER be suppressed by ML
|
|
26
30
|
const HC_TYPES = new Set([
|
|
@@ -94,6 +98,80 @@ function resetBundlerModel() {
|
|
|
94
98
|
_bundlerModel = undefined;
|
|
95
99
|
}
|
|
96
100
|
|
|
101
|
+
// --- Shadow model (ML1 v2, logs only, no filtering) ---
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Load shadow model from model-trees-shadow.js. Returns model object or null.
|
|
105
|
+
* Shadow model runs in parallel with the main model for comparison.
|
|
106
|
+
*/
|
|
107
|
+
function loadShadowModel() {
|
|
108
|
+
if (_shadowModel !== undefined) return _shadowModel;
|
|
109
|
+
try {
|
|
110
|
+
_shadowModel = require('./model-trees-shadow.js') || null;
|
|
111
|
+
} catch {
|
|
112
|
+
_shadowModel = null;
|
|
113
|
+
}
|
|
114
|
+
return _shadowModel;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function isShadowModelAvailable() {
|
|
118
|
+
return loadShadowModel() !== null;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function resetShadowModel() {
|
|
122
|
+
_shadowModel = undefined;
|
|
123
|
+
_shadowStats.total = 0;
|
|
124
|
+
_shadowStats.agree = 0;
|
|
125
|
+
_shadowStats.disagree = 0;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Run shadow model prediction and log comparison with main model.
|
|
130
|
+
* Never affects the actual classification decision.
|
|
131
|
+
*
|
|
132
|
+
* @param {Object} result - scan result
|
|
133
|
+
* @param {Object} meta - enriched metadata
|
|
134
|
+
* @param {string} mainPrediction - the main model's prediction
|
|
135
|
+
* @param {number} mainProbability - the main model's probability
|
|
136
|
+
* @param {string} packageName - for logging
|
|
137
|
+
*/
|
|
138
|
+
function runShadowComparison(result, meta, mainPrediction, mainProbability, packageName) {
|
|
139
|
+
const shadow = loadShadowModel();
|
|
140
|
+
if (!shadow) return;
|
|
141
|
+
|
|
142
|
+
const features = extractFeatures(result, meta || {});
|
|
143
|
+
const values = new Array(shadow.features.length);
|
|
144
|
+
for (let i = 0; i < shadow.features.length; i++) {
|
|
145
|
+
values[i] = features[shadow.features[i]] || 0;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
let margin = 0;
|
|
149
|
+
for (const tree of shadow.trees) {
|
|
150
|
+
margin += traverseTree(tree, values);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const shadowProb = sigmoid(margin);
|
|
154
|
+
const shadowPred = shadowProb >= shadow.threshold ? 'malicious' : 'clean';
|
|
155
|
+
|
|
156
|
+
_shadowStats.total++;
|
|
157
|
+
if (shadowPred === mainPrediction) {
|
|
158
|
+
_shadowStats.agree++;
|
|
159
|
+
} else {
|
|
160
|
+
_shadowStats.disagree++;
|
|
161
|
+
console.log(`[ML-SHADOW] Disagreement on ${packageName}: main=${mainPrediction}(${mainProbability}) shadow=${shadowPred}(${Math.round(shadowProb * 1000) / 1000}) [${_shadowStats.disagree}/${_shadowStats.total} disagree]`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Periodic summary every 100 classifications
|
|
165
|
+
if (_shadowStats.total % 100 === 0) {
|
|
166
|
+
const agreeRate = ((_shadowStats.agree / _shadowStats.total) * 100).toFixed(1);
|
|
167
|
+
console.log(`[ML-SHADOW] Stats: ${_shadowStats.total} total, ${agreeRate}% agree, ${_shadowStats.disagree} disagree`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function getShadowStats() {
|
|
172
|
+
return { ..._shadowStats };
|
|
173
|
+
}
|
|
174
|
+
|
|
97
175
|
/**
|
|
98
176
|
* Sigmoid function: maps raw margin to probability [0, 1].
|
|
99
177
|
* @param {number} x - raw margin (sum of tree outputs)
|
|
@@ -276,9 +354,18 @@ function classifyPackage(result, meta) {
|
|
|
276
354
|
const featureValues = buildFeatureVector(result, meta);
|
|
277
355
|
const { probability, prediction } = predict(featureValues);
|
|
278
356
|
|
|
357
|
+
const roundedProb = Math.round(probability * 1000) / 1000;
|
|
358
|
+
|
|
359
|
+
// Shadow model comparison (log-only, never affects decision)
|
|
360
|
+
if (isShadowModelAvailable()) {
|
|
361
|
+
const pkgName = (result && result.summary && result.summary.packageName) ||
|
|
362
|
+
(meta && meta.name) || 'unknown';
|
|
363
|
+
runShadowComparison(result, meta, prediction, roundedProb, pkgName);
|
|
364
|
+
}
|
|
365
|
+
|
|
279
366
|
return {
|
|
280
367
|
prediction,
|
|
281
|
-
probability:
|
|
368
|
+
probability: roundedProb,
|
|
282
369
|
reason: prediction === 'clean' ? 'ml_clean' : 'ml_malicious'
|
|
283
370
|
};
|
|
284
371
|
}
|
|
@@ -298,5 +385,10 @@ module.exports = {
|
|
|
298
385
|
resetBundlerModel,
|
|
299
386
|
loadBundlerModel,
|
|
300
387
|
predictBundler,
|
|
301
|
-
buildBundlerFeatureVector
|
|
388
|
+
buildBundlerFeatureVector,
|
|
389
|
+
// Shadow model (ML1 v2, log-only comparison)
|
|
390
|
+
isShadowModelAvailable,
|
|
391
|
+
resetShadowModel,
|
|
392
|
+
loadShadowModel,
|
|
393
|
+
getShadowStats
|
|
302
394
|
};
|
|
@@ -13,7 +13,11 @@
|
|
|
13
13
|
|
|
14
14
|
// Top threat types by frequency in production (covers ~95% of all findings).
|
|
15
15
|
// Types not in this list are aggregated into `threat_type_other`.
|
|
16
|
+
// v2.10.32: expanded from 31 to 47 types — code exec bypasses, IoC, GlassWorm,
|
|
17
|
+
// obfuscation patterns, module graph sinks. New features will be 0 for pre-existing
|
|
18
|
+
// JSONL records; SHAP feature selection handles sparsity gracefully.
|
|
16
19
|
const TOP_THREAT_TYPES = [
|
|
20
|
+
// --- Original 31 types ---
|
|
17
21
|
'suspicious_dataflow',
|
|
18
22
|
'env_access',
|
|
19
23
|
'sensitive_string',
|
|
@@ -44,7 +48,29 @@ const TOP_THREAT_TYPES = [
|
|
|
44
48
|
'curl_exec',
|
|
45
49
|
'reverse_shell',
|
|
46
50
|
'binary_dropper',
|
|
47
|
-
'mcp_config_injection'
|
|
51
|
+
'mcp_config_injection',
|
|
52
|
+
// --- Code execution bypasses (v2.9.x–v2.10.x) ---
|
|
53
|
+
'vm_code_execution',
|
|
54
|
+
'vm_dynamic_code',
|
|
55
|
+
'dangerous_constructor',
|
|
56
|
+
'module_load_bypass',
|
|
57
|
+
'require_process_mainmodule',
|
|
58
|
+
'proxy_globalthis_intercept',
|
|
59
|
+
'reflect_bind_code_execution',
|
|
60
|
+
// --- IoC / supply chain ---
|
|
61
|
+
'known_malicious_package',
|
|
62
|
+
'known_malicious_hash',
|
|
63
|
+
// --- GlassWorm (Unicode + Blockchain C2) ---
|
|
64
|
+
'unicode_invisible_injection',
|
|
65
|
+
'blockchain_c2_resolution',
|
|
66
|
+
// --- Shell / exec patterns ---
|
|
67
|
+
'dangerous_exec',
|
|
68
|
+
'node_inline_exec',
|
|
69
|
+
// --- Obfuscation patterns ---
|
|
70
|
+
'js_obfuscation_pattern',
|
|
71
|
+
// --- Module graph / WASM ---
|
|
72
|
+
'suspicious_module_sink',
|
|
73
|
+
'wasm_host_sink'
|
|
48
74
|
];
|
|
49
75
|
|
|
50
76
|
const TOP_THREAT_TYPES_SET = new Set(TOP_THREAT_TYPES);
|
|
@@ -78,7 +104,7 @@ function extractFeatures(result, meta) {
|
|
|
78
104
|
const distinctTypes = new Set(threats.map(t => t.type));
|
|
79
105
|
features.distinct_threat_types = distinctTypes.size;
|
|
80
106
|
|
|
81
|
-
// --- Per-type counts (top
|
|
107
|
+
// --- Per-type counts (top 47 types) ---
|
|
82
108
|
const typeCounts = Object.create(null);
|
|
83
109
|
for (const t of threats) {
|
|
84
110
|
typeCounts[t.type] = (typeCounts[t.type] || 0) + 1;
|