muaddib-scanner 2.8.6 → 2.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/index.js +27 -0
- package/src/ml/feature-extractor.js +214 -0
- package/src/ml/jsonl-writer.js +187 -0
- package/src/response/playbooks.js +4 -0
- package/src/rules/index.js +12 -0
- package/src/scanner/ast-detectors.js +18 -0
- package/src/scoring.js +4 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "muaddib-scanner",
|
|
3
|
-
"version": "2.8.
|
|
3
|
+
"version": "2.8.8",
|
|
4
4
|
"description": "Supply-chain threat detection & response for npm & PyPI/Python",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
"node": ">=18.0.0"
|
|
45
45
|
},
|
|
46
46
|
"dependencies": {
|
|
47
|
-
"@inquirer/prompts": "8.3.
|
|
47
|
+
"@inquirer/prompts": "8.3.2",
|
|
48
48
|
"acorn": "8.16.0",
|
|
49
49
|
"acorn-walk": "8.3.5",
|
|
50
50
|
"adm-zip": "0.5.16",
|
package/src/index.js
CHANGED
|
@@ -567,6 +567,33 @@ async function run(targetPath, options = {}) {
|
|
|
567
567
|
}
|
|
568
568
|
} catch { /* graceful fallback */ }
|
|
569
569
|
|
|
570
|
+
// Cross-scanner compound: detached_process + suspicious_dataflow in same file
|
|
571
|
+
// Catches cases where credential flow is detected by dataflow scanner, not AST scanner
|
|
572
|
+
{
|
|
573
|
+
const fileMap = Object.create(null);
|
|
574
|
+
for (const t of deduped) {
|
|
575
|
+
if (t.file) {
|
|
576
|
+
if (!fileMap[t.file]) fileMap[t.file] = [];
|
|
577
|
+
fileMap[t.file].push(t);
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
for (const file of Object.keys(fileMap)) {
|
|
581
|
+
const fileThreats = fileMap[file];
|
|
582
|
+
const hasDetached = fileThreats.some(t => t.type === 'detached_process');
|
|
583
|
+
const hasCredFlow = fileThreats.some(t => t.type === 'suspicious_dataflow');
|
|
584
|
+
const alreadyCompound = fileThreats.some(t => t.type === 'detached_credential_exfil');
|
|
585
|
+
if (hasDetached && hasCredFlow && !alreadyCompound) {
|
|
586
|
+
deduped.push({
|
|
587
|
+
type: 'detached_credential_exfil',
|
|
588
|
+
severity: 'CRITICAL',
|
|
589
|
+
message: 'Detached process + credential dataflow — background exfiltration (cross-scanner compound).',
|
|
590
|
+
file,
|
|
591
|
+
count: 1
|
|
592
|
+
});
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
|
|
570
597
|
// FP reduction: legitimate frameworks produce high volumes of certain threat types.
|
|
571
598
|
// A malware package typically has 1-3 occurrences, not dozens.
|
|
572
599
|
applyFPReductions(deduped, reachableFiles, packageName, packageDeps);
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* ML Feature Extractor — extracts numeric/boolean features from scan results
|
|
5
|
+
* for ML classifier training (Phase 1 of FPR reduction pipeline).
|
|
6
|
+
*
|
|
7
|
+
* Features are designed to capture the discriminative signals between true
|
|
8
|
+
* positives and false positives: threat composition, severity distribution,
|
|
9
|
+
* scoring breakdown, and package metadata.
|
|
10
|
+
*
|
|
11
|
+
* Output: flat object with numeric/boolean values suitable for XGBoost/RF.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// Top threat types by frequency in production (covers ~95% of all findings).
|
|
15
|
+
// Types not in this list are aggregated into `threat_type_other`.
|
|
16
|
+
const TOP_THREAT_TYPES = [
|
|
17
|
+
'suspicious_dataflow',
|
|
18
|
+
'env_access',
|
|
19
|
+
'sensitive_string',
|
|
20
|
+
'dangerous_call_eval',
|
|
21
|
+
'dangerous_call_exec',
|
|
22
|
+
'dangerous_call_function',
|
|
23
|
+
'obfuscation_detected',
|
|
24
|
+
'high_entropy_string',
|
|
25
|
+
'dynamic_require',
|
|
26
|
+
'dynamic_import',
|
|
27
|
+
'lifecycle_script',
|
|
28
|
+
'typosquat_detected',
|
|
29
|
+
'staged_payload',
|
|
30
|
+
'staged_binary_payload',
|
|
31
|
+
'network_require',
|
|
32
|
+
'sandbox_evasion',
|
|
33
|
+
'credential_regex_harvest',
|
|
34
|
+
'remote_code_load',
|
|
35
|
+
'suspicious_domain',
|
|
36
|
+
'prototype_hook',
|
|
37
|
+
'intent_credential_exfil',
|
|
38
|
+
'intent_command_exfil',
|
|
39
|
+
'cross_file_dataflow',
|
|
40
|
+
'module_compile',
|
|
41
|
+
'crypto_decipher',
|
|
42
|
+
'env_charcode_reconstruction',
|
|
43
|
+
'lifecycle_shell_pipe',
|
|
44
|
+
'curl_exec',
|
|
45
|
+
'reverse_shell',
|
|
46
|
+
'binary_dropper',
|
|
47
|
+
'mcp_config_injection'
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
const TOP_THREAT_TYPES_SET = new Set(TOP_THREAT_TYPES);
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Extract ML features from a scan result object.
|
|
54
|
+
*
|
|
55
|
+
* @param {Object} result - scan result from run() with { threats, summary }
|
|
56
|
+
* @param {Object} meta - package metadata { name, version, ecosystem, unpackedSize, registryMeta }
|
|
57
|
+
* @returns {Object} flat feature vector with numeric/boolean values
|
|
58
|
+
*/
|
|
59
|
+
function extractFeatures(result, meta) {
|
|
60
|
+
const features = Object.create(null);
|
|
61
|
+
const threats = (result && result.threats) || [];
|
|
62
|
+
const summary = (result && result.summary) || {};
|
|
63
|
+
|
|
64
|
+
// --- Scoring features ---
|
|
65
|
+
features.score = summary.riskScore || 0;
|
|
66
|
+
features.max_file_score = summary.maxFileScore || 0;
|
|
67
|
+
features.package_score = summary.packageScore || 0;
|
|
68
|
+
features.global_risk_score = summary.globalRiskScore || 0;
|
|
69
|
+
|
|
70
|
+
// --- Severity counts ---
|
|
71
|
+
features.count_total = summary.total || 0;
|
|
72
|
+
features.count_critical = summary.critical || 0;
|
|
73
|
+
features.count_high = summary.high || 0;
|
|
74
|
+
features.count_medium = summary.medium || 0;
|
|
75
|
+
features.count_low = summary.low || 0;
|
|
76
|
+
|
|
77
|
+
// --- Distinct threat types ---
|
|
78
|
+
const distinctTypes = new Set(threats.map(t => t.type));
|
|
79
|
+
features.distinct_threat_types = distinctTypes.size;
|
|
80
|
+
|
|
81
|
+
// --- Per-type counts (top 31 types) ---
|
|
82
|
+
const typeCounts = Object.create(null);
|
|
83
|
+
for (const t of threats) {
|
|
84
|
+
typeCounts[t.type] = (typeCounts[t.type] || 0) + 1;
|
|
85
|
+
}
|
|
86
|
+
for (const type of TOP_THREAT_TYPES) {
|
|
87
|
+
features[`type_${type}`] = typeCounts[type] || 0;
|
|
88
|
+
}
|
|
89
|
+
// Aggregate count for types not in top list
|
|
90
|
+
let otherCount = 0;
|
|
91
|
+
for (const [type, count] of Object.entries(typeCounts)) {
|
|
92
|
+
if (!TOP_THREAT_TYPES_SET.has(type)) {
|
|
93
|
+
otherCount += count;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
features.type_other = otherCount;
|
|
97
|
+
|
|
98
|
+
// --- Boolean behavioral signals ---
|
|
99
|
+
features.has_lifecycle_script = threats.some(t => t.type === 'lifecycle_script' || t.type === 'lifecycle_shell_pipe') ? 1 : 0;
|
|
100
|
+
features.has_network_access = threats.some(t =>
|
|
101
|
+
t.type === 'network_require' || t.type === 'remote_code_load' ||
|
|
102
|
+
t.type === 'curl_exec' || t.type === 'suspicious_dataflow'
|
|
103
|
+
) ? 1 : 0;
|
|
104
|
+
features.has_obfuscation = threats.some(t =>
|
|
105
|
+
t.type === 'obfuscation_detected' || t.type === 'high_entropy_string' ||
|
|
106
|
+
t.type === 'js_obfuscation_pattern'
|
|
107
|
+
) ? 1 : 0;
|
|
108
|
+
features.has_env_access = threats.some(t => t.type === 'env_access' || t.type === 'env_charcode_reconstruction') ? 1 : 0;
|
|
109
|
+
features.has_eval = threats.some(t => t.type === 'dangerous_call_eval' || t.type === 'dangerous_call_function') ? 1 : 0;
|
|
110
|
+
features.has_staged_payload = threats.some(t => t.type === 'staged_payload' || t.type === 'staged_binary_payload') ? 1 : 0;
|
|
111
|
+
features.has_typosquat = threats.some(t => t.type === 'typosquat_detected' || t.type === 'pypi_typosquat_detected') ? 1 : 0;
|
|
112
|
+
features.has_ioc_match = threats.some(t => t.type === 'known_malicious_package' || t.type === 'known_malicious_hash' || t.type === 'pypi_malicious_package') ? 1 : 0;
|
|
113
|
+
features.has_intent_pair = threats.some(t => t.type === 'intent_credential_exfil' || t.type === 'intent_command_exfil') ? 1 : 0;
|
|
114
|
+
features.has_sandbox_finding = threats.some(t => t.type && t.type.startsWith('sandbox_')) ? 1 : 0;
|
|
115
|
+
|
|
116
|
+
// --- File distribution features ---
|
|
117
|
+
const fileScores = summary.fileScores || {};
|
|
118
|
+
const fileScoreValues = Object.values(fileScores);
|
|
119
|
+
features.file_count_with_threats = fileScoreValues.length;
|
|
120
|
+
features.file_score_mean = fileScoreValues.length > 0
|
|
121
|
+
? Math.round(fileScoreValues.reduce((a, b) => a + b, 0) / fileScoreValues.length)
|
|
122
|
+
: 0;
|
|
123
|
+
features.file_score_max = fileScoreValues.length > 0
|
|
124
|
+
? Math.max(...fileScoreValues)
|
|
125
|
+
: 0;
|
|
126
|
+
|
|
127
|
+
// --- Severity concentration: ratio of CRITICAL+HIGH vs total ---
|
|
128
|
+
features.severity_ratio_high = features.count_total > 0
|
|
129
|
+
? Math.round(((features.count_critical + features.count_high) / features.count_total) * 100) / 100
|
|
130
|
+
: 0;
|
|
131
|
+
|
|
132
|
+
// --- Points concentration: max single-threat points vs score ---
|
|
133
|
+
const breakdown = summary.breakdown || [];
|
|
134
|
+
features.max_single_points = breakdown.length > 0 ? breakdown[0].points : 0;
|
|
135
|
+
features.points_concentration = features.score > 0 && breakdown.length > 0
|
|
136
|
+
? Math.round((breakdown[0].points / features.score) * 100) / 100
|
|
137
|
+
: 0;
|
|
138
|
+
|
|
139
|
+
// --- Package metadata (from registry) ---
|
|
140
|
+
const registry = (meta && meta.registryMeta) || {};
|
|
141
|
+
features.unpacked_size_bytes = (meta && meta.unpackedSize) || registry.unpackedSize || 0;
|
|
142
|
+
features.dep_count = countDeps(registry.dependencies);
|
|
143
|
+
features.dev_dep_count = countDeps(registry.devDependencies);
|
|
144
|
+
|
|
145
|
+
// --- Reputation factor (if computed by monitor) ---
|
|
146
|
+
features.reputation_factor = summary.reputationFactor || 1.0;
|
|
147
|
+
|
|
148
|
+
return features;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Count dependencies from a registry metadata dependencies object.
|
|
153
|
+
* Handles both object format ({name: version}) and number.
|
|
154
|
+
*/
|
|
155
|
+
function countDeps(deps) {
|
|
156
|
+
if (!deps) return 0;
|
|
157
|
+
if (typeof deps === 'number') return deps;
|
|
158
|
+
if (typeof deps === 'object') return Object.keys(deps).length;
|
|
159
|
+
return 0;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Build a complete JSONL record for a scanned package.
|
|
164
|
+
*
|
|
165
|
+
* @param {Object} result - scan result from run()
|
|
166
|
+
* @param {Object} params - { name, version, ecosystem, unpackedSize, registryMeta, label, tier, sandboxResult }
|
|
167
|
+
* @returns {Object} complete record with metadata + features + label
|
|
168
|
+
*/
|
|
169
|
+
function buildTrainingRecord(result, params) {
|
|
170
|
+
const {
|
|
171
|
+
name, version, ecosystem,
|
|
172
|
+
unpackedSize, registryMeta,
|
|
173
|
+
label, tier, sandboxResult
|
|
174
|
+
} = params;
|
|
175
|
+
|
|
176
|
+
const features = extractFeatures(result, {
|
|
177
|
+
name, version, ecosystem,
|
|
178
|
+
unpackedSize, registryMeta
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
const record = Object.create(null);
|
|
182
|
+
|
|
183
|
+
// --- Identity (not features, for traceability) ---
|
|
184
|
+
record.name = name || '';
|
|
185
|
+
record.version = version || '';
|
|
186
|
+
record.ecosystem = ecosystem || 'npm';
|
|
187
|
+
record.timestamp = new Date().toISOString();
|
|
188
|
+
|
|
189
|
+
// --- Label ---
|
|
190
|
+
// 'clean' = no findings or T3 only
|
|
191
|
+
// 'suspect' = T1/T2 (pending manual review)
|
|
192
|
+
// 'confirmed' = manually confirmed malicious
|
|
193
|
+
// 'fp' = manually confirmed false positive
|
|
194
|
+
record.label = label || 'suspect';
|
|
195
|
+
record.tier = tier || null;
|
|
196
|
+
|
|
197
|
+
// --- Features ---
|
|
198
|
+
Object.assign(record, features);
|
|
199
|
+
|
|
200
|
+
// --- Sandbox score (if available) ---
|
|
201
|
+
record.sandbox_score = (sandboxResult && sandboxResult.score) || 0;
|
|
202
|
+
record.sandbox_finding_count = (sandboxResult && sandboxResult.findings)
|
|
203
|
+
? sandboxResult.findings.length
|
|
204
|
+
: 0;
|
|
205
|
+
|
|
206
|
+
return record;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
module.exports = {
|
|
210
|
+
extractFeatures,
|
|
211
|
+
buildTrainingRecord,
|
|
212
|
+
TOP_THREAT_TYPES,
|
|
213
|
+
TOP_THREAT_TYPES_SET
|
|
214
|
+
};
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* JSONL Writer — appends training records to data/ml-training.jsonl.
|
|
5
|
+
*
|
|
6
|
+
* One JSON object per line, newline-delimited (JSONL format).
|
|
7
|
+
* Uses append mode for crash-safe incremental writes.
|
|
8
|
+
* Auto-creates data/ directory if missing.
|
|
9
|
+
*
|
|
10
|
+
* File rotation: when the file exceeds MAX_JSONL_SIZE (100MB),
|
|
11
|
+
* it is renamed to ml-training-{timestamp}.jsonl and a fresh file starts.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const fs = require('fs');
|
|
15
|
+
const path = require('path');
|
|
16
|
+
|
|
17
|
+
const DEFAULT_TRAINING_FILE = path.join(__dirname, '..', '..', 'data', 'ml-training.jsonl');
|
|
18
|
+
let TRAINING_FILE = DEFAULT_TRAINING_FILE;
|
|
19
|
+
const MAX_JSONL_SIZE = 100 * 1024 * 1024; // 100MB rotation threshold
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Override the training file path (for testing).
|
|
23
|
+
* @param {string} filePath - new file path
|
|
24
|
+
*/
|
|
25
|
+
function setTrainingFile(filePath) {
|
|
26
|
+
TRAINING_FILE = filePath;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Reset the training file path to the default.
|
|
31
|
+
*/
|
|
32
|
+
function resetTrainingFile() {
|
|
33
|
+
TRAINING_FILE = DEFAULT_TRAINING_FILE;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Append a single record to the JSONL training file.
|
|
38
|
+
* @param {Object} record - training record from buildTrainingRecord()
|
|
39
|
+
*/
|
|
40
|
+
function appendRecord(record) {
|
|
41
|
+
try {
|
|
42
|
+
const dir = path.dirname(TRAINING_FILE);
|
|
43
|
+
if (!fs.existsSync(dir)) {
|
|
44
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Rotate if file is too large
|
|
48
|
+
maybeRotate();
|
|
49
|
+
|
|
50
|
+
const line = JSON.stringify(record) + '\n';
|
|
51
|
+
fs.appendFileSync(TRAINING_FILE, line, 'utf8');
|
|
52
|
+
} catch (err) {
|
|
53
|
+
// Non-fatal: JSONL export failure should never crash the monitor
|
|
54
|
+
if (err.code === 'EROFS' || err.code === 'EACCES' || err.code === 'EPERM') {
|
|
55
|
+
// Read-only filesystem — silently skip (same pattern as atomicWriteFileSync)
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
console.error(`[ML] Failed to append JSONL record: ${err.message}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Rotate the JSONL file if it exceeds MAX_JSONL_SIZE.
|
|
64
|
+
* Renames to ml-training-{ISO timestamp}.jsonl.
|
|
65
|
+
*/
|
|
66
|
+
function maybeRotate() {
|
|
67
|
+
try {
|
|
68
|
+
if (!fs.existsSync(TRAINING_FILE)) return;
|
|
69
|
+
const stat = fs.statSync(TRAINING_FILE);
|
|
70
|
+
if (stat.size < MAX_JSONL_SIZE) return;
|
|
71
|
+
|
|
72
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
73
|
+
const rotatedName = TRAINING_FILE.replace('.jsonl', `-${timestamp}.jsonl`);
|
|
74
|
+
fs.renameSync(TRAINING_FILE, rotatedName);
|
|
75
|
+
console.log(`[ML] Rotated training file → ${path.basename(rotatedName)} (${(stat.size / 1024 / 1024).toFixed(1)}MB)`);
|
|
76
|
+
} catch (err) {
|
|
77
|
+
console.error(`[ML] Rotation failed: ${err.message}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Read all records from the current JSONL file.
|
|
83
|
+
* Useful for offline analysis and model training.
|
|
84
|
+
* @returns {Object[]} array of parsed records
|
|
85
|
+
*/
|
|
86
|
+
function readRecords() {
|
|
87
|
+
try {
|
|
88
|
+
if (!fs.existsSync(TRAINING_FILE)) return [];
|
|
89
|
+
const content = fs.readFileSync(TRAINING_FILE, 'utf8');
|
|
90
|
+
return content
|
|
91
|
+
.split('\n')
|
|
92
|
+
.filter(line => line.trim())
|
|
93
|
+
.map((line, i) => {
|
|
94
|
+
try {
|
|
95
|
+
return JSON.parse(line);
|
|
96
|
+
} catch {
|
|
97
|
+
console.warn(`[ML] Skipping malformed JSONL line ${i + 1}`);
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
})
|
|
101
|
+
.filter(Boolean);
|
|
102
|
+
} catch (err) {
|
|
103
|
+
console.error(`[ML] Failed to read JSONL: ${err.message}`);
|
|
104
|
+
return [];
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Get stats about the current JSONL file.
|
|
110
|
+
* @returns {{ recordCount: number, fileSizeBytes: number, fileSizeMB: string }}
|
|
111
|
+
*/
|
|
112
|
+
function getStats() {
|
|
113
|
+
try {
|
|
114
|
+
if (!fs.existsSync(TRAINING_FILE)) {
|
|
115
|
+
return { recordCount: 0, fileSizeBytes: 0, fileSizeMB: '0.0' };
|
|
116
|
+
}
|
|
117
|
+
const stat = fs.statSync(TRAINING_FILE);
|
|
118
|
+
// Count lines without reading the entire file into memory
|
|
119
|
+
const content = fs.readFileSync(TRAINING_FILE, 'utf8');
|
|
120
|
+
const lineCount = content.split('\n').filter(l => l.trim()).length;
|
|
121
|
+
return {
|
|
122
|
+
recordCount: lineCount,
|
|
123
|
+
fileSizeBytes: stat.size,
|
|
124
|
+
fileSizeMB: (stat.size / 1024 / 1024).toFixed(1)
|
|
125
|
+
};
|
|
126
|
+
} catch {
|
|
127
|
+
return { recordCount: 0, fileSizeBytes: 0, fileSizeMB: '0.0' };
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Update the label of records matching a given package name.
|
|
133
|
+
* Used when manual confirmation (fp/confirmed) is applied retroactively.
|
|
134
|
+
*
|
|
135
|
+
* @param {string} packageName - package name to relabel
|
|
136
|
+
* @param {string} newLabel - 'fp' or 'confirmed'
|
|
137
|
+
* @param {number} [sandboxFindingCount] - number of sandbox findings (defense-in-depth for 'confirmed')
|
|
138
|
+
* @returns {number} number of records updated
|
|
139
|
+
*/
|
|
140
|
+
function relabelRecords(packageName, newLabel, sandboxFindingCount) {
|
|
141
|
+
// Defense-in-depth: never write 'confirmed' without real sandbox findings
|
|
142
|
+
if (newLabel === 'confirmed' && (!sandboxFindingCount || sandboxFindingCount === 0)) {
|
|
143
|
+
console.warn(`[ML] BLOCKED relabel to 'confirmed' for ${packageName}: sandbox_finding_count=${sandboxFindingCount || 0}`);
|
|
144
|
+
return 0;
|
|
145
|
+
}
|
|
146
|
+
try {
|
|
147
|
+
if (!fs.existsSync(TRAINING_FILE)) return 0;
|
|
148
|
+
const content = fs.readFileSync(TRAINING_FILE, 'utf8');
|
|
149
|
+
const lines = content.split('\n');
|
|
150
|
+
let updated = 0;
|
|
151
|
+
const newLines = lines.map(line => {
|
|
152
|
+
if (!line.trim()) return line;
|
|
153
|
+
try {
|
|
154
|
+
const record = JSON.parse(line);
|
|
155
|
+
if (record.name === packageName && record.label !== newLabel) {
|
|
156
|
+
record.label = newLabel;
|
|
157
|
+
updated++;
|
|
158
|
+
return JSON.stringify(record);
|
|
159
|
+
}
|
|
160
|
+
return line;
|
|
161
|
+
} catch {
|
|
162
|
+
return line;
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
if (updated > 0) {
|
|
167
|
+
fs.writeFileSync(TRAINING_FILE, newLines.join('\n'), 'utf8');
|
|
168
|
+
console.log(`[ML] Relabeled ${updated} records for ${packageName} → ${newLabel}`);
|
|
169
|
+
}
|
|
170
|
+
return updated;
|
|
171
|
+
} catch (err) {
|
|
172
|
+
console.error(`[ML] Failed to relabel records: ${err.message}`);
|
|
173
|
+
return 0;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
module.exports = {
|
|
178
|
+
appendRecord,
|
|
179
|
+
readRecords,
|
|
180
|
+
getStats,
|
|
181
|
+
relabelRecords,
|
|
182
|
+
maybeRotate,
|
|
183
|
+
get TRAINING_FILE() { return TRAINING_FILE; },
|
|
184
|
+
setTrainingFile,
|
|
185
|
+
resetTrainingFile,
|
|
186
|
+
MAX_JSONL_SIZE
|
|
187
|
+
};
|
|
@@ -501,6 +501,10 @@ const PLAYBOOKS = {
|
|
|
501
501
|
'CRITIQUE: Un Proxy JavaScript avec trap set/get/apply est combine avec un appel reseau. ' +
|
|
502
502
|
'Technique d\'interception: le Proxy capture toutes les ecritures de proprietes (credentials, tokens, config) ' +
|
|
503
503
|
'et les exfiltre via HTTPS/fetch/dgram. Supprimer le package. Auditer tous les modules qui importent ce package.',
|
|
504
|
+
detached_credential_exfil:
|
|
505
|
+
'CRITIQUE: Process detache avec acces aux credentials et exfiltration reseau. ' +
|
|
506
|
+
'Technique DPRK/Lazarus: le process fils survit au parent (detached:true, unref()) et exfiltre des secrets en arriere-plan. ' +
|
|
507
|
+
'Supprimer le package immediatement. Regenerer tous les tokens/credentials. Auditer les process en cours d\'execution.',
|
|
504
508
|
intent_credential_exfil:
|
|
505
509
|
'CRITIQUE: Coherence d\'intention detectee — lecture de credentials combinee avec exfiltration reseau. ' +
|
|
506
510
|
'Pattern multi-fichier DPRK/Lazarus: chaque fichier semble legitime individuellement mais le package ' +
|
package/src/rules/index.js
CHANGED
|
@@ -1408,6 +1408,18 @@ const RULES = {
|
|
|
1408
1408
|
},
|
|
1409
1409
|
|
|
1410
1410
|
// Intent Graph rules (v2.6.0)
|
|
1411
|
+
detached_credential_exfil: {
|
|
1412
|
+
id: 'MUADDIB-AST-047',
|
|
1413
|
+
name: 'Detached Process Credential Exfiltration',
|
|
1414
|
+
severity: 'CRITICAL',
|
|
1415
|
+
confidence: 'high',
|
|
1416
|
+
description: 'Process detache (survit au parent) avec acces aux credentials et appel reseau — technique DPRK/Lazarus pour exfiltrer des secrets en arriere-plan',
|
|
1417
|
+
references: [
|
|
1418
|
+
'https://attack.mitre.org/techniques/T1041/',
|
|
1419
|
+
'https://www.cisa.gov/news-events/cybersecurity-advisories/aa22-108a'
|
|
1420
|
+
],
|
|
1421
|
+
mitre: 'T1041'
|
|
1422
|
+
},
|
|
1411
1423
|
intent_credential_exfil: {
|
|
1412
1424
|
id: 'MUADDIB-INTENT-001',
|
|
1413
1425
|
name: 'Intent Credential Exfiltration',
|
|
@@ -2187,6 +2187,24 @@ function handlePostWalk(ctx) {
|
|
|
2187
2187
|
file: ctx.relFile
|
|
2188
2188
|
});
|
|
2189
2189
|
}
|
|
2190
|
+
|
|
2191
|
+
// DPRK/Lazarus compound: detached background process + credential env access + network
|
|
2192
|
+
// Pattern: spawn({detached:true}) reads secrets then exfils via network.
|
|
2193
|
+
// This combination is never legitimate — daemons don't read API keys and send them out.
|
|
2194
|
+
const hasDetachedInFile = ctx.threats.some(t =>
|
|
2195
|
+
t.file === ctx.relFile && t.type === 'detached_process'
|
|
2196
|
+
);
|
|
2197
|
+
const hasSensitiveEnvInFile = ctx.threats.some(t =>
|
|
2198
|
+
t.file === ctx.relFile && t.type === 'env_access'
|
|
2199
|
+
);
|
|
2200
|
+
if (hasDetachedInFile && hasSensitiveEnvInFile && ctx.hasNetworkCallInFile) {
|
|
2201
|
+
ctx.threats.push({
|
|
2202
|
+
type: 'detached_credential_exfil',
|
|
2203
|
+
severity: 'CRITICAL',
|
|
2204
|
+
message: 'Detached process + sensitive env access + network call — credential exfiltration via background process (DPRK/Lazarus evasion pattern).',
|
|
2205
|
+
file: ctx.relFile
|
|
2206
|
+
});
|
|
2207
|
+
}
|
|
2190
2208
|
}
|
|
2191
2209
|
|
|
2192
2210
|
function handleWithStatement(node, ctx) {
|
package/src/scoring.js
CHANGED
|
@@ -153,7 +153,8 @@ const DIST_EXEMPT_TYPES = new Set([
|
|
|
153
153
|
'download_exec_binary', // download + chmod + exec (binary dropper)
|
|
154
154
|
'cross_file_dataflow', // credential read → network exfil across files
|
|
155
155
|
'staged_eval_decode', // eval(atob(...)) (explicit payload staging)
|
|
156
|
-
'reverse_shell'
|
|
156
|
+
'reverse_shell', // net.Socket + connect + pipe (always malicious)
|
|
157
|
+
'detached_credential_exfil' // detached process + credential exfil (DPRK/Lazarus)
|
|
157
158
|
// P6: remote_code_load and proxy_data_intercept removed — in bundled dist/ files,
|
|
158
159
|
// fetch + eval co-occurrence is coincidental (bundler combines HTTP client + template compilation).
|
|
159
160
|
// fetch_decrypt_exec (fetch+decrypt+eval triple) remains exempt — never coincidental.
|
|
@@ -196,7 +197,8 @@ const REACHABILITY_EXEMPT_TYPES = new Set([
|
|
|
196
197
|
'cross_file_dataflow',
|
|
197
198
|
'typosquat_detected', 'pypi_typosquat_detected',
|
|
198
199
|
'pypi_malicious_package',
|
|
199
|
-
'ai_config_injection', 'ai_config_injection_compound'
|
|
200
|
+
'ai_config_injection', 'ai_config_injection_compound',
|
|
201
|
+
'detached_credential_exfil' // DPRK/Lazarus: invoked via lifecycle, not require/import
|
|
200
202
|
]);
|
|
201
203
|
|
|
202
204
|
// Custom class prototypes that HTTP frameworks legitimately extend.
|