muaddib-scanner 2.10.37 → 2.10.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/muaddib.js +4 -2
- package/package.json +1 -1
- package/scripts/ossf-benchmark.js +560 -0
- package/src/integrations/publish-anomaly.js +1 -1
- package/src/ioc/scraper.js +87 -0
- package/src/ioc/updater.js +9 -8
- package/src/ml/llm-detective.js +24 -0
- package/src/monitor/classify.js +31 -0
- package/src/monitor/queue.js +68 -10
- package/src/response/playbooks.js +9 -0
- package/src/rules/index.js +23 -0
- package/src/sandbox/index.js +46 -17
- package/src/sandbox/network-allowlist.js +162 -0
- package/src/scoring.js +17 -1
package/bin/muaddib.js
CHANGED
|
@@ -260,10 +260,12 @@ if (command === 'version' || command === '--version' || command === '-v') {
|
|
|
260
260
|
configPath: configPath,
|
|
261
261
|
autoSandbox: autoSandbox
|
|
262
262
|
}).then(exitCode => {
|
|
263
|
-
process.exit(
|
|
263
|
+
// Use process.exitCode instead of process.exit() to let pending async work
|
|
264
|
+
// (the non-blocking version update check) complete before the process exits.
|
|
265
|
+
process.exitCode = exitCode;
|
|
264
266
|
}).catch(err => {
|
|
265
267
|
console.error('[ERROR]', err.message);
|
|
266
|
-
process.
|
|
268
|
+
process.exitCode = 1;
|
|
267
269
|
});
|
|
268
270
|
} else if (command === 'feed') {
|
|
269
271
|
if (wantHelp) showHelp('feed');
|
package/package.json
CHANGED
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* MUAD'DIB OpenSSF Benchmark
|
|
6
|
+
*
|
|
7
|
+
* Fetches the OpenSSF malicious-packages dataset (via OSV.dev API),
|
|
8
|
+
* downloads available npm packages, scans them with MUAD'DIB, and
|
|
9
|
+
* produces a benchmark results file consumed by `muaddib evaluate`.
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* node scripts/ossf-benchmark.js [--sample N] [--seed N] [--refresh]
|
|
13
|
+
*
|
|
14
|
+
* Options:
|
|
15
|
+
* --sample N Number of packages to sample (default: 500)
|
|
16
|
+
* --seed N Random seed for reproducibility (default: 42)
|
|
17
|
+
* --refresh Force re-download of cached tarballs
|
|
18
|
+
*
|
|
19
|
+
* Output:
|
|
20
|
+
* datasets/real-world/ossf-benchmark-results.json
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const fs = require('fs');
|
|
24
|
+
const path = require('path');
|
|
25
|
+
const zlib = require('zlib');
|
|
26
|
+
const { execSync } = require('child_process');
|
|
27
|
+
|
|
28
|
+
const ROOT = path.join(__dirname, '..');
|
|
29
|
+
const RESULTS_FILE = path.join(ROOT, 'datasets', 'real-world', 'ossf-benchmark-results.json');
|
|
30
|
+
const CACHE_DIR = path.join(ROOT, '.muaddib-cache', 'ossf-tarballs');
|
|
31
|
+
const PACK_TIMEOUT_MS = 30000;
|
|
32
|
+
const SCAN_TIMEOUT_MS = 30000;
|
|
33
|
+
const SAFE_PKG_RE = /^(@[\w._-]+\/)?[\w._-]+$/;
|
|
34
|
+
|
|
35
|
+
// --- CLI args ---
|
|
36
|
+
const SAMPLE_SIZE = parseInt(process.argv.find((a, i) => process.argv[i - 1] === '--sample') || '5000', 10);
|
|
37
|
+
const SEED = parseInt(process.argv.find((a, i) => process.argv[i - 1] === '--seed') || '42', 10);
|
|
38
|
+
const REFRESH = process.argv.includes('--refresh');
|
|
39
|
+
|
|
40
|
+
// --- Seeded PRNG (Mulberry32) ---
|
|
41
|
+
function mulberry32(seed) {
|
|
42
|
+
let s = seed | 0;
|
|
43
|
+
return function() {
|
|
44
|
+
s = (s + 0x6D2B79F5) | 0;
|
|
45
|
+
let t = Math.imul(s ^ (s >>> 15), 1 | s);
|
|
46
|
+
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
|
|
47
|
+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// --- Native tgz extraction (same as evaluate.js) ---
|
|
52
|
+
function extractTgz(tgzPath, destDir) {
|
|
53
|
+
const compressed = fs.readFileSync(tgzPath);
|
|
54
|
+
const tarData = zlib.gunzipSync(compressed);
|
|
55
|
+
|
|
56
|
+
let offset = 0;
|
|
57
|
+
while (offset + 512 <= tarData.length) {
|
|
58
|
+
const header = tarData.subarray(offset, offset + 512);
|
|
59
|
+
if (header.every(b => b === 0)) break;
|
|
60
|
+
|
|
61
|
+
const name = header.subarray(0, 100).toString('utf8').replace(/\0+$/, '');
|
|
62
|
+
const sizeOctal = header.subarray(124, 136).toString('utf8').replace(/\0+$/, '').trim();
|
|
63
|
+
const size = parseInt(sizeOctal, 8) || 0;
|
|
64
|
+
const typeFlag = String.fromCharCode(header[156]);
|
|
65
|
+
|
|
66
|
+
offset += 512;
|
|
67
|
+
|
|
68
|
+
if (name && (typeFlag === '0' || typeFlag === '\0') && size > 0) {
|
|
69
|
+
const resolved = path.resolve(destDir, name);
|
|
70
|
+
const rel = path.relative(path.resolve(destDir), resolved);
|
|
71
|
+
if (rel.startsWith('..') || path.isAbsolute(rel)) {
|
|
72
|
+
offset += Math.ceil(size / 512) * 512;
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
fs.mkdirSync(path.dirname(resolved), { recursive: true });
|
|
76
|
+
fs.writeFileSync(resolved, tarData.subarray(offset, offset + size));
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
offset += Math.ceil(size / 512) * 512;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function pkgToCacheName(name, version) {
|
|
84
|
+
return (name + '@' + version).replace(/\//g, '_').replace(/@/g, '_');
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// --- Step 1: Fetch OSV npm MAL-* index via zip dump ---
|
|
88
|
+
async function fetchOSSFIndex() {
|
|
89
|
+
console.log('\n[1/5] Fetching OSV npm malware index...');
|
|
90
|
+
|
|
91
|
+
// Use the OSV zip dump (same as scrapeOSVDataDump) for the full index
|
|
92
|
+
// This is more complete than the query API which has pagination limits
|
|
93
|
+
const https = require('https');
|
|
94
|
+
const AdmZip = require('adm-zip');
|
|
95
|
+
|
|
96
|
+
const zipUrl = 'https://osv-vulnerabilities.storage.googleapis.com/npm/all.zip';
|
|
97
|
+
|
|
98
|
+
console.log(' Downloading npm OSV zip...');
|
|
99
|
+
const zipBuffer = await new Promise(function(resolve, reject) {
|
|
100
|
+
const chunks = [];
|
|
101
|
+
let totalBytes = 0;
|
|
102
|
+
|
|
103
|
+
https.get(zipUrl, { headers: { 'User-Agent': 'MUADDIB-Scanner/3.0' } }, function(res) {
|
|
104
|
+
if ([301, 302, 307, 308].includes(res.statusCode) && res.headers.location) {
|
|
105
|
+
https.get(res.headers.location, { headers: { 'User-Agent': 'MUADDIB-Scanner/3.0' } }, function(res2) {
|
|
106
|
+
res2.on('data', function(chunk) {
|
|
107
|
+
chunks.push(chunk);
|
|
108
|
+
totalBytes += chunk.length;
|
|
109
|
+
if (totalBytes % (10 * 1024 * 1024) < chunk.length) {
|
|
110
|
+
process.stdout.write('\r Downloaded: ' + (totalBytes / 1024 / 1024).toFixed(1) + ' MB');
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
res2.on('end', function() {
|
|
114
|
+
process.stdout.write('\r Downloaded: ' + (totalBytes / 1024 / 1024).toFixed(1) + ' MB\n');
|
|
115
|
+
resolve(Buffer.concat(chunks));
|
|
116
|
+
});
|
|
117
|
+
res2.on('error', reject);
|
|
118
|
+
}).on('error', reject);
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
res.on('data', function(chunk) {
|
|
123
|
+
chunks.push(chunk);
|
|
124
|
+
totalBytes += chunk.length;
|
|
125
|
+
if (totalBytes % (10 * 1024 * 1024) < chunk.length) {
|
|
126
|
+
process.stdout.write('\r Downloaded: ' + (totalBytes / 1024 / 1024).toFixed(1) + ' MB');
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
res.on('end', function() {
|
|
130
|
+
process.stdout.write('\r Downloaded: ' + (totalBytes / 1024 / 1024).toFixed(1) + ' MB\n');
|
|
131
|
+
resolve(Buffer.concat(chunks));
|
|
132
|
+
});
|
|
133
|
+
res.on('error', reject);
|
|
134
|
+
}).on('error', reject);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
console.log(' Parsing MAL-* entries...');
|
|
138
|
+
const zip = new AdmZip(zipBuffer);
|
|
139
|
+
const entries = zip.getEntries();
|
|
140
|
+
|
|
141
|
+
// Deduplicate by name@version, keep first MAL-* ID encountered
|
|
142
|
+
const dedupMap = new Map(); // key: "name@version" -> entry
|
|
143
|
+
let malCount = 0;
|
|
144
|
+
|
|
145
|
+
for (const entry of entries) {
|
|
146
|
+
const entryName = entry.entryName;
|
|
147
|
+
if (!entryName.startsWith('MAL-') || !entryName.endsWith('.json')) continue;
|
|
148
|
+
|
|
149
|
+
// Size guard
|
|
150
|
+
const entrySize = entry.header ? entry.header.size : 0;
|
|
151
|
+
if (entrySize > 10 * 1024 * 1024) continue; // skip >10MB entries
|
|
152
|
+
|
|
153
|
+
try {
|
|
154
|
+
const vuln = JSON.parse(entry.getData().toString('utf8'));
|
|
155
|
+
if (!vuln.affected) continue;
|
|
156
|
+
|
|
157
|
+
for (const affected of vuln.affected) {
|
|
158
|
+
if (!affected.package || affected.package.ecosystem !== 'npm') continue;
|
|
159
|
+
|
|
160
|
+
const pkgName = affected.package.name;
|
|
161
|
+
const versions = [];
|
|
162
|
+
|
|
163
|
+
// Extract versions from ranges or explicit list
|
|
164
|
+
if (affected.versions && affected.versions.length > 0) {
|
|
165
|
+
for (const v of affected.versions) versions.push(v);
|
|
166
|
+
} else if (affected.ranges) {
|
|
167
|
+
for (const range of affected.ranges) {
|
|
168
|
+
if (range.events) {
|
|
169
|
+
for (const evt of range.events) {
|
|
170
|
+
if (evt.introduced && evt.introduced !== '0') versions.push(evt.introduced);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// If no specific version, use wildcard marker
|
|
177
|
+
if (versions.length === 0) versions.push('*');
|
|
178
|
+
|
|
179
|
+
// Determine source from database_specific
|
|
180
|
+
let source = 'unknown';
|
|
181
|
+
if (vuln.database_specific && vuln.database_specific['malicious-packages-origins']) {
|
|
182
|
+
const origins = vuln.database_specific['malicious-packages-origins'];
|
|
183
|
+
if (origins.length > 0 && origins[0].source) {
|
|
184
|
+
source = origins[0].source;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
for (const ver of versions) {
|
|
189
|
+
const key = pkgName + '@' + ver;
|
|
190
|
+
if (!dedupMap.has(key)) {
|
|
191
|
+
dedupMap.set(key, {
|
|
192
|
+
name: pkgName,
|
|
193
|
+
version: ver,
|
|
194
|
+
osv_id: vuln.id,
|
|
195
|
+
source: source,
|
|
196
|
+
summary: (vuln.summary || '').slice(0, 200),
|
|
197
|
+
published: vuln.published || null
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
malCount++;
|
|
204
|
+
} catch { /* skip unparseable */ }
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const index = Array.from(dedupMap.values());
|
|
208
|
+
console.log(' Parsed ' + malCount + ' MAL-* reports -> ' + index.length + ' unique name@version entries');
|
|
209
|
+
|
|
210
|
+
return index;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// --- Step 2: Stratified sampling ---
|
|
214
|
+
function stratifySample(index, sampleSize, seed) {
|
|
215
|
+
console.log('\n[2/5] Stratified sampling (' + sampleSize + ' packages, seed=' + seed + ')...');
|
|
216
|
+
|
|
217
|
+
const rng = mulberry32(seed);
|
|
218
|
+
|
|
219
|
+
// Filter out wildcard versions (can't download *)
|
|
220
|
+
const downloadable = index.filter(e => e.version !== '*' && SAFE_PKG_RE.test(e.name));
|
|
221
|
+
console.log(' Downloadable (non-wildcard, valid name): ' + downloadable.length);
|
|
222
|
+
|
|
223
|
+
// Filter out spam packages (SEO junk uploaded to npm — never available, waste time)
|
|
224
|
+
const SPAM_WORDS = /\b(watch|movie|free|generator|download|stream|online|full|episode|subtitle)\b/i;
|
|
225
|
+
const filtered = downloadable.filter(function(e) {
|
|
226
|
+
if (e.name.length > 100) return false;
|
|
227
|
+
if (/\s/.test(e.name)) return false;
|
|
228
|
+
if (SPAM_WORDS.test(e.name)) return false;
|
|
229
|
+
return true;
|
|
230
|
+
});
|
|
231
|
+
const spamRemoved = downloadable.length - filtered.length;
|
|
232
|
+
if (spamRemoved > 0) console.log(' Spam filtered: ' + spamRemoved + ' entries removed');
|
|
233
|
+
console.log(' After spam filter: ' + filtered.length);
|
|
234
|
+
|
|
235
|
+
// Group by source
|
|
236
|
+
const bySource = {};
|
|
237
|
+
for (const entry of filtered) {
|
|
238
|
+
const src = entry.source || 'unknown';
|
|
239
|
+
if (!bySource[src]) bySource[src] = [];
|
|
240
|
+
bySource[src].push(entry);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
console.log(' Sources: ' + Object.entries(bySource).map(([k, v]) => k + '=' + v.length).join(', '));
|
|
244
|
+
|
|
245
|
+
// Shuffle each source group with seeded RNG
|
|
246
|
+
for (const src of Object.keys(bySource)) {
|
|
247
|
+
const arr = bySource[src];
|
|
248
|
+
for (let i = arr.length - 1; i > 0; i--) {
|
|
249
|
+
const j = Math.floor(rng() * (i + 1));
|
|
250
|
+
const tmp = arr[i];
|
|
251
|
+
arr[i] = arr[j];
|
|
252
|
+
arr[j] = tmp;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Proportional allocation per source
|
|
257
|
+
const sources = Object.keys(bySource);
|
|
258
|
+
const totalDownloadable = filtered.length;
|
|
259
|
+
const sample = [];
|
|
260
|
+
|
|
261
|
+
for (const src of sources) {
|
|
262
|
+
const proportion = bySource[src].length / totalDownloadable;
|
|
263
|
+
const count = Math.max(1, Math.round(proportion * sampleSize));
|
|
264
|
+
const take = bySource[src].slice(0, count);
|
|
265
|
+
sample.push(...take);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Trim to exact sample size
|
|
269
|
+
while (sample.length > sampleSize) sample.pop();
|
|
270
|
+
|
|
271
|
+
console.log(' Sampled: ' + sample.length + ' packages');
|
|
272
|
+
return sample;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// --- Step 3: Check npm availability ---
|
|
276
|
+
async function checkAvailability(sample) {
|
|
277
|
+
console.log('\n[3/5] Checking npm availability...');
|
|
278
|
+
|
|
279
|
+
let available = 0;
|
|
280
|
+
let unavailable = 0;
|
|
281
|
+
let errors = 0;
|
|
282
|
+
|
|
283
|
+
const npmCmd = process.platform === 'win32' ? 'npm.cmd' : 'npm';
|
|
284
|
+
|
|
285
|
+
for (let i = 0; i < sample.length; i++) {
|
|
286
|
+
const entry = sample[i];
|
|
287
|
+
|
|
288
|
+
if (process.stdout.isTTY) {
|
|
289
|
+
process.stdout.write('\r Checking [' + (i + 1) + '/' + sample.length + '] ' + entry.name + '@' + entry.version + ' ');
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
try {
|
|
293
|
+
execSync(npmCmd + ' view ' + entry.name + '@' + entry.version + ' version --json', {
|
|
294
|
+
encoding: 'utf8',
|
|
295
|
+
timeout: 10000,
|
|
296
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
297
|
+
});
|
|
298
|
+
entry.status = 'available';
|
|
299
|
+
available++;
|
|
300
|
+
} catch (err) {
|
|
301
|
+
const stderr = (err.stderr || '').toLowerCase();
|
|
302
|
+
if (stderr.includes('404') || stderr.includes('not found') || stderr.includes('not in this registry')) {
|
|
303
|
+
entry.status = 'unavailable';
|
|
304
|
+
unavailable++;
|
|
305
|
+
} else {
|
|
306
|
+
entry.status = 'error';
|
|
307
|
+
entry.error = (err.message || '').slice(0, 100);
|
|
308
|
+
errors++;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (process.stdout.isTTY) {
|
|
314
|
+
process.stdout.write('\r' + ''.padEnd(80) + '\r');
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
console.log(' Available: ' + available + ', Unavailable: ' + unavailable + ', Errors: ' + errors);
|
|
318
|
+
return sample;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// --- Step 4: Download and scan ---
|
|
322
|
+
async function downloadAndScan(sample) {
|
|
323
|
+
console.log('\n[4/5] Downloading and scanning available packages...');
|
|
324
|
+
|
|
325
|
+
const { run } = require('../src/index.js');
|
|
326
|
+
const { clearFileListCache } = require('../src/utils.js');
|
|
327
|
+
|
|
328
|
+
fs.mkdirSync(CACHE_DIR, { recursive: true });
|
|
329
|
+
|
|
330
|
+
const scannable = sample.filter(e => e.status === 'available');
|
|
331
|
+
console.log(' Scannable: ' + scannable.length + ' packages');
|
|
332
|
+
|
|
333
|
+
let scanned = 0;
|
|
334
|
+
let detected = 0;
|
|
335
|
+
let scanErrors = 0;
|
|
336
|
+
let scanCount = 0;
|
|
337
|
+
|
|
338
|
+
for (let i = 0; i < scannable.length; i++) {
|
|
339
|
+
const entry = scannable[i];
|
|
340
|
+
const progress = '[' + (i + 1) + '/' + scannable.length + ']';
|
|
341
|
+
|
|
342
|
+
if (process.stdout.isTTY) {
|
|
343
|
+
process.stdout.write('\r Scanning ' + progress + ' ' + entry.name + '@' + entry.version + ' ');
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Download
|
|
347
|
+
const cacheName = pkgToCacheName(entry.name, entry.version);
|
|
348
|
+
const pkgCacheDir = path.join(CACHE_DIR, cacheName);
|
|
349
|
+
let extractedDir = null;
|
|
350
|
+
|
|
351
|
+
if (!REFRESH && fs.existsSync(path.join(pkgCacheDir, 'package'))) {
|
|
352
|
+
extractedDir = path.join(pkgCacheDir, 'package');
|
|
353
|
+
} else {
|
|
354
|
+
fs.mkdirSync(pkgCacheDir, { recursive: true });
|
|
355
|
+
try {
|
|
356
|
+
const output = execSync('npm pack ' + entry.name + '@' + entry.version, {
|
|
357
|
+
cwd: pkgCacheDir,
|
|
358
|
+
encoding: 'utf8',
|
|
359
|
+
timeout: PACK_TIMEOUT_MS,
|
|
360
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
361
|
+
});
|
|
362
|
+
const tgzFilename = output.trim().split(/\r?\n/).pop().trim();
|
|
363
|
+
const tgzPath = path.join(pkgCacheDir, tgzFilename);
|
|
364
|
+
|
|
365
|
+
if (fs.existsSync(tgzPath)) {
|
|
366
|
+
extractTgz(tgzPath, pkgCacheDir);
|
|
367
|
+
try { fs.unlinkSync(tgzPath); } catch { /* ignore */ }
|
|
368
|
+
if (fs.existsSync(path.join(pkgCacheDir, 'package'))) {
|
|
369
|
+
extractedDir = path.join(pkgCacheDir, 'package');
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
} catch {
|
|
373
|
+
// Download failed — mark as unavailable (possibly removed between check and download)
|
|
374
|
+
entry.status = 'unavailable';
|
|
375
|
+
entry.error = 'npm pack failed';
|
|
376
|
+
fs.rmSync(pkgCacheDir, { recursive: true, force: true });
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
if (!extractedDir) {
|
|
381
|
+
if (entry.status === 'available') {
|
|
382
|
+
entry.status = 'error';
|
|
383
|
+
entry.error = 'extraction failed';
|
|
384
|
+
scanErrors++;
|
|
385
|
+
}
|
|
386
|
+
continue;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// Scan
|
|
390
|
+
try {
|
|
391
|
+
const result = await Promise.race([
|
|
392
|
+
run(extractedDir, { _capture: true }),
|
|
393
|
+
new Promise(function(_, reject) {
|
|
394
|
+
setTimeout(function() { reject(new Error('scan timeout')); }, SCAN_TIMEOUT_MS);
|
|
395
|
+
})
|
|
396
|
+
]);
|
|
397
|
+
|
|
398
|
+
const score = result.summary.riskScore || 0;
|
|
399
|
+
entry.score = score;
|
|
400
|
+
entry.detected = score >= 20;
|
|
401
|
+
entry.threat_count = result.summary.total || 0;
|
|
402
|
+
entry.threats = (result.threats || []).slice(0, 20).map(function(t) {
|
|
403
|
+
return { type: t.type, severity: t.severity, file: t.file };
|
|
404
|
+
});
|
|
405
|
+
entry.status = 'scanned';
|
|
406
|
+
|
|
407
|
+
scanned++;
|
|
408
|
+
if (entry.detected) detected++;
|
|
409
|
+
} catch (err) {
|
|
410
|
+
entry.status = 'error';
|
|
411
|
+
entry.error = (err.message || '').slice(0, 100);
|
|
412
|
+
entry.score = 0;
|
|
413
|
+
entry.detected = false;
|
|
414
|
+
scanErrors++;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// Memory management
|
|
418
|
+
clearFileListCache();
|
|
419
|
+
scanCount++;
|
|
420
|
+
if (scanCount % 20 === 0 && global.gc) {
|
|
421
|
+
global.gc();
|
|
422
|
+
const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024);
|
|
423
|
+
console.log('\n [Memory] ' + used + ' MB after ' + scanCount + ' scans');
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
if (process.stdout.isTTY) {
|
|
428
|
+
process.stdout.write('\r' + ''.padEnd(80) + '\r');
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
console.log(' Scanned: ' + scanned + ', Detected: ' + detected + ', Errors: ' + scanErrors);
|
|
432
|
+
return { scanned, detected, scanErrors };
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// --- Step 5: Save results ---
|
|
436
|
+
function saveResults(sample, index, stats) {
|
|
437
|
+
console.log('\n[5/5] Saving results...');
|
|
438
|
+
|
|
439
|
+
// Compute per-source breakdown
|
|
440
|
+
const bySource = {};
|
|
441
|
+
for (const entry of sample) {
|
|
442
|
+
const src = entry.source || 'unknown';
|
|
443
|
+
if (!bySource[src]) bySource[src] = { total: 0, scanned: 0, detected: 0, unavailable: 0 };
|
|
444
|
+
bySource[src].total++;
|
|
445
|
+
if (entry.status === 'scanned') {
|
|
446
|
+
bySource[src].scanned++;
|
|
447
|
+
if (entry.detected) bySource[src].detected++;
|
|
448
|
+
} else if (entry.status === 'unavailable') {
|
|
449
|
+
bySource[src].unavailable++;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// Compute TPR per source
|
|
454
|
+
for (const src of Object.keys(bySource)) {
|
|
455
|
+
const d = bySource[src];
|
|
456
|
+
d.tpr = d.scanned > 0 ? d.detected / d.scanned : 0;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Score distribution
|
|
460
|
+
const scannedEntries = sample.filter(e => e.status === 'scanned');
|
|
461
|
+
const scoreDistribution = { '0': 0, '1-9': 0, '10-19': 0, '20-49': 0, '50-74': 0, '75-100': 0 };
|
|
462
|
+
for (const e of scannedEntries) {
|
|
463
|
+
const s = e.score || 0;
|
|
464
|
+
if (s === 0) scoreDistribution['0']++;
|
|
465
|
+
else if (s <= 9) scoreDistribution['1-9']++;
|
|
466
|
+
else if (s <= 19) scoreDistribution['10-19']++;
|
|
467
|
+
else if (s <= 49) scoreDistribution['20-49']++;
|
|
468
|
+
else if (s <= 74) scoreDistribution['50-74']++;
|
|
469
|
+
else scoreDistribution['75-100']++;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
const results = {
|
|
473
|
+
metadata: {
|
|
474
|
+
benchmark: 'OpenSSF Malicious Packages',
|
|
475
|
+
version: 'v1',
|
|
476
|
+
repo: 'https://github.com/ossf/malicious-packages',
|
|
477
|
+
scanned_at: new Date().toISOString(),
|
|
478
|
+
seed: SEED,
|
|
479
|
+
total_osv_npm_unique: index.length,
|
|
480
|
+
sampled: sample.length,
|
|
481
|
+
available_on_npm: sample.filter(e => e.status === 'scanned' || e.status === 'available').length,
|
|
482
|
+
scanned: stats.scanned,
|
|
483
|
+
detected: stats.detected,
|
|
484
|
+
missed: stats.scanned - stats.detected,
|
|
485
|
+
errors: stats.scanErrors,
|
|
486
|
+
unavailable: sample.filter(e => e.status === 'unavailable').length,
|
|
487
|
+
threshold: 20,
|
|
488
|
+
tpr: stats.scanned > 0 ? ((stats.detected / stats.scanned * 100).toFixed(1) + '%') : 'N/A',
|
|
489
|
+
coverage: ((stats.scanned / sample.length * 100).toFixed(1) + '%'),
|
|
490
|
+
by_source: bySource,
|
|
491
|
+
score_distribution: scoreDistribution
|
|
492
|
+
},
|
|
493
|
+
results: sample.map(function(e) {
|
|
494
|
+
return {
|
|
495
|
+
name: e.name,
|
|
496
|
+
version: e.version,
|
|
497
|
+
osv_id: e.osv_id,
|
|
498
|
+
source: e.source,
|
|
499
|
+
status: e.status,
|
|
500
|
+
score: e.score || 0,
|
|
501
|
+
detected: e.detected || false,
|
|
502
|
+
threat_count: e.threat_count || 0,
|
|
503
|
+
threats: e.threats || [],
|
|
504
|
+
error: e.error || undefined
|
|
505
|
+
};
|
|
506
|
+
})
|
|
507
|
+
};
|
|
508
|
+
|
|
509
|
+
fs.mkdirSync(path.dirname(RESULTS_FILE), { recursive: true });
|
|
510
|
+
fs.writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
|
|
511
|
+
console.log(' Saved to: ' + RESULTS_FILE);
|
|
512
|
+
|
|
513
|
+
return results;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// --- Main ---
|
|
517
|
+
async function main() {
|
|
518
|
+
const startTime = Date.now();
|
|
519
|
+
|
|
520
|
+
console.log('='.repeat(60));
|
|
521
|
+
console.log(' MUAD\'DIB OpenSSF Benchmark');
|
|
522
|
+
console.log(' Sample: ' + SAMPLE_SIZE + ', Seed: ' + SEED);
|
|
523
|
+
console.log('='.repeat(60));
|
|
524
|
+
|
|
525
|
+
// 1. Fetch full OSV npm index
|
|
526
|
+
const index = await fetchOSSFIndex();
|
|
527
|
+
|
|
528
|
+
// 2. Stratified sample
|
|
529
|
+
const sample = stratifySample(index, SAMPLE_SIZE, SEED);
|
|
530
|
+
|
|
531
|
+
// 3. Check npm availability
|
|
532
|
+
await checkAvailability(sample);
|
|
533
|
+
|
|
534
|
+
// 4. Download + scan
|
|
535
|
+
const stats = await downloadAndScan(sample);
|
|
536
|
+
|
|
537
|
+
// 5. Save results
|
|
538
|
+
const results = saveResults(sample, index, stats);
|
|
539
|
+
|
|
540
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
541
|
+
|
|
542
|
+
console.log('\n' + '='.repeat(60));
|
|
543
|
+
console.log(' RESULTS');
|
|
544
|
+
console.log('='.repeat(60));
|
|
545
|
+
console.log(' Total OSV npm unique: ' + index.length);
|
|
546
|
+
console.log(' Sampled: ' + sample.length);
|
|
547
|
+
console.log(' Available on npm: ' + results.metadata.available_on_npm);
|
|
548
|
+
console.log(' Scanned: ' + stats.scanned);
|
|
549
|
+
console.log(' Detected (score>=20): ' + stats.detected);
|
|
550
|
+
console.log(' TPR: ' + results.metadata.tpr);
|
|
551
|
+
console.log(' Coverage: ' + results.metadata.coverage);
|
|
552
|
+
console.log(' Elapsed: ' + elapsed + 's');
|
|
553
|
+
console.log('='.repeat(60) + '\n');
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
main().catch(function(err) {
|
|
557
|
+
console.error('[FATAL] ' + err.message);
|
|
558
|
+
console.error(err.stack);
|
|
559
|
+
process.exit(1);
|
|
560
|
+
});
|
|
@@ -132,7 +132,7 @@ async function detectPublishAnomaly(packageName) {
|
|
|
132
132
|
const spanHours = Math.round(spanMs / MS_PER_HOUR * 10) / 10;
|
|
133
133
|
findings.push({
|
|
134
134
|
type: 'publish_burst',
|
|
135
|
-
severity: '
|
|
135
|
+
severity: 'LOW',
|
|
136
136
|
description: `${inWindow.length} versions published in ${spanHours} hours (avg interval: ${stats.avgIntervalDays} days)`,
|
|
137
137
|
versions: inWindow.map(e => e.version)
|
|
138
138
|
});
|
package/src/ioc/scraper.js
CHANGED
|
@@ -1283,12 +1283,99 @@ async function runScraper() {
|
|
|
1283
1283
|
};
|
|
1284
1284
|
}
|
|
1285
1285
|
|
|
1286
|
+
// ============================================
|
|
1287
|
+
// SOURCE 5: OSV.dev Lightweight API
|
|
1288
|
+
// Used by `muaddib update` (fast, no zip download)
|
|
1289
|
+
// ============================================
|
|
1290
|
+
|
|
1291
|
+
/**
|
|
1292
|
+
* Lightweight OSV.dev query — fetches recent npm MAL-* entries via REST API.
|
|
1293
|
+
* Used by `muaddib update` as a fast complement to the full zip scrape.
|
|
1294
|
+
* @returns {Promise<Array>} Parsed IOC package entries
|
|
1295
|
+
*/
|
|
1296
|
+
async function scrapeOSVLightweightAPI() {
|
|
1297
|
+
console.log('[SCRAPER] OSV.dev lightweight API...');
|
|
1298
|
+
const packages = [];
|
|
1299
|
+
|
|
1300
|
+
try {
|
|
1301
|
+
const resp = await fetchJSON('https://api.osv.dev/v1/query', {
|
|
1302
|
+
method: 'POST',
|
|
1303
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1304
|
+
body: { package: { ecosystem: 'npm' } }
|
|
1305
|
+
});
|
|
1306
|
+
|
|
1307
|
+
if (resp.status === 200 && resp.data && resp.data.vulns) {
|
|
1308
|
+
for (const vuln of resp.data.vulns) {
|
|
1309
|
+
if (vuln.id && vuln.id.startsWith('MAL-')) {
|
|
1310
|
+
const parsed = parseOSVEntry(vuln, 'osv-api');
|
|
1311
|
+
for (const p of parsed) packages.push(p);
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
console.log('[SCRAPER] ' + packages.length + ' MAL-* packages from OSV API');
|
|
1317
|
+
} catch (e) {
|
|
1318
|
+
console.log('[SCRAPER] OSV API error: ' + e.message);
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
return packages;
|
|
1322
|
+
}
|
|
1323
|
+
|
|
1324
|
+
/**
|
|
1325
|
+
* Batch query OSV.dev for specific package names.
|
|
1326
|
+
* Returns all MAL-* entries matching the given packages.
|
|
1327
|
+
* Used by the OpenSSF benchmark script (W1).
|
|
1328
|
+
* @param {string[]} packageNames - npm package names to query
|
|
1329
|
+
* @returns {Promise<Array>} Parsed IOC entries with osv_id
|
|
1330
|
+
*/
|
|
1331
|
+
async function queryOSVBatch(packageNames) {
|
|
1332
|
+
const BATCH_SIZE = 1000;
|
|
1333
|
+
const allResults = [];
|
|
1334
|
+
|
|
1335
|
+
for (let i = 0; i < packageNames.length; i += BATCH_SIZE) {
|
|
1336
|
+
const batch = packageNames.slice(i, i + BATCH_SIZE);
|
|
1337
|
+
const queries = batch.map(function(name) {
|
|
1338
|
+
return { package: { name: name, ecosystem: 'npm' } };
|
|
1339
|
+
});
|
|
1340
|
+
|
|
1341
|
+
try {
|
|
1342
|
+
const resp = await fetchJSON('https://api.osv.dev/v1/querybatch', {
|
|
1343
|
+
method: 'POST',
|
|
1344
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1345
|
+
body: { queries: queries }
|
|
1346
|
+
});
|
|
1347
|
+
|
|
1348
|
+
if (resp.status === 200 && resp.data && resp.data.results) {
|
|
1349
|
+
for (let j = 0; j < resp.data.results.length; j++) {
|
|
1350
|
+
const vulns = resp.data.results[j].vulns || [];
|
|
1351
|
+
for (const vuln of vulns) {
|
|
1352
|
+
if (vuln.id && vuln.id.startsWith('MAL-')) {
|
|
1353
|
+
const parsed = parseOSVEntry(vuln, 'osv-batch');
|
|
1354
|
+
for (const p of parsed) allResults.push(p);
|
|
1355
|
+
}
|
|
1356
|
+
}
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
} catch (e) {
|
|
1360
|
+
console.log('[SCRAPER] OSV batch error (offset ' + i + '): ' + e.message);
|
|
1361
|
+
}
|
|
1362
|
+
|
|
1363
|
+
// Courtesy delay between batches
|
|
1364
|
+
if (i + BATCH_SIZE < packageNames.length) {
|
|
1365
|
+
await new Promise(function(r) { setTimeout(r, 200); });
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
|
|
1369
|
+
return allResults;
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1286
1372
|
// Test helpers for aggregated warning counters
|
|
1287
1373
|
function getNoVersionSkipCount() { return _noVersionSkipCount; }
|
|
1288
1374
|
function resetNoVersionSkipCount() { _noVersionSkipCount = 0; }
|
|
1289
1375
|
|
|
1290
1376
|
module.exports = {
|
|
1291
1377
|
runScraper, scrapeShaiHuludDetector, scrapeDatadogIOCs,
|
|
1378
|
+
scrapeOSVLightweightAPI, queryOSVBatch,
|
|
1292
1379
|
// Pure utility functions (exported for testing)
|
|
1293
1380
|
parseCSVLine, parseCSV, extractVersions, parseOSVEntry,
|
|
1294
1381
|
createFreshness, isAllowedRedirect,
|
package/src/ioc/updater.js
CHANGED
|
@@ -39,24 +39,25 @@ async function updateIOCs() {
|
|
|
39
39
|
mergeIOCs(baseIOCs, yamlStandard);
|
|
40
40
|
console.log('[2/4] YAML IOCs: ' + yamlStandard.packages.length + ' packages, ' + yamlStandard.hashes.length + ' hashes');
|
|
41
41
|
|
|
42
|
-
// Step 3: Download additional IOCs from GitHub (GenSecAI + DataDog
|
|
43
|
-
const { scrapeShaiHuludDetector, scrapeDatadogIOCs } = require('./scraper.js');
|
|
44
|
-
console.log('[3/4] Downloading GitHub IOCs...');
|
|
42
|
+
// Step 3: Download additional IOCs from GitHub + OSV API (GenSecAI + DataDog + OSV lightweight)
|
|
43
|
+
const { scrapeShaiHuludDetector, scrapeDatadogIOCs, scrapeOSVLightweightAPI } = require('./scraper.js');
|
|
44
|
+
console.log('[3/4] Downloading GitHub + OSV API IOCs...');
|
|
45
45
|
|
|
46
|
-
const [shaiHulud, datadog] = await Promise.all([
|
|
46
|
+
const [shaiHulud, datadog, osvApi] = await Promise.all([
|
|
47
47
|
scrapeShaiHuludDetector(),
|
|
48
|
-
scrapeDatadogIOCs()
|
|
48
|
+
scrapeDatadogIOCs(),
|
|
49
|
+
scrapeOSVLightweightAPI()
|
|
49
50
|
]);
|
|
50
51
|
|
|
51
52
|
const githubIOCs = {
|
|
52
|
-
packages: [].concat(shaiHulud.packages, datadog.packages),
|
|
53
|
+
packages: [].concat(shaiHulud.packages, datadog.packages, osvApi),
|
|
53
54
|
pypi_packages: [],
|
|
54
55
|
hashes: [].concat(shaiHulud.hashes || [], datadog.hashes || []),
|
|
55
56
|
markers: [],
|
|
56
57
|
files: []
|
|
57
58
|
};
|
|
58
59
|
mergeIOCs(baseIOCs, githubIOCs);
|
|
59
|
-
console.log(' +' + shaiHulud.packages.length + ' GenSecAI, +' + datadog.packages.length + ' DataDog');
|
|
60
|
+
console.log(' +' + shaiHulud.packages.length + ' GenSecAI, +' + datadog.packages.length + ' DataDog, +' + osvApi.length + ' OSV API');
|
|
60
61
|
|
|
61
62
|
// Step 3b: Load existing cache IOCs (from bootstrap download or previous update)
|
|
62
63
|
if (fs.existsSync(CACHE_IOC_FILE)) {
|
|
@@ -90,7 +91,7 @@ async function updateIOCs() {
|
|
|
90
91
|
}
|
|
91
92
|
|
|
92
93
|
baseIOCs.updated = new Date().toISOString();
|
|
93
|
-
baseIOCs.sources = ['compact', 'yaml', 'shai-hulud-detector', 'datadog', 'cache'];
|
|
94
|
+
baseIOCs.sources = ['compact', 'yaml', 'shai-hulud-detector', 'datadog', 'osv-api', 'cache'];
|
|
94
95
|
|
|
95
96
|
// Clean internal dedup sets before serialization
|
|
96
97
|
delete baseIOCs._pkgKeys;
|
package/src/ml/llm-detective.js
CHANGED
|
@@ -92,9 +92,15 @@ function resetDailyCounter() {
|
|
|
92
92
|
_dailyCounter.resetDate = null;
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
+
// ── Credit exhaustion kill switch (session-level) ──
|
|
96
|
+
// When the Anthropic API returns a 400 with "credit balance is too low",
|
|
97
|
+
// we disable the LLM for the rest of the session to avoid error spam.
|
|
98
|
+
let _creditExhausted = false;
|
|
99
|
+
|
|
95
100
|
// ── Feature flags ──
|
|
96
101
|
|
|
97
102
|
function isLlmEnabled() {
|
|
103
|
+
if (_creditExhausted) return false;
|
|
98
104
|
if (!process.env.ANTHROPIC_API_KEY) return false;
|
|
99
105
|
const env = process.env.MUADDIB_LLM_ENABLED;
|
|
100
106
|
if (env !== undefined && env.toLowerCase() === 'false') return false;
|
|
@@ -441,6 +447,14 @@ async function callAnthropicAPI(system, messages) {
|
|
|
441
447
|
}
|
|
442
448
|
|
|
443
449
|
const errorText = await response.text().catch(() => '');
|
|
450
|
+
|
|
451
|
+
// Credit exhaustion: disable LLM for entire session (not just this call)
|
|
452
|
+
if (response.status === 400 && /credit balance is too low/i.test(errorText)) {
|
|
453
|
+
_creditExhausted = true;
|
|
454
|
+
console.warn('[LLM] API credits exhausted — LLM Detective disabled for this session');
|
|
455
|
+
throw new Error('API credits exhausted');
|
|
456
|
+
}
|
|
457
|
+
|
|
444
458
|
throw new Error(`API ${response.status}: ${errorText.slice(0, 200)}`);
|
|
445
459
|
} catch (err) {
|
|
446
460
|
clearTimeout(timeout);
|
|
@@ -602,6 +616,14 @@ function resetLlmLimiter() {
|
|
|
602
616
|
_semaphore.queue.length = 0;
|
|
603
617
|
}
|
|
604
618
|
|
|
619
|
+
function resetCreditExhausted() {
|
|
620
|
+
_creditExhausted = false;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
function isCreditExhausted() {
|
|
624
|
+
return _creditExhausted;
|
|
625
|
+
}
|
|
626
|
+
|
|
605
627
|
module.exports = {
|
|
606
628
|
investigatePackage,
|
|
607
629
|
isLlmEnabled,
|
|
@@ -614,6 +636,8 @@ module.exports = {
|
|
|
614
636
|
resetStats,
|
|
615
637
|
resetDailyCounter,
|
|
616
638
|
resetLlmLimiter,
|
|
639
|
+
resetCreditExhausted,
|
|
640
|
+
isCreditExhausted,
|
|
617
641
|
// Exported for testing
|
|
618
642
|
collectSourceContext,
|
|
619
643
|
buildPrompt,
|
package/src/monitor/classify.js
CHANGED
|
@@ -49,6 +49,7 @@ const HIGH_CONFIDENCE_MALICE_TYPES = new Set([
|
|
|
49
49
|
'cross_file_dataflow', // proven taint cross-modules
|
|
50
50
|
'canary_exfiltration', // canary sandbox exfiltrated
|
|
51
51
|
'sandbox_network_after_sensitive_read', // compound sandbox detection
|
|
52
|
+
'sandbox_known_exfil_domain', // known exfil/C2 domain contacted during install
|
|
52
53
|
'detached_credential_exfil', // detached process + credential exfil (DPRK/Lazarus)
|
|
53
54
|
'node_modules_write', // writeFile to node_modules/ (worm propagation)
|
|
54
55
|
'npm_publish_worm', // exec("npm publish") (worm propagation)
|
|
@@ -331,6 +332,35 @@ function evaluateCacheTrigger(name, docMeta, doc) {
|
|
|
331
332
|
return { shouldCache: false, reason: '', retentionDays: 0 };
|
|
332
333
|
}
|
|
333
334
|
|
|
335
|
+
/**
|
|
336
|
+
* Determine if a first-publish package is high-risk and should be sandboxed
|
|
337
|
+
* even with a clean static scan (0 findings).
|
|
338
|
+
*
|
|
339
|
+
* First-publish is where malware concentrates: new packages from unknown
|
|
340
|
+
* maintainers with no linked repository are the highest-risk population.
|
|
341
|
+
*
|
|
342
|
+
* @param {Object|null} cacheTrigger - From evaluateCacheTrigger()
|
|
343
|
+
* @param {Object|null} npmRegistryMeta - From getPackageMetadata()
|
|
344
|
+
* @returns {boolean} true if package should be sandboxed regardless of static score
|
|
345
|
+
*/
|
|
346
|
+
function isFirstPublishHighRisk(cacheTrigger, npmRegistryMeta) {
|
|
347
|
+
if (!cacheTrigger || cacheTrigger.reason !== 'first_publish') return false;
|
|
348
|
+
|
|
349
|
+
// With registry metadata, require at least one additional risk signal
|
|
350
|
+
if (npmRegistryMeta) {
|
|
351
|
+
// No linked repository — high risk
|
|
352
|
+
if (!npmRegistryMeta.has_repository) return true;
|
|
353
|
+
// New maintainer (only 1 package ever published)
|
|
354
|
+
if (npmRegistryMeta.author_package_count <= 1) return true;
|
|
355
|
+
// Package age < 1 day with registry metadata but no strong signals — skip sandbox
|
|
356
|
+
// (has repo + experienced maintainer = likely legitimate)
|
|
357
|
+
return false;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Without registry metadata, sandbox by default (precautionary)
|
|
361
|
+
return true;
|
|
362
|
+
}
|
|
363
|
+
|
|
334
364
|
module.exports = {
|
|
335
365
|
// Constants
|
|
336
366
|
IOC_MATCH_TYPES,
|
|
@@ -367,4 +397,5 @@ module.exports = {
|
|
|
367
397
|
setVerboseMode,
|
|
368
398
|
quickTyposquatCheck,
|
|
369
399
|
evaluateCacheTrigger,
|
|
400
|
+
isFirstPublishHighRisk,
|
|
370
401
|
};
|
package/src/monitor/queue.js
CHANGED
|
@@ -54,6 +54,7 @@ const {
|
|
|
54
54
|
classifyError,
|
|
55
55
|
formatFindings,
|
|
56
56
|
evaluateCacheTrigger,
|
|
57
|
+
isFirstPublishHighRisk,
|
|
57
58
|
POPULAR_THRESHOLD,
|
|
58
59
|
downloadsCache: classifyDownloadsCache,
|
|
59
60
|
DOWNLOADS_CACHE_TTL,
|
|
@@ -109,6 +110,11 @@ const SCAN_TIMEOUT_MS = 180_000; // 3 minutes per package
|
|
|
109
110
|
const STATIC_SCAN_TIMEOUT_MS = 45_000; // 45s for static analysis only
|
|
110
111
|
const LARGE_PACKAGE_SIZE = 10 * 1024 * 1024; // 10MB
|
|
111
112
|
|
|
113
|
+
// First-publish sandbox: max pending sandbox items before deferring first-publish clean scans
|
|
114
|
+
// Prevents starving T1a sandbox capacity when many first-publish packages arrive at once
|
|
115
|
+
const FIRST_PUBLISH_SANDBOX_MAX_QUEUE = parseInt(process.env.MUADDIB_FIRST_PUBLISH_SANDBOX_MAX_QUEUE, 10) || 10;
|
|
116
|
+
const FIRST_PUBLISH_SANDBOX_ENABLED = process.env.MUADDIB_FIRST_PUBLISH_SANDBOX !== '0';
|
|
117
|
+
|
|
112
118
|
// --- Bundled tooling false-positive filter ---
|
|
113
119
|
|
|
114
120
|
const KNOWN_BUNDLED_FILES = ['yarn.js', 'webpack.js', 'terser.js', 'esbuild.js', 'polyfills.js'];
|
|
@@ -401,10 +407,14 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
401
407
|
throw staticErr;
|
|
402
408
|
}
|
|
403
409
|
|
|
404
|
-
//
|
|
410
|
+
// First-publish detection: used for sandbox priority below
|
|
411
|
+
const isFirstPublish = cacheTrigger && cacheTrigger.reason === 'first_publish';
|
|
412
|
+
|
|
413
|
+
// ML Phase 2a: Fetch npm registry metadata once for packages with findings
|
|
414
|
+
// OR for first-publish packages (needed for isFirstPublishHighRisk decision).
|
|
405
415
|
// Reused for both training records (enriched features) and reputation scoring.
|
|
406
416
|
let npmRegistryMeta = null;
|
|
407
|
-
if (result.summary.total > 0 && ecosystem === 'npm') {
|
|
417
|
+
if ((result.summary.total > 0 || isFirstPublish) && ecosystem === 'npm') {
|
|
408
418
|
try {
|
|
409
419
|
const { getPackageMetadata } = require('../scanner/npm-registry.js');
|
|
410
420
|
npmRegistryMeta = await getPackageMetadata(name);
|
|
@@ -413,15 +423,61 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
413
423
|
}
|
|
414
424
|
}
|
|
415
425
|
|
|
426
|
+
// First-publish sandbox priority: sandbox even with 0 static findings
|
|
427
|
+
// if the package is from a new/unknown maintainer without a linked repository.
|
|
428
|
+
const firstPublishSandbox = isFirstPublish &&
|
|
429
|
+
FIRST_PUBLISH_SANDBOX_ENABLED &&
|
|
430
|
+
isFirstPublishHighRisk(cacheTrigger, npmRegistryMeta) &&
|
|
431
|
+
isSandboxEnabled() && sandboxAvailable &&
|
|
432
|
+
scanQueue.length < FIRST_PUBLISH_SANDBOX_MAX_QUEUE;
|
|
433
|
+
|
|
416
434
|
if (result.summary.total === 0) {
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
435
|
+
if (firstPublishSandbox) {
|
|
436
|
+
// First-publish sandbox priority: run sandbox even with 0 static findings
|
|
437
|
+
console.log(`[MONITOR] FIRST-PUBLISH SANDBOX: ${name}@${version} (0 findings, sandboxing anyway)`);
|
|
438
|
+
stats.firstPublishSandboxed = (stats.firstPublishSandboxed || 0) + 1;
|
|
439
|
+
|
|
440
|
+
let sandboxResult = null;
|
|
441
|
+
try {
|
|
442
|
+
const canary = isCanaryEnabled();
|
|
443
|
+
console.log(`[MONITOR] SANDBOX (first-publish): launching for ${name}@${version}${canary ? ' (canary: on)' : ''}...`);
|
|
444
|
+
sandboxResult = await runSandbox(name, { canary });
|
|
445
|
+
console.log(`[MONITOR] SANDBOX: ${name}@${version} → score: ${sandboxResult.score}, severity: ${sandboxResult.severity}`);
|
|
446
|
+
} catch (err) {
|
|
447
|
+
console.error(`[MONITOR] SANDBOX ERROR: ${name}@${version} — ${err.message}`);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
const sandboxScore = sandboxResult ? (sandboxResult.score || 0) : 0;
|
|
451
|
+
if (sandboxScore > 0) {
|
|
452
|
+
// Sandbox found something — treat as suspect
|
|
453
|
+
stats.suspect++;
|
|
454
|
+
stats.scanned++;
|
|
455
|
+
const elapsed = Date.now() - startTime;
|
|
456
|
+
stats.totalTimeMs += elapsed;
|
|
457
|
+
updateScanStats('suspect');
|
|
458
|
+
recordTrainingSample(result, { name, version, ecosystem, label: 'suspect', registryMeta: meta, unpackedSize: meta.unpackedSize, npmRegistryMeta, fileCountTotal, hasTests });
|
|
459
|
+
return { sandboxResult, staticClean: false, firstPublishSandbox: true };
|
|
460
|
+
} else {
|
|
461
|
+
// Sandbox clean — still CLEAN
|
|
462
|
+
stats.scanned++;
|
|
463
|
+
const elapsed = Date.now() - startTime;
|
|
464
|
+
stats.totalTimeMs += elapsed;
|
|
465
|
+
stats.clean++;
|
|
466
|
+
console.log(`[MONITOR] CLEAN (first-publish sandbox OK): ${name}@${version} (${(elapsed / 1000).toFixed(1)}s)`);
|
|
467
|
+
updateScanStats('clean');
|
|
468
|
+
recordTrainingSample(result, { name, version, ecosystem, label: 'clean', registryMeta: meta, unpackedSize: meta.unpackedSize, npmRegistryMeta, fileCountTotal, hasTests });
|
|
469
|
+
return { sandboxResult, staticClean: true, firstPublishSandbox: true };
|
|
470
|
+
}
|
|
471
|
+
} else {
|
|
472
|
+
stats.scanned++;
|
|
473
|
+
const elapsed = Date.now() - startTime;
|
|
474
|
+
stats.totalTimeMs += elapsed;
|
|
475
|
+
stats.clean++;
|
|
476
|
+
console.log(`[MONITOR] CLEAN: ${name}@${version} (0 findings, ${(elapsed / 1000).toFixed(1)}s)`);
|
|
477
|
+
updateScanStats('clean');
|
|
478
|
+
recordTrainingSample(result, { name, version, ecosystem, label: 'clean', registryMeta: meta, unpackedSize: meta.unpackedSize, npmRegistryMeta, fileCountTotal, hasTests });
|
|
479
|
+
return { sandboxResult: null, staticClean: true };
|
|
480
|
+
}
|
|
425
481
|
} else {
|
|
426
482
|
const counts = [];
|
|
427
483
|
if (result.summary.critical > 0) counts.push(`${result.summary.critical} CRITICAL`);
|
|
@@ -1023,6 +1079,8 @@ module.exports = {
|
|
|
1023
1079
|
SCAN_TIMEOUT_MS,
|
|
1024
1080
|
STATIC_SCAN_TIMEOUT_MS,
|
|
1025
1081
|
LARGE_PACKAGE_SIZE,
|
|
1082
|
+
FIRST_PUBLISH_SANDBOX_MAX_QUEUE,
|
|
1083
|
+
FIRST_PUBLISH_SANDBOX_ENABLED,
|
|
1026
1084
|
KNOWN_BUNDLED_FILES,
|
|
1027
1085
|
KNOWN_BUNDLED_PATHS,
|
|
1028
1086
|
ML_EXCLUDED_DIRS,
|
|
@@ -243,6 +243,15 @@ const PLAYBOOKS = {
|
|
|
243
243
|
'Acces a des variables d\'environnement sensibles detecte via monkey-patching runtime (TOKEN, SECRET, KEY, PASSWORD). ' +
|
|
244
244
|
'Verifier si le package a une raison legitime d\'acceder a ces variables. Revoquer les credentials si necessaire.',
|
|
245
245
|
|
|
246
|
+
sandbox_network_outlier:
|
|
247
|
+
'Package contacte un domaine/IP hors allowlist pendant l\'installation. Seulement 0.027% des packages font du DNS hors infrastructure npm. ' +
|
|
248
|
+
'Verifier le domaine contacte. Si aucune raison legitime (CDN de binaires, service declare en dep), considerer comme suspect.',
|
|
249
|
+
|
|
250
|
+
sandbox_known_exfil_domain:
|
|
251
|
+
'CRITIQUE: Package contacte un domaine d\'exfiltration/C2 connu (OAST, webhook.site, infrastructure de campagne). ' +
|
|
252
|
+
'Taux de faux positif quasi-nul. Actions: 1. NE PAS installer. 2. Signaler au registry. ' +
|
|
253
|
+
'3. Si deja installe, isoler la machine et regenerer TOUS les secrets.',
|
|
254
|
+
|
|
246
255
|
high_entropy_string:
|
|
247
256
|
'Chaine a haute entropie detectee. Verifier si c\'est du base64, hex, ou un payload chiffre. Analyser le contexte d\'utilisation.',
|
|
248
257
|
js_obfuscation_pattern:
|
package/src/rules/index.js
CHANGED
|
@@ -1011,6 +1011,29 @@ const RULES = {
|
|
|
1011
1011
|
mitre: 'T1552.001'
|
|
1012
1012
|
},
|
|
1013
1013
|
|
|
1014
|
+
// Sandbox network outlier detections
|
|
1015
|
+
sandbox_network_outlier: {
|
|
1016
|
+
id: 'MUADDIB-SANDBOX-015',
|
|
1017
|
+
name: 'Sandbox: Network Outlier',
|
|
1018
|
+
severity: 'HIGH',
|
|
1019
|
+
confidence: 'medium',
|
|
1020
|
+
description: 'Package contacts a non-registry domain/IP during install. Only 0.027% of packages make DNS queries outside npm infrastructure — this is a high-precision outlier signal.',
|
|
1021
|
+
references: ['https://attack.mitre.org/techniques/T1071/001/'],
|
|
1022
|
+
mitre: 'T1071.001'
|
|
1023
|
+
},
|
|
1024
|
+
sandbox_known_exfil_domain: {
|
|
1025
|
+
id: 'MUADDIB-SANDBOX-016',
|
|
1026
|
+
name: 'Sandbox: Known Exfiltration Domain',
|
|
1027
|
+
severity: 'CRITICAL',
|
|
1028
|
+
confidence: 'high',
|
|
1029
|
+
description: 'Package contacts a known exfiltration/C2 domain during install (OAST, webhook sinks, campaign infrastructure). Near-zero false positive rate.',
|
|
1030
|
+
references: [
|
|
1031
|
+
'https://attack.mitre.org/techniques/T1041/',
|
|
1032
|
+
'https://attack.mitre.org/techniques/T1071/001/'
|
|
1033
|
+
],
|
|
1034
|
+
mitre: 'T1041'
|
|
1035
|
+
},
|
|
1036
|
+
|
|
1014
1037
|
// Entropy detections
|
|
1015
1038
|
high_entropy_string: {
|
|
1016
1039
|
id: 'MUADDIB-ENTROPY-001',
|
package/src/sandbox/index.js
CHANGED
|
@@ -15,6 +15,7 @@ const {
|
|
|
15
15
|
|
|
16
16
|
const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
|
|
17
17
|
const { analyzePreloadLog } = require('./analyzer.js');
|
|
18
|
+
const { classifyDomain } = require('./network-allowlist.js');
|
|
18
19
|
|
|
19
20
|
const DOCKER_IMAGE = 'muaddib-sandbox';
|
|
20
21
|
const CONTAINER_TIMEOUT = 120000; // 120 seconds
|
|
@@ -646,34 +647,56 @@ function scoreFindings(report) {
|
|
|
646
647
|
}
|
|
647
648
|
}
|
|
648
649
|
|
|
649
|
-
// 4a. DNS queries
|
|
650
|
+
// 4a. DNS queries — classify via network allowlist
|
|
650
651
|
for (const domain of (report.network?.dns_queries || [])) {
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
652
|
+
const cls = classifyDomain(domain);
|
|
653
|
+
if (cls === 'safe') continue;
|
|
654
|
+
if (cls === 'blacklisted') {
|
|
655
|
+
score += 50;
|
|
656
|
+
findings.push({ type: 'sandbox_known_exfil_domain', severity: 'CRITICAL', detail: `DNS query to known exfiltration domain: ${domain}`, evidence: domain });
|
|
657
|
+
} else if (cls === 'tunnel') {
|
|
658
|
+
score += 30;
|
|
659
|
+
findings.push({ type: 'sandbox_network_outlier', severity: 'HIGH', detail: `DNS query to tunnel/proxy domain: ${domain}`, evidence: domain });
|
|
660
|
+
} else {
|
|
661
|
+
score += 20;
|
|
662
|
+
findings.push({ type: 'sandbox_network_outlier', severity: 'HIGH', detail: `DNS query to non-registry domain: ${domain}`, evidence: domain });
|
|
663
|
+
}
|
|
654
664
|
}
|
|
655
665
|
|
|
656
666
|
// 4b. DNS resolutions — extra detail
|
|
657
667
|
for (const res of (report.network?.dns_resolutions || [])) {
|
|
658
|
-
|
|
668
|
+
const cls = classifyDomain(res.domain);
|
|
669
|
+
if (cls === 'safe') continue;
|
|
659
670
|
// Already scored in 4a via dns_queries, but flag the resolution for reporting
|
|
660
671
|
findings.push({ type: 'dns_resolution', severity: 'INFO', detail: `${res.domain} → ${res.ip}`, evidence: `${res.domain}:${res.ip}` });
|
|
661
672
|
}
|
|
662
673
|
|
|
663
|
-
// 5a. TCP connections
|
|
674
|
+
// 5a. TCP connections — classify via network allowlist
|
|
664
675
|
for (const conn of (report.network?.http_connections || [])) {
|
|
665
|
-
if (isSafeHost(conn.host)) continue;
|
|
666
676
|
if (SAFE_IPS.includes(conn.host)) continue;
|
|
667
677
|
if (PROBE_PORTS.includes(conn.port)) continue;
|
|
668
|
-
|
|
669
|
-
|
|
678
|
+
const cls = classifyDomain(conn.host);
|
|
679
|
+
if (cls === 'safe') continue;
|
|
680
|
+
if (cls === 'blacklisted') {
|
|
681
|
+
score += 50;
|
|
682
|
+
findings.push({ type: 'sandbox_known_exfil_domain', severity: 'CRITICAL', detail: `TCP connection to known exfiltration host: ${conn.host}:${conn.port}`, evidence: `${conn.host}:${conn.port}` });
|
|
683
|
+
} else {
|
|
684
|
+
score += 25;
|
|
685
|
+
findings.push({ type: 'suspicious_connection', severity: 'HIGH', detail: `TCP connection to ${conn.host}:${conn.port}`, evidence: `${conn.host}:${conn.port}` });
|
|
686
|
+
}
|
|
670
687
|
}
|
|
671
688
|
|
|
672
|
-
// 5b. TLS connections —
|
|
689
|
+
// 5b. TLS connections — classify via network allowlist
|
|
673
690
|
for (const tls of (report.network?.tls_connections || [])) {
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
691
|
+
const cls = classifyDomain(tls.domain);
|
|
692
|
+
if (cls === 'safe') continue;
|
|
693
|
+
if (cls === 'blacklisted') {
|
|
694
|
+
score += 50;
|
|
695
|
+
findings.push({ type: 'sandbox_known_exfil_domain', severity: 'CRITICAL', detail: `TLS to known exfiltration domain: ${tls.domain} (${tls.ip}:${tls.port})`, evidence: tls.domain });
|
|
696
|
+
} else {
|
|
697
|
+
score += 20;
|
|
698
|
+
findings.push({ type: 'suspicious_tls', severity: 'HIGH', detail: `TLS connection to ${tls.domain} (${tls.ip}:${tls.port})`, evidence: tls.domain });
|
|
699
|
+
}
|
|
677
700
|
}
|
|
678
701
|
|
|
679
702
|
// 5c. HTTP exfiltration detection — scan body snippets for sensitive data
|
|
@@ -692,11 +715,17 @@ function scoreFindings(report) {
|
|
|
692
715
|
}
|
|
693
716
|
}
|
|
694
717
|
|
|
695
|
-
// 5d. HTTP requests
|
|
718
|
+
// 5d. HTTP requests — classify via network allowlist
|
|
696
719
|
for (const req of (report.network?.http_requests || [])) {
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
720
|
+
const cls = classifyDomain(req.host);
|
|
721
|
+
if (cls === 'safe') continue;
|
|
722
|
+
if (cls === 'blacklisted') {
|
|
723
|
+
score += 50;
|
|
724
|
+
findings.push({ type: 'sandbox_known_exfil_domain', severity: 'CRITICAL', detail: `HTTP request to known exfiltration host: ${req.method} ${req.host}${req.path}`, evidence: `${req.method} ${req.host}${req.path}` });
|
|
725
|
+
} else {
|
|
726
|
+
score += 20;
|
|
727
|
+
findings.push({ type: 'suspicious_http_request', severity: 'HIGH', detail: `${req.method} ${req.host}${req.path}`, evidence: `${req.method} ${req.host}${req.path}` });
|
|
728
|
+
}
|
|
700
729
|
}
|
|
701
730
|
|
|
702
731
|
// 5e. Blocked connections (strict mode)
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ── Network Allowlist & Blacklist for Sandbox Analysis ──
|
|
4
|
+
//
|
|
5
|
+
// Classifies domains/IPs contacted during npm install into three categories:
|
|
6
|
+
// - safe: legitimate install-time traffic (registries, CDNs, GitHub)
|
|
7
|
+
// - blacklisted: known exfiltration/C2 infrastructure (OAST, webhook sinks, campaign IPs)
|
|
8
|
+
// - unknown: everything else — potential outlier requiring investigation
|
|
9
|
+
//
|
|
10
|
+
// Threat model: SafeDep found only 0.027% of 3M+ packages make DNS queries to
|
|
11
|
+
// non-npm domains during install. Network outliers are the highest-precision
|
|
12
|
+
// signal available for detecting supply chain attacks at install time.
|
|
13
|
+
|
|
14
|
+
// ── Safe domains: legitimate traffic during npm install ──
|
|
15
|
+
// These domains are expected during normal package installation.
|
|
16
|
+
// Subdomains are matched (e.g., foo.github.com matches github.com).
|
|
17
|
+
const SAFE_INSTALL_DOMAINS = [
|
|
18
|
+
// npm registry
|
|
19
|
+
'registry.npmjs.org',
|
|
20
|
+
'npmjs.com',
|
|
21
|
+
'npmjs.org',
|
|
22
|
+
// yarn registry
|
|
23
|
+
'registry.yarnpkg.com',
|
|
24
|
+
'yarnpkg.com',
|
|
25
|
+
// GitHub (source tarballs, git deps)
|
|
26
|
+
'github.com',
|
|
27
|
+
'api.github.com',
|
|
28
|
+
'objects.githubusercontent.com',
|
|
29
|
+
'raw.githubusercontent.com',
|
|
30
|
+
'codeload.github.com',
|
|
31
|
+
'github.githubassets.com',
|
|
32
|
+
// CDNs (native binary downloads via node-gyp, prebuild)
|
|
33
|
+
'cdn.jsdelivr.net',
|
|
34
|
+
'unpkg.com',
|
|
35
|
+
'cdnjs.cloudflare.com',
|
|
36
|
+
'cloudflare.com',
|
|
37
|
+
// AWS S3 (prebuild binaries: sharp, canvas, sqlite3, etc.)
|
|
38
|
+
'amazonaws.com',
|
|
39
|
+
// Google (googleapis client, protobuf downloads)
|
|
40
|
+
'googleapis.com',
|
|
41
|
+
'storage.googleapis.com',
|
|
42
|
+
// Node.js (node-gyp headers)
|
|
43
|
+
'nodejs.org',
|
|
44
|
+
// GitLab (git deps)
|
|
45
|
+
'gitlab.com',
|
|
46
|
+
// Bitbucket (git deps)
|
|
47
|
+
'bitbucket.org'
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
// ── Known exfiltration / C2 domains ──
|
|
51
|
+
// Any contact during install is near-certain malicious (quasi-zero FP).
|
|
52
|
+
// Sources: OAST tooling, known campaign C2, webhook sink services.
|
|
53
|
+
const KNOWN_EXFIL_DOMAINS = [
|
|
54
|
+
// OAST / Interactsh / BurpSuite
|
|
55
|
+
'oastify.com',
|
|
56
|
+
'oast.fun',
|
|
57
|
+
'oast.me',
|
|
58
|
+
'oast.live',
|
|
59
|
+
'oast.online',
|
|
60
|
+
'oast.site',
|
|
61
|
+
'burpcollaborator.net',
|
|
62
|
+
'interact.sh',
|
|
63
|
+
// Webhook sink services
|
|
64
|
+
'webhook.site',
|
|
65
|
+
'pipedream.net',
|
|
66
|
+
'requestbin.com',
|
|
67
|
+
'hookbin.com',
|
|
68
|
+
'canarytokens.com',
|
|
69
|
+
// GlassWorm C2 IPs (mars 2026, 433+ packages)
|
|
70
|
+
'217.69.3.218',
|
|
71
|
+
'217.69.3.152',
|
|
72
|
+
'199.247.10.166',
|
|
73
|
+
'199.247.13.106',
|
|
74
|
+
'140.82.52.31',
|
|
75
|
+
'45.32.150.251',
|
|
76
|
+
// TeamPCP / CanisterWorm C2 (mars 2026)
|
|
77
|
+
'icp0.io',
|
|
78
|
+
'raw.icp0.io',
|
|
79
|
+
'ic0.app',
|
|
80
|
+
'hackmoltrepeat.com',
|
|
81
|
+
'recv.hackmoltrepeat.com',
|
|
82
|
+
'scan.aquasecurtiy.org', // Trivy exfil C2 (typosquat of aquasecurity)
|
|
83
|
+
'api.telegram.org', // Telegram bot exfiltration
|
|
84
|
+
'checkmarx.zone',
|
|
85
|
+
'45.148.10.212',
|
|
86
|
+
'83.142.209.11'
|
|
87
|
+
];
|
|
88
|
+
|
|
89
|
+
// ── Regex patterns for wildcard exfil domains ──
|
|
90
|
+
// Matches subdomains of OAST/exfil infrastructure.
|
|
91
|
+
const KNOWN_EXFIL_PATTERNS = [
|
|
92
|
+
/\.oast\.(online|site|live|fun|me)$/i,
|
|
93
|
+
/\.oastify\.com$/i,
|
|
94
|
+
/\.burpcollaborator\.net$/i,
|
|
95
|
+
/\.interact\.sh$/i,
|
|
96
|
+
/\.webhook\.site$/i,
|
|
97
|
+
/\.pipedream\.net$/i,
|
|
98
|
+
/\.requestbin\.com$/i
|
|
99
|
+
];
|
|
100
|
+
|
|
101
|
+
// ── Suspicious tunnel/proxy domains (not blacklisted, but escalate unknown → suspicious) ──
|
|
102
|
+
const TUNNEL_DOMAINS = [
|
|
103
|
+
'ngrok.io',
|
|
104
|
+
'ngrok-free.app',
|
|
105
|
+
'serveo.net',
|
|
106
|
+
'localhost.run',
|
|
107
|
+
'loca.lt',
|
|
108
|
+
'trycloudflare.com'
|
|
109
|
+
];
|
|
110
|
+
|
|
111
|
+
// Parse MUADDIB_SANDBOX_NETWORK_ALLOWLIST env var (comma-separated domains)
|
|
112
|
+
function getCustomAllowlist() {
|
|
113
|
+
const envVal = process.env.MUADDIB_SANDBOX_NETWORK_ALLOWLIST;
|
|
114
|
+
if (!envVal) return [];
|
|
115
|
+
return envVal.split(',')
|
|
116
|
+
.map(d => d.trim().toLowerCase())
|
|
117
|
+
.filter(d => d.length > 0 && d.length < 256);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Classify a domain/IP contacted during sandbox install.
|
|
122
|
+
*
|
|
123
|
+
* @param {string} domain - Domain name or IP address
|
|
124
|
+
* @returns {'safe'|'blacklisted'|'tunnel'|'unknown'} classification
|
|
125
|
+
*/
|
|
126
|
+
function classifyDomain(domain) {
|
|
127
|
+
if (!domain || typeof domain !== 'string') return 'unknown';
|
|
128
|
+
const d = domain.toLowerCase().trim();
|
|
129
|
+
if (d.length === 0) return 'unknown';
|
|
130
|
+
|
|
131
|
+
// Check safe domains (exact or subdomain match)
|
|
132
|
+
const allSafe = SAFE_INSTALL_DOMAINS.concat(getCustomAllowlist());
|
|
133
|
+
for (const safe of allSafe) {
|
|
134
|
+
if (d === safe || d.endsWith('.' + safe)) return 'safe';
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Check blacklisted domains (exact match)
|
|
138
|
+
for (const exfil of KNOWN_EXFIL_DOMAINS) {
|
|
139
|
+
if (d === exfil || d.endsWith('.' + exfil)) return 'blacklisted';
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Check blacklisted patterns (regex — catches subdomains like abc123.oast.online)
|
|
143
|
+
for (const pat of KNOWN_EXFIL_PATTERNS) {
|
|
144
|
+
if (pat.test(d)) return 'blacklisted';
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Check tunnel domains
|
|
148
|
+
for (const tunnel of TUNNEL_DOMAINS) {
|
|
149
|
+
if (d === tunnel || d.endsWith('.' + tunnel)) return 'tunnel';
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return 'unknown';
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
module.exports = {
|
|
156
|
+
SAFE_INSTALL_DOMAINS,
|
|
157
|
+
KNOWN_EXFIL_DOMAINS,
|
|
158
|
+
KNOWN_EXFIL_PATTERNS,
|
|
159
|
+
TUNNEL_DOMAINS,
|
|
160
|
+
classifyDomain,
|
|
161
|
+
getCustomAllowlist
|
|
162
|
+
};
|
package/src/scoring.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
const { getRule } = require('./rules/index.js');
|
|
2
|
+
const { HIGH_CONFIDENCE_MALICE_TYPES } = require('./monitor/classify.js');
|
|
2
3
|
|
|
3
4
|
// ============================================
|
|
4
5
|
// SCORING CONSTANTS
|
|
@@ -873,7 +874,22 @@ function calculateRiskScore(deduped, intentResult) {
|
|
|
873
874
|
}
|
|
874
875
|
|
|
875
876
|
// 7. Final score = max file score + cross-file bonus + intent bonus + package-level score + lifecycle boost, capped at 100
|
|
876
|
-
|
|
877
|
+
let riskScore = Math.min(MAX_RISK_SCORE, maxFileScore + crossFileBonus + intentBonus + packageScore + lifecycleBoost);
|
|
878
|
+
|
|
879
|
+
// 7b. MT-1: Score ceiling for packages without lifecycle scripts.
|
|
880
|
+
// 56% of real malware uses install scripts. Packages without lifecycle that score high
|
|
881
|
+
// (minified bundles, frameworks) are quasi-exclusively false positives.
|
|
882
|
+
// Cap at 35 to prevent webhook triggers (threshold ~20-25 post-reputation).
|
|
883
|
+
// Bypass: HC malice types, compound detections — these are never benign regardless of lifecycle.
|
|
884
|
+
const _hasLifecycle = deduped.some(t =>
|
|
885
|
+
t.type === 'lifecycle_script' || t.type === 'lifecycle_file_exec' ||
|
|
886
|
+
t.type === 'lifecycle_shell_pipe' || t.type === 'lifecycle_remote_fetch'
|
|
887
|
+
);
|
|
888
|
+
const _hasHC = deduped.some(t => HIGH_CONFIDENCE_MALICE_TYPES.has(t.type));
|
|
889
|
+
const _hasCompound = deduped.some(t => t.compound === true);
|
|
890
|
+
if (!_hasLifecycle && !_hasHC && !_hasCompound) {
|
|
891
|
+
riskScore = Math.min(riskScore, 35);
|
|
892
|
+
}
|
|
877
893
|
|
|
878
894
|
// 8. Old global score for comparison (sum of ALL findings)
|
|
879
895
|
const globalRiskScore = computeGroupScore(deduped);
|