muaddib-scanner 1.6.5 → 1.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docker/Dockerfile +2 -2
- package/package.json +1 -1
- package/src/ioc/scraper.js +69 -49
- package/src/ioc/updater.js +62 -15
- package/src/utils.js +51 -1
package/docker/Dockerfile
CHANGED
|
@@ -10,9 +10,9 @@ RUN adduser -D sandboxuser
|
|
|
10
10
|
WORKDIR /sandbox
|
|
11
11
|
RUN chown sandboxuser:sandboxuser /sandbox
|
|
12
12
|
|
|
13
|
-
# Script d'analyse
|
|
13
|
+
# Script d'analyse (sed strips Windows CRLF line endings)
|
|
14
14
|
COPY sandbox-runner.sh /sandbox/
|
|
15
|
-
RUN chmod +x /sandbox/sandbox-runner.sh
|
|
15
|
+
RUN sed -i 's/\r$//' /sandbox/sandbox-runner.sh && chmod +x /sandbox/sandbox-runner.sh
|
|
16
16
|
|
|
17
17
|
USER sandboxuser
|
|
18
18
|
|
package/package.json
CHANGED
package/src/ioc/scraper.js
CHANGED
|
@@ -7,6 +7,7 @@ const IOC_FILE = path.join(__dirname, 'data/iocs.json');
|
|
|
7
7
|
const COMPACT_IOC_FILE = path.join(__dirname, 'data/iocs-compact.json');
|
|
8
8
|
const STATIC_IOCS_FILE = path.join(__dirname, 'data/static-iocs.json');
|
|
9
9
|
const { generateCompactIOCs } = require('./updater.js');
|
|
10
|
+
const { Spinner } = require('../utils.js');
|
|
10
11
|
|
|
11
12
|
// Allowed domains for redirections (SSRF security)
|
|
12
13
|
const ALLOWED_REDIRECT_DOMAINS = [
|
|
@@ -266,7 +267,7 @@ function fetchBuffer(url, redirectCount = 0) {
|
|
|
266
267
|
}
|
|
267
268
|
|
|
268
269
|
/**
|
|
269
|
-
* Download a large file with progress
|
|
270
|
+
* Download a large file with spinner progress (npm/ora style).
|
|
270
271
|
* Used for bulk zip downloads (OSV npm/PyPI ~50-100MB each).
|
|
271
272
|
*/
|
|
272
273
|
function fetchBufferWithProgress(url, label, redirectCount = 0) {
|
|
@@ -302,30 +303,27 @@ function fetchBufferWithProgress(url, label, redirectCount = 0) {
|
|
|
302
303
|
}
|
|
303
304
|
|
|
304
305
|
const totalSize = parseInt(res.headers['content-length'], 10) || 0;
|
|
306
|
+
const totalMb = totalSize > 0 ? Math.round(totalSize / 1024 / 1024) : null;
|
|
305
307
|
const chunks = [];
|
|
306
308
|
let received = 0;
|
|
307
|
-
|
|
309
|
+
|
|
310
|
+
const spinner = new Spinner();
|
|
311
|
+
spinner.start('Downloading ' + label + '...');
|
|
308
312
|
|
|
309
313
|
res.on('data', (chunk) => {
|
|
310
314
|
chunks.push(chunk);
|
|
311
315
|
received += chunk.length;
|
|
312
|
-
const
|
|
313
|
-
if (
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
const pct = Math.round((received / totalSize) * 100);
|
|
318
|
-
process.stdout.write(`\r[SCRAPER] ${label}: ${mb}MB / ${totalMb}MB (${pct}%)`);
|
|
319
|
-
} else {
|
|
320
|
-
process.stdout.write(`\r[SCRAPER] ${label}: ${mb}MB received`);
|
|
321
|
-
}
|
|
322
|
-
lastLog = now;
|
|
316
|
+
const mb = Math.round(received / 1024 / 1024);
|
|
317
|
+
if (totalMb) {
|
|
318
|
+
spinner.update('Downloading ' + label + '... ' + mb + 'MB/' + totalMb + 'MB');
|
|
319
|
+
} else {
|
|
320
|
+
spinner.update('Downloading ' + label + '... ' + mb + 'MB');
|
|
323
321
|
}
|
|
324
322
|
});
|
|
325
323
|
|
|
326
324
|
res.on('end', () => {
|
|
327
|
-
const mb = (received / 1024 / 1024)
|
|
328
|
-
|
|
325
|
+
const mb = Math.round(received / 1024 / 1024);
|
|
326
|
+
spinner.succeed('Downloaded ' + label + ' (' + mb + 'MB)');
|
|
329
327
|
resolve(Buffer.concat(chunks));
|
|
330
328
|
});
|
|
331
329
|
});
|
|
@@ -613,6 +611,12 @@ async function scrapeOSSFMaliciousPackages(knownIds) {
|
|
|
613
611
|
|
|
614
612
|
// Step 4: Batch fetch (50 concurrent, with small delay between batches for rate limit)
|
|
615
613
|
const BATCH_SIZE = 50;
|
|
614
|
+
let fetchSpinner = null;
|
|
615
|
+
if (toFetch.length > 0) {
|
|
616
|
+
fetchSpinner = new Spinner();
|
|
617
|
+
fetchSpinner.start('Fetching OSSF entries... 0/' + toFetch.length);
|
|
618
|
+
}
|
|
619
|
+
|
|
616
620
|
for (let i = 0; i < toFetch.length; i += BATCH_SIZE) {
|
|
617
621
|
const batch = toFetch.slice(i, i + BATCH_SIZE);
|
|
618
622
|
const results = await Promise.all(batch.map(function(entry) {
|
|
@@ -628,7 +632,7 @@ async function scrapeOSSFMaliciousPackages(knownIds) {
|
|
|
628
632
|
|
|
629
633
|
// Progress
|
|
630
634
|
const progress = Math.min(i + BATCH_SIZE, toFetch.length);
|
|
631
|
-
|
|
635
|
+
fetchSpinner.update('Fetching OSSF entries... ' + progress + '/' + toFetch.length);
|
|
632
636
|
|
|
633
637
|
// Small delay between batches to respect rate limits
|
|
634
638
|
if (i + BATCH_SIZE < toFetch.length) {
|
|
@@ -636,12 +640,12 @@ async function scrapeOSSFMaliciousPackages(knownIds) {
|
|
|
636
640
|
}
|
|
637
641
|
}
|
|
638
642
|
|
|
639
|
-
if (
|
|
643
|
+
if (fetchSpinner) {
|
|
644
|
+
fetchSpinner.succeed('Fetched OSSF entries: ' + packages.length + ' packages');
|
|
645
|
+
}
|
|
640
646
|
|
|
641
647
|
// Save tree SHA for next incremental run
|
|
642
648
|
try { fs.writeFileSync(shaFile, treeSha); } catch {}
|
|
643
|
-
|
|
644
|
-
console.log('[SCRAPER] ' + packages.length + ' packages extracted');
|
|
645
649
|
} catch (e) {
|
|
646
650
|
console.log('[SCRAPER] Error: ' + e.message);
|
|
647
651
|
}
|
|
@@ -654,7 +658,6 @@ async function scrapeOSSFMaliciousPackages(knownIds) {
|
|
|
654
658
|
// Bulk zip download — primary volume source
|
|
655
659
|
// ============================================
|
|
656
660
|
async function scrapeOSVDataDump() {
|
|
657
|
-
console.log('[SCRAPER] OSV.dev npm data dump (all.zip)...');
|
|
658
661
|
const packages = [];
|
|
659
662
|
const knownIds = new Set();
|
|
660
663
|
|
|
@@ -666,35 +669,42 @@ async function scrapeOSVDataDump() {
|
|
|
666
669
|
// Extract using adm-zip
|
|
667
670
|
const zip = new AdmZip(zipBuffer);
|
|
668
671
|
const entries = zip.getEntries();
|
|
672
|
+
const total = entries.length;
|
|
669
673
|
|
|
670
674
|
let malCount = 0;
|
|
671
675
|
let skippedCount = 0;
|
|
672
676
|
|
|
673
|
-
|
|
677
|
+
const spinner = new Spinner();
|
|
678
|
+
spinner.start('Parsing npm entries... 0/' + total);
|
|
679
|
+
|
|
680
|
+
for (let i = 0; i < entries.length; i++) {
|
|
681
|
+
const entry = entries[i];
|
|
674
682
|
const name = entry.entryName;
|
|
675
683
|
|
|
676
684
|
// Only process MAL-*.json files (malware), skip GHSA-*, CVE-*, PYSEC-* etc.
|
|
677
685
|
if (!name.startsWith('MAL-') || !name.endsWith('.json')) {
|
|
678
686
|
skippedCount++;
|
|
679
|
-
|
|
687
|
+
} else {
|
|
688
|
+
try {
|
|
689
|
+
const content = entry.getData().toString('utf8');
|
|
690
|
+
const vuln = JSON.parse(content);
|
|
691
|
+
const parsed = parseOSVEntry(vuln, 'osv-malicious');
|
|
692
|
+
packages.push(...parsed);
|
|
693
|
+
|
|
694
|
+
// Track known IDs so OSSF can skip them
|
|
695
|
+
knownIds.add(vuln.id || path.basename(name, '.json'));
|
|
696
|
+
malCount++;
|
|
697
|
+
} catch {
|
|
698
|
+
// Skip unparseable entries
|
|
699
|
+
}
|
|
680
700
|
}
|
|
681
701
|
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
const vuln = JSON.parse(content);
|
|
685
|
-
const parsed = parseOSVEntry(vuln, 'osv-malicious');
|
|
686
|
-
packages.push(...parsed);
|
|
687
|
-
|
|
688
|
-
// Track known IDs so OSSF can skip them
|
|
689
|
-
knownIds.add(vuln.id || path.basename(name, '.json'));
|
|
690
|
-
malCount++;
|
|
691
|
-
} catch {
|
|
692
|
-
// Skip unparseable entries
|
|
702
|
+
if ((i + 1) % 1000 === 0 || i === entries.length - 1) {
|
|
703
|
+
spinner.update('Parsing npm entries... ' + (i + 1) + '/' + total);
|
|
693
704
|
}
|
|
694
705
|
}
|
|
695
706
|
|
|
696
|
-
|
|
697
|
-
console.log('[SCRAPER] ' + packages.length + ' packages extracted');
|
|
707
|
+
spinner.succeed('Parsed npm entries: ' + malCount + ' MAL-* (' + skippedCount + ' skipped) \u2192 ' + packages.length + ' packages');
|
|
698
708
|
} catch (e) {
|
|
699
709
|
console.log('[SCRAPER] Error: ' + e.message);
|
|
700
710
|
}
|
|
@@ -707,7 +717,6 @@ async function scrapeOSVDataDump() {
|
|
|
707
717
|
// Bulk zip download — PyPI malicious packages
|
|
708
718
|
// ============================================
|
|
709
719
|
async function scrapeOSVPyPIDataDump() {
|
|
710
|
-
console.log('[SCRAPER] OSV.dev PyPI data dump (all.zip)...');
|
|
711
720
|
const packages = [];
|
|
712
721
|
|
|
713
722
|
try {
|
|
@@ -716,32 +725,39 @@ async function scrapeOSVPyPIDataDump() {
|
|
|
716
725
|
|
|
717
726
|
const zip = new AdmZip(zipBuffer);
|
|
718
727
|
const entries = zip.getEntries();
|
|
728
|
+
const total = entries.length;
|
|
719
729
|
|
|
720
730
|
let malCount = 0;
|
|
721
731
|
let skippedCount = 0;
|
|
722
732
|
|
|
723
|
-
|
|
733
|
+
const spinner = new Spinner();
|
|
734
|
+
spinner.start('Parsing PyPI entries... 0/' + total);
|
|
735
|
+
|
|
736
|
+
for (let i = 0; i < entries.length; i++) {
|
|
737
|
+
const entry = entries[i];
|
|
724
738
|
const name = entry.entryName;
|
|
725
739
|
|
|
726
740
|
// Only process MAL-*.json files (malware)
|
|
727
741
|
if (!name.startsWith('MAL-') || !name.endsWith('.json')) {
|
|
728
742
|
skippedCount++;
|
|
729
|
-
|
|
743
|
+
} else {
|
|
744
|
+
try {
|
|
745
|
+
const content = entry.getData().toString('utf8');
|
|
746
|
+
const vuln = JSON.parse(content);
|
|
747
|
+
const parsed = parseOSVEntry(vuln, 'osv-malicious-pypi', 'PyPI');
|
|
748
|
+
packages.push(...parsed);
|
|
749
|
+
malCount++;
|
|
750
|
+
} catch {
|
|
751
|
+
// Skip unparseable entries
|
|
752
|
+
}
|
|
730
753
|
}
|
|
731
754
|
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
const vuln = JSON.parse(content);
|
|
735
|
-
const parsed = parseOSVEntry(vuln, 'osv-malicious-pypi', 'PyPI');
|
|
736
|
-
packages.push(...parsed);
|
|
737
|
-
malCount++;
|
|
738
|
-
} catch {
|
|
739
|
-
// Skip unparseable entries
|
|
755
|
+
if ((i + 1) % 1000 === 0 || i === entries.length - 1) {
|
|
756
|
+
spinner.update('Parsing PyPI entries... ' + (i + 1) + '/' + total);
|
|
740
757
|
}
|
|
741
758
|
}
|
|
742
759
|
|
|
743
|
-
|
|
744
|
-
console.log('[SCRAPER] ' + packages.length + ' PyPI packages extracted');
|
|
760
|
+
spinner.succeed('Parsed PyPI entries: ' + malCount + ' MAL-* (' + skippedCount + ' skipped) \u2192 ' + packages.length + ' packages');
|
|
745
761
|
} catch (e) {
|
|
746
762
|
console.log('[SCRAPER] Error: ' + e.message);
|
|
747
763
|
}
|
|
@@ -1106,11 +1122,15 @@ async function runScraper() {
|
|
|
1106
1122
|
];
|
|
1107
1123
|
|
|
1108
1124
|
// Save enriched (full) IOCs
|
|
1125
|
+
const saveSpinner = new Spinner();
|
|
1126
|
+
saveSpinner.start('Saving IOCs...');
|
|
1109
1127
|
fs.writeFileSync(IOC_FILE, JSON.stringify(existingIOCs, null, 2));
|
|
1110
1128
|
|
|
1111
1129
|
// Save compact IOCs (lightweight, shipped in npm)
|
|
1130
|
+
saveSpinner.update('Generating compact IOCs...');
|
|
1112
1131
|
const compactIOCs = generateCompactIOCs(existingIOCs);
|
|
1113
1132
|
fs.writeFileSync(COMPACT_IOC_FILE, JSON.stringify(compactIOCs));
|
|
1133
|
+
saveSpinner.succeed('Saved IOCs + compact format');
|
|
1114
1134
|
|
|
1115
1135
|
// Display summary
|
|
1116
1136
|
console.log('\n' + '='.repeat(60));
|
|
@@ -1163,7 +1183,7 @@ async function runScraper() {
|
|
|
1163
1183
|
};
|
|
1164
1184
|
}
|
|
1165
1185
|
|
|
1166
|
-
module.exports = { runScraper };
|
|
1186
|
+
module.exports = { runScraper, scrapeShaiHuludDetector, scrapeDatadogIOCs };
|
|
1167
1187
|
|
|
1168
1188
|
// Direct execution if called as CLI
|
|
1169
1189
|
if (require.main === module) {
|
package/src/ioc/updater.js
CHANGED
|
@@ -8,30 +8,77 @@ const LOCAL_COMPACT_FILE = path.join(__dirname, 'data/iocs-compact.json');
|
|
|
8
8
|
const { loadYAMLIOCs } = require('./yaml-loader.js');
|
|
9
9
|
|
|
10
10
|
async function updateIOCs() {
|
|
11
|
-
console.log('[MUADDIB] Updating IOCs
|
|
11
|
+
console.log('[MUADDIB] Updating IOCs (fast mode)...\n');
|
|
12
12
|
|
|
13
|
-
//
|
|
14
|
-
|
|
15
|
-
const { runScraper } = require('./scraper.js');
|
|
16
|
-
const result = await runScraper();
|
|
13
|
+
// Step 1: Load compact IOCs shipped in package (~225K IOCs)
|
|
14
|
+
let baseIOCs = { packages: [], pypi_packages: [], hashes: [], markers: [], files: [] };
|
|
17
15
|
|
|
18
|
-
|
|
16
|
+
if (fs.existsSync(LOCAL_COMPACT_FILE)) {
|
|
17
|
+
try {
|
|
18
|
+
const compactData = JSON.parse(fs.readFileSync(LOCAL_COMPACT_FILE, 'utf8'));
|
|
19
|
+
baseIOCs = expandCompactIOCs(compactData);
|
|
20
|
+
console.log('[1/4] Compact IOCs: ' + baseIOCs.packages.length + ' npm + ' + (baseIOCs.pypi_packages || []).length + ' PyPI');
|
|
21
|
+
} catch (e) {
|
|
22
|
+
console.log('[1/4] Error loading compact IOCs: ' + e.message);
|
|
23
|
+
}
|
|
24
|
+
} else {
|
|
25
|
+
console.log('[1/4] Compact IOCs not found (run "muaddib scrape" first for full data)');
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Step 2: Load YAML IOCs (builtin.yaml, packages.yaml, hashes.yaml)
|
|
29
|
+
const yamlIOCs = loadYAMLIOCs();
|
|
30
|
+
const yamlStandard = {
|
|
31
|
+
packages: yamlIOCs.packages || [],
|
|
32
|
+
pypi_packages: [],
|
|
33
|
+
hashes: (yamlIOCs.hashes || []).map(function(h) { return h.sha256; }),
|
|
34
|
+
markers: (yamlIOCs.markers || []).map(function(m) { return m.pattern; }),
|
|
35
|
+
files: (yamlIOCs.files || []).map(function(f) { return f.name; })
|
|
36
|
+
};
|
|
37
|
+
mergeIOCs(baseIOCs, yamlStandard);
|
|
38
|
+
console.log('[2/4] YAML IOCs: ' + yamlStandard.packages.length + ' packages, ' + yamlStandard.hashes.length + ' hashes');
|
|
39
|
+
|
|
40
|
+
// Step 3: Download additional IOCs from GitHub (GenSecAI + DataDog — small files, fast)
|
|
41
|
+
const { scrapeShaiHuludDetector, scrapeDatadogIOCs } = require('./scraper.js');
|
|
42
|
+
console.log('[3/4] Downloading GitHub IOCs...');
|
|
43
|
+
|
|
44
|
+
const [shaiHulud, datadog] = await Promise.all([
|
|
45
|
+
scrapeShaiHuludDetector(),
|
|
46
|
+
scrapeDatadogIOCs()
|
|
47
|
+
]);
|
|
48
|
+
|
|
49
|
+
const githubIOCs = {
|
|
50
|
+
packages: [].concat(shaiHulud.packages, datadog.packages),
|
|
51
|
+
pypi_packages: [],
|
|
52
|
+
hashes: [].concat(shaiHulud.hashes || [], datadog.hashes || []),
|
|
53
|
+
markers: [],
|
|
54
|
+
files: []
|
|
55
|
+
};
|
|
56
|
+
mergeIOCs(baseIOCs, githubIOCs);
|
|
57
|
+
console.log(' +' + shaiHulud.packages.length + ' GenSecAI, +' + datadog.packages.length + ' DataDog');
|
|
58
|
+
|
|
59
|
+
// Step 4: Merge and save to cache
|
|
19
60
|
if (!fs.existsSync(CACHE_PATH)) {
|
|
20
61
|
fs.mkdirSync(CACHE_PATH, { recursive: true });
|
|
21
62
|
}
|
|
22
63
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
}
|
|
64
|
+
baseIOCs.updated = new Date().toISOString();
|
|
65
|
+
baseIOCs.sources = ['compact', 'yaml', 'shai-hulud-detector', 'datadog'];
|
|
26
66
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
67
|
+
// Clean internal dedup sets before serialization
|
|
68
|
+
delete baseIOCs._pkgKeys;
|
|
69
|
+
delete baseIOCs._pypiPkgKeys;
|
|
70
|
+
delete baseIOCs._hashSet;
|
|
71
|
+
delete baseIOCs._markerSet;
|
|
72
|
+
delete baseIOCs._fileSet;
|
|
73
|
+
|
|
74
|
+
fs.writeFileSync(CACHE_IOC_FILE, JSON.stringify(baseIOCs));
|
|
31
75
|
|
|
32
|
-
|
|
76
|
+
const totalNpm = baseIOCs.packages.length;
|
|
77
|
+
const totalPyPI = (baseIOCs.pypi_packages || []).length;
|
|
78
|
+
console.log('[4/4] Saved to cache: ' + CACHE_IOC_FILE);
|
|
79
|
+
console.log('\n[OK] IOCs updated: ' + totalNpm + ' npm + ' + totalPyPI + ' PyPI packages');
|
|
33
80
|
|
|
34
|
-
return
|
|
81
|
+
return { total: totalNpm, totalPyPI: totalPyPI };
|
|
35
82
|
}
|
|
36
83
|
|
|
37
84
|
/**
|
package/src/utils.js
CHANGED
|
@@ -156,6 +156,55 @@ function getCallName(node) {
|
|
|
156
156
|
return '';
|
|
157
157
|
}
|
|
158
158
|
|
|
159
|
+
/**
|
|
160
|
+
* Minimal CLI spinner (npm/ora style, no external deps).
|
|
161
|
+
* Frames rotate every 100ms via setInterval.
|
|
162
|
+
* Uses ANSI escapes to clear/rewrite the current line.
|
|
163
|
+
*/
|
|
164
|
+
class Spinner {
|
|
165
|
+
constructor() {
|
|
166
|
+
this._frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
|
167
|
+
this._index = 0;
|
|
168
|
+
this._interval = null;
|
|
169
|
+
this._text = '';
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
start(text) {
|
|
173
|
+
this._text = text;
|
|
174
|
+
this._index = 0;
|
|
175
|
+
this._render();
|
|
176
|
+
this._interval = setInterval(() => this._render(), 100);
|
|
177
|
+
return this;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
update(text) {
|
|
181
|
+
this._text = text;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
succeed(text) {
|
|
185
|
+
this._stop();
|
|
186
|
+
process.stdout.write('\r\x1b[K\x1b[32m\u2713\x1b[0m ' + text + '\n');
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
fail(text) {
|
|
190
|
+
this._stop();
|
|
191
|
+
process.stdout.write('\r\x1b[K\x1b[31m\u2717\x1b[0m ' + text + '\n');
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
_render() {
|
|
195
|
+
const frame = this._frames[this._index % this._frames.length];
|
|
196
|
+
process.stdout.write('\r\x1b[K' + frame + ' ' + this._text);
|
|
197
|
+
this._index++;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
_stop() {
|
|
201
|
+
if (this._interval) {
|
|
202
|
+
clearInterval(this._interval);
|
|
203
|
+
this._interval = null;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
159
208
|
module.exports = {
|
|
160
209
|
EXCLUDED_DIRS,
|
|
161
210
|
DEV_PATTERNS,
|
|
@@ -163,5 +212,6 @@ module.exports = {
|
|
|
163
212
|
findFiles,
|
|
164
213
|
findJsFiles,
|
|
165
214
|
escapeHtml,
|
|
166
|
-
getCallName
|
|
215
|
+
getCallName,
|
|
216
|
+
Spinner
|
|
167
217
|
};
|