muaddib-scanner 1.6.5 → 1.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docker/Dockerfile CHANGED
@@ -10,9 +10,9 @@ RUN adduser -D sandboxuser
10
10
  WORKDIR /sandbox
11
11
  RUN chown sandboxuser:sandboxuser /sandbox
12
12
 
13
- # Script d'analyse
13
+ # Script d'analyse (sed strips Windows CRLF line endings)
14
14
  COPY sandbox-runner.sh /sandbox/
15
- RUN chmod +x /sandbox/sandbox-runner.sh
15
+ RUN sed -i 's/\r$//' /sandbox/sandbox-runner.sh && chmod +x /sandbox/sandbox-runner.sh
16
16
 
17
17
  USER sandboxuser
18
18
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "1.6.5",
3
+ "version": "1.6.7",
4
4
  "description": "Supply-chain threat detection & response for npm",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -7,6 +7,7 @@ const IOC_FILE = path.join(__dirname, 'data/iocs.json');
7
7
  const COMPACT_IOC_FILE = path.join(__dirname, 'data/iocs-compact.json');
8
8
  const STATIC_IOCS_FILE = path.join(__dirname, 'data/static-iocs.json');
9
9
  const { generateCompactIOCs } = require('./updater.js');
10
+ const { Spinner } = require('../utils.js');
10
11
 
11
12
  // Allowed domains for redirections (SSRF security)
12
13
  const ALLOWED_REDIRECT_DOMAINS = [
@@ -266,7 +267,7 @@ function fetchBuffer(url, redirectCount = 0) {
266
267
  }
267
268
 
268
269
  /**
269
- * Download a large file with progress feedback (MB received every 5s).
270
+ * Download a large file with spinner progress (npm/ora style).
270
271
  * Used for bulk zip downloads (OSV npm/PyPI ~50-100MB each).
271
272
  */
272
273
  function fetchBufferWithProgress(url, label, redirectCount = 0) {
@@ -302,30 +303,27 @@ function fetchBufferWithProgress(url, label, redirectCount = 0) {
302
303
  }
303
304
 
304
305
  const totalSize = parseInt(res.headers['content-length'], 10) || 0;
306
+ const totalMb = totalSize > 0 ? Math.round(totalSize / 1024 / 1024) : null;
305
307
  const chunks = [];
306
308
  let received = 0;
307
- let lastLog = Date.now();
309
+
310
+ const spinner = new Spinner();
311
+ spinner.start('Downloading ' + label + '...');
308
312
 
309
313
  res.on('data', (chunk) => {
310
314
  chunks.push(chunk);
311
315
  received += chunk.length;
312
- const now = Date.now();
313
- if (now - lastLog >= 5000) {
314
- const mb = (received / 1024 / 1024).toFixed(1);
315
- if (totalSize > 0) {
316
- const totalMb = (totalSize / 1024 / 1024).toFixed(1);
317
- const pct = Math.round((received / totalSize) * 100);
318
- process.stdout.write(`\r[SCRAPER] ${label}: ${mb}MB / ${totalMb}MB (${pct}%)`);
319
- } else {
320
- process.stdout.write(`\r[SCRAPER] ${label}: ${mb}MB received`);
321
- }
322
- lastLog = now;
316
+ const mb = Math.round(received / 1024 / 1024);
317
+ if (totalMb) {
318
+ spinner.update('Downloading ' + label + '... ' + mb + 'MB/' + totalMb + 'MB');
319
+ } else {
320
+ spinner.update('Downloading ' + label + '... ' + mb + 'MB');
323
321
  }
324
322
  });
325
323
 
326
324
  res.on('end', () => {
327
- const mb = (received / 1024 / 1024).toFixed(1);
328
- process.stdout.write(`\r[SCRAPER] ${label}: ${mb}MB done\n`);
325
+ const mb = Math.round(received / 1024 / 1024);
326
+ spinner.succeed('Downloaded ' + label + ' (' + mb + 'MB)');
329
327
  resolve(Buffer.concat(chunks));
330
328
  });
331
329
  });
@@ -613,6 +611,12 @@ async function scrapeOSSFMaliciousPackages(knownIds) {
613
611
 
614
612
  // Step 4: Batch fetch (50 concurrent, with small delay between batches for rate limit)
615
613
  const BATCH_SIZE = 50;
614
+ let fetchSpinner = null;
615
+ if (toFetch.length > 0) {
616
+ fetchSpinner = new Spinner();
617
+ fetchSpinner.start('Fetching OSSF entries... 0/' + toFetch.length);
618
+ }
619
+
616
620
  for (let i = 0; i < toFetch.length; i += BATCH_SIZE) {
617
621
  const batch = toFetch.slice(i, i + BATCH_SIZE);
618
622
  const results = await Promise.all(batch.map(function(entry) {
@@ -628,7 +632,7 @@ async function scrapeOSSFMaliciousPackages(knownIds) {
628
632
 
629
633
  // Progress
630
634
  const progress = Math.min(i + BATCH_SIZE, toFetch.length);
631
- process.stdout.write('\r[SCRAPER] Fetched ' + progress + '/' + toFetch.length);
635
+ fetchSpinner.update('Fetching OSSF entries... ' + progress + '/' + toFetch.length);
632
636
 
633
637
  // Small delay between batches to respect rate limits
634
638
  if (i + BATCH_SIZE < toFetch.length) {
@@ -636,12 +640,12 @@ async function scrapeOSSFMaliciousPackages(knownIds) {
636
640
  }
637
641
  }
638
642
 
639
- if (toFetch.length > 0) console.log('');
643
+ if (fetchSpinner) {
644
+ fetchSpinner.succeed('Fetched OSSF entries: ' + packages.length + ' packages');
645
+ }
640
646
 
641
647
  // Save tree SHA for next incremental run
642
648
  try { fs.writeFileSync(shaFile, treeSha); } catch {}
643
-
644
- console.log('[SCRAPER] ' + packages.length + ' packages extracted');
645
649
  } catch (e) {
646
650
  console.log('[SCRAPER] Error: ' + e.message);
647
651
  }
@@ -654,7 +658,6 @@ async function scrapeOSSFMaliciousPackages(knownIds) {
654
658
  // Bulk zip download — primary volume source
655
659
  // ============================================
656
660
  async function scrapeOSVDataDump() {
657
- console.log('[SCRAPER] OSV.dev npm data dump (all.zip)...');
658
661
  const packages = [];
659
662
  const knownIds = new Set();
660
663
 
@@ -666,35 +669,42 @@ async function scrapeOSVDataDump() {
666
669
  // Extract using adm-zip
667
670
  const zip = new AdmZip(zipBuffer);
668
671
  const entries = zip.getEntries();
672
+ const total = entries.length;
669
673
 
670
674
  let malCount = 0;
671
675
  let skippedCount = 0;
672
676
 
673
- for (const entry of entries) {
677
+ const spinner = new Spinner();
678
+ spinner.start('Parsing npm entries... 0/' + total);
679
+
680
+ for (let i = 0; i < entries.length; i++) {
681
+ const entry = entries[i];
674
682
  const name = entry.entryName;
675
683
 
676
684
  // Only process MAL-*.json files (malware), skip GHSA-*, CVE-*, PYSEC-* etc.
677
685
  if (!name.startsWith('MAL-') || !name.endsWith('.json')) {
678
686
  skippedCount++;
679
- continue;
687
+ } else {
688
+ try {
689
+ const content = entry.getData().toString('utf8');
690
+ const vuln = JSON.parse(content);
691
+ const parsed = parseOSVEntry(vuln, 'osv-malicious');
692
+ packages.push(...parsed);
693
+
694
+ // Track known IDs so OSSF can skip them
695
+ knownIds.add(vuln.id || path.basename(name, '.json'));
696
+ malCount++;
697
+ } catch {
698
+ // Skip unparseable entries
699
+ }
680
700
  }
681
701
 
682
- try {
683
- const content = entry.getData().toString('utf8');
684
- const vuln = JSON.parse(content);
685
- const parsed = parseOSVEntry(vuln, 'osv-malicious');
686
- packages.push(...parsed);
687
-
688
- // Track known IDs so OSSF can skip them
689
- knownIds.add(vuln.id || path.basename(name, '.json'));
690
- malCount++;
691
- } catch {
692
- // Skip unparseable entries
702
+ if ((i + 1) % 1000 === 0 || i === entries.length - 1) {
703
+ spinner.update('Parsing npm entries... ' + (i + 1) + '/' + total);
693
704
  }
694
705
  }
695
706
 
696
- console.log('[SCRAPER] Processed ' + malCount + ' MAL-* entries (' + skippedCount + ' non-malware skipped)');
697
- console.log('[SCRAPER] ' + packages.length + ' packages extracted');
707
+ spinner.succeed('Parsed npm entries: ' + malCount + ' MAL-* (' + skippedCount + ' skipped) \u2192 ' + packages.length + ' packages');
698
708
  } catch (e) {
699
709
  console.log('[SCRAPER] Error: ' + e.message);
700
710
  }
@@ -707,7 +717,6 @@ async function scrapeOSVDataDump() {
707
717
  // Bulk zip download — PyPI malicious packages
708
718
  // ============================================
709
719
  async function scrapeOSVPyPIDataDump() {
710
- console.log('[SCRAPER] OSV.dev PyPI data dump (all.zip)...');
711
720
  const packages = [];
712
721
 
713
722
  try {
@@ -716,32 +725,39 @@ async function scrapeOSVPyPIDataDump() {
716
725
 
717
726
  const zip = new AdmZip(zipBuffer);
718
727
  const entries = zip.getEntries();
728
+ const total = entries.length;
719
729
 
720
730
  let malCount = 0;
721
731
  let skippedCount = 0;
722
732
 
723
- for (const entry of entries) {
733
+ const spinner = new Spinner();
734
+ spinner.start('Parsing PyPI entries... 0/' + total);
735
+
736
+ for (let i = 0; i < entries.length; i++) {
737
+ const entry = entries[i];
724
738
  const name = entry.entryName;
725
739
 
726
740
  // Only process MAL-*.json files (malware)
727
741
  if (!name.startsWith('MAL-') || !name.endsWith('.json')) {
728
742
  skippedCount++;
729
- continue;
743
+ } else {
744
+ try {
745
+ const content = entry.getData().toString('utf8');
746
+ const vuln = JSON.parse(content);
747
+ const parsed = parseOSVEntry(vuln, 'osv-malicious-pypi', 'PyPI');
748
+ packages.push(...parsed);
749
+ malCount++;
750
+ } catch {
751
+ // Skip unparseable entries
752
+ }
730
753
  }
731
754
 
732
- try {
733
- const content = entry.getData().toString('utf8');
734
- const vuln = JSON.parse(content);
735
- const parsed = parseOSVEntry(vuln, 'osv-malicious-pypi', 'PyPI');
736
- packages.push(...parsed);
737
- malCount++;
738
- } catch {
739
- // Skip unparseable entries
755
+ if ((i + 1) % 1000 === 0 || i === entries.length - 1) {
756
+ spinner.update('Parsing PyPI entries... ' + (i + 1) + '/' + total);
740
757
  }
741
758
  }
742
759
 
743
- console.log('[SCRAPER] Processed ' + malCount + ' PyPI MAL-* entries (' + skippedCount + ' non-malware skipped)');
744
- console.log('[SCRAPER] ' + packages.length + ' PyPI packages extracted');
760
+ spinner.succeed('Parsed PyPI entries: ' + malCount + ' MAL-* (' + skippedCount + ' skipped) \u2192 ' + packages.length + ' packages');
745
761
  } catch (e) {
746
762
  console.log('[SCRAPER] Error: ' + e.message);
747
763
  }
@@ -1106,11 +1122,15 @@ async function runScraper() {
1106
1122
  ];
1107
1123
 
1108
1124
  // Save enriched (full) IOCs
1125
+ const saveSpinner = new Spinner();
1126
+ saveSpinner.start('Saving IOCs...');
1109
1127
  fs.writeFileSync(IOC_FILE, JSON.stringify(existingIOCs, null, 2));
1110
1128
 
1111
1129
  // Save compact IOCs (lightweight, shipped in npm)
1130
+ saveSpinner.update('Generating compact IOCs...');
1112
1131
  const compactIOCs = generateCompactIOCs(existingIOCs);
1113
1132
  fs.writeFileSync(COMPACT_IOC_FILE, JSON.stringify(compactIOCs));
1133
+ saveSpinner.succeed('Saved IOCs + compact format');
1114
1134
 
1115
1135
  // Display summary
1116
1136
  console.log('\n' + '='.repeat(60));
@@ -1163,7 +1183,7 @@ async function runScraper() {
1163
1183
  };
1164
1184
  }
1165
1185
 
1166
- module.exports = { runScraper };
1186
+ module.exports = { runScraper, scrapeShaiHuludDetector, scrapeDatadogIOCs };
1167
1187
 
1168
1188
  // Direct execution if called as CLI
1169
1189
  if (require.main === module) {
@@ -8,30 +8,77 @@ const LOCAL_COMPACT_FILE = path.join(__dirname, 'data/iocs-compact.json');
8
8
  const { loadYAMLIOCs } = require('./yaml-loader.js');
9
9
 
10
10
  async function updateIOCs() {
11
- console.log('[MUADDIB] Updating IOCs via live scrape (OSV + OSSF + all sources)...\n');
11
+ console.log('[MUADDIB] Updating IOCs (fast mode)...\n');
12
12
 
13
- // Run a full scrape this downloads directly from OSV/OSSF/etc.
14
- // and writes both iocs.json and iocs-compact.json
15
- const { runScraper } = require('./scraper.js');
16
- const result = await runScraper();
13
+ // Step 1: Load compact IOCs shipped in package (~225K IOCs)
14
+ let baseIOCs = { packages: [], pypi_packages: [], hashes: [], markers: [], files: [] };
17
15
 
18
- // Also copy results to cache for loadCachedIOCs
16
+ if (fs.existsSync(LOCAL_COMPACT_FILE)) {
17
+ try {
18
+ const compactData = JSON.parse(fs.readFileSync(LOCAL_COMPACT_FILE, 'utf8'));
19
+ baseIOCs = expandCompactIOCs(compactData);
20
+ console.log('[1/4] Compact IOCs: ' + baseIOCs.packages.length + ' npm + ' + (baseIOCs.pypi_packages || []).length + ' PyPI');
21
+ } catch (e) {
22
+ console.log('[1/4] Error loading compact IOCs: ' + e.message);
23
+ }
24
+ } else {
25
+ console.log('[1/4] Compact IOCs not found (run "muaddib scrape" first for full data)');
26
+ }
27
+
28
+ // Step 2: Load YAML IOCs (builtin.yaml, packages.yaml, hashes.yaml)
29
+ const yamlIOCs = loadYAMLIOCs();
30
+ const yamlStandard = {
31
+ packages: yamlIOCs.packages || [],
32
+ pypi_packages: [],
33
+ hashes: (yamlIOCs.hashes || []).map(function(h) { return h.sha256; }),
34
+ markers: (yamlIOCs.markers || []).map(function(m) { return m.pattern; }),
35
+ files: (yamlIOCs.files || []).map(function(f) { return f.name; })
36
+ };
37
+ mergeIOCs(baseIOCs, yamlStandard);
38
+ console.log('[2/4] YAML IOCs: ' + yamlStandard.packages.length + ' packages, ' + yamlStandard.hashes.length + ' hashes');
39
+
40
+ // Step 3: Download additional IOCs from GitHub (GenSecAI + DataDog — small files, fast)
41
+ const { scrapeShaiHuludDetector, scrapeDatadogIOCs } = require('./scraper.js');
42
+ console.log('[3/4] Downloading GitHub IOCs...');
43
+
44
+ const [shaiHulud, datadog] = await Promise.all([
45
+ scrapeShaiHuludDetector(),
46
+ scrapeDatadogIOCs()
47
+ ]);
48
+
49
+ const githubIOCs = {
50
+ packages: [].concat(shaiHulud.packages, datadog.packages),
51
+ pypi_packages: [],
52
+ hashes: [].concat(shaiHulud.hashes || [], datadog.hashes || []),
53
+ markers: [],
54
+ files: []
55
+ };
56
+ mergeIOCs(baseIOCs, githubIOCs);
57
+ console.log(' +' + shaiHulud.packages.length + ' GenSecAI, +' + datadog.packages.length + ' DataDog');
58
+
59
+ // Step 4: Merge and save to cache
19
60
  if (!fs.existsSync(CACHE_PATH)) {
20
61
  fs.mkdirSync(CACHE_PATH, { recursive: true });
21
62
  }
22
63
 
23
- if (fs.existsSync(LOCAL_IOC_FILE)) {
24
- fs.copyFileSync(LOCAL_IOC_FILE, CACHE_IOC_FILE);
25
- }
64
+ baseIOCs.updated = new Date().toISOString();
65
+ baseIOCs.sources = ['compact', 'yaml', 'shai-hulud-detector', 'datadog'];
26
66
 
27
- const compactCachePath = path.join(CACHE_PATH, 'iocs-compact.json');
28
- if (fs.existsSync(LOCAL_COMPACT_FILE)) {
29
- fs.copyFileSync(LOCAL_COMPACT_FILE, compactCachePath);
30
- }
67
+ // Clean internal dedup sets before serialization
68
+ delete baseIOCs._pkgKeys;
69
+ delete baseIOCs._pypiPkgKeys;
70
+ delete baseIOCs._hashSet;
71
+ delete baseIOCs._markerSet;
72
+ delete baseIOCs._fileSet;
73
+
74
+ fs.writeFileSync(CACHE_IOC_FILE, JSON.stringify(baseIOCs));
31
75
 
32
- console.log('\n[OK] IOCs updated: ' + result.total + ' npm + ' + result.totalPyPI + ' PyPI packages');
76
+ const totalNpm = baseIOCs.packages.length;
77
+ const totalPyPI = (baseIOCs.pypi_packages || []).length;
78
+ console.log('[4/4] Saved to cache: ' + CACHE_IOC_FILE);
79
+ console.log('\n[OK] IOCs updated: ' + totalNpm + ' npm + ' + totalPyPI + ' PyPI packages');
33
80
 
34
- return result;
81
+ return { total: totalNpm, totalPyPI: totalPyPI };
35
82
  }
36
83
 
37
84
  /**
package/src/utils.js CHANGED
@@ -156,6 +156,55 @@ function getCallName(node) {
156
156
  return '';
157
157
  }
158
158
 
159
+ /**
160
+ * Minimal CLI spinner (npm/ora style, no external deps).
161
+ * Frames rotate every 100ms via setInterval.
162
+ * Uses ANSI escapes to clear/rewrite the current line.
163
+ */
164
+ class Spinner {
165
+ constructor() {
166
+ this._frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
167
+ this._index = 0;
168
+ this._interval = null;
169
+ this._text = '';
170
+ }
171
+
172
+ start(text) {
173
+ this._text = text;
174
+ this._index = 0;
175
+ this._render();
176
+ this._interval = setInterval(() => this._render(), 100);
177
+ return this;
178
+ }
179
+
180
+ update(text) {
181
+ this._text = text;
182
+ }
183
+
184
+ succeed(text) {
185
+ this._stop();
186
+ process.stdout.write('\r\x1b[K\x1b[32m\u2713\x1b[0m ' + text + '\n');
187
+ }
188
+
189
+ fail(text) {
190
+ this._stop();
191
+ process.stdout.write('\r\x1b[K\x1b[31m\u2717\x1b[0m ' + text + '\n');
192
+ }
193
+
194
+ _render() {
195
+ const frame = this._frames[this._index % this._frames.length];
196
+ process.stdout.write('\r\x1b[K' + frame + ' ' + this._text);
197
+ this._index++;
198
+ }
199
+
200
+ _stop() {
201
+ if (this._interval) {
202
+ clearInterval(this._interval);
203
+ this._interval = null;
204
+ }
205
+ }
206
+ }
207
+
159
208
  module.exports = {
160
209
  EXCLUDED_DIRS,
161
210
  DEV_PATTERNS,
@@ -163,5 +212,6 @@ module.exports = {
163
212
  findFiles,
164
213
  findJsFiles,
165
214
  escapeHtml,
166
- getCallName
215
+ getCallName,
216
+ Spinner
167
217
  };