muaddib-scanner 2.10.21 → 2.10.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,326 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * MUAD'DIB Performance Benchmark
6
+ *
7
+ * Generates synthetic projects of different sizes and measures:
8
+ * - Scan time (wall-clock via process.hrtime.bigint())
9
+ * - Peak memory usage (process.memoryUsage())
10
+ * - Per-scanner breakdown (via _capture mode timing)
11
+ *
12
+ * Usage: node scripts/benchmark.js [--runs N] [--sizes small,medium,large]
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+ const os = require('os');
18
+
19
+ // ----- Config -----
20
+ const RUNS = parseInt(process.argv.find((a, i) => process.argv[i - 1] === '--runs') || '3', 10);
21
+ const SIZE_ARG = process.argv.find((a, i) => process.argv[i - 1] === '--sizes') || 'small,medium,large';
22
+ const SIZES = SIZE_ARG.split(',').map(s => s.trim());
23
+
24
+ const SIZE_CONFIGS = {
25
+ small: { files: 10, label: '10 JS files' },
26
+ medium: { files: 100, label: '100 JS files' },
27
+ large: { files: 500, label: '500 JS files (cap test)' }
28
+ };
29
+
30
+ // ----- Synthetic file templates -----
31
+ // Mix of benign-looking code with varied patterns to exercise all scanners
32
+ const TEMPLATES = [
33
+ // Standard module
34
+ (i) => `'use strict';
35
+ const path = require('path');
36
+ const fs = require('fs');
37
+
38
+ function process${i}(input) {
39
+ const result = input.toString().trim();
40
+ return path.resolve(result);
41
+ }
42
+
43
+ module.exports = { process${i} };
44
+ `,
45
+ // HTTP client usage
46
+ (i) => `'use strict';
47
+ const https = require('https');
48
+
49
+ function fetch${i}(url) {
50
+ return new Promise((resolve, reject) => {
51
+ https.get(url, (res) => {
52
+ let data = '';
53
+ res.on('data', (chunk) => { data += chunk; });
54
+ res.on('end', () => resolve(JSON.parse(data)));
55
+ }).on('error', reject);
56
+ });
57
+ }
58
+
59
+ module.exports = { fetch${i} };
60
+ `,
61
+ // Crypto usage
62
+ (i) => `'use strict';
63
+ const crypto = require('crypto');
64
+
65
+ function hash${i}(data) {
66
+ return crypto.createHash('sha256').update(data).digest('hex');
67
+ }
68
+
69
+ function verify${i}(data, expected) {
70
+ const actual = hash${i}(data);
71
+ return crypto.timingSafeEqual(Buffer.from(actual), Buffer.from(expected));
72
+ }
73
+
74
+ module.exports = { hash${i}, verify${i} };
75
+ `,
76
+ // Config/util module
77
+ (i) => `'use strict';
78
+ const os = require('os');
79
+
80
+ const CONFIG_${i} = {
81
+ tmpDir: os.tmpdir(),
82
+ cpus: os.cpus().length,
83
+ platform: os.platform(),
84
+ arch: os.arch()
85
+ };
86
+
87
+ function getConfig${i}() {
88
+ return { ...CONFIG_${i} };
89
+ }
90
+
91
+ module.exports = { getConfig${i}, CONFIG_${i} };
92
+ `,
93
+ // Event emitter pattern
94
+ (i) => `'use strict';
95
+ const { EventEmitter } = require('events');
96
+
97
+ class Service${i} extends EventEmitter {
98
+ constructor() {
99
+ super();
100
+ this.data = new Map();
101
+ }
102
+
103
+ add(key, value) {
104
+ this.data.set(key, value);
105
+ this.emit('added', { key, value });
106
+ }
107
+
108
+ remove(key) {
109
+ this.data.delete(key);
110
+ this.emit('removed', { key });
111
+ }
112
+ }
113
+
114
+ module.exports = { Service${i} };
115
+ `
116
+ ];
117
+
118
+ // ----- Generate synthetic project -----
119
+ function generateProject(tmpDir, fileCount) {
120
+ fs.mkdirSync(tmpDir, { recursive: true });
121
+
122
+ // Create package.json
123
+ fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify({
124
+ name: `bench-project-${fileCount}`,
125
+ version: '1.0.0',
126
+ description: 'Synthetic benchmark project',
127
+ main: 'index.js'
128
+ }, null, 2));
129
+
130
+ // Create subdirectories for realism
131
+ const dirs = ['', 'src', 'lib', 'utils', 'helpers'];
132
+ for (const d of dirs) {
133
+ if (d) fs.mkdirSync(path.join(tmpDir, d), { recursive: true });
134
+ }
135
+
136
+ // Create JS files
137
+ for (let i = 0; i < fileCount; i++) {
138
+ const template = TEMPLATES[i % TEMPLATES.length];
139
+ const dir = dirs[i % dirs.length];
140
+ const filePath = path.join(tmpDir, dir, `module-${i}.js`);
141
+ fs.writeFileSync(filePath, template(i));
142
+ }
143
+
144
+ // Create index.js that references some modules
145
+ const imports = Array.from({ length: Math.min(10, fileCount) }, (_, i) => {
146
+ const dir = dirs[i % dirs.length];
147
+ const rel = dir ? `./${dir}/module-${i}` : `./module-${i}`;
148
+ return `const m${i} = require('${rel}');`;
149
+ }).join('\n');
150
+ fs.writeFileSync(path.join(tmpDir, 'index.js'), `'use strict';\n${imports}\n\nconsole.log('Loaded modules');\n`);
151
+
152
+ return tmpDir;
153
+ }
154
+
155
+ // ----- Cleanup -----
156
+ function cleanup(dir) {
157
+ try {
158
+ fs.rmSync(dir, { recursive: true, force: true });
159
+ } catch { /* ignore */ }
160
+ }
161
+
162
+ // ----- Run benchmark -----
163
+ async function benchmark() {
164
+ // Lazy-load the scanner
165
+ const { run } = require('../src/index.js');
166
+ const { clearFileListCache } = require('../src/utils.js');
167
+ const { clearASTCache } = require('../src/shared/constants.js');
168
+
169
+ console.log('='.repeat(70));
170
+ console.log(' MUAD\'DIB Performance Benchmark');
171
+ console.log(` Runs per size: ${RUNS} | Sizes: ${SIZES.join(', ')}`);
172
+ console.log(` Node ${process.version} | ${os.cpus()[0]?.model || 'unknown CPU'} | ${os.platform()}`);
173
+ console.log('='.repeat(70));
174
+ console.log();
175
+
176
+ const results = {};
177
+
178
+ for (const sizeName of SIZES) {
179
+ const config = SIZE_CONFIGS[sizeName];
180
+ if (!config) {
181
+ console.error(`Unknown size: ${sizeName}`);
182
+ continue;
183
+ }
184
+
185
+ console.log(`--- ${sizeName.toUpperCase()}: ${config.label} ---`);
186
+
187
+ const tmpDir = path.join(os.tmpdir(), `muaddib-bench-${sizeName}-${Date.now()}`);
188
+ generateProject(tmpDir, config.files);
189
+
190
+ const times = [];
191
+ const memories = [];
192
+
193
+ for (let r = 0; r < RUNS; r++) {
194
+ // Clear caches between runs for fair measurement
195
+ clearFileListCache();
196
+ clearASTCache();
197
+
198
+ // Force GC if available
199
+ if (global.gc) global.gc();
200
+
201
+ const memBefore = process.memoryUsage();
202
+ const start = process.hrtime.bigint();
203
+
204
+ try {
205
+ await run(tmpDir, { _capture: true });
206
+ } catch (err) {
207
+ console.error(` Run ${r + 1} error: ${err.message}`);
208
+ }
209
+
210
+ const end = process.hrtime.bigint();
211
+ const memAfter = process.memoryUsage();
212
+
213
+ const durationMs = Number(end - start) / 1e6;
214
+ const heapDelta = memAfter.heapUsed - memBefore.heapUsed;
215
+ const rssAfter = memAfter.rss;
216
+
217
+ times.push(durationMs);
218
+ memories.push({ heapDelta, rss: rssAfter, heapUsed: memAfter.heapUsed });
219
+
220
+ console.log(` Run ${r + 1}/${RUNS}: ${durationMs.toFixed(0)}ms | heap: ${(memAfter.heapUsed / 1024 / 1024).toFixed(1)}MB | RSS: ${(rssAfter / 1024 / 1024).toFixed(1)}MB`);
221
+ }
222
+
223
+ // Stats
224
+ times.sort((a, b) => a - b);
225
+ const median = times[Math.floor(times.length / 2)];
226
+ const mean = times.reduce((a, b) => a + b, 0) / times.length;
227
+ const min = times[0];
228
+ const max = times[times.length - 1];
229
+ const peakRss = Math.max(...memories.map(m => m.rss));
230
+ const peakHeap = Math.max(...memories.map(m => m.heapUsed));
231
+
232
+ results[sizeName] = { median, mean, min, max, peakRss, peakHeap, runs: RUNS, files: config.files };
233
+
234
+ console.log(` => median: ${median.toFixed(0)}ms mean: ${mean.toFixed(0)}ms min: ${min.toFixed(0)}ms max: ${max.toFixed(0)}ms`);
235
+ console.log(` => peak heap: ${(peakHeap / 1024 / 1024).toFixed(1)}MB peak RSS: ${(peakRss / 1024 / 1024).toFixed(1)}MB`);
236
+ console.log();
237
+
238
+ cleanup(tmpDir);
239
+ }
240
+
241
+ // ----- Per-scanner timing (single run on medium) -----
242
+ console.log('--- SCANNER BREAKDOWN (medium, single run) ---');
243
+ const scannerTmpDir = path.join(os.tmpdir(), `muaddib-bench-scanner-${Date.now()}`);
244
+ const scannerFiles = SIZE_CONFIGS.medium?.files || 100;
245
+ generateProject(scannerTmpDir, scannerFiles);
246
+ clearFileListCache();
247
+ clearASTCache();
248
+
249
+ // Monkey-patch Promise.allSettled to measure per-scanner time
250
+ const origAllSettled = Promise.allSettled.bind(Promise);
251
+ const scannerTimings = [];
252
+
253
+ // We'll measure by wrapping run() and parsing its internal flow
254
+ // Simpler approach: time each scanner individually
255
+ const scannerModules = [
256
+ { name: 'PackageJson', mod: '../src/scanner/package.js', fn: 'scanPackageJson' },
257
+ { name: 'ShellScripts', mod: '../src/scanner/shell.js', fn: 'scanShellScripts' },
258
+ { name: 'AST', mod: '../src/scanner/ast.js', fn: 'analyzeAST' },
259
+ { name: 'Obfuscation', mod: '../src/scanner/obfuscation.js', fn: 'detectObfuscation' },
260
+ { name: 'Dependencies', mod: '../src/scanner/dependencies.js', fn: 'scanDependencies' },
261
+ { name: 'Hashes', mod: '../src/scanner/hash.js', fn: 'scanHashes' },
262
+ { name: 'DataFlow', mod: '../src/scanner/dataflow.js', fn: 'analyzeDataFlow' },
263
+ { name: 'Typosquat', mod: '../src/scanner/typosquat.js', fn: 'scanTyposquatting' },
264
+ { name: 'GitHubActions', mod: '../src/scanner/github-actions.js', fn: 'scanGitHubActions' },
265
+ { name: 'Entropy', mod: '../src/scanner/entropy.js', fn: 'scanEntropy' },
266
+ { name: 'AIConfig', mod: '../src/scanner/ai-config.js', fn: 'scanAIConfig' }
267
+ ];
268
+
269
+ for (const s of scannerModules) {
270
+ try {
271
+ const mod = require(s.mod);
272
+ const fn = mod[s.fn];
273
+ if (!fn) {
274
+ scannerTimings.push({ name: s.name, ms: 0, note: 'not found' });
275
+ continue;
276
+ }
277
+
278
+ clearFileListCache(); // each scanner gets fresh file list
279
+ const start = process.hrtime.bigint();
280
+ try {
281
+ await fn(scannerTmpDir, {});
282
+ } catch { /* some scanners may throw on benign input */ }
283
+ const end = process.hrtime.bigint();
284
+ const ms = Number(end - start) / 1e6;
285
+ scannerTimings.push({ name: s.name, ms });
286
+ } catch (err) {
287
+ scannerTimings.push({ name: s.name, ms: 0, note: err.message });
288
+ }
289
+ }
290
+
291
+ // Sort by time descending
292
+ scannerTimings.sort((a, b) => b.ms - a.ms);
293
+ const totalScannerMs = scannerTimings.reduce((sum, s) => sum + s.ms, 0);
294
+
295
+ for (const s of scannerTimings) {
296
+ const pct = totalScannerMs > 0 ? ((s.ms / totalScannerMs) * 100).toFixed(1) : '0.0';
297
+ const bar = '#'.repeat(Math.max(1, Math.round(s.ms / totalScannerMs * 40)));
298
+ console.log(` ${s.name.padEnd(15)} ${s.ms.toFixed(0).padStart(6)}ms ${pct.padStart(5)}% ${bar}${s.note ? ` (${s.note})` : ''}`);
299
+ }
300
+ console.log(` ${'TOTAL'.padEnd(15)} ${totalScannerMs.toFixed(0).padStart(6)}ms`);
301
+ console.log();
302
+
303
+ cleanup(scannerTmpDir);
304
+
305
+ // ----- Summary table -----
306
+ console.log('='.repeat(70));
307
+ console.log(' SUMMARY');
308
+ console.log('='.repeat(70));
309
+ console.log(` ${'Size'.padEnd(10)} ${'Files'.padStart(6)} ${'Median'.padStart(8)} ${'Mean'.padStart(8)} ${'Min'.padStart(8)} ${'Max'.padStart(8)} ${'Heap'.padStart(8)} ${'RSS'.padStart(8)}`);
310
+ console.log(` ${'-'.repeat(10)} ${'-'.repeat(6)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)}`);
311
+ for (const [name, r] of Object.entries(results)) {
312
+ console.log(` ${name.padEnd(10)} ${String(r.files).padStart(6)} ${(r.median.toFixed(0) + 'ms').padStart(8)} ${(r.mean.toFixed(0) + 'ms').padStart(8)} ${(r.min.toFixed(0) + 'ms').padStart(8)} ${(r.max.toFixed(0) + 'ms').padStart(8)} ${((r.peakHeap / 1024 / 1024).toFixed(1) + 'MB').padStart(8)} ${((r.peakRss / 1024 / 1024).toFixed(1) + 'MB').padStart(8)}`);
313
+ }
314
+ console.log();
315
+
316
+ // Slowest scanner
317
+ if (scannerTimings.length > 0) {
318
+ console.log(` Slowest scanner: ${scannerTimings[0].name} (${scannerTimings[0].ms.toFixed(0)}ms, ${((scannerTimings[0].ms / totalScannerMs) * 100).toFixed(1)}% of total)`);
319
+ }
320
+ console.log();
321
+ }
322
+
323
+ benchmark().catch(err => {
324
+ console.error('Benchmark failed:', err);
325
+ process.exit(1);
326
+ });
package/src/index.js CHANGED
@@ -27,7 +27,7 @@ const { buildModuleGraph, annotateTaintedExports, detectCrossFileFlows, annotate
27
27
  const { computeReachableFiles } = require('./scanner/reachability.js');
28
28
  const { runTemporalAnalyses } = require('./temporal-runner.js');
29
29
  const { formatOutput } = require('./output-formatter.js');
30
- const { setExtraExcludes, getExtraExcludes, Spinner, listInstalledPackages, clearFileListCache, wasFilesCapped, debugLog } = require('./utils.js');
30
+ const { setExtraExcludes, getExtraExcludes, Spinner, listInstalledPackages, clearFileListCache, wasFilesCapped, getOverflowFiles, debugLog } = require('./utils.js');
31
31
  const { SEVERITY_WEIGHTS, RISK_THRESHOLDS, MAX_RISK_SCORE, isPackageLevelThreat, computeGroupScore, applyFPReductions, applyCompoundBoosts, calculateRiskScore, applyConfigOverrides, resetConfigOverrides, getSeverityWeights } = require('./scoring.js');
32
32
  const { resolveConfig } = require('./config.js');
33
33
  const { buildIntentPairs } = require('./intent-graph.js');
@@ -480,9 +480,39 @@ async function run(targetPath, options = {}) {
480
480
  aiConfigThreats
481
481
  ] = scanResult;
482
482
 
483
- // Emit warning if file count cap was hit
483
+ // Emit warning if file count cap was hit + quick-scan overflow files
484
+ const quickScanThreats = [];
484
485
  if (wasFilesCapped()) {
485
- warnings.push('File count cap reached (500 files) — some files were not scanned. Root-level files were prioritized.');
486
+ warnings.push('File count cap reached (500 files) — overflow files scanned in quick-scan mode (lifecycle + child_process only).');
487
+ const overflowFiles = getOverflowFiles();
488
+ const QUICK_SCAN_PATTERNS = [
489
+ { re: /\brequire\s*\(\s*['"]child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'HIGH', label: 'require("child_process")' },
490
+ { re: /\brequire\s*\(\s*['"]node:child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'HIGH', label: 'require("node:child_process")' },
491
+ { re: /\b(?:exec|execSync|spawn|spawnSync)\s*\(/, type: 'dangerous_exec', severity: 'HIGH', label: 'exec/spawn call' },
492
+ { re: /\bprocess\.mainModule\b/, type: 'dynamic_require', severity: 'HIGH', label: 'process.mainModule' },
493
+ { re: /\bModule\._load\b/, type: 'module_load_bypass', severity: 'CRITICAL', label: 'Module._load' }
494
+ ];
495
+ for (const filePath of overflowFiles) {
496
+ try {
497
+ const stat = fs.statSync(filePath);
498
+ if (stat.size > getMaxFileSize()) continue;
499
+ const content = fs.readFileSync(filePath, 'utf8');
500
+ const relFile = path.relative(targetPath, filePath);
501
+ for (const pat of QUICK_SCAN_PATTERNS) {
502
+ if (pat.re.test(content)) {
503
+ quickScanThreats.push({
504
+ type: pat.type,
505
+ severity: pat.severity,
506
+ message: `[quick-scan] ${pat.label} detected in overflow file.`,
507
+ file: relFile
508
+ });
509
+ }
510
+ }
511
+ } catch { /* skip unreadable files */ }
512
+ }
513
+ if (quickScanThreats.length > 0) {
514
+ debugLog(`Quick-scan found ${quickScanThreats.length} threats in ${overflowFiles.length} overflow files`);
515
+ }
486
516
  }
487
517
 
488
518
  // Stop spinner now that scanning is complete
@@ -494,6 +524,7 @@ async function run(targetPath, options = {}) {
494
524
  ...packageThreats,
495
525
  ...shellThreats,
496
526
  ...astThreats,
527
+ ...quickScanThreats,
497
528
  ...obfuscationThreats,
498
529
  ...dependencyThreats,
499
530
  ...hashThreats,
@@ -7,7 +7,6 @@ const AdmZip = require('adm-zip');
7
7
  const IOC_FILE = path.join(__dirname, 'data/iocs.json');
8
8
  const COMPACT_IOC_FILE = path.join(__dirname, 'data/iocs-compact.json');
9
9
  const HOME_IOC_FILE = path.join(os.homedir(), '.muaddib', 'data', 'iocs.json');
10
- const STATIC_IOCS_FILE = path.join(__dirname, '../../data/static-iocs.json');
11
10
  const { generateCompactIOCs, NEVER_WILDCARD } = require('./updater.js');
12
11
  const { Spinner } = require('../utils.js');
13
12
  const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
@@ -144,17 +143,6 @@ function parseCSV(csvContent, hasHeader = true) {
144
143
  return results;
145
144
  }
146
145
 
147
- function loadStaticIOCs() {
148
- try {
149
- if (fs.existsSync(STATIC_IOCS_FILE)) {
150
- return JSON.parse(fs.readFileSync(STATIC_IOCS_FILE, 'utf8'));
151
- }
152
- } catch (e) {
153
- console.log(`[WARN] Error loading static-iocs.json: ${e.message}`);
154
- }
155
- return { socket: [], phylum: [], npmRemoved: [] };
156
- }
157
-
158
146
  const MAX_REDIRECTS = 5;
159
147
  const MAX_RESPONSE_SIZE = 200 * 1024 * 1024; // 200MB
160
148
  const MAX_ENTRY_UNCOMPRESSED = 50 * 1024 * 1024; // 50MB per zip entry
@@ -972,124 +960,6 @@ async function scrapeGitHubAdvisory() {
972
960
  return packages;
973
961
  }
974
962
 
975
- // ============================================
976
- // SOURCE 5: Static IOCs (Socket, Phylum, npm removed)
977
- // Local file maintained manually
978
- // ============================================
979
- async function scrapeStaticIOCs() {
980
- console.log('[SCRAPER] Static IOCs (local file)...');
981
- const packages = [];
982
- const staticIOCs = loadStaticIOCs();
983
-
984
- // Socket.dev reports
985
- for (const pkg of staticIOCs.socket || []) {
986
- if (!pkg.version) continue; // Skip entries without version — avoids wildcard cascade
987
- packages.push({
988
- id: `SOCKET-${pkg.name}`,
989
- name: pkg.name,
990
- version: pkg.version,
991
- severity: pkg.severity || 'critical',
992
- confidence: 'high',
993
- source: 'socket-dev',
994
- description: pkg.description || 'Malicious package reported by Socket.dev',
995
- references: ['https://socket.dev/npm/package/' + pkg.name],
996
- mitre: 'T1195.002',
997
- freshness: createFreshness('socket', 'high')
998
- });
999
- }
1000
-
1001
- // Phylum Research
1002
- for (const pkg of staticIOCs.phylum || []) {
1003
- if (!pkg.version) continue; // Skip entries without version — avoids wildcard cascade
1004
- packages.push({
1005
- id: `PHYLUM-${pkg.name}`,
1006
- name: pkg.name,
1007
- version: pkg.version,
1008
- severity: pkg.severity || 'critical',
1009
- confidence: 'high',
1010
- source: 'phylum',
1011
- description: pkg.description || 'Malicious package reported by Phylum Research',
1012
- references: ['https://blog.phylum.io'],
1013
- mitre: 'T1195.002',
1014
- freshness: createFreshness('phylum', 'high')
1015
- });
1016
- }
1017
-
1018
- // npm removed packages
1019
- for (const pkg of staticIOCs.npmRemoved || []) {
1020
- if (!pkg.version) continue; // Skip entries without version — avoids wildcard cascade
1021
- packages.push({
1022
- id: `NPM-REMOVED-${pkg.name}`,
1023
- name: pkg.name,
1024
- version: pkg.version,
1025
- severity: 'critical',
1026
- confidence: 'high',
1027
- source: 'npm-removed',
1028
- description: 'Removed from npm: ' + (pkg.reason || 'security violation'),
1029
- references: ['https://www.npmjs.com/policies/security'],
1030
- mitre: 'T1195.002',
1031
- freshness: createFreshness('npm-removed', 'medium')
1032
- });
1033
- }
1034
-
1035
- console.log(`[SCRAPER] ${packages.length} packages`);
1036
- return packages;
1037
- }
1038
-
1039
- // ============================================
1040
- // SOURCE 6: Snyk Known Malware
1041
- // Historical attacks database
1042
- // ============================================
1043
- async function scrapeSnykMalware() {
1044
- console.log('[SCRAPER] Snyk Malware DB...');
1045
- const packages = [];
1046
-
1047
- const knownSnykMalware = [
1048
- { name: 'event-stream', version: '3.3.6', description: 'Flatmap-stream backdoor (2018)' },
1049
- { name: 'flatmap-stream', version: '*', description: 'Malicious dependency of event-stream' },
1050
- { name: 'eslint-scope', version: '3.7.2', description: 'Credential theft (2018)' },
1051
- { name: 'eslint-config-eslint', version: '5.0.2', description: 'Credential theft (2018)' },
1052
- { name: 'getcookies', version: '*', description: 'Backdoor malware' },
1053
- { name: 'mailparser', version: '2.3.0', description: 'Compromised version' },
1054
- { name: 'node-ipc', version: '10.1.1', description: 'Protestware - file deletion' },
1055
- { name: 'node-ipc', version: '10.1.2', description: 'Protestware - file deletion' },
1056
- { name: 'node-ipc', version: '10.1.3', description: 'Protestware - file deletion' },
1057
- { name: 'colors', version: '1.4.1', description: 'Protestware - infinite loop' },
1058
- { name: 'colors', version: '1.4.2', description: 'Protestware - infinite loop' },
1059
- { name: 'faker', version: '6.6.6', description: 'Protestware - breaking change' },
1060
- { name: 'ua-parser-js', version: '0.7.29', description: 'Cryptominer injection' },
1061
- { name: 'ua-parser-js', version: '0.8.0', description: 'Cryptominer injection' },
1062
- { name: 'ua-parser-js', version: '1.0.0', description: 'Cryptominer injection' },
1063
- { name: 'coa', version: '2.0.3', description: 'Malicious version' },
1064
- { name: 'coa', version: '2.0.4', description: 'Malicious version' },
1065
- { name: 'coa', version: '2.1.1', description: 'Malicious version' },
1066
- { name: 'coa', version: '2.1.3', description: 'Malicious version' },
1067
- { name: 'coa', version: '3.0.1', description: 'Malicious version' },
1068
- { name: 'coa', version: '3.1.3', description: 'Malicious version' },
1069
- { name: 'rc', version: '1.2.9', description: 'Malicious version' },
1070
- { name: 'rc', version: '1.3.9', description: 'Malicious version' },
1071
- { name: 'rc', version: '2.3.9', description: 'Malicious version' },
1072
- ];
1073
-
1074
- for (const pkg of knownSnykMalware) {
1075
- packages.push({
1076
- id: ('SNYK-' + pkg.name + '-' + pkg.version).replace(/[^a-zA-Z0-9-]/g, '-'),
1077
- name: pkg.name,
1078
- version: pkg.version,
1079
- severity: 'critical',
1080
- confidence: 'high',
1081
- source: 'snyk-known',
1082
- description: pkg.description,
1083
- references: ['https://snyk.io/advisor'],
1084
- mitre: 'T1195.002',
1085
- freshness: createFreshness('snyk', 'high')
1086
- });
1087
- }
1088
-
1089
- console.log(`[SCRAPER] ${packages.length} packages`);
1090
- return packages;
1091
- }
1092
-
1093
963
  // ============================================
1094
964
  // MAIN SCRAPER
1095
965
  // ============================================
@@ -1152,8 +1022,6 @@ async function runScraper() {
1152
1022
  scrapeDatadogIOCs(),
1153
1023
  scrapeOSSFMaliciousPackages(osvResult.knownIds),
1154
1024
  scrapeGitHubAdvisory(),
1155
- scrapeStaticIOCs(),
1156
- scrapeSnykMalware(),
1157
1025
  scrapeOSVPyPIDataDump()
1158
1026
  ]);
1159
1027
 
@@ -1161,9 +1029,7 @@ async function runScraper() {
1161
1029
  const datadogResult = results[1];
1162
1030
  const ossfPackages = results[2];
1163
1031
  const githubPackages = results[3];
1164
- const staticPackages = results[4];
1165
- const snykPackages = results[5];
1166
- const pypiPackages = results[6];
1032
+ const pypiPackages = results[4];
1167
1033
 
1168
1034
  // Log aggregated warnings
1169
1035
  if (_noVersionSkipCount > 0) {
@@ -1176,9 +1042,7 @@ async function runScraper() {
1176
1042
  ...shaiHuludResult.packages,
1177
1043
  ...datadogResult.packages,
1178
1044
  ...ossfPackages,
1179
- ...githubPackages,
1180
- ...staticPackages,
1181
- ...snykPackages
1045
+ ...githubPackages
1182
1046
  ];
1183
1047
 
1184
1048
  // Merge all hashes
@@ -1326,8 +1190,7 @@ async function runScraper() {
1326
1190
  'github-advisory',
1327
1191
  'socket-dev',
1328
1192
  'phylum',
1329
- 'npm-removed',
1330
- 'snyk-known'
1193
+ 'npm-removed'
1331
1194
  ];
1332
1195
 
1333
1196
  // Save enriched (full) IOCs — atomic write via .tmp + rename
@@ -1428,7 +1291,7 @@ module.exports = {
1428
1291
  runScraper, scrapeShaiHuludDetector, scrapeDatadogIOCs,
1429
1292
  // Pure utility functions (exported for testing)
1430
1293
  parseCSVLine, parseCSV, extractVersions, parseOSVEntry,
1431
- createFreshness, isAllowedRedirect, loadStaticIOCs,
1294
+ createFreshness, isAllowedRedirect,
1432
1295
  validateIOCEntry,
1433
1296
  getNoVersionSkipCount, resetNoVersionSkipCount,
1434
1297
  CONFIDENCE_ORDER, ALLOWED_REDIRECT_DOMAINS,
@@ -209,7 +209,7 @@ function loadCachedIOCs() {
209
209
  files: yamlIOCs.files.map(function(f) { return f.name; })
210
210
  };
211
211
 
212
- // Priority 2: Local scraped IOCs (full enriched file)
212
+ // Priority 2a: Local scraped IOCs (full enriched file)
213
213
  if (fs.existsSync(LOCAL_IOC_FILE)) {
214
214
  try {
215
215
  const localIOCs = JSON.parse(fs.readFileSync(LOCAL_IOC_FILE, 'utf8'));
@@ -217,8 +217,10 @@ function loadCachedIOCs() {
217
217
  } catch (e) {
218
218
  console.log('[WARN] Failed to load IOC database (iocs.json): ' + e.message);
219
219
  }
220
- } else if (fs.existsSync(LOCAL_COMPACT_FILE)) {
221
- // Priority 2b: Compact file (shipped in npm, lightweight)
220
+ }
221
+
222
+ // Priority 2b: Always merge compact file (contains manual IOCs not in full — TeamPCP, LiteLLM, etc.)
223
+ if (fs.existsSync(LOCAL_COMPACT_FILE)) {
222
224
  try {
223
225
  const compactData = JSON.parse(fs.readFileSync(LOCAL_COMPACT_FILE, 'utf8'));
224
226
  const expandedIOCs = expandCompactIOCs(compactData);
@@ -315,7 +317,9 @@ function createOptimizedIOCs(iocs) {
315
317
  const versions = pkg.version.split(',').map(v => v.trim()).filter(Boolean);
316
318
  for (const ver of versions) {
317
319
  const entry = Object.assign({}, pkg, { version: ver });
318
- if (ver === '*') pypiWildcardPackages.add(pkg.name);
320
+ if (ver === '*' && !NEVER_WILDCARD_PYPI.has(pkg.name)) {
321
+ pypiWildcardPackages.add(pkg.name);
322
+ }
319
323
  if (!pypiPackagesMap.has(pkg.name)) pypiPackagesMap.set(pkg.name, []);
320
324
  pypiPackagesMap.get(pkg.name).push(entry);
321
325
  }
@@ -323,7 +327,10 @@ function createOptimizedIOCs(iocs) {
323
327
  }
324
328
 
325
329
  if (pkg.version === '*') {
326
- pypiWildcardPackages.add(pkg.name);
330
+ // Defense-in-depth: NEVER_WILDCARD_PYPI packages must not be wildcarded
331
+ if (!NEVER_WILDCARD_PYPI.has(pkg.name)) {
332
+ pypiWildcardPackages.add(pkg.name);
333
+ }
327
334
  }
328
335
 
329
336
  if (!pypiPackagesMap.has(pkg.name)) {
@@ -377,6 +384,13 @@ const NEVER_WILDCARD = new Set([
377
384
  'posthog-node', 'posthog-js', 'ngx-bootstrap', '@asyncapi/specs'
378
385
  ]);
379
386
 
387
+ // PyPI equivalent: legitimate packages where only specific versions were compromised.
388
+ // These are among the most popular PyPI packages — wildcarding them would cause mass FPs.
389
+ const NEVER_WILDCARD_PYPI = new Set([
390
+ 'flask', 'django', 'requests', 'numpy', 'pandas',
391
+ 'scipy', 'tensorflow', 'torch', 'fastapi', 'uvicorn'
392
+ ]);
393
+
380
394
  function generateCompactIOCs(fullIOCs) {
381
395
  const wildcards = [];
382
396
  const versioned = Object.create(null);
@@ -416,6 +430,7 @@ function generateCompactIOCs(fullIOCs) {
416
430
 
417
431
  for (const p of fullIOCs.pypi_packages || []) {
418
432
  if (p.version === '*') {
433
+ if (NEVER_WILDCARD_PYPI.has(p.name)) continue;
419
434
  pypiWildcards.push(p.name);
420
435
  } else {
421
436
  if (!pypiVersioned[p.name]) pypiVersioned[p.name] = [];
@@ -481,9 +496,13 @@ function expandCompactIOCs(compact) {
481
496
  }
482
497
  }
483
498
 
484
- // Expand PyPI wildcards
499
+ // Expand PyPI wildcards (deduplicate via Set, enforce NEVER_WILDCARD_PYPI)
485
500
  const pypiPackages = [];
501
+ const seenPyPIWildcards = new Set();
486
502
  for (const name of compact.pypi_wildcards || []) {
503
+ if (seenPyPIWildcards.has(name)) continue;
504
+ if (NEVER_WILDCARD_PYPI.has(name)) continue;
505
+ seenPyPIWildcards.add(name);
487
506
  pypiPackages.push({ name: name, version: '*', severity: defaultSev });
488
507
  }
489
508
 
@@ -593,4 +612,4 @@ function verifyIOCHMAC(data, hmac) {
593
612
  }
594
613
  }
595
614
 
596
- module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD };
615
+ module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD, NEVER_WILDCARD_PYPI };