muaddib-scanner 2.10.39 → 2.10.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,190 +0,0 @@
1
- #!/usr/bin/env node
2
- 'use strict';
3
-
4
- /**
5
- * analyze-score0.js — Diagnostic script for score-0 malware investigation.
6
- *
7
- * Analyzes packages from the Datadog benchmark that scored 0 (zero threats detected).
8
- * Categorizes each package to identify blind spots vs expected non-detections.
9
- *
10
- * Categories:
11
- * - empty_package: no code files at all
12
- * - ts_only: only .ts files (no .js)
13
- * - binary_only: only .wasm/.node/.dll/.so
14
- * - non_code_assets: CSS/images/fonts/markdown only
15
- * - minimum_viable: package.json + README only
16
- * - python_in_npm: .py files in an npm package
17
- * - unknown: has .js but 0 detections — TRUE BLIND SPOT
18
- *
19
- * Usage:
20
- * node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl
21
- * node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl --csv report.csv
22
- * node scripts/analyze-score0.js --dir .muaddib-cache/datadog-tarballs/
23
- */
24
-
25
- const fs = require('fs');
26
- const path = require('path');
27
-
28
- const CODE_EXTENSIONS = new Set(['.js', '.cjs', '.mjs', '.jsx']);
29
- const TS_EXTENSIONS = new Set(['.ts', '.tsx', '.cts', '.mts']);
30
- const BINARY_EXTENSIONS = new Set(['.wasm', '.node', '.dll', '.so', '.dylib', '.exe']);
31
- const ASSET_EXTENSIONS = new Set(['.css', '.scss', '.less', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico',
32
- '.woff', '.woff2', '.ttf', '.eot', '.otf', '.md', '.txt', '.html', '.htm', '.map']);
33
- const PY_EXTENSIONS = new Set(['.py', '.pyx', '.pyi']);
34
-
35
- function categorizePackage(packageDir) {
36
- if (!fs.existsSync(packageDir)) return 'missing';
37
-
38
- const files = [];
39
- function walk(dir, depth) {
40
- if (depth > 5) return; // Limit depth
41
- try {
42
- const entries = fs.readdirSync(dir, { withFileTypes: true });
43
- for (const entry of entries) {
44
- if (entry.name === 'node_modules' || entry.name === '.git') continue;
45
- const full = path.join(dir, entry.name);
46
- if (entry.isDirectory()) {
47
- walk(full, depth + 1);
48
- } else if (entry.isFile()) {
49
- files.push(entry.name);
50
- }
51
- }
52
- } catch { /* skip permission errors */ }
53
- }
54
- walk(packageDir, 0);
55
-
56
- if (files.length === 0) return 'empty_package';
57
-
58
- const extensions = files.map(f => path.extname(f).toLowerCase());
59
- const hasCode = extensions.some(e => CODE_EXTENSIONS.has(e));
60
- const hasTs = extensions.some(e => TS_EXTENSIONS.has(e));
61
- const hasBinary = extensions.some(e => BINARY_EXTENSIONS.has(e));
62
- const hasPython = extensions.some(e => PY_EXTENSIONS.has(e));
63
- const hasAssets = extensions.some(e => ASSET_EXTENSIONS.has(e));
64
-
65
- // Only package.json + README
66
- const nonMeta = files.filter(f => !['package.json', 'readme.md', 'readme', 'license', 'license.md', 'changelog.md'].includes(f.toLowerCase()));
67
- if (nonMeta.length === 0) return 'minimum_viable';
68
-
69
- if (hasCode) return 'unknown'; // TRUE BLIND SPOT: has JS but 0 detections
70
-
71
- if (hasTs && !hasCode) return 'ts_only';
72
- if (hasBinary && !hasCode && !hasTs) return 'binary_only';
73
- if (hasPython && !hasCode) return 'python_in_npm';
74
- if (hasAssets && !hasCode && !hasTs && !hasBinary) return 'non_code_assets';
75
-
76
- return 'unknown'; // Fallback
77
- }
78
-
79
- function loadBenchmarkResults(filepath) {
80
- if (!fs.existsSync(filepath)) {
81
- console.error(`[SCORE0] File not found: ${filepath}`);
82
- process.exit(1);
83
- }
84
-
85
- const content = fs.readFileSync(filepath, 'utf8');
86
- const records = [];
87
- for (const line of content.split('\n')) {
88
- if (!line.trim()) continue;
89
- try {
90
- const record = JSON.parse(line);
91
- if (record.score === 0 && record.threat_count === 0) {
92
- records.push(record);
93
- }
94
- } catch { /* skip malformed */ }
95
- }
96
- return records;
97
- }
98
-
99
- function main() {
100
- const args = process.argv.slice(2);
101
- const benchmarkIdx = args.indexOf('--benchmark');
102
- const dirIdx = args.indexOf('--dir');
103
- const csvIdx = args.indexOf('--csv');
104
-
105
- const benchmarkFile = benchmarkIdx >= 0 ? args[benchmarkIdx + 1] : null;
106
- const tarballDir = dirIdx >= 0 ? args[dirIdx + 1] : null;
107
- const csvFile = csvIdx >= 0 ? args[csvIdx + 1] : null;
108
-
109
- if (!benchmarkFile && !tarballDir) {
110
- console.log('Usage:');
111
- console.log(' node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl');
112
- console.log(' node scripts/analyze-score0.js --dir .muaddib-cache/datadog-tarballs/');
113
- console.log(' node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl --csv report.csv');
114
- process.exit(0);
115
- }
116
-
117
- let packages = [];
118
-
119
- if (benchmarkFile) {
120
- const records = loadBenchmarkResults(benchmarkFile);
121
- console.log(`[SCORE0] Loaded ${records.length} score-0 packages from benchmark`);
122
- packages = records.map(r => ({
123
- name: r.name || r.package || 'unknown',
124
- version: r.version || '',
125
- dir: tarballDir ? path.join(tarballDir, r.name || r.package || 'unknown') : null
126
- }));
127
- } else if (tarballDir) {
128
- // Direct directory scan mode
129
- if (!fs.existsSync(tarballDir)) {
130
- console.error(`[SCORE0] Directory not found: ${tarballDir}`);
131
- process.exit(1);
132
- }
133
- const entries = fs.readdirSync(tarballDir, { withFileTypes: true });
134
- packages = entries
135
- .filter(e => e.isDirectory())
136
- .map(e => ({ name: e.name, version: '', dir: path.join(tarballDir, e.name) }));
137
- console.log(`[SCORE0] Found ${packages.length} package directories`);
138
- }
139
-
140
- // Categorize
141
- const categories = {};
142
- const results = [];
143
-
144
- for (const pkg of packages) {
145
- let category = 'no_dir';
146
- if (pkg.dir && fs.existsSync(pkg.dir)) {
147
- category = categorizePackage(pkg.dir);
148
- }
149
- categories[category] = (categories[category] || 0) + 1;
150
- results.push({ name: pkg.name, version: pkg.version, category });
151
- }
152
-
153
- // Summary
154
- console.log('\n=== SCORE 0 INVESTIGATION REPORT ===\n');
155
- console.log(`Total score-0 packages: ${packages.length}\n`);
156
-
157
- const sortedCategories = Object.entries(categories).sort((a, b) => b[1] - a[1]);
158
- for (const [cat, count] of sortedCategories) {
159
- const pct = ((count / packages.length) * 100).toFixed(1);
160
- const label = cat === 'unknown' ? `${cat} *** BLIND SPOT ***` : cat;
161
- console.log(` ${label}: ${count} (${pct}%)`);
162
- }
163
-
164
- const unknownCount = categories.unknown || 0;
165
- console.log(`\n Actionable blind spots: ${unknownCount} packages with JS code but 0 detections`);
166
-
167
- // CSV output
168
- if (csvFile) {
169
- const csvLines = ['name,version,category'];
170
- for (const r of results) {
171
- csvLines.push(`${r.name},${r.version},${r.category}`);
172
- }
173
- fs.writeFileSync(csvFile, csvLines.join('\n'), 'utf8');
174
- console.log(`\n CSV report written to: ${csvFile}`);
175
- }
176
-
177
- // List unknown packages (first 20)
178
- const unknowns = results.filter(r => r.category === 'unknown');
179
- if (unknowns.length > 0) {
180
- console.log('\n First 20 "unknown" (blind spot) packages:');
181
- for (const u of unknowns.slice(0, 20)) {
182
- console.log(` - ${u.name}@${u.version}`);
183
- }
184
- if (unknowns.length > 20) {
185
- console.log(` ... and ${unknowns.length - 20} more`);
186
- }
187
- }
188
- }
189
-
190
- main();
@@ -1,7 +0,0 @@
1
- #!/bin/bash
2
- # Supprime les archives de plus de 30 jours
3
- ARCHIVE_DIR="/opt/muaddib/archive"
4
- find "$ARCHIVE_DIR" -type d -name "20*" -mtime +30 -exec rm -rf {} + 2>/dev/null
5
- # Log
6
- TOTAL=$(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)
7
- echo "[Archive Cleanup] $(date -Iseconds) — Total size: $TOTAL"
@@ -1,45 +0,0 @@
1
- #!/bin/bash
2
- # Usage: ./audit-archive.sh [YYYY-MM-DD] [priority]
3
- # Exemples:
4
- # ./audit-archive.sh → résumé de toutes les dates
5
- # ./audit-archive.sh 2026-03-29 → liste les packages archivés ce jour
6
- # ./audit-archive.sh 2026-03-29 P1 → filtre par priorité
7
-
8
- ARCHIVE_DIR="/opt/muaddib/archive"
9
- DATE=$1
10
- PRIORITY=$2
11
-
12
- if [ -z "$DATE" ]; then
13
- echo "=== Archive Summary ==="
14
- for dir in "$ARCHIVE_DIR"/20*; do
15
- [ -d "$dir" ] || continue
16
- day=$(basename "$dir")
17
- count=$(ls "$dir"/*.tgz 2>/dev/null | wc -l)
18
- size=$(du -sh "$dir" 2>/dev/null | cut -f1)
19
- echo "$day : $count packages ($size)"
20
- done
21
- echo "---"
22
- echo "Total: $(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)"
23
- exit 0
24
- fi
25
-
26
- DIR="$ARCHIVE_DIR/$DATE"
27
- if [ ! -d "$DIR" ]; then
28
- echo "No archive for $DATE"
29
- exit 1
30
- fi
31
-
32
- for json in "$DIR"/*.json; do
33
- [ -f "$json" ] || continue
34
- pkg=$(jq -r '.package' "$json")
35
- ver=$(jq -r '.version' "$json")
36
- prio=$(jq -r '.priority' "$json")
37
- score=$(jq -r '.score' "$json")
38
- llm=$(jq -r '.llm_verdict // "none"' "$json")
39
-
40
- if [ -n "$PRIORITY" ] && [ "$prio" != "$PRIORITY" ]; then
41
- continue
42
- fi
43
-
44
- printf "%-40s %-8s score=%-4s llm=%s\n" "$pkg@$ver" "$prio" "$score" "$llm"
45
- done
@@ -1,326 +0,0 @@
1
- #!/usr/bin/env node
2
- 'use strict';
3
-
4
- /**
5
- * MUAD'DIB Performance Benchmark
6
- *
7
- * Generates synthetic projects of different sizes and measures:
8
- * - Scan time (wall-clock via process.hrtime.bigint())
9
- * - Peak memory usage (process.memoryUsage())
10
- * - Per-scanner breakdown (via _capture mode timing)
11
- *
12
- * Usage: node scripts/benchmark.js [--runs N] [--sizes small,medium,large]
13
- */
14
-
15
- const fs = require('fs');
16
- const path = require('path');
17
- const os = require('os');
18
-
19
- // ----- Config -----
20
- const RUNS = parseInt(process.argv.find((a, i) => process.argv[i - 1] === '--runs') || '3', 10);
21
- const SIZE_ARG = process.argv.find((a, i) => process.argv[i - 1] === '--sizes') || 'small,medium,large';
22
- const SIZES = SIZE_ARG.split(',').map(s => s.trim());
23
-
24
- const SIZE_CONFIGS = {
25
- small: { files: 10, label: '10 JS files' },
26
- medium: { files: 100, label: '100 JS files' },
27
- large: { files: 500, label: '500 JS files (cap test)' }
28
- };
29
-
30
- // ----- Synthetic file templates -----
31
- // Mix of benign-looking code with varied patterns to exercise all scanners
32
- const TEMPLATES = [
33
- // Standard module
34
- (i) => `'use strict';
35
- const path = require('path');
36
- const fs = require('fs');
37
-
38
- function process${i}(input) {
39
- const result = input.toString().trim();
40
- return path.resolve(result);
41
- }
42
-
43
- module.exports = { process${i} };
44
- `,
45
- // HTTP client usage
46
- (i) => `'use strict';
47
- const https = require('https');
48
-
49
- function fetch${i}(url) {
50
- return new Promise((resolve, reject) => {
51
- https.get(url, (res) => {
52
- let data = '';
53
- res.on('data', (chunk) => { data += chunk; });
54
- res.on('end', () => resolve(JSON.parse(data)));
55
- }).on('error', reject);
56
- });
57
- }
58
-
59
- module.exports = { fetch${i} };
60
- `,
61
- // Crypto usage
62
- (i) => `'use strict';
63
- const crypto = require('crypto');
64
-
65
- function hash${i}(data) {
66
- return crypto.createHash('sha256').update(data).digest('hex');
67
- }
68
-
69
- function verify${i}(data, expected) {
70
- const actual = hash${i}(data);
71
- return crypto.timingSafeEqual(Buffer.from(actual), Buffer.from(expected));
72
- }
73
-
74
- module.exports = { hash${i}, verify${i} };
75
- `,
76
- // Config/util module
77
- (i) => `'use strict';
78
- const os = require('os');
79
-
80
- const CONFIG_${i} = {
81
- tmpDir: os.tmpdir(),
82
- cpus: os.cpus().length,
83
- platform: os.platform(),
84
- arch: os.arch()
85
- };
86
-
87
- function getConfig${i}() {
88
- return { ...CONFIG_${i} };
89
- }
90
-
91
- module.exports = { getConfig${i}, CONFIG_${i} };
92
- `,
93
- // Event emitter pattern
94
- (i) => `'use strict';
95
- const { EventEmitter } = require('events');
96
-
97
- class Service${i} extends EventEmitter {
98
- constructor() {
99
- super();
100
- this.data = new Map();
101
- }
102
-
103
- add(key, value) {
104
- this.data.set(key, value);
105
- this.emit('added', { key, value });
106
- }
107
-
108
- remove(key) {
109
- this.data.delete(key);
110
- this.emit('removed', { key });
111
- }
112
- }
113
-
114
- module.exports = { Service${i} };
115
- `
116
- ];
117
-
118
- // ----- Generate synthetic project -----
119
- function generateProject(tmpDir, fileCount) {
120
- fs.mkdirSync(tmpDir, { recursive: true });
121
-
122
- // Create package.json
123
- fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify({
124
- name: `bench-project-${fileCount}`,
125
- version: '1.0.0',
126
- description: 'Synthetic benchmark project',
127
- main: 'index.js'
128
- }, null, 2));
129
-
130
- // Create subdirectories for realism
131
- const dirs = ['', 'src', 'lib', 'utils', 'helpers'];
132
- for (const d of dirs) {
133
- if (d) fs.mkdirSync(path.join(tmpDir, d), { recursive: true });
134
- }
135
-
136
- // Create JS files
137
- for (let i = 0; i < fileCount; i++) {
138
- const template = TEMPLATES[i % TEMPLATES.length];
139
- const dir = dirs[i % dirs.length];
140
- const filePath = path.join(tmpDir, dir, `module-${i}.js`);
141
- fs.writeFileSync(filePath, template(i));
142
- }
143
-
144
- // Create index.js that references some modules
145
- const imports = Array.from({ length: Math.min(10, fileCount) }, (_, i) => {
146
- const dir = dirs[i % dirs.length];
147
- const rel = dir ? `./${dir}/module-${i}` : `./module-${i}`;
148
- return `const m${i} = require('${rel}');`;
149
- }).join('\n');
150
- fs.writeFileSync(path.join(tmpDir, 'index.js'), `'use strict';\n${imports}\n\nconsole.log('Loaded modules');\n`);
151
-
152
- return tmpDir;
153
- }
154
-
155
- // ----- Cleanup -----
156
- function cleanup(dir) {
157
- try {
158
- fs.rmSync(dir, { recursive: true, force: true });
159
- } catch { /* ignore */ }
160
- }
161
-
162
- // ----- Run benchmark -----
163
- async function benchmark() {
164
- // Lazy-load the scanner
165
- const { run } = require('../src/index.js');
166
- const { clearFileListCache } = require('../src/utils.js');
167
- const { clearASTCache } = require('../src/shared/constants.js');
168
-
169
- console.log('='.repeat(70));
170
- console.log(' MUAD\'DIB Performance Benchmark');
171
- console.log(` Runs per size: ${RUNS} | Sizes: ${SIZES.join(', ')}`);
172
- console.log(` Node ${process.version} | ${os.cpus()[0]?.model || 'unknown CPU'} | ${os.platform()}`);
173
- console.log('='.repeat(70));
174
- console.log();
175
-
176
- const results = {};
177
-
178
- for (const sizeName of SIZES) {
179
- const config = SIZE_CONFIGS[sizeName];
180
- if (!config) {
181
- console.error(`Unknown size: ${sizeName}`);
182
- continue;
183
- }
184
-
185
- console.log(`--- ${sizeName.toUpperCase()}: ${config.label} ---`);
186
-
187
- const tmpDir = path.join(os.tmpdir(), `muaddib-bench-${sizeName}-${Date.now()}`);
188
- generateProject(tmpDir, config.files);
189
-
190
- const times = [];
191
- const memories = [];
192
-
193
- for (let r = 0; r < RUNS; r++) {
194
- // Clear caches between runs for fair measurement
195
- clearFileListCache();
196
- clearASTCache();
197
-
198
- // Force GC if available
199
- if (global.gc) global.gc();
200
-
201
- const memBefore = process.memoryUsage();
202
- const start = process.hrtime.bigint();
203
-
204
- try {
205
- await run(tmpDir, { _capture: true });
206
- } catch (err) {
207
- console.error(` Run ${r + 1} error: ${err.message}`);
208
- }
209
-
210
- const end = process.hrtime.bigint();
211
- const memAfter = process.memoryUsage();
212
-
213
- const durationMs = Number(end - start) / 1e6;
214
- const heapDelta = memAfter.heapUsed - memBefore.heapUsed;
215
- const rssAfter = memAfter.rss;
216
-
217
- times.push(durationMs);
218
- memories.push({ heapDelta, rss: rssAfter, heapUsed: memAfter.heapUsed });
219
-
220
- console.log(` Run ${r + 1}/${RUNS}: ${durationMs.toFixed(0)}ms | heap: ${(memAfter.heapUsed / 1024 / 1024).toFixed(1)}MB | RSS: ${(rssAfter / 1024 / 1024).toFixed(1)}MB`);
221
- }
222
-
223
- // Stats
224
- times.sort((a, b) => a - b);
225
- const median = times[Math.floor(times.length / 2)];
226
- const mean = times.reduce((a, b) => a + b, 0) / times.length;
227
- const min = times[0];
228
- const max = times[times.length - 1];
229
- const peakRss = Math.max(...memories.map(m => m.rss));
230
- const peakHeap = Math.max(...memories.map(m => m.heapUsed));
231
-
232
- results[sizeName] = { median, mean, min, max, peakRss, peakHeap, runs: RUNS, files: config.files };
233
-
234
- console.log(` => median: ${median.toFixed(0)}ms mean: ${mean.toFixed(0)}ms min: ${min.toFixed(0)}ms max: ${max.toFixed(0)}ms`);
235
- console.log(` => peak heap: ${(peakHeap / 1024 / 1024).toFixed(1)}MB peak RSS: ${(peakRss / 1024 / 1024).toFixed(1)}MB`);
236
- console.log();
237
-
238
- cleanup(tmpDir);
239
- }
240
-
241
- // ----- Per-scanner timing (single run on medium) -----
242
- console.log('--- SCANNER BREAKDOWN (medium, single run) ---');
243
- const scannerTmpDir = path.join(os.tmpdir(), `muaddib-bench-scanner-${Date.now()}`);
244
- const scannerFiles = SIZE_CONFIGS.medium?.files || 100;
245
- generateProject(scannerTmpDir, scannerFiles);
246
- clearFileListCache();
247
- clearASTCache();
248
-
249
- // Monkey-patch Promise.allSettled to measure per-scanner time
250
- const origAllSettled = Promise.allSettled.bind(Promise);
251
- const scannerTimings = [];
252
-
253
- // We'll measure by wrapping run() and parsing its internal flow
254
- // Simpler approach: time each scanner individually
255
- const scannerModules = [
256
- { name: 'PackageJson', mod: '../src/scanner/package.js', fn: 'scanPackageJson' },
257
- { name: 'ShellScripts', mod: '../src/scanner/shell.js', fn: 'scanShellScripts' },
258
- { name: 'AST', mod: '../src/scanner/ast.js', fn: 'analyzeAST' },
259
- { name: 'Obfuscation', mod: '../src/scanner/obfuscation.js', fn: 'detectObfuscation' },
260
- { name: 'Dependencies', mod: '../src/scanner/dependencies.js', fn: 'scanDependencies' },
261
- { name: 'Hashes', mod: '../src/scanner/hash.js', fn: 'scanHashes' },
262
- { name: 'DataFlow', mod: '../src/scanner/dataflow.js', fn: 'analyzeDataFlow' },
263
- { name: 'Typosquat', mod: '../src/scanner/typosquat.js', fn: 'scanTyposquatting' },
264
- { name: 'GitHubActions', mod: '../src/scanner/github-actions.js', fn: 'scanGitHubActions' },
265
- { name: 'Entropy', mod: '../src/scanner/entropy.js', fn: 'scanEntropy' },
266
- { name: 'AIConfig', mod: '../src/scanner/ai-config.js', fn: 'scanAIConfig' }
267
- ];
268
-
269
- for (const s of scannerModules) {
270
- try {
271
- const mod = require(s.mod);
272
- const fn = mod[s.fn];
273
- if (!fn) {
274
- scannerTimings.push({ name: s.name, ms: 0, note: 'not found' });
275
- continue;
276
- }
277
-
278
- clearFileListCache(); // each scanner gets fresh file list
279
- const start = process.hrtime.bigint();
280
- try {
281
- await fn(scannerTmpDir, {});
282
- } catch { /* some scanners may throw on benign input */ }
283
- const end = process.hrtime.bigint();
284
- const ms = Number(end - start) / 1e6;
285
- scannerTimings.push({ name: s.name, ms });
286
- } catch (err) {
287
- scannerTimings.push({ name: s.name, ms: 0, note: err.message });
288
- }
289
- }
290
-
291
- // Sort by time descending
292
- scannerTimings.sort((a, b) => b.ms - a.ms);
293
- const totalScannerMs = scannerTimings.reduce((sum, s) => sum + s.ms, 0);
294
-
295
- for (const s of scannerTimings) {
296
- const pct = totalScannerMs > 0 ? ((s.ms / totalScannerMs) * 100).toFixed(1) : '0.0';
297
- const bar = '#'.repeat(Math.max(1, Math.round(s.ms / totalScannerMs * 40)));
298
- console.log(` ${s.name.padEnd(15)} ${s.ms.toFixed(0).padStart(6)}ms ${pct.padStart(5)}% ${bar}${s.note ? ` (${s.note})` : ''}`);
299
- }
300
- console.log(` ${'TOTAL'.padEnd(15)} ${totalScannerMs.toFixed(0).padStart(6)}ms`);
301
- console.log();
302
-
303
- cleanup(scannerTmpDir);
304
-
305
- // ----- Summary table -----
306
- console.log('='.repeat(70));
307
- console.log(' SUMMARY');
308
- console.log('='.repeat(70));
309
- console.log(` ${'Size'.padEnd(10)} ${'Files'.padStart(6)} ${'Median'.padStart(8)} ${'Mean'.padStart(8)} ${'Min'.padStart(8)} ${'Max'.padStart(8)} ${'Heap'.padStart(8)} ${'RSS'.padStart(8)}`);
310
- console.log(` ${'-'.repeat(10)} ${'-'.repeat(6)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)}`);
311
- for (const [name, r] of Object.entries(results)) {
312
- console.log(` ${name.padEnd(10)} ${String(r.files).padStart(6)} ${(r.median.toFixed(0) + 'ms').padStart(8)} ${(r.mean.toFixed(0) + 'ms').padStart(8)} ${(r.min.toFixed(0) + 'ms').padStart(8)} ${(r.max.toFixed(0) + 'ms').padStart(8)} ${((r.peakHeap / 1024 / 1024).toFixed(1) + 'MB').padStart(8)} ${((r.peakRss / 1024 / 1024).toFixed(1) + 'MB').padStart(8)}`);
313
- }
314
- console.log();
315
-
316
- // Slowest scanner
317
- if (scannerTimings.length > 0) {
318
- console.log(` Slowest scanner: ${scannerTimings[0].name} (${scannerTimings[0].ms.toFixed(0)}ms, ${((scannerTimings[0].ms / totalScannerMs) * 100).toFixed(1)}% of total)`);
319
- }
320
- console.log();
321
- }
322
-
323
- benchmark().catch(err => {
324
- console.error('Benchmark failed:', err);
325
- process.exit(1);
326
- });
@@ -1,81 +0,0 @@
1
- #!/usr/bin/env node
2
- 'use strict';
3
-
4
- /**
5
- * cleanup-fp-labels.js — One-shot script to convert contaminated 'fp' labels to 'unconfirmed'.
6
- *
7
- * Context: During 3 months of monitoring, sandbox score === 0 was automatically relabeled
8
- * as 'fp' (false positive). Without honey tokens, sandbox clean ≠ false positive.
9
- * This script converts all automated 'fp' labels to 'unconfirmed' so they are excluded
10
- * from ML training (neither positive nor negative).
11
- *
12
- * Usage:
13
- * node scripts/cleanup-fp-labels.js # Dry-run (default)
14
- * node scripts/cleanup-fp-labels.js --apply # Write changes
15
- * node scripts/cleanup-fp-labels.js --file path # Custom JSONL path
16
- */
17
-
18
- const fs = require('fs');
19
- const path = require('path');
20
-
21
- const DEFAULT_FILE = path.join(__dirname, '..', 'data', 'ml-training.jsonl');
22
-
23
- function main() {
24
- const args = process.argv.slice(2);
25
- const apply = args.includes('--apply');
26
- const fileIdx = args.indexOf('--file');
27
- const filePath = fileIdx >= 0 && args[fileIdx + 1] ? args[fileIdx + 1] : DEFAULT_FILE;
28
-
29
- if (!fs.existsSync(filePath)) {
30
- console.log(`[CLEANUP] File not found: ${filePath}`);
31
- process.exit(1);
32
- }
33
-
34
- const content = fs.readFileSync(filePath, 'utf8');
35
- const lines = content.split('\n');
36
-
37
- let totalRecords = 0;
38
- let fpCount = 0;
39
- let convertedLines = [];
40
-
41
- for (const line of lines) {
42
- if (!line.trim()) {
43
- convertedLines.push(line);
44
- continue;
45
- }
46
-
47
- try {
48
- const record = JSON.parse(line);
49
- totalRecords++;
50
-
51
- if (record.label === 'fp') {
52
- fpCount++;
53
- if (apply) {
54
- record.label = 'unconfirmed';
55
- convertedLines.push(JSON.stringify(record));
56
- } else {
57
- convertedLines.push(line);
58
- }
59
- } else {
60
- convertedLines.push(line);
61
- }
62
- } catch {
63
- convertedLines.push(line); // Keep malformed lines as-is
64
- }
65
- }
66
-
67
- console.log(`[CLEANUP] File: ${filePath}`);
68
- console.log(`[CLEANUP] Total records: ${totalRecords}`);
69
- console.log(`[CLEANUP] Records with label 'fp': ${fpCount}`);
70
-
71
- if (apply && fpCount > 0) {
72
- fs.writeFileSync(filePath, convertedLines.join('\n'), 'utf8');
73
- console.log(`[CLEANUP] APPLIED: Converted ${fpCount} 'fp' labels to 'unconfirmed'`);
74
- } else if (!apply && fpCount > 0) {
75
- console.log(`[CLEANUP] DRY-RUN: Would convert ${fpCount} labels. Use --apply to write.`);
76
- } else {
77
- console.log(`[CLEANUP] No 'fp' labels found. Nothing to do.`);
78
- }
79
- }
80
-
81
- main();