npm - muaddib-scanner - Versions diffs - 2.10.39 → 2.10.41 - Mend

muaddib-scanner 2.10.39 → 2.10.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/package.json +1 -1
package/src/integrations/canary-tokens.js +53 -0
package/src/monitor/classify.js +1 -0
package/src/response/playbooks.js +9 -0
package/src/rules/index.js +23 -0
package/src/sandbox/gvisor-parser.js +348 -0
package/src/sandbox/index.js +133 -21
package/src/sandbox/network-allowlist.js +162 -0
package/iocs/builtin.yaml +0 -239
package/iocs/hashes.yaml +0 -214
package/iocs/packages.yaml +0 -481
package/scripts/analyze-score0.js +0 -190
package/scripts/archive-cleanup.sh +0 -7
package/scripts/audit-archive.sh +0 -45
package/scripts/benchmark.js +0 -326
package/scripts/cleanup-fp-labels.js +0 -81
package/scripts/ossf-benchmark.js +0 -548
package/scripts/sample-npm-random.js +0 -339
package/src/ioc/data/.ossf-tree-sha +0 -1

package/scripts/analyze-score0.js DELETED Viewed

@@ -1,190 +0,0 @@
-#!/usr/bin/env node
-'use strict';
-/**
- * analyze-score0.js — Diagnostic script for score-0 malware investigation.
- *
- * Analyzes packages from the Datadog benchmark that scored 0 (zero threats detected).
- * Categorizes each package to identify blind spots vs expected non-detections.
- *
- * Categories:
- *   - empty_package: no code files at all
- *   - ts_only: only .ts files (no .js)
- *   - binary_only: only .wasm/.node/.dll/.so
- *   - non_code_assets: CSS/images/fonts/markdown only
- *   - minimum_viable: package.json + README only
- *   - python_in_npm: .py files in an npm package
- *   - unknown: has .js but 0 detections — TRUE BLIND SPOT
- *
- * Usage:
- *   node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl
- *   node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl --csv report.csv
- *   node scripts/analyze-score0.js --dir .muaddib-cache/datadog-tarballs/
- */
-const fs = require('fs');
-const path = require('path');
-const CODE_EXTENSIONS = new Set(['.js', '.cjs', '.mjs', '.jsx']);
-const TS_EXTENSIONS = new Set(['.ts', '.tsx', '.cts', '.mts']);
-const BINARY_EXTENSIONS = new Set(['.wasm', '.node', '.dll', '.so', '.dylib', '.exe']);
-const ASSET_EXTENSIONS = new Set(['.css', '.scss', '.less', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico',
-  '.woff', '.woff2', '.ttf', '.eot', '.otf', '.md', '.txt', '.html', '.htm', '.map']);
-const PY_EXTENSIONS = new Set(['.py', '.pyx', '.pyi']);
-function categorizePackage(packageDir) {
-  if (!fs.existsSync(packageDir)) return 'missing';
-  const files = [];
-  function walk(dir, depth) {
-    if (depth > 5) return; // Limit depth
-    try {
-      const entries = fs.readdirSync(dir, { withFileTypes: true });
-      for (const entry of entries) {
-        if (entry.name === 'node_modules' || entry.name === '.git') continue;
-        const full = path.join(dir, entry.name);
-        if (entry.isDirectory()) {
-          walk(full, depth + 1);
-        } else if (entry.isFile()) {
-          files.push(entry.name);
-        }
-      }
-    } catch { /* skip permission errors */ }
-  }
-  walk(packageDir, 0);
-  if (files.length === 0) return 'empty_package';
-  const extensions = files.map(f => path.extname(f).toLowerCase());
-  const hasCode = extensions.some(e => CODE_EXTENSIONS.has(e));
-  const hasTs = extensions.some(e => TS_EXTENSIONS.has(e));
-  const hasBinary = extensions.some(e => BINARY_EXTENSIONS.has(e));
-  const hasPython = extensions.some(e => PY_EXTENSIONS.has(e));
-  const hasAssets = extensions.some(e => ASSET_EXTENSIONS.has(e));
-  // Only package.json + README
-  const nonMeta = files.filter(f => !['package.json', 'readme.md', 'readme', 'license', 'license.md', 'changelog.md'].includes(f.toLowerCase()));
-  if (nonMeta.length === 0) return 'minimum_viable';
-  if (hasCode) return 'unknown'; // TRUE BLIND SPOT: has JS but 0 detections
-  if (hasTs && !hasCode) return 'ts_only';
-  if (hasBinary && !hasCode && !hasTs) return 'binary_only';
-  if (hasPython && !hasCode) return 'python_in_npm';
-  if (hasAssets && !hasCode && !hasTs && !hasBinary) return 'non_code_assets';
-  return 'unknown'; // Fallback
-}
-function loadBenchmarkResults(filepath) {
-  if (!fs.existsSync(filepath)) {
-    console.error(`[SCORE0] File not found: ${filepath}`);
-    process.exit(1);
-  }
-  const content = fs.readFileSync(filepath, 'utf8');
-  const records = [];
-  for (const line of content.split('\n')) {
-    if (!line.trim()) continue;
-    try {
-      const record = JSON.parse(line);
-      if (record.score === 0 && record.threat_count === 0) {
-        records.push(record);
-      }
-    } catch { /* skip malformed */ }
-  }
-  return records;
-}
-function main() {
-  const args = process.argv.slice(2);
-  const benchmarkIdx = args.indexOf('--benchmark');
-  const dirIdx = args.indexOf('--dir');
-  const csvIdx = args.indexOf('--csv');
-  const benchmarkFile = benchmarkIdx >= 0 ? args[benchmarkIdx + 1] : null;
-  const tarballDir = dirIdx >= 0 ? args[dirIdx + 1] : null;
-  const csvFile = csvIdx >= 0 ? args[csvIdx + 1] : null;
-  if (!benchmarkFile && !tarballDir) {
-    console.log('Usage:');
-    console.log('  node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl');
-    console.log('  node scripts/analyze-score0.js --dir .muaddib-cache/datadog-tarballs/');
-    console.log('  node scripts/analyze-score0.js --benchmark data/datadog-benchmark.jsonl --csv report.csv');
-    process.exit(0);
-  }
-  let packages = [];
-  if (benchmarkFile) {
-    const records = loadBenchmarkResults(benchmarkFile);
-    console.log(`[SCORE0] Loaded ${records.length} score-0 packages from benchmark`);
-    packages = records.map(r => ({
-      name: r.name || r.package || 'unknown',
-      version: r.version || '',
-      dir: tarballDir ? path.join(tarballDir, r.name || r.package || 'unknown') : null
-    }));
-  } else if (tarballDir) {
-    // Direct directory scan mode
-    if (!fs.existsSync(tarballDir)) {
-      console.error(`[SCORE0] Directory not found: ${tarballDir}`);
-      process.exit(1);
-    }
-    const entries = fs.readdirSync(tarballDir, { withFileTypes: true });
-    packages = entries
-      .filter(e => e.isDirectory())
-      .map(e => ({ name: e.name, version: '', dir: path.join(tarballDir, e.name) }));
-    console.log(`[SCORE0] Found ${packages.length} package directories`);
-  }
-  // Categorize
-  const categories = {};
-  const results = [];
-  for (const pkg of packages) {
-    let category = 'no_dir';
-    if (pkg.dir && fs.existsSync(pkg.dir)) {
-      category = categorizePackage(pkg.dir);
-    }
-    categories[category] = (categories[category] || 0) + 1;
-    results.push({ name: pkg.name, version: pkg.version, category });
-  }
-  // Summary
-  console.log('\n=== SCORE 0 INVESTIGATION REPORT ===\n');
-  console.log(`Total score-0 packages: ${packages.length}\n`);
-  const sortedCategories = Object.entries(categories).sort((a, b) => b[1] - a[1]);
-  for (const [cat, count] of sortedCategories) {
-    const pct = ((count / packages.length) * 100).toFixed(1);
-    const label = cat === 'unknown' ? `${cat} *** BLIND SPOT ***` : cat;
-    console.log(`  ${label}: ${count} (${pct}%)`);
-  }
-  const unknownCount = categories.unknown || 0;
-  console.log(`\n  Actionable blind spots: ${unknownCount} packages with JS code but 0 detections`);
-  // CSV output
-  if (csvFile) {
-    const csvLines = ['name,version,category'];
-    for (const r of results) {
-      csvLines.push(`${r.name},${r.version},${r.category}`);
-    }
-    fs.writeFileSync(csvFile, csvLines.join('\n'), 'utf8');
-    console.log(`\n  CSV report written to: ${csvFile}`);
-  }
-  // List unknown packages (first 20)
-  const unknowns = results.filter(r => r.category === 'unknown');
-  if (unknowns.length > 0) {
-    console.log('\n  First 20 "unknown" (blind spot) packages:');
-    for (const u of unknowns.slice(0, 20)) {
-      console.log(`    - ${u.name}@${u.version}`);
-    }
-    if (unknowns.length > 20) {
-      console.log(`    ... and ${unknowns.length - 20} more`);
-    }
-  }
-}
-main();

package/scripts/archive-cleanup.sh DELETED Viewed

@@ -1,7 +0,0 @@
-#!/bin/bash
-# Supprime les archives de plus de 30 jours
-ARCHIVE_DIR="/opt/muaddib/archive"
-find "$ARCHIVE_DIR" -type d -name "20*" -mtime +30 -exec rm -rf {} + 2>/dev/null
-# Log
-TOTAL=$(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)
-echo "[Archive Cleanup] $(date -Iseconds) — Total size: $TOTAL"

package/scripts/audit-archive.sh DELETED Viewed

@@ -1,45 +0,0 @@
-#!/bin/bash
-# Usage: ./audit-archive.sh [YYYY-MM-DD] [priority]
-# Exemples:
-#   ./audit-archive.sh                    → résumé de toutes les dates
-#   ./audit-archive.sh 2026-03-29         → liste les packages archivés ce jour
-#   ./audit-archive.sh 2026-03-29 P1      → filtre par priorité
-ARCHIVE_DIR="/opt/muaddib/archive"
-DATE=$1
-PRIORITY=$2
-if [ -z "$DATE" ]; then
-  echo "=== Archive Summary ==="
-  for dir in "$ARCHIVE_DIR"/20*; do
-    [ -d "$dir" ] || continue
-    day=$(basename "$dir")
-    count=$(ls "$dir"/*.tgz 2>/dev/null | wc -l)
-    size=$(du -sh "$dir" 2>/dev/null | cut -f1)
-    echo "$day : $count packages ($size)"
-  done
-  echo "---"
-  echo "Total: $(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)"
-  exit 0
-fi
-DIR="$ARCHIVE_DIR/$DATE"
-if [ ! -d "$DIR" ]; then
-  echo "No archive for $DATE"
-  exit 1
-fi
-for json in "$DIR"/*.json; do
-  [ -f "$json" ] || continue
-  pkg=$(jq -r '.package' "$json")
-  ver=$(jq -r '.version' "$json")
-  prio=$(jq -r '.priority' "$json")
-  score=$(jq -r '.score' "$json")
-  llm=$(jq -r '.llm_verdict // "none"' "$json")
-  if [ -n "$PRIORITY" ] && [ "$prio" != "$PRIORITY" ]; then
-    continue
-  fi
-  printf "%-40s %-8s score=%-4s llm=%s\n" "$pkg@$ver" "$prio" "$score" "$llm"
-done

package/scripts/benchmark.js DELETED Viewed

@@ -1,326 +0,0 @@
-#!/usr/bin/env node
-'use strict';
-/**
- * MUAD'DIB Performance Benchmark
- *
- * Generates synthetic projects of different sizes and measures:
- * - Scan time (wall-clock via process.hrtime.bigint())
- * - Peak memory usage (process.memoryUsage())
- * - Per-scanner breakdown (via _capture mode timing)
- *
- * Usage: node scripts/benchmark.js [--runs N] [--sizes small,medium,large]
- */
-const fs = require('fs');
-const path = require('path');
-const os = require('os');
-// ----- Config -----
-const RUNS = parseInt(process.argv.find((a, i) => process.argv[i - 1] === '--runs') || '3', 10);
-const SIZE_ARG = process.argv.find((a, i) => process.argv[i - 1] === '--sizes') || 'small,medium,large';
-const SIZES = SIZE_ARG.split(',').map(s => s.trim());
-const SIZE_CONFIGS = {
-  small:  { files: 10,  label: '10 JS files' },
-  medium: { files: 100, label: '100 JS files' },
-  large:  { files: 500, label: '500 JS files (cap test)' }
-};
-// ----- Synthetic file templates -----
-// Mix of benign-looking code with varied patterns to exercise all scanners
-const TEMPLATES = [
-  // Standard module
-  (i) => `'use strict';
-const path = require('path');
-const fs = require('fs');
-function process${i}(input) {
-  const result = input.toString().trim();
-  return path.resolve(result);
-}
-module.exports = { process${i} };
-`,
-  // HTTP client usage
-  (i) => `'use strict';
-const https = require('https');
-function fetch${i}(url) {
-  return new Promise((resolve, reject) => {
-    https.get(url, (res) => {
-      let data = '';
-      res.on('data', (chunk) => { data += chunk; });
-      res.on('end', () => resolve(JSON.parse(data)));
-    }).on('error', reject);
-  });
-}
-module.exports = { fetch${i} };
-`,
-  // Crypto usage
-  (i) => `'use strict';
-const crypto = require('crypto');
-function hash${i}(data) {
-  return crypto.createHash('sha256').update(data).digest('hex');
-}
-function verify${i}(data, expected) {
-  const actual = hash${i}(data);
-  return crypto.timingSafeEqual(Buffer.from(actual), Buffer.from(expected));
-}
-module.exports = { hash${i}, verify${i} };
-`,
-  // Config/util module
-  (i) => `'use strict';
-const os = require('os');
-const CONFIG_${i} = {
-  tmpDir: os.tmpdir(),
-  cpus: os.cpus().length,
-  platform: os.platform(),
-  arch: os.arch()
-};
-function getConfig${i}() {
-  return { ...CONFIG_${i} };
-}
-module.exports = { getConfig${i}, CONFIG_${i} };
-`,
-  // Event emitter pattern
-  (i) => `'use strict';
-const { EventEmitter } = require('events');
-class Service${i} extends EventEmitter {
-  constructor() {
-    super();
-    this.data = new Map();
-  }
-  add(key, value) {
-    this.data.set(key, value);
-    this.emit('added', { key, value });
-  }
-  remove(key) {
-    this.data.delete(key);
-    this.emit('removed', { key });
-  }
-}
-module.exports = { Service${i} };
-`
-];
-// ----- Generate synthetic project -----
-function generateProject(tmpDir, fileCount) {
-  fs.mkdirSync(tmpDir, { recursive: true });
-  // Create package.json
-  fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify({
-    name: `bench-project-${fileCount}`,
-    version: '1.0.0',
-    description: 'Synthetic benchmark project',
-    main: 'index.js'
-  }, null, 2));
-  // Create subdirectories for realism
-  const dirs = ['', 'src', 'lib', 'utils', 'helpers'];
-  for (const d of dirs) {
-    if (d) fs.mkdirSync(path.join(tmpDir, d), { recursive: true });
-  }
-  // Create JS files
-  for (let i = 0; i < fileCount; i++) {
-    const template = TEMPLATES[i % TEMPLATES.length];
-    const dir = dirs[i % dirs.length];
-    const filePath = path.join(tmpDir, dir, `module-${i}.js`);
-    fs.writeFileSync(filePath, template(i));
-  }
-  // Create index.js that references some modules
-  const imports = Array.from({ length: Math.min(10, fileCount) }, (_, i) => {
-    const dir = dirs[i % dirs.length];
-    const rel = dir ? `./${dir}/module-${i}` : `./module-${i}`;
-    return `const m${i} = require('${rel}');`;
-  }).join('\n');
-  fs.writeFileSync(path.join(tmpDir, 'index.js'), `'use strict';\n${imports}\n\nconsole.log('Loaded modules');\n`);
-  return tmpDir;
-}
-// ----- Cleanup -----
-function cleanup(dir) {
-  try {
-    fs.rmSync(dir, { recursive: true, force: true });
-  } catch { /* ignore */ }
-}
-// ----- Run benchmark -----
-async function benchmark() {
-  // Lazy-load the scanner
-  const { run } = require('../src/index.js');
-  const { clearFileListCache } = require('../src/utils.js');
-  const { clearASTCache } = require('../src/shared/constants.js');
-  console.log('='.repeat(70));
-  console.log('  MUAD\'DIB Performance Benchmark');
-  console.log(`  Runs per size: ${RUNS}  |  Sizes: ${SIZES.join(', ')}`);
-  console.log(`  Node ${process.version}  |  ${os.cpus()[0]?.model || 'unknown CPU'}  |  ${os.platform()}`);
-  console.log('='.repeat(70));
-  console.log();
-  const results = {};
-  for (const sizeName of SIZES) {
-    const config = SIZE_CONFIGS[sizeName];
-    if (!config) {
-      console.error(`Unknown size: ${sizeName}`);
-      continue;
-    }
-    console.log(`--- ${sizeName.toUpperCase()}: ${config.label} ---`);
-    const tmpDir = path.join(os.tmpdir(), `muaddib-bench-${sizeName}-${Date.now()}`);
-    generateProject(tmpDir, config.files);
-    const times = [];
-    const memories = [];
-    for (let r = 0; r < RUNS; r++) {
-      // Clear caches between runs for fair measurement
-      clearFileListCache();
-      clearASTCache();
-      // Force GC if available
-      if (global.gc) global.gc();
-      const memBefore = process.memoryUsage();
-      const start = process.hrtime.bigint();
-      try {
-        await run(tmpDir, { _capture: true });
-      } catch (err) {
-        console.error(`  Run ${r + 1} error: ${err.message}`);
-      }
-      const end = process.hrtime.bigint();
-      const memAfter = process.memoryUsage();
-      const durationMs = Number(end - start) / 1e6;
-      const heapDelta = memAfter.heapUsed - memBefore.heapUsed;
-      const rssAfter = memAfter.rss;
-      times.push(durationMs);
-      memories.push({ heapDelta, rss: rssAfter, heapUsed: memAfter.heapUsed });
-      console.log(`  Run ${r + 1}/${RUNS}: ${durationMs.toFixed(0)}ms  |  heap: ${(memAfter.heapUsed / 1024 / 1024).toFixed(1)}MB  |  RSS: ${(rssAfter / 1024 / 1024).toFixed(1)}MB`);
-    }
-    // Stats
-    times.sort((a, b) => a - b);
-    const median = times[Math.floor(times.length / 2)];
-    const mean = times.reduce((a, b) => a + b, 0) / times.length;
-    const min = times[0];
-    const max = times[times.length - 1];
-    const peakRss = Math.max(...memories.map(m => m.rss));
-    const peakHeap = Math.max(...memories.map(m => m.heapUsed));
-    results[sizeName] = { median, mean, min, max, peakRss, peakHeap, runs: RUNS, files: config.files };
-    console.log(`  => median: ${median.toFixed(0)}ms  mean: ${mean.toFixed(0)}ms  min: ${min.toFixed(0)}ms  max: ${max.toFixed(0)}ms`);
-    console.log(`  => peak heap: ${(peakHeap / 1024 / 1024).toFixed(1)}MB  peak RSS: ${(peakRss / 1024 / 1024).toFixed(1)}MB`);
-    console.log();
-    cleanup(tmpDir);
-  }
-  // ----- Per-scanner timing (single run on medium) -----
-  console.log('--- SCANNER BREAKDOWN (medium, single run) ---');
-  const scannerTmpDir = path.join(os.tmpdir(), `muaddib-bench-scanner-${Date.now()}`);
-  const scannerFiles = SIZE_CONFIGS.medium?.files || 100;
-  generateProject(scannerTmpDir, scannerFiles);
-  clearFileListCache();
-  clearASTCache();
-  // Monkey-patch Promise.allSettled to measure per-scanner time
-  const origAllSettled = Promise.allSettled.bind(Promise);
-  const scannerTimings = [];
-  // We'll measure by wrapping run() and parsing its internal flow
-  // Simpler approach: time each scanner individually
-  const scannerModules = [
-    { name: 'PackageJson', mod: '../src/scanner/package.js', fn: 'scanPackageJson' },
-    { name: 'ShellScripts', mod: '../src/scanner/shell.js', fn: 'scanShellScripts' },
-    { name: 'AST', mod: '../src/scanner/ast.js', fn: 'analyzeAST' },
-    { name: 'Obfuscation', mod: '../src/scanner/obfuscation.js', fn: 'detectObfuscation' },
-    { name: 'Dependencies', mod: '../src/scanner/dependencies.js', fn: 'scanDependencies' },
-    { name: 'Hashes', mod: '../src/scanner/hash.js', fn: 'scanHashes' },
-    { name: 'DataFlow', mod: '../src/scanner/dataflow.js', fn: 'analyzeDataFlow' },
-    { name: 'Typosquat', mod: '../src/scanner/typosquat.js', fn: 'scanTyposquatting' },
-    { name: 'GitHubActions', mod: '../src/scanner/github-actions.js', fn: 'scanGitHubActions' },
-    { name: 'Entropy', mod: '../src/scanner/entropy.js', fn: 'scanEntropy' },
-    { name: 'AIConfig', mod: '../src/scanner/ai-config.js', fn: 'scanAIConfig' }
-  ];
-  for (const s of scannerModules) {
-    try {
-      const mod = require(s.mod);
-      const fn = mod[s.fn];
-      if (!fn) {
-        scannerTimings.push({ name: s.name, ms: 0, note: 'not found' });
-        continue;
-      }
-      clearFileListCache(); // each scanner gets fresh file list
-      const start = process.hrtime.bigint();
-      try {
-        await fn(scannerTmpDir, {});
-      } catch { /* some scanners may throw on benign input */ }
-      const end = process.hrtime.bigint();
-      const ms = Number(end - start) / 1e6;
-      scannerTimings.push({ name: s.name, ms });
-    } catch (err) {
-      scannerTimings.push({ name: s.name, ms: 0, note: err.message });
-    }
-  }
-  // Sort by time descending
-  scannerTimings.sort((a, b) => b.ms - a.ms);
-  const totalScannerMs = scannerTimings.reduce((sum, s) => sum + s.ms, 0);
-  for (const s of scannerTimings) {
-    const pct = totalScannerMs > 0 ? ((s.ms / totalScannerMs) * 100).toFixed(1) : '0.0';
-    const bar = '#'.repeat(Math.max(1, Math.round(s.ms / totalScannerMs * 40)));
-    console.log(`  ${s.name.padEnd(15)} ${s.ms.toFixed(0).padStart(6)}ms  ${pct.padStart(5)}%  ${bar}${s.note ? ` (${s.note})` : ''}`);
-  }
-  console.log(`  ${'TOTAL'.padEnd(15)} ${totalScannerMs.toFixed(0).padStart(6)}ms`);
-  console.log();
-  cleanup(scannerTmpDir);
-  // ----- Summary table -----
-  console.log('='.repeat(70));
-  console.log('  SUMMARY');
-  console.log('='.repeat(70));
-  console.log(`  ${'Size'.padEnd(10)} ${'Files'.padStart(6)} ${'Median'.padStart(8)} ${'Mean'.padStart(8)} ${'Min'.padStart(8)} ${'Max'.padStart(8)} ${'Heap'.padStart(8)} ${'RSS'.padStart(8)}`);
-  console.log(`  ${'-'.repeat(10)} ${'-'.repeat(6)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)} ${'-'.repeat(8)}`);
-  for (const [name, r] of Object.entries(results)) {
-    console.log(`  ${name.padEnd(10)} ${String(r.files).padStart(6)} ${(r.median.toFixed(0) + 'ms').padStart(8)} ${(r.mean.toFixed(0) + 'ms').padStart(8)} ${(r.min.toFixed(0) + 'ms').padStart(8)} ${(r.max.toFixed(0) + 'ms').padStart(8)} ${((r.peakHeap / 1024 / 1024).toFixed(1) + 'MB').padStart(8)} ${((r.peakRss / 1024 / 1024).toFixed(1) + 'MB').padStart(8)}`);
-  }
-  console.log();
-  // Slowest scanner
-  if (scannerTimings.length > 0) {
-    console.log(`  Slowest scanner: ${scannerTimings[0].name} (${scannerTimings[0].ms.toFixed(0)}ms, ${((scannerTimings[0].ms / totalScannerMs) * 100).toFixed(1)}% of total)`);
-  }
-  console.log();
-}
-benchmark().catch(err => {
-  console.error('Benchmark failed:', err);
-  process.exit(1);
-});

package/scripts/cleanup-fp-labels.js DELETED Viewed

@@ -1,81 +0,0 @@
-#!/usr/bin/env node
-'use strict';
-/**
- * cleanup-fp-labels.js — One-shot script to convert contaminated 'fp' labels to 'unconfirmed'.
- *
- * Context: During 3 months of monitoring, sandbox score === 0 was automatically relabeled
- * as 'fp' (false positive). Without honey tokens, sandbox clean ≠ false positive.
- * This script converts all automated 'fp' labels to 'unconfirmed' so they are excluded
- * from ML training (neither positive nor negative).
- *
- * Usage:
- *   node scripts/cleanup-fp-labels.js                # Dry-run (default)
- *   node scripts/cleanup-fp-labels.js --apply        # Write changes
- *   node scripts/cleanup-fp-labels.js --file path    # Custom JSONL path
- */
-const fs = require('fs');
-const path = require('path');
-const DEFAULT_FILE = path.join(__dirname, '..', 'data', 'ml-training.jsonl');
-function main() {
-  const args = process.argv.slice(2);
-  const apply = args.includes('--apply');
-  const fileIdx = args.indexOf('--file');
-  const filePath = fileIdx >= 0 && args[fileIdx + 1] ? args[fileIdx + 1] : DEFAULT_FILE;
-  if (!fs.existsSync(filePath)) {
-    console.log(`[CLEANUP] File not found: ${filePath}`);
-    process.exit(1);
-  }
-  const content = fs.readFileSync(filePath, 'utf8');
-  const lines = content.split('\n');
-  let totalRecords = 0;
-  let fpCount = 0;
-  let convertedLines = [];
-  for (const line of lines) {
-    if (!line.trim()) {
-      convertedLines.push(line);
-      continue;
-    }
-    try {
-      const record = JSON.parse(line);
-      totalRecords++;
-      if (record.label === 'fp') {
-        fpCount++;
-        if (apply) {
-          record.label = 'unconfirmed';
-          convertedLines.push(JSON.stringify(record));
-        } else {
-          convertedLines.push(line);
-        }
-      } else {
-        convertedLines.push(line);
-      }
-    } catch {
-      convertedLines.push(line); // Keep malformed lines as-is
-    }
-  }
-  console.log(`[CLEANUP] File: ${filePath}`);
-  console.log(`[CLEANUP] Total records: ${totalRecords}`);
-  console.log(`[CLEANUP] Records with label 'fp': ${fpCount}`);
-  if (apply && fpCount > 0) {
-    fs.writeFileSync(filePath, convertedLines.join('\n'), 'utf8');
-    console.log(`[CLEANUP] APPLIED: Converted ${fpCount} 'fp' labels to 'unconfirmed'`);
-  } else if (!apply && fpCount > 0) {
-    console.log(`[CLEANUP] DRY-RUN: Would convert ${fpCount} labels. Use --apply to write.`);
-  } else {
-    console.log(`[CLEANUP] No 'fp' labels found. Nothing to do.`);
-  }
-}
-main();